SHOGUN
v3.2.0
|
00001 /* 00002 * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights 00003 * embodied in the content of this file are licensed under the BSD 00004 * (revised) open source license. 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 3 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * Written (W) 2011 Shashwat Lal Das 00012 * Adaptation of Vowpal Wabbit v5.1. 00013 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society. 00014 */ 00015 00016 #include <algorithm> 00017 #include <shogun/classifier/vw/VowpalWabbit.h> 00018 00019 using namespace std; 00020 using namespace shogun; 00021 00022 CVowpalWabbit::CVowpalWabbit() 00023 : COnlineLinearMachine() 00024 { 00025 reg=NULL; 00026 learner=NULL; 00027 init(NULL); 00028 } 00029 00030 CVowpalWabbit::CVowpalWabbit(CStreamingVwFeatures* feat) 00031 : COnlineLinearMachine() 00032 { 00033 reg=NULL; 00034 learner=NULL; 00035 init(feat); 00036 } 00037 00038 CVowpalWabbit::CVowpalWabbit(CVowpalWabbit *vw) 00039 : COnlineLinearMachine() 00040 { 00041 features = vw->features; 00042 env = vw->env; 00043 reg = new CVwRegressor(env); 00044 SG_REF(env); 00045 SG_REF(reg); 00046 00047 quiet = vw->quiet; 00048 no_training = vw->no_training; 00049 dump_interval = vw->dump_interval; 00050 sum_loss_since_last_dump = 0.; 00051 reg_name = vw->reg_name; 00052 reg_dump_text = vw->reg_dump_text; 00053 save_predictions = vw->save_predictions; 00054 prediction_fd = vw->prediction_fd; 00055 00056 w = reg->weight_vectors[0]; 00057 copy(vw->w, vw->w+vw->w_dim, w); 00058 w_dim = vw->w_dim; 00059 bias = vw->bias; 00060 } 00061 00062 CVowpalWabbit::~CVowpalWabbit() 00063 { 00064 SG_UNREF(env); 00065 SG_UNREF(reg); 00066 SG_UNREF(learner); 00067 } 00068 00069 void CVowpalWabbit::reinitialize_weights() 00070 { 00071 if (reg->weight_vectors) 00072 { 00073 if (reg->weight_vectors[0]) 00074 SG_FREE(reg->weight_vectors[0]); 00075 SG_FREE(reg->weight_vectors); 00076 } 00077 00078 reg->init(env); 00079 w = reg->weight_vectors[0]; 00080 } 00081 00082 void CVowpalWabbit::set_adaptive(bool adaptive_learning) 00083 { 00084 if (adaptive_learning) 00085 { 00086 env->adaptive = true; 00087 env->set_stride(2); 00088 env->power_t = 0.; 00089 reinitialize_weights(); 00090 } 00091 else 00092 env->adaptive = false; 00093 } 00094 00095 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive) 00096 { 00097 if (exact_adaptive) 00098 { 00099 set_adaptive(true); 00100 env->exact_adaptive_norm = true; 00101 } 00102 else 00103 env->exact_adaptive_norm = false; 00104 } 00105 00106 void CVowpalWabbit::load_regressor(char* file_name) 00107 { 00108 reg->load_regressor(file_name); 00109 w = reg->weight_vectors[0]; 00110 w_dim = 1 << env->num_bits; 00111 } 00112 00113 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text) 00114 { 00115 reg_name = file_name; 00116 reg_dump_text = is_text; 00117 } 00118 00119 void CVowpalWabbit::set_prediction_out(char* file_name) 00120 { 00121 save_predictions = true; 00122 prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666); 00123 if (prediction_fd < 0) 00124 SG_SERROR("Unable to open prediction file %s for writing!\n", file_name) 00125 } 00126 00127 void CVowpalWabbit::add_quadratic_pair(char* pair) 00128 { 00129 env->pairs.push_back(pair); 00130 } 00131 00132 bool CVowpalWabbit::train_machine(CFeatures* feat) 00133 { 00134 ASSERT(features || feat) 00135 if (feat && (features != (CStreamingVwFeatures*) feat)) 00136 { 00137 SG_UNREF(features); 00138 init((CStreamingVwFeatures*) feat); 00139 } 00140 00141 set_learner(); 00142 00143 VwExample* example = NULL; 00144 vw_size_t current_pass = 0; 00145 00146 const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n"; 00147 00148 if (!quiet) 00149 { 00150 SG_SPRINT(header_fmt, 00151 "average", "since", "example", "example", 00152 "current", "current", "current"); 00153 SG_SPRINT(header_fmt, 00154 "loss", "last", "counter", "weight", "label", "predict", "features"); 00155 } 00156 00157 features->start_parser(); 00158 while (env->passes_complete < env->num_passes) 00159 { 00160 while (features->get_next_example()) 00161 { 00162 example = features->get_example(); 00163 00164 // Check if we shouldn't train (generally used for cache creation) 00165 if (!no_training) 00166 { 00167 if (example->pass != current_pass) 00168 { 00169 env->eta *= env->eta_decay_rate; 00170 current_pass = example->pass; 00171 } 00172 00173 predict_and_finalize(example); 00174 00175 learner->train(example, example->eta_round); 00176 example->eta_round = 0.; 00177 00178 output_example(example); 00179 } 00180 00181 features->release_example(); 00182 } 00183 env->passes_complete++; 00184 if (env->passes_complete < env->num_passes) 00185 features->reset_stream(); 00186 } 00187 features->end_parser(); 00188 00189 if (env->l1_regularization > 0.) 00190 { 00191 uint32_t length = 1 << env->num_bits; 00192 vw_size_t stride = env->stride; 00193 float32_t gravity = env->l1_regularization * env->update_sum; 00194 for (uint32_t i = 0; i < length; i++) 00195 reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity); 00196 } 00197 00198 if (reg_name != NULL) 00199 reg->dump_regressor(reg_name, reg_dump_text); 00200 00201 return true; 00202 } 00203 00204 float32_t CVowpalWabbit::predict_and_finalize(VwExample* ex) 00205 { 00206 float32_t prediction; 00207 if (env->l1_regularization != 0.) 00208 prediction = inline_l1_predict(ex); 00209 else 00210 prediction = inline_predict(ex); 00211 00212 ex->final_prediction = 0; 00213 ex->final_prediction += prediction; 00214 ex->final_prediction = finalize_prediction(ex->final_prediction); 00215 float32_t t = ex->example_t; 00216 00217 if (ex->ld->label != FLT_MAX) 00218 { 00219 ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight; 00220 float64_t update = 0.; 00221 if (env->adaptive && env->exact_adaptive_norm) 00222 { 00223 float32_t sum_abs_x = 0.; 00224 float32_t exact_norm = compute_exact_norm(ex, sum_abs_x); 00225 update = (env->eta * exact_norm)/sum_abs_x; 00226 env->update_sum += update; 00227 ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm); 00228 } 00229 else 00230 { 00231 update = (env->eta)/pow(t, env->power_t) * ex->ld->weight; 00232 ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq); 00233 } 00234 env->update_sum += update; 00235 } 00236 00237 return prediction; 00238 } 00239 00240 void CVowpalWabbit::init(CStreamingVwFeatures* feat) 00241 { 00242 features = feat; 00243 00244 if (feat) 00245 env = feat->get_env(); 00246 else 00247 { 00248 env=new CVwEnvironment(); 00249 SG_REF(env); 00250 } 00251 00252 reg = new CVwRegressor(env); 00253 SG_REF(reg); 00254 00255 quiet = true; 00256 no_training = false; 00257 dump_interval = exp(1.); 00258 sum_loss_since_last_dump = 0.; 00259 reg_name = NULL; 00260 reg_dump_text = true; 00261 save_predictions = false; 00262 prediction_fd = -1; 00263 00264 w = reg->weight_vectors[0]; 00265 w_dim = 1 << env->num_bits; 00266 bias = 0.; 00267 } 00268 00269 void CVowpalWabbit::set_learner() 00270 { 00271 if (env->adaptive) 00272 learner = new CVwAdaptiveLearner(reg, env); 00273 else 00274 learner = new CVwNonAdaptiveLearner(reg, env); 00275 SG_REF(learner); 00276 } 00277 00278 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex) 00279 { 00280 vw_size_t thread_num = 0; 00281 00282 float32_t prediction = ex->ld->get_initial(); 00283 00284 float32_t* weights = reg->weight_vectors[thread_num]; 00285 vw_size_t thread_mask = env->thread_mask; 00286 00287 prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum); 00288 00289 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00290 { 00291 char* i = env->pairs.get_element(k); 00292 00293 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00294 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00295 temp.end = ex->atomics[(int32_t)(i[0])].end; 00296 for (; temp.begin != temp.end; temp.begin++) 00297 prediction += one_pf_quad_predict_trunc(weights, *temp.begin, 00298 ex->atomics[(int32_t)(i[1])], thread_mask, 00299 env->l1_regularization * env->update_sum); 00300 } 00301 00302 return prediction; 00303 } 00304 00305 float32_t CVowpalWabbit::inline_predict(VwExample* &ex) 00306 { 00307 vw_size_t thread_num = 0; 00308 float32_t prediction = ex->ld->initial; 00309 00310 float32_t* weights = reg->weight_vectors[thread_num]; 00311 vw_size_t thread_mask = env->thread_mask; 00312 prediction += features->dense_dot(weights, 0); 00313 00314 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00315 { 00316 char* i = env->pairs.get_element(k); 00317 00318 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00319 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00320 temp.end = ex->atomics[(int32_t)(i[0])].end; 00321 for (; temp.begin != temp.end; temp.begin++) 00322 prediction += one_pf_quad_predict(weights, *temp.begin, 00323 ex->atomics[(int32_t)(i[1])], 00324 thread_mask); 00325 } 00326 00327 return prediction; 00328 } 00329 00330 float32_t CVowpalWabbit::finalize_prediction(float32_t ret) 00331 { 00332 if (isnan(ret)) 00333 return 0.5; 00334 if (ret > env->max_label) 00335 return env->max_label; 00336 if (ret < env->min_label) 00337 return env->min_label; 00338 00339 return ret; 00340 } 00341 00342 void CVowpalWabbit::output_example(VwExample* &example) 00343 { 00344 if (!quiet) 00345 { 00346 sum_loss_since_last_dump += example->loss; 00347 if (env->weighted_examples + example->ld->weight > dump_interval) 00348 { 00349 print_update(example); 00350 dump_interval *= 2; 00351 } 00352 } 00353 00354 if (save_predictions) 00355 { 00356 float32_t wt = 0.; 00357 if (reg->weight_vectors) 00358 wt = reg->weight_vectors[0][0]; 00359 00360 output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag); 00361 } 00362 } 00363 00364 void CVowpalWabbit::print_update(VwExample* &ex) 00365 { 00366 SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f %8.4f %8.4f %8lu\n", 00367 (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight), 00368 sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples), 00369 env->example_number + 1, 00370 env->weighted_examples + ex->ld->weight, 00371 ex->ld->label, 00372 ex->final_prediction, 00373 (long unsigned int)ex->num_features); 00374 sum_loss_since_last_dump = 0.0; 00375 old_weighted_examples = env->weighted_examples + ex->ld->weight; 00376 } 00377 00378 00379 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag) 00380 { 00381 if (f >= 0) 00382 { 00383 char temp[30]; 00384 int32_t num = sprintf(temp, "%f", res); 00385 ssize_t t; 00386 t = write(f, temp, num); 00387 if (t != num) 00388 SG_SERROR("Write error!\n") 00389 00390 if (tag.begin != tag.end) 00391 { 00392 temp[0] = ' '; 00393 t = write(f, temp, 1); 00394 if (t != 1) 00395 SG_SERROR("Write error!\n") 00396 00397 t = write(f, tag.begin, sizeof(char)*tag.index()); 00398 if (t != (ssize_t) (sizeof(char)*tag.index())) 00399 SG_SERROR("Write error!\n") 00400 } 00401 00402 temp[0] = '\n'; 00403 t = write(f, temp, 1); 00404 if (t != 1) 00405 SG_SERROR("Write error!\n") 00406 } 00407 } 00408 00409 void CVowpalWabbit::set_verbose(bool verbose) 00410 { 00411 quiet=verbose==false; 00412 } 00413 00414 00415 float32_t CVowpalWabbit::compute_exact_norm(VwExample* &ex, float32_t& sum_abs_x) 00416 { 00417 // We must traverse the features in _precisely_ the same order as during training. 00418 vw_size_t thread_mask = env->thread_mask; 00419 vw_size_t thread_num = 0; 00420 00421 float32_t g = reg->loss->get_square_grad(ex->final_prediction, ex->ld->label) * ex->ld->weight; 00422 if (g == 0) return 0.; 00423 00424 float32_t xGx = 0.; 00425 00426 float32_t* weights = reg->weight_vectors[thread_num]; 00427 for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++) 00428 { 00429 for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++) 00430 { 00431 float32_t* w_vec = &weights[f->weight_index & thread_mask]; 00432 float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x); 00433 xGx += t * f->x; 00434 sum_abs_x += fabsf(f->x); 00435 } 00436 } 00437 00438 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++) 00439 { 00440 char* i = env->pairs.get_element(k); 00441 00442 v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])]; 00443 temp.begin = ex->atomics[(int32_t)(i[0])].begin; 00444 temp.end = ex->atomics[(int32_t)(i[0])].end; 00445 for (; temp.begin != temp.end; temp.begin++) 00446 xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x); 00447 } 00448 00449 return xGx; 00450 } 00451 00452 float32_t CVowpalWabbit::compute_exact_norm_quad(float32_t* weights, VwFeature& page_feature, v_array<VwFeature> &offer_features, 00453 vw_size_t mask, float32_t g, float32_t& sum_abs_x) 00454 { 00455 vw_size_t halfhash = quadratic_constant * page_feature.weight_index; 00456 float32_t xGx = 0.; 00457 float32_t update2 = g * page_feature.x * page_feature.x; 00458 for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++) 00459 { 00460 float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask]; 00461 float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x); 00462 xGx += t * elem->x; 00463 sum_abs_x += fabsf(elem->x); 00464 } 00465 return xGx; 00466 }