SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2010 Soeren Sonnenburg 00008 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00009 * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/lib/config.h> 00012 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/lib/Signal.h> 00015 #include <shogun/lib/Time.h> 00016 #include <shogun/base/Parameter.h> 00017 #include <shogun/classifier/svm/LibLinear.h> 00018 #include <shogun/optimization/liblinear/tron.h> 00019 #include <shogun/features/DotFeatures.h> 00020 #include <shogun/labels/BinaryLabels.h> 00021 00022 using namespace shogun; 00023 00024 CLibLinear::CLibLinear() 00025 : CLinearMachine() 00026 { 00027 init(); 00028 } 00029 00030 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l) 00031 : CLinearMachine() 00032 { 00033 init(); 00034 liblinear_solver_type=l; 00035 } 00036 00037 CLibLinear::CLibLinear( 00038 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00039 : CLinearMachine() 00040 { 00041 init(); 00042 C1=C; 00043 C2=C; 00044 use_bias=true; 00045 00046 set_features(traindat); 00047 set_labels(trainlab); 00048 init_linear_term(); 00049 } 00050 00051 void CLibLinear::init() 00052 { 00053 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL; 00054 use_bias=false; 00055 C1=1; 00056 C2=1; 00057 set_max_iterations(); 00058 epsilon=1e-5; 00059 00060 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE); 00061 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE); 00062 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.", 00063 MS_NOT_AVAILABLE); 00064 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE); 00065 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.", 00066 MS_NOT_AVAILABLE); 00067 SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE); 00068 SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type", 00069 "Type of LibLinear solver.", MS_NOT_AVAILABLE); 00070 } 00071 00072 CLibLinear::~CLibLinear() 00073 { 00074 } 00075 00076 bool CLibLinear::train_machine(CFeatures* data) 00077 { 00078 CSignal::clear_cancel(); 00079 ASSERT(m_labels) 00080 ASSERT(m_labels->get_label_type() == LT_BINARY) 00081 00082 if (data) 00083 { 00084 if (!data->has_property(FP_DOT)) 00085 SG_ERROR("Specified features are not of type CDotFeatures\n") 00086 00087 set_features((CDotFeatures*) data); 00088 } 00089 ASSERT(features) 00090 00091 00092 int32_t num_train_labels=m_labels->get_num_labels(); 00093 int32_t num_feat=features->get_dim_feature_space(); 00094 int32_t num_vec=features->get_num_vectors(); 00095 00096 if (liblinear_solver_type == L1R_L2LOSS_SVC || 00097 (liblinear_solver_type == L1R_LR) ) 00098 { 00099 if (num_feat!=num_train_labels) 00100 { 00101 SG_ERROR("L1 methods require the data to be transposed: " 00102 "number of features %d does not match number of " 00103 "training labels %d\n", 00104 num_feat, num_train_labels); 00105 } 00106 CMath::swap(num_feat, num_vec); 00107 00108 } 00109 else 00110 { 00111 if (num_vec!=num_train_labels) 00112 { 00113 SG_ERROR("number of vectors %d does not match " 00114 "number of training labels %d\n", 00115 num_vec, num_train_labels); 00116 } 00117 } 00118 if (use_bias) 00119 w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat); 00120 else 00121 w=SGVector<float64_t>(num_feat); 00122 00123 liblinear_problem prob; 00124 if (use_bias) 00125 { 00126 prob.n=w.vlen+1; 00127 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1)); 00128 } 00129 else 00130 { 00131 prob.n=w.vlen; 00132 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0)); 00133 } 00134 prob.l=num_vec; 00135 prob.x=features; 00136 prob.y=SG_MALLOC(double, prob.l); 00137 float64_t* Cs=SG_MALLOC(double, prob.l); 00138 prob.use_bias=use_bias; 00139 double Cp=C1; 00140 double Cn=C2; 00141 00142 for (int32_t i=0; i<prob.l; i++) 00143 { 00144 prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i); 00145 if (prob.y[i] == +1) 00146 Cs[i]=C1; 00147 else if (prob.y[i] == -1) 00148 Cs[i]=C2; 00149 else 00150 SG_ERROR("labels should be +1/-1 only\n") 00151 } 00152 00153 int pos = 0; 00154 int neg = 0; 00155 for(int i=0;i<prob.l;i++) 00156 { 00157 if(prob.y[i]==+1) 00158 pos++; 00159 } 00160 neg = prob.l - pos; 00161 00162 SG_INFO("%d training points %d dims\n", prob.l, prob.n) 00163 00164 function *fun_obj=NULL; 00165 switch (liblinear_solver_type) 00166 { 00167 case L2R_LR: 00168 { 00169 fun_obj=new l2r_lr_fun(&prob, Cs); 00170 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00171 SG_DEBUG("starting L2R_LR training via tron\n") 00172 tron_obj.tron(w.vector, m_max_train_time); 00173 SG_DEBUG("done with tron\n") 00174 delete fun_obj; 00175 break; 00176 } 00177 case L2R_L2LOSS_SVC: 00178 { 00179 fun_obj=new l2r_l2_svc_fun(&prob, Cs); 00180 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations); 00181 tron_obj.tron(w.vector, m_max_train_time); 00182 delete fun_obj; 00183 break; 00184 } 00185 case L2R_L2LOSS_SVC_DUAL: 00186 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL); 00187 break; 00188 case L2R_L1LOSS_SVC_DUAL: 00189 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL); 00190 break; 00191 case L1R_L2LOSS_SVC: 00192 { 00193 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00194 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00195 break; 00196 } 00197 case L1R_LR: 00198 { 00199 //ASSUME FEATURES ARE TRANSPOSED ALREADY 00200 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn); 00201 break; 00202 } 00203 case L2R_LR_DUAL: 00204 { 00205 solve_l2r_lr_dual(&prob, epsilon, Cp, Cn); 00206 break; 00207 } 00208 default: 00209 SG_ERROR("Error: unknown solver_type\n") 00210 break; 00211 } 00212 00213 if (use_bias) 00214 set_bias(w[w.vlen]); 00215 else 00216 set_bias(0); 00217 00218 SG_FREE(prob.y); 00219 SG_FREE(Cs); 00220 00221 return true; 00222 } 00223 00224 // A coordinate descent algorithm for 00225 // L1-loss and L2-loss SVM dual problems 00226 // 00227 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, 00228 // s.t. 0 <= alpha_i <= upper_bound_i, 00229 // 00230 // where Qij = yi yj xi^T xj and 00231 // D is a diagonal matrix 00232 // 00233 // In L1-SVM case: 00234 // upper_bound_i = Cp if y_i = 1 00235 // upper_bound_i = Cn if y_i = -1 00236 // D_ii = 0 00237 // In L2-SVM case: 00238 // upper_bound_i = INF 00239 // D_ii = 1/(2*Cp) if y_i = 1 00240 // D_ii = 1/(2*Cn) if y_i = -1 00241 // 00242 // Given: 00243 // x, y, Cp, Cn 00244 // eps is the stopping tolerance 00245 // 00246 // solution will be put in w 00247 00248 #undef GETI 00249 #define GETI(i) (y[i]+1) 00250 // To support weights for instances, use GETI(i) (i) 00251 00252 void CLibLinear::solve_l2r_l1l2_svc( 00253 const liblinear_problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st) 00254 { 00255 int l = prob->l; 00256 int w_size = prob->n; 00257 int i, s, iter = 0; 00258 double C, d, G; 00259 double *QD = SG_MALLOC(double, l); 00260 int *index = SG_MALLOC(int, l); 00261 double *alpha = SG_MALLOC(double, l); 00262 int32_t *y = SG_MALLOC(int32_t, l); 00263 int active_size = l; 00264 00265 // PG: projected gradient, for shrinking and stopping 00266 double PG; 00267 double PGmax_old = CMath::INFTY; 00268 double PGmin_old = -CMath::INFTY; 00269 double PGmax_new, PGmin_new; 00270 00271 // default solver_type: L2R_L2LOSS_SVC_DUAL 00272 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 00273 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY}; 00274 if(st == L2R_L1LOSS_SVC_DUAL) 00275 { 00276 diag[0] = 0; 00277 diag[2] = 0; 00278 upper_bound[0] = Cn; 00279 upper_bound[2] = Cp; 00280 } 00281 00282 int n = prob->n; 00283 00284 if (prob->use_bias) 00285 n--; 00286 00287 for(i=0; i<w_size; i++) 00288 w[i] = 0; 00289 00290 for(i=0; i<l; i++) 00291 { 00292 alpha[i] = 0; 00293 if(prob->y[i] > 0) 00294 { 00295 y[i] = +1; 00296 } 00297 else 00298 { 00299 y[i] = -1; 00300 } 00301 QD[i] = diag[GETI(i)]; 00302 00303 QD[i] += prob->x->dot(i, prob->x,i); 00304 index[i] = i; 00305 } 00306 00307 00308 CTime start_time; 00309 while (iter < max_iterations && !CSignal::cancel_computations()) 00310 { 00311 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) 00312 break; 00313 00314 PGmax_new = -CMath::INFTY; 00315 PGmin_new = CMath::INFTY; 00316 00317 for (i=0; i<active_size; i++) 00318 { 00319 int j = CMath::random(i, active_size-1); 00320 CMath::swap(index[i], index[j]); 00321 } 00322 00323 for (s=0;s<active_size;s++) 00324 { 00325 i = index[s]; 00326 int32_t yi = y[i]; 00327 00328 G = prob->x->dense_dot(i, w.vector, n); 00329 if (prob->use_bias) 00330 G+=w.vector[n]; 00331 00332 if (m_linear_term.vector) 00333 G = G*yi + m_linear_term.vector[i]; 00334 else 00335 G = G*yi-1; 00336 00337 C = upper_bound[GETI(i)]; 00338 G += alpha[i]*diag[GETI(i)]; 00339 00340 PG = 0; 00341 if (alpha[i] == 0) 00342 { 00343 if (G > PGmax_old) 00344 { 00345 active_size--; 00346 CMath::swap(index[s], index[active_size]); 00347 s--; 00348 continue; 00349 } 00350 else if (G < 0) 00351 PG = G; 00352 } 00353 else if (alpha[i] == C) 00354 { 00355 if (G < PGmin_old) 00356 { 00357 active_size--; 00358 CMath::swap(index[s], index[active_size]); 00359 s--; 00360 continue; 00361 } 00362 else if (G > 0) 00363 PG = G; 00364 } 00365 else 00366 PG = G; 00367 00368 PGmax_new = CMath::max(PGmax_new, PG); 00369 PGmin_new = CMath::min(PGmin_new, PG); 00370 00371 if(fabs(PG) > 1.0e-12) 00372 { 00373 double alpha_old = alpha[i]; 00374 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C); 00375 d = (alpha[i] - alpha_old)*yi; 00376 00377 prob->x->add_to_dense_vec(d, i, w.vector, n); 00378 00379 if (prob->use_bias) 00380 w.vector[n]+=d; 00381 } 00382 } 00383 00384 iter++; 00385 float64_t gap=PGmax_new - PGmin_new; 00386 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6) 00387 00388 if(gap <= eps) 00389 { 00390 if(active_size == l) 00391 break; 00392 else 00393 { 00394 active_size = l; 00395 PGmax_old = CMath::INFTY; 00396 PGmin_old = -CMath::INFTY; 00397 continue; 00398 } 00399 } 00400 PGmax_old = PGmax_new; 00401 PGmin_old = PGmin_new; 00402 if (PGmax_old <= 0) 00403 PGmax_old = CMath::INFTY; 00404 if (PGmin_old >= 0) 00405 PGmin_old = -CMath::INFTY; 00406 } 00407 00408 SG_DONE() 00409 SG_INFO("optimization finished, #iter = %d\n",iter) 00410 if (iter >= max_iterations) 00411 { 00412 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster" 00413 "(also see liblinear FAQ)\n\n"); 00414 } 00415 00416 // calculate objective value 00417 00418 double v = 0; 00419 int nSV = 0; 00420 for(i=0; i<w_size; i++) 00421 v += w.vector[i]*w.vector[i]; 00422 for(i=0; i<l; i++) 00423 { 00424 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2); 00425 if(alpha[i] > 0) 00426 ++nSV; 00427 } 00428 SG_INFO("Objective value = %lf\n",v/2) 00429 SG_INFO("nSV = %d\n",nSV) 00430 00431 SG_FREE(QD); 00432 SG_FREE(alpha); 00433 SG_FREE(y); 00434 SG_FREE(index); 00435 } 00436 00437 // A coordinate descent algorithm for 00438 // L1-regularized L2-loss support vector classification 00439 // 00440 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, 00441 // 00442 // Given: 00443 // x, y, Cp, Cn 00444 // eps is the stopping tolerance 00445 // 00446 // solution will be put in w 00447 00448 #undef GETI 00449 #define GETI(i) (y[i]+1) 00450 // To support weights for instances, use GETI(i) (i) 00451 00452 void CLibLinear::solve_l1r_l2_svc( 00453 liblinear_problem *prob_col, double eps, double Cp, double Cn) 00454 { 00455 int l = prob_col->l; 00456 int w_size = prob_col->n; 00457 int j, s, iter = 0; 00458 int active_size = w_size; 00459 int max_num_linesearch = 20; 00460 00461 double sigma = 0.01; 00462 double d, G_loss, G, H; 00463 double Gmax_old = CMath::INFTY; 00464 double Gmax_new; 00465 double Gmax_init=0; 00466 double d_old, d_diff; 00467 double loss_old=0, loss_new; 00468 double appxcond, cond; 00469 00470 int *index = SG_MALLOC(int, w_size); 00471 int32_t *y = SG_MALLOC(int32_t, l); 00472 double *b = SG_MALLOC(double, l); // b = 1-ywTx 00473 double *xj_sq = SG_MALLOC(double, w_size); 00474 00475 CDotFeatures* x = (CDotFeatures*) prob_col->x; 00476 void* iterator; 00477 int32_t ind; 00478 float64_t val; 00479 00480 double C[3] = {Cn,0,Cp}; 00481 00482 int n = prob_col->n; 00483 if (prob_col->use_bias) 00484 n--; 00485 00486 for(j=0; j<l; j++) 00487 { 00488 b[j] = 1; 00489 if(prob_col->y[j] > 0) 00490 y[j] = 1; 00491 else 00492 y[j] = -1; 00493 } 00494 00495 for(j=0; j<w_size; j++) 00496 { 00497 w.vector[j] = 0; 00498 index[j] = j; 00499 xj_sq[j] = 0; 00500 00501 if (use_bias && j==n) 00502 { 00503 for (ind=0; ind<l; ind++) 00504 xj_sq[n] += C[GETI(ind)]; 00505 } 00506 else 00507 { 00508 iterator=x->get_feature_iterator(j); 00509 while (x->get_next_feature(ind, val, iterator)) 00510 xj_sq[j] += C[GETI(ind)]*val*val; 00511 x->free_feature_iterator(iterator); 00512 } 00513 } 00514 00515 00516 CTime start_time; 00517 while (iter < max_iterations && !CSignal::cancel_computations()) 00518 { 00519 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) 00520 break; 00521 00522 Gmax_new = 0; 00523 00524 for(j=0; j<active_size; j++) 00525 { 00526 int i = CMath::random(j, active_size-1); 00527 CMath::swap(index[i], index[j]); 00528 } 00529 00530 for(s=0; s<active_size; s++) 00531 { 00532 j = index[s]; 00533 G_loss = 0; 00534 H = 0; 00535 00536 if (use_bias && j==n) 00537 { 00538 for (ind=0; ind<l; ind++) 00539 { 00540 if(b[ind] > 0) 00541 { 00542 double tmp = C[GETI(ind)]*y[ind]; 00543 G_loss -= tmp*b[ind]; 00544 H += tmp*y[ind]; 00545 } 00546 } 00547 } 00548 else 00549 { 00550 iterator=x->get_feature_iterator(j); 00551 00552 while (x->get_next_feature(ind, val, iterator)) 00553 { 00554 if(b[ind] > 0) 00555 { 00556 double tmp = C[GETI(ind)]*val*y[ind]; 00557 G_loss -= tmp*b[ind]; 00558 H += tmp*val*y[ind]; 00559 } 00560 } 00561 x->free_feature_iterator(iterator); 00562 } 00563 00564 G_loss *= 2; 00565 00566 G = G_loss; 00567 H *= 2; 00568 H = CMath::max(H, 1e-12); 00569 00570 double Gp = G+1; 00571 double Gn = G-1; 00572 double violation = 0; 00573 if(w.vector[j] == 0) 00574 { 00575 if(Gp < 0) 00576 violation = -Gp; 00577 else if(Gn > 0) 00578 violation = Gn; 00579 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00580 { 00581 active_size--; 00582 CMath::swap(index[s], index[active_size]); 00583 s--; 00584 continue; 00585 } 00586 } 00587 else if(w.vector[j] > 0) 00588 violation = fabs(Gp); 00589 else 00590 violation = fabs(Gn); 00591 00592 Gmax_new = CMath::max(Gmax_new, violation); 00593 00594 // obtain Newton direction d 00595 if(Gp <= H*w.vector[j]) 00596 d = -Gp/H; 00597 else if(Gn >= H*w.vector[j]) 00598 d = -Gn/H; 00599 else 00600 d = -w.vector[j]; 00601 00602 if(fabs(d) < 1.0e-12) 00603 continue; 00604 00605 double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d; 00606 d_old = 0; 00607 int num_linesearch; 00608 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00609 { 00610 d_diff = d_old - d; 00611 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta; 00612 00613 appxcond = xj_sq[j]*d*d + G_loss*d + cond; 00614 if(appxcond <= 0) 00615 { 00616 if (use_bias && j==n) 00617 { 00618 for (ind=0; ind<l; ind++) 00619 b[ind] += d_diff*y[ind]; 00620 break; 00621 } 00622 else 00623 { 00624 iterator=x->get_feature_iterator(j); 00625 while (x->get_next_feature(ind, val, iterator)) 00626 b[ind] += d_diff*val*y[ind]; 00627 00628 x->free_feature_iterator(iterator); 00629 break; 00630 } 00631 } 00632 00633 if(num_linesearch == 0) 00634 { 00635 loss_old = 0; 00636 loss_new = 0; 00637 00638 if (use_bias && j==n) 00639 { 00640 for (ind=0; ind<l; ind++) 00641 { 00642 if(b[ind] > 0) 00643 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00644 double b_new = b[ind] + d_diff*y[ind]; 00645 b[ind] = b_new; 00646 if(b_new > 0) 00647 loss_new += C[GETI(ind)]*b_new*b_new; 00648 } 00649 } 00650 else 00651 { 00652 iterator=x->get_feature_iterator(j); 00653 while (x->get_next_feature(ind, val, iterator)) 00654 { 00655 if(b[ind] > 0) 00656 loss_old += C[GETI(ind)]*b[ind]*b[ind]; 00657 double b_new = b[ind] + d_diff*val*y[ind]; 00658 b[ind] = b_new; 00659 if(b_new > 0) 00660 loss_new += C[GETI(ind)]*b_new*b_new; 00661 } 00662 x->free_feature_iterator(iterator); 00663 } 00664 } 00665 else 00666 { 00667 loss_new = 0; 00668 if (use_bias && j==n) 00669 { 00670 for (ind=0; ind<l; ind++) 00671 { 00672 double b_new = b[ind] + d_diff*y[ind]; 00673 b[ind] = b_new; 00674 if(b_new > 0) 00675 loss_new += C[GETI(ind)]*b_new*b_new; 00676 } 00677 } 00678 else 00679 { 00680 iterator=x->get_feature_iterator(j); 00681 while (x->get_next_feature(ind, val, iterator)) 00682 { 00683 double b_new = b[ind] + d_diff*val*y[ind]; 00684 b[ind] = b_new; 00685 if(b_new > 0) 00686 loss_new += C[GETI(ind)]*b_new*b_new; 00687 } 00688 x->free_feature_iterator(iterator); 00689 } 00690 } 00691 00692 cond = cond + loss_new - loss_old; 00693 if(cond <= 0) 00694 break; 00695 else 00696 { 00697 d_old = d; 00698 d *= 0.5; 00699 delta *= 0.5; 00700 } 00701 } 00702 00703 w.vector[j] += d; 00704 00705 // recompute b[] if line search takes too many steps 00706 if(num_linesearch >= max_num_linesearch) 00707 { 00708 SG_INFO("#") 00709 for(int i=0; i<l; i++) 00710 b[i] = 1; 00711 00712 for(int i=0; i<n; i++) 00713 { 00714 if(w.vector[i]==0) 00715 continue; 00716 00717 iterator=x->get_feature_iterator(i); 00718 while (x->get_next_feature(ind, val, iterator)) 00719 b[ind] -= w.vector[i]*val*y[ind]; 00720 x->free_feature_iterator(iterator); 00721 } 00722 00723 if (use_bias && w.vector[n]) 00724 { 00725 for (ind=0; ind<l; ind++) 00726 b[ind] -= w.vector[n]*y[ind]; 00727 } 00728 } 00729 } 00730 00731 if(iter == 0) 00732 Gmax_init = Gmax_new; 00733 iter++; 00734 00735 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), 00736 -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6); 00737 00738 if(Gmax_new <= eps*Gmax_init) 00739 { 00740 if(active_size == w_size) 00741 break; 00742 else 00743 { 00744 active_size = w_size; 00745 Gmax_old = CMath::INFTY; 00746 continue; 00747 } 00748 } 00749 00750 Gmax_old = Gmax_new; 00751 } 00752 00753 SG_DONE() 00754 SG_INFO("optimization finished, #iter = %d\n", iter) 00755 if(iter >= max_iterations) 00756 SG_WARNING("\nWARNING: reaching max number of iterations\n") 00757 00758 // calculate objective value 00759 00760 double v = 0; 00761 int nnz = 0; 00762 for(j=0; j<w_size; j++) 00763 { 00764 if(w.vector[j] != 0) 00765 { 00766 v += fabs(w.vector[j]); 00767 nnz++; 00768 } 00769 } 00770 for(j=0; j<l; j++) 00771 if(b[j] > 0) 00772 v += C[GETI(j)]*b[j]*b[j]; 00773 00774 SG_INFO("Objective value = %lf\n", v) 00775 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size) 00776 00777 SG_FREE(index); 00778 SG_FREE(y); 00779 SG_FREE(b); 00780 SG_FREE(xj_sq); 00781 } 00782 00783 // A coordinate descent algorithm for 00784 // L1-regularized logistic regression problems 00785 // 00786 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), 00787 // 00788 // Given: 00789 // x, y, Cp, Cn 00790 // eps is the stopping tolerance 00791 // 00792 // solution will be put in w 00793 00794 #undef GETI 00795 #define GETI(i) (y[i]+1) 00796 // To support weights for instances, use GETI(i) (i) 00797 00798 void CLibLinear::solve_l1r_lr( 00799 const liblinear_problem *prob_col, double eps, 00800 double Cp, double Cn) 00801 { 00802 int l = prob_col->l; 00803 int w_size = prob_col->n; 00804 int j, s, iter = 0; 00805 int active_size = w_size; 00806 int max_num_linesearch = 20; 00807 00808 double x_min = 0; 00809 double sigma = 0.01; 00810 double d, G, H; 00811 double Gmax_old = CMath::INFTY; 00812 double Gmax_new; 00813 double Gmax_init=0; 00814 double sum1, appxcond1; 00815 double sum2, appxcond2; 00816 double cond; 00817 00818 int *index = SG_MALLOC(int, w_size); 00819 int32_t *y = SG_MALLOC(int32_t, l); 00820 double *exp_wTx = SG_MALLOC(double, l); 00821 double *exp_wTx_new = SG_MALLOC(double, l); 00822 double *xj_max = SG_MALLOC(double, w_size); 00823 double *C_sum = SG_MALLOC(double, w_size); 00824 double *xjneg_sum = SG_MALLOC(double, w_size); 00825 double *xjpos_sum = SG_MALLOC(double, w_size); 00826 00827 CDotFeatures* x = prob_col->x; 00828 void* iterator; 00829 int ind; 00830 double val; 00831 00832 double C[3] = {Cn,0,Cp}; 00833 00834 int n = prob_col->n; 00835 if (prob_col->use_bias) 00836 n--; 00837 00838 for(j=0; j<l; j++) 00839 { 00840 exp_wTx[j] = 1; 00841 if(prob_col->y[j] > 0) 00842 y[j] = 1; 00843 else 00844 y[j] = -1; 00845 } 00846 for(j=0; j<w_size; j++) 00847 { 00848 w.vector[j] = 0; 00849 index[j] = j; 00850 xj_max[j] = 0; 00851 C_sum[j] = 0; 00852 xjneg_sum[j] = 0; 00853 xjpos_sum[j] = 0; 00854 00855 if (use_bias && j==n) 00856 { 00857 for (ind=0; ind<l; ind++) 00858 { 00859 x_min = CMath::min(x_min, 1.0); 00860 xj_max[j] = CMath::max(xj_max[j], 1.0); 00861 C_sum[j] += C[GETI(ind)]; 00862 if(y[ind] == -1) 00863 xjneg_sum[j] += C[GETI(ind)]; 00864 else 00865 xjpos_sum[j] += C[GETI(ind)]; 00866 } 00867 } 00868 else 00869 { 00870 iterator=x->get_feature_iterator(j); 00871 while (x->get_next_feature(ind, val, iterator)) 00872 { 00873 x_min = CMath::min(x_min, val); 00874 xj_max[j] = CMath::max(xj_max[j], val); 00875 C_sum[j] += C[GETI(ind)]; 00876 if(y[ind] == -1) 00877 xjneg_sum[j] += C[GETI(ind)]*val; 00878 else 00879 xjpos_sum[j] += C[GETI(ind)]*val; 00880 } 00881 x->free_feature_iterator(iterator); 00882 } 00883 } 00884 00885 CTime start_time; 00886 while (iter < max_iterations && !CSignal::cancel_computations()) 00887 { 00888 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) 00889 break; 00890 00891 Gmax_new = 0; 00892 00893 for(j=0; j<active_size; j++) 00894 { 00895 int i = CMath::random(j, active_size-1); 00896 CMath::swap(index[i], index[j]); 00897 } 00898 00899 for(s=0; s<active_size; s++) 00900 { 00901 j = index[s]; 00902 sum1 = 0; 00903 sum2 = 0; 00904 H = 0; 00905 00906 if (use_bias && j==n) 00907 { 00908 for (ind=0; ind<l; ind++) 00909 { 00910 double exp_wTxind = exp_wTx[ind]; 00911 double tmp1 = 1.0/(1+exp_wTxind); 00912 double tmp2 = C[GETI(ind)]*tmp1; 00913 double tmp3 = tmp2*exp_wTxind; 00914 sum2 += tmp2; 00915 sum1 += tmp3; 00916 H += tmp1*tmp3; 00917 } 00918 } 00919 else 00920 { 00921 iterator=x->get_feature_iterator(j); 00922 while (x->get_next_feature(ind, val, iterator)) 00923 { 00924 double exp_wTxind = exp_wTx[ind]; 00925 double tmp1 = val/(1+exp_wTxind); 00926 double tmp2 = C[GETI(ind)]*tmp1; 00927 double tmp3 = tmp2*exp_wTxind; 00928 sum2 += tmp2; 00929 sum1 += tmp3; 00930 H += tmp1*tmp3; 00931 } 00932 x->free_feature_iterator(iterator); 00933 } 00934 00935 G = -sum2 + xjneg_sum[j]; 00936 00937 double Gp = G+1; 00938 double Gn = G-1; 00939 double violation = 0; 00940 if(w.vector[j] == 0) 00941 { 00942 if(Gp < 0) 00943 violation = -Gp; 00944 else if(Gn > 0) 00945 violation = Gn; 00946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) 00947 { 00948 active_size--; 00949 CMath::swap(index[s], index[active_size]); 00950 s--; 00951 continue; 00952 } 00953 } 00954 else if(w.vector[j] > 0) 00955 violation = fabs(Gp); 00956 else 00957 violation = fabs(Gn); 00958 00959 Gmax_new = CMath::max(Gmax_new, violation); 00960 00961 // obtain Newton direction d 00962 if(Gp <= H*w.vector[j]) 00963 d = -Gp/H; 00964 else if(Gn >= H*w.vector[j]) 00965 d = -Gn/H; 00966 else 00967 d = -w.vector[j]; 00968 00969 if(fabs(d) < 1.0e-12) 00970 continue; 00971 00972 d = CMath::min(CMath::max(d,-10.0),10.0); 00973 00974 double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d; 00975 int num_linesearch; 00976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) 00977 { 00978 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta; 00979 00980 if(x_min >= 0) 00981 { 00982 double tmp = exp(d*xj_max[j]); 00983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; 00984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; 00985 if(CMath::min(appxcond1,appxcond2) <= 0) 00986 { 00987 if (use_bias && j==n) 00988 { 00989 for (ind=0; ind<l; ind++) 00990 exp_wTx[ind] *= exp(d); 00991 } 00992 00993 else 00994 { 00995 iterator=x->get_feature_iterator(j); 00996 while (x->get_next_feature(ind, val, iterator)) 00997 exp_wTx[ind] *= exp(d*val); 00998 x->free_feature_iterator(iterator); 00999 } 01000 break; 01001 } 01002 } 01003 01004 cond += d*xjneg_sum[j]; 01005 01006 int i = 0; 01007 01008 if (use_bias && j==n) 01009 { 01010 for (ind=0; ind<l; ind++) 01011 { 01012 double exp_dx = exp(d); 01013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01014 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01015 i++; 01016 } 01017 } 01018 else 01019 { 01020 01021 iterator=x->get_feature_iterator(j); 01022 while (x->get_next_feature(ind, val, iterator)) 01023 { 01024 double exp_dx = exp(d*val); 01025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx; 01026 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); 01027 i++; 01028 } 01029 x->free_feature_iterator(iterator); 01030 } 01031 01032 if(cond <= 0) 01033 { 01034 i = 0; 01035 if (use_bias && j==n) 01036 { 01037 for (ind=0; ind<l; ind++) 01038 { 01039 exp_wTx[ind] = exp_wTx_new[i]; 01040 i++; 01041 } 01042 } 01043 else 01044 { 01045 iterator=x->get_feature_iterator(j); 01046 while (x->get_next_feature(ind, val, iterator)) 01047 { 01048 exp_wTx[ind] = exp_wTx_new[i]; 01049 i++; 01050 } 01051 x->free_feature_iterator(iterator); 01052 } 01053 break; 01054 } 01055 else 01056 { 01057 d *= 0.5; 01058 delta *= 0.5; 01059 } 01060 } 01061 01062 w.vector[j] += d; 01063 01064 // recompute exp_wTx[] if line search takes too many steps 01065 if(num_linesearch >= max_num_linesearch) 01066 { 01067 SG_INFO("#") 01068 for(int i=0; i<l; i++) 01069 exp_wTx[i] = 0; 01070 01071 for(int i=0; i<w_size; i++) 01072 { 01073 if(w.vector[i]==0) continue; 01074 01075 if (use_bias && i==n) 01076 { 01077 for (ind=0; ind<l; ind++) 01078 exp_wTx[ind] += w.vector[i]; 01079 } 01080 else 01081 { 01082 iterator=x->get_feature_iterator(i); 01083 while (x->get_next_feature(ind, val, iterator)) 01084 exp_wTx[ind] += w.vector[i]*val; 01085 x->free_feature_iterator(iterator); 01086 } 01087 } 01088 01089 for(int i=0; i<l; i++) 01090 exp_wTx[i] = exp(exp_wTx[i]); 01091 } 01092 } 01093 01094 if(iter == 0) 01095 Gmax_init = Gmax_new; 01096 iter++; 01097 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6) 01098 01099 if(Gmax_new <= eps*Gmax_init) 01100 { 01101 if(active_size == w_size) 01102 break; 01103 else 01104 { 01105 active_size = w_size; 01106 Gmax_old = CMath::INFTY; 01107 continue; 01108 } 01109 } 01110 01111 Gmax_old = Gmax_new; 01112 } 01113 01114 SG_DONE() 01115 SG_INFO("optimization finished, #iter = %d\n", iter) 01116 if(iter >= max_iterations) 01117 SG_WARNING("\nWARNING: reaching max number of iterations\n") 01118 01119 // calculate objective value 01120 01121 double v = 0; 01122 int nnz = 0; 01123 for(j=0; j<w_size; j++) 01124 if(w.vector[j] != 0) 01125 { 01126 v += fabs(w.vector[j]); 01127 nnz++; 01128 } 01129 for(j=0; j<l; j++) 01130 if(y[j] == 1) 01131 v += C[GETI(j)]*log(1+1/exp_wTx[j]); 01132 else 01133 v += C[GETI(j)]*log(1+exp_wTx[j]); 01134 01135 SG_INFO("Objective value = %lf\n", v) 01136 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size) 01137 01138 SG_FREE(index); 01139 SG_FREE(y); 01140 SG_FREE(exp_wTx); 01141 SG_FREE(exp_wTx_new); 01142 SG_FREE(xj_max); 01143 SG_FREE(C_sum); 01144 SG_FREE(xjneg_sum); 01145 SG_FREE(xjpos_sum); 01146 } 01147 01148 // A coordinate descent algorithm for 01149 // the dual of L2-regularized logistic regression problems 01150 // 01151 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i), 01152 // s.t. 0 <= \alpha_i <= upper_bound_i, 01153 // 01154 // where Qij = yi yj xi^T xj and 01155 // upper_bound_i = Cp if y_i = 1 01156 // upper_bound_i = Cn if y_i = -1 01157 // 01158 // Given: 01159 // x, y, Cp, Cn 01160 // eps is the stopping tolerance 01161 // 01162 // solution will be put in w 01163 // 01164 // See Algorithm 5 of Yu et al., MLJ 2010 01165 01166 #undef GETI 01167 #define GETI(i) (y[i]+1) 01168 // To support weights for instances, use GETI(i) (i) 01169 01170 void CLibLinear::solve_l2r_lr_dual(const liblinear_problem *prob, double eps, double Cp, double Cn) 01171 { 01172 int l = prob->l; 01173 int w_size = prob->n; 01174 int i, s, iter = 0; 01175 double *xTx = new double[l]; 01176 int max_iter = 1000; 01177 int *index = new int[l]; 01178 double *alpha = new double[2*l]; // store alpha and C - alpha 01179 int32_t *y = SG_MALLOC(int32_t, l); 01180 int max_inner_iter = 100; // for inner Newton 01181 double innereps = 1e-2; 01182 double innereps_min = CMath::min(1e-8, eps); 01183 double upper_bound[3] = {Cn, 0, Cp}; 01184 double Gmax_init = 0; 01185 01186 for(i=0; i<l; i++) 01187 { 01188 if(prob->y[i] > 0) 01189 { 01190 y[i] = +1; 01191 } 01192 else 01193 { 01194 y[i] = -1; 01195 } 01196 } 01197 01198 // Initial alpha can be set here. Note that 01199 // 0 < alpha[i] < upper_bound[GETI(i)] 01200 // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)] 01201 for(i=0; i<l; i++) 01202 { 01203 alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8); 01204 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i]; 01205 } 01206 01207 for(i=0; i<w_size; i++) 01208 w[i] = 0; 01209 for(i=0; i<l; i++) 01210 { 01211 xTx[i] = prob->x->dot(i, prob->x,i); 01212 prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size); 01213 01214 if (prob->use_bias) 01215 { 01216 w.vector[w_size]+=y[i]*alpha[2*i]; 01217 xTx[i]+=1; 01218 } 01219 index[i] = i; 01220 } 01221 01222 while (iter < max_iter) 01223 { 01224 for (i=0; i<l; i++) 01225 { 01226 int j = CMath::random(i, l-1); 01227 CMath::swap(index[i], index[j]); 01228 } 01229 int newton_iter = 0; 01230 double Gmax = 0; 01231 for (s=0; s<l; s++) 01232 { 01233 i = index[s]; 01234 int32_t yi = y[i]; 01235 double C = upper_bound[GETI(i)]; 01236 double ywTx = 0, xisq = xTx[i]; 01237 01238 ywTx = prob->x->dense_dot(i, w.vector, w_size); 01239 if (prob->use_bias) 01240 ywTx+=w.vector[w_size]; 01241 01242 ywTx *= y[i]; 01243 double a = xisq, b = ywTx; 01244 01245 // Decide to minimize g_1(z) or g_2(z) 01246 int ind1 = 2*i, ind2 = 2*i+1, sign = 1; 01247 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) 01248 { 01249 ind1 = 2*i+1; 01250 ind2 = 2*i; 01251 sign = -1; 01252 } 01253 01254 // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) 01255 double alpha_old = alpha[ind1]; 01256 double z = alpha_old; 01257 if(C - z < 0.5 * C) 01258 z = 0.1*z; 01259 double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z)); 01260 Gmax = CMath::max(Gmax, CMath::abs(gp)); 01261 01262 // Newton method on the sub-problem 01263 const double eta = 0.1; // xi in the paper 01264 int inner_iter = 0; 01265 while (inner_iter <= max_inner_iter) 01266 { 01267 if(fabs(gp) < innereps) 01268 break; 01269 double gpp = a + C/(C-z)/z; 01270 double tmpz = z - gp/gpp; 01271 if(tmpz <= 0) 01272 z *= eta; 01273 else // tmpz in (0, C) 01274 z = tmpz; 01275 gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); 01276 newton_iter++; 01277 inner_iter++; 01278 } 01279 01280 if(inner_iter > 0) // update w 01281 { 01282 alpha[ind1] = z; 01283 alpha[ind2] = C-z; 01284 01285 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size); 01286 01287 if (prob->use_bias) 01288 w.vector[w_size]+=sign*(z-alpha_old)*yi; 01289 } 01290 } 01291 01292 iter++; 01293 if(iter == 0) 01294 Gmax_init = Gmax; 01295 01296 SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6) 01297 01298 if(Gmax < eps) 01299 break; 01300 01301 if(newton_iter <= l/10) 01302 innereps = CMath::max(innereps_min, 0.1*innereps); 01303 01304 } 01305 01306 SG_DONE() 01307 SG_INFO("optimization finished, #iter = %d\n",iter) 01308 if (iter >= max_iter) 01309 SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n") 01310 01311 // calculate objective value 01312 01313 double v = 0; 01314 for(i=0; i<w_size; i++) 01315 v += w[i] * w[i]; 01316 v *= 0.5; 01317 for(i=0; i<l; i++) 01318 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 01319 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); 01320 SG_INFO("Objective value = %lf\n", v) 01321 01322 delete [] xTx; 01323 delete [] alpha; 01324 delete [] y; 01325 delete [] index; 01326 } 01327 01328 01329 void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term) 01330 { 01331 if (!m_labels) 01332 SG_ERROR("Please assign labels first!\n") 01333 01334 int32_t num_labels=m_labels->get_num_labels(); 01335 01336 if (num_labels!=linear_term.vlen) 01337 { 01338 SG_ERROR("Number of labels (%d) does not match number" 01339 " of entries (%d) in linear term \n", num_labels, 01340 linear_term.vlen); 01341 } 01342 01343 m_linear_term=linear_term; 01344 } 01345 01346 SGVector<float64_t> CLibLinear::get_linear_term() 01347 { 01348 if (!m_linear_term.vlen || !m_linear_term.vector) 01349 SG_ERROR("Please assign linear term first!\n") 01350 01351 return m_linear_term; 01352 } 01353 01354 void CLibLinear::init_linear_term() 01355 { 01356 if (!m_labels) 01357 SG_ERROR("Please assign labels first!\n") 01358 01359 m_linear_term=SGVector<float64_t>(m_labels->get_num_labels()); 01360 SGVector<float64_t>::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0); 01361 }