SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011-2012 Christian Widmer 00008 * Written (W) 2007-2010 Soeren Sonnenburg 00009 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00010 * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <vector> 00014 00015 #include <shogun/lib/config.h> 00016 00017 #ifdef HAVE_LAPACK 00018 #include <shogun/io/SGIO.h> 00019 #include <shogun/lib/Signal.h> 00020 #include <shogun/lib/Time.h> 00021 #include <shogun/base/Parameter.h> 00022 #include <shogun/transfer/multitask/LibLinearMTL.h> 00023 #include <shogun/optimization/liblinear/tron.h> 00024 #include <shogun/features/DotFeatures.h> 00025 00026 using namespace shogun; 00027 00028 00029 CLibLinearMTL::CLibLinearMTL() 00030 : CLinearMachine() 00031 { 00032 init(); 00033 } 00034 00035 CLibLinearMTL::CLibLinearMTL( 00036 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00037 : CLinearMachine() 00038 { 00039 init(); 00040 C1=C; 00041 C2=C; 00042 use_bias=true; 00043 00044 set_features(traindat); 00045 set_labels(trainlab); 00046 00047 } 00048 00049 00050 void CLibLinearMTL::init() 00051 { 00052 use_bias=false; 00053 C1=1; 00054 C2=1; 00055 set_max_iterations(); 00056 epsilon=1e-5; 00057 00058 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE); 00059 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE); 00060 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.", 00061 MS_NOT_AVAILABLE); 00062 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE); 00063 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.", 00064 MS_NOT_AVAILABLE); 00065 00066 } 00067 00068 CLibLinearMTL::~CLibLinearMTL() 00069 { 00070 } 00071 00072 bool CLibLinearMTL::train_machine(CFeatures* data) 00073 { 00074 CSignal::clear_cancel(); 00075 ASSERT(m_labels) 00076 00077 if (data) 00078 { 00079 if (!data->has_property(FP_DOT)) 00080 SG_ERROR("Specified features are not of type CDotFeatures\n") 00081 00082 set_features((CDotFeatures*) data); 00083 } 00084 ASSERT(features) 00085 m_labels->ensure_valid(); 00086 00087 00088 int32_t num_train_labels=m_labels->get_num_labels(); 00089 int32_t num_feat=features->get_dim_feature_space(); 00090 int32_t num_vec=features->get_num_vectors(); 00091 00092 if (num_vec!=num_train_labels) 00093 { 00094 SG_ERROR("number of vectors %d does not match " 00095 "number of training labels %d\n", 00096 num_vec, num_train_labels); 00097 } 00098 00099 00100 float64_t* training_w = NULL; 00101 if (use_bias) 00102 training_w=SG_MALLOC(float64_t, num_feat+1); 00103 else 00104 training_w=SG_MALLOC(float64_t, num_feat+0); 00105 00106 liblinear_problem prob; 00107 if (use_bias) 00108 { 00109 prob.n=num_feat+1; 00110 memset(training_w, 0, sizeof(float64_t)*(num_feat+1)); 00111 } 00112 else 00113 { 00114 prob.n=num_feat; 00115 memset(training_w, 0, sizeof(float64_t)*(num_feat+0)); 00116 } 00117 prob.l=num_vec; 00118 prob.x=features; 00119 prob.y=SG_MALLOC(float64_t, prob.l); 00120 prob.use_bias=use_bias; 00121 00122 for (int32_t i=0; i<prob.l; i++) 00123 prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i); 00124 00125 int pos = 0; 00126 int neg = 0; 00127 for(int i=0;i<prob.l;i++) 00128 { 00129 if(prob.y[i]==+1) 00130 pos++; 00131 } 00132 neg = prob.l - pos; 00133 00134 SG_INFO("%d training points %d dims\n", prob.l, prob.n) 00135 SG_INFO("%d positives, %d negatives\n", pos, neg) 00136 00137 double Cp=C1; 00138 double Cn=C2; 00139 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn); 00140 00141 if (use_bias) 00142 set_bias(training_w[num_feat]); 00143 else 00144 set_bias(0); 00145 00146 SG_FREE(prob.y); 00147 00148 w = SGVector<float64_t>(num_feat); 00149 for (int32_t i=0; i<num_feat; i++) 00150 w[i] = training_w[i]; 00151 00152 return true; 00153 } 00154 00155 // A coordinate descent algorithm for 00156 // L1-loss and L2-loss SVM dual problems 00157 // 00158 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, 00159 // s.t. 0 <= alpha_i <= upper_bound_i, 00160 // 00161 // where Qij = yi yj xi^T xj and 00162 // D is a diagonal matrix 00163 // 00164 // In L1-SVM case: 00165 // upper_bound_i = Cp if y_i = 1 00166 // upper_bound_i = Cn if y_i = -1 00167 // D_ii = 0 00168 // In L2-SVM case: 00169 // upper_bound_i = INF 00170 // D_ii = 1/(2*Cp) if y_i = 1 00171 // D_ii = 1/(2*Cn) if y_i = -1 00172 // 00173 // Given: 00174 // x, y, Cp, Cn 00175 // eps is the stopping tolerance 00176 // 00177 // solution will be put in w 00178 00179 #undef GETI 00180 #define GETI(i) (y[i]+1) 00181 // To support weights for instances, use GETI(i) (i) 00182 00183 00184 void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn) 00185 { 00186 00187 00188 00189 int l = prob->l; 00190 int w_size = prob->n; 00191 int i, s, iter = 0; 00192 double C, d, G; 00193 double *QD = SG_MALLOC(double, l); 00194 int *index = SG_MALLOC(int, l); 00195 //double *alpha = SG_MALLOC(double, l); 00196 00197 int32_t *y = SG_MALLOC(int32_t, l); 00198 int active_size = l; 00199 // PG: projected gradient, for shrinking and stopping 00200 double PG; 00201 double PGmax_old = CMath::INFTY; 00202 double PGmin_old = -CMath::INFTY; 00203 double PGmax_new, PGmin_new; 00204 00205 // matrix W 00206 V = SGMatrix<float64_t>(w_size,num_tasks); 00207 00208 // save alpha 00209 alphas = SGVector<float64_t>(l); 00210 00211 00212 // default solver_type: L2R_L2LOSS_SVC_DUAL 00213 double diag[3] = {0.5/Cn, 0, 0.5/Cp}; 00214 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY}; 00215 if(true) 00216 { 00217 diag[0] = 0; 00218 diag[2] = 0; 00219 upper_bound[0] = Cn; 00220 upper_bound[2] = Cp; 00221 } 00222 00223 int n = prob->n; 00224 00225 if (prob->use_bias) 00226 n--; 00227 00228 // set V to zero 00229 for(int32_t k=0; k<w_size*num_tasks; k++) 00230 { 00231 V.matrix[k] = 0; 00232 } 00233 00234 // init alphas 00235 for(i=0; i<l; i++) 00236 { 00237 alphas[i] = 0; 00238 } 00239 00240 for(i=0; i<l; i++) 00241 { 00242 if(prob->y[i] > 0) 00243 { 00244 y[i] = +1; 00245 } 00246 else 00247 { 00248 y[i] = -1; 00249 } 00250 QD[i] = diag[GETI(i)]; 00251 QD[i] += prob->x->dot(i, prob->x,i); 00252 index[i] = i; 00253 } 00254 00255 CTime start_time; 00256 while (iter < max_iterations && !CSignal::cancel_computations()) 00257 { 00258 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) 00259 break; 00260 00261 PGmax_new = -CMath::INFTY; 00262 PGmin_new = CMath::INFTY; 00263 00264 for (i=0; i<active_size; i++) 00265 { 00266 int j = CMath::random(i, active_size-1); 00267 CMath::swap(index[i], index[j]); 00268 } 00269 00270 for (s=0;s<active_size;s++) 00271 { 00272 i = index[s]; 00273 int32_t yi = y[i]; 00274 int32_t ti = task_indicator_lhs[i]; 00275 C = upper_bound[GETI(i)]; 00276 00277 // we compute the inner sum by looping over tasks 00278 // this update is the main result of MTL_DCD 00279 typedef std::map<index_t, float64_t>::const_iterator map_iter; 00280 00281 float64_t inner_sum = 0; 00282 for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++) 00283 { 00284 00285 // get data from sparse matrix 00286 int32_t e_i = it->first; 00287 float64_t sim = it->second; 00288 00289 // fetch vector 00290 float64_t* tmp_w = V.get_column_vector(e_i); 00291 inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n); 00292 00293 //possibly deal with bias 00294 //if (prob->use_bias) 00295 // G+=w[n]; 00296 } 00297 00298 // compute gradient 00299 G = inner_sum-1.0; 00300 00301 // check if point can be removed from active set 00302 PG = 0; 00303 if (alphas[i] == 0) 00304 { 00305 if (G > PGmax_old) 00306 { 00307 active_size--; 00308 CMath::swap(index[s], index[active_size]); 00309 s--; 00310 continue; 00311 } 00312 else if (G < 0) 00313 PG = G; 00314 } 00315 else if (alphas[i] == C) 00316 { 00317 if (G < PGmin_old) 00318 { 00319 active_size--; 00320 CMath::swap(index[s], index[active_size]); 00321 s--; 00322 continue; 00323 } 00324 else if (G > 0) 00325 PG = G; 00326 } 00327 else 00328 PG = G; 00329 00330 PGmax_new = CMath::max(PGmax_new, PG); 00331 PGmin_new = CMath::min(PGmin_new, PG); 00332 00333 if(fabs(PG) > 1.0e-12) 00334 { 00335 // save previous alpha 00336 double alpha_old = alphas[i]; 00337 00338 // project onto feasible set 00339 alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C); 00340 d = (alphas[i] - alpha_old)*yi; 00341 00342 // update corresponding weight vector 00343 float64_t* tmp_w = V.get_column_vector(ti); 00344 prob->x->add_to_dense_vec(d, i, tmp_w, n); 00345 00346 00347 //if (prob->use_bias) 00348 // w[n]+=d; 00349 } 00350 } 00351 00352 iter++; 00353 float64_t gap=PGmax_new - PGmin_new; 00354 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6) 00355 00356 if(gap <= eps) 00357 { 00358 if(active_size == l) 00359 break; 00360 else 00361 { 00362 active_size = l; 00363 PGmax_old = CMath::INFTY; 00364 PGmin_old = -CMath::INFTY; 00365 continue; 00366 } 00367 } 00368 PGmax_old = PGmax_new; 00369 PGmin_old = PGmin_new; 00370 if (PGmax_old <= 0) 00371 PGmax_old = CMath::INFTY; 00372 if (PGmin_old >= 0) 00373 PGmin_old = -CMath::INFTY; 00374 } 00375 00376 SG_DONE() 00377 SG_INFO("optimization finished, #iter = %d\n",iter) 00378 if (iter >= max_iterations) 00379 { 00380 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster" 00381 "(also see liblinear FAQ)\n\n"); 00382 } 00383 00384 00385 00386 delete [] QD; 00387 //delete [] alpha; 00388 delete [] y; 00389 delete [] index; 00390 } 00391 00392 00393 float64_t CLibLinearMTL::compute_primal_obj() 00394 { 00395 /* python protype 00396 num_param = param.shape[0] 00397 num_dim = len(all_xt[0]) 00398 num_tasks = int(num_param / num_dim) 00399 num_examples = len(all_xt) 00400 00401 # vector to matrix 00402 W = param.reshape(num_tasks, num_dim) 00403 00404 obj = 0 00405 00406 reg_obj = 0 00407 loss_obj = 0 00408 00409 assert len(all_xt) == len(all_xt) == len(task_indicator) 00410 00411 # L2 regularizer 00412 for t in xrange(num_tasks): 00413 reg_obj += 0.5 * np.dot(W[t,:], W[t,:]) 00414 00415 # MTL regularizer 00416 for s in xrange(num_tasks): 00417 for t in xrange(num_tasks): 00418 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:]) 00419 00420 # loss 00421 for i in xrange(num_examples): 00422 ti = task_indicator[i] 00423 t = all_lt[i] * np.dot(W[ti,:], all_xt[i]) 00424 # hinge 00425 loss_obj += max(0, 1 - t) 00426 00427 00428 # combine to final objective 00429 obj = reg_obj + C * loss_obj 00430 00431 00432 return obj 00433 */ 00434 00435 SG_INFO("DONE to compute Primal OBJ\n") 00436 // calculate objective value 00437 SGMatrix<float64_t> W = get_W(); 00438 00439 float64_t obj = 0; 00440 int32_t num_vec = features->get_num_vectors(); 00441 int32_t w_size = features->get_dim_feature_space(); 00442 00443 // L2 regularizer 00444 for (int32_t t=0; t<num_tasks; t++) 00445 { 00446 float64_t* w_t = W.get_column_vector(t); 00447 00448 for(int32_t i=0; i<w_size; i++) 00449 { 00450 obj += 0.5 * w_t[i]*w_t[i]; 00451 } 00452 } 00453 00454 // MTL regularizer 00455 for (int32_t s=0; s<num_tasks; s++) 00456 { 00457 float64_t* w_s = W.get_column_vector(s); 00458 for (int32_t t=0; t<num_tasks; t++) 00459 { 00460 float64_t* w_t = W.get_column_vector(t); 00461 float64_t l = graph_laplacian.matrix[s*num_tasks+t]; 00462 00463 for(int32_t i=0; i<w_size; i++) 00464 { 00465 obj += 0.5 * l * w_s[i]*w_t[i]; 00466 } 00467 } 00468 } 00469 00470 // loss 00471 for(int32_t i=0; i<num_vec; i++) 00472 { 00473 int32_t ti = task_indicator_lhs[i]; 00474 float64_t* w_t = W.get_column_vector(ti); 00475 float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size); 00476 00477 // hinge loss 00478 obj += C1 * CMath::max(0.0, 1 - residual); 00479 00480 } 00481 00482 SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj) 00483 00484 return obj; 00485 } 00486 00487 float64_t CLibLinearMTL::compute_dual_obj() 00488 { 00489 /* python prototype 00490 num_xt = len(xt) 00491 00492 # compute quadratic term 00493 for i in xrange(num_xt): 00494 for j in xrange(num_xt): 00495 00496 s = task_indicator[i] 00497 t = task_indicator[j] 00498 00499 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j]) 00500 00501 return obj 00502 */ 00503 00504 SG_INFO("starting to compute DUAL OBJ\n") 00505 00506 int32_t num_vec=features->get_num_vectors(); 00507 00508 float64_t obj = 0; 00509 00510 // compute linear term 00511 for(int32_t i=0; i<num_vec; i++) 00512 { 00513 obj += alphas[i]; 00514 } 00515 00516 // compute quadratic term 00517 00518 int32_t v_size = features->get_dim_feature_space(); 00519 00520 // efficient computation 00521 for (int32_t s=0; s<num_tasks; s++) 00522 { 00523 float64_t* v_s = V.get_column_vector(s); 00524 for (int32_t t=0; t<num_tasks; t++) 00525 { 00526 float64_t* v_t = V.get_column_vector(t); 00527 const float64_t ts = task_similarity_matrix(s, t); 00528 00529 for(int32_t i=0; i<v_size; i++) 00530 { 00531 obj -= 0.5 * ts * v_s[i]*v_t[i]; 00532 } 00533 } 00534 } 00535 00536 /* 00537 // naiive implementation 00538 float64_t tmp_val2 = 0; 00539 00540 for(int32_t i=0; i<num_vec; i++) 00541 { 00542 int32_t ti_i = task_indicator_lhs[i]; 00543 for(int32_t j=0; j<num_vec; j++) 00544 { 00545 // look up task similarity 00546 int32_t ti_j = task_indicator_lhs[j]; 00547 00548 const float64_t ts = task_similarity_matrix(ti_i, ti_j); 00549 00550 // compute objective 00551 tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) * 00552 ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j); 00553 } 00554 } 00555 */ 00556 00557 00558 return obj; 00559 } 00560 00561 00562 float64_t CLibLinearMTL::compute_duality_gap() 00563 { 00564 return 0.0; 00565 } 00566 00567 00568 #endif //HAVE_LAPACK