SHOGUN
v3.2.0
|
00001 00002 /* 00003 * This program is free software; you can redistribute it and/or modify 00004 * it under the terms of the GNU General Public License as published by 00005 * the Free Software Foundation; either version 3 of the License, or 00006 * (at your option) any later version. 00007 * 00008 * Copyright (C) 2012 Soeren Sonnenburg 00009 */ 00010 00011 #include <shogun/lib/config.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/regression/svr/LibLinearRegression.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/labels/RegressionLabels.h> 00016 #include <shogun/optimization/liblinear/tron.h> 00017 #include <shogun/lib/Signal.h> 00018 00019 using namespace shogun; 00020 00021 CLibLinearRegression::CLibLinearRegression() : 00022 CLinearMachine() 00023 { 00024 init_defaults(); 00025 } 00026 00027 CLibLinearRegression::CLibLinearRegression(float64_t C, CDotFeatures* feats, CLabels* labs) : 00028 CLinearMachine() 00029 { 00030 init_defaults(); 00031 set_C(C); 00032 set_features(feats); 00033 set_labels(labs); 00034 } 00035 00036 void CLibLinearRegression::init_defaults() 00037 { 00038 set_C(1.0); 00039 set_epsilon(1e-2); 00040 set_max_iter(10000); 00041 set_use_bias(false); 00042 set_liblinear_regression_type(L2R_L1LOSS_SVR_DUAL); 00043 } 00044 00045 void CLibLinearRegression::register_parameters() 00046 { 00047 SG_ADD(&m_C, "m_C", "regularization constant",MS_AVAILABLE); 00048 SG_ADD(&m_epsilon, "m_epsilon", "tolerance epsilon",MS_NOT_AVAILABLE); 00049 SG_ADD(&m_epsilon, "m_tube_epsilon", "svr tube epsilon",MS_AVAILABLE); 00050 SG_ADD(&m_max_iter, "m_max_iter", "max number of iterations",MS_NOT_AVAILABLE); 00051 SG_ADD(&m_use_bias, "m_use_bias", "indicates whether bias should be used",MS_NOT_AVAILABLE); 00052 } 00053 00054 CLibLinearRegression::~CLibLinearRegression() 00055 { 00056 } 00057 00058 bool CLibLinearRegression::train_machine(CFeatures* data) 00059 { 00060 CSignal::clear_cancel(); 00061 00062 if (data) 00063 set_features((CDotFeatures*)data); 00064 00065 ASSERT(features) 00066 ASSERT(m_labels && m_labels->get_label_type()==LT_REGRESSION) 00067 00068 int32_t num_train_labels=m_labels->get_num_labels(); 00069 int32_t num_feat=features->get_dim_feature_space(); 00070 int32_t num_vec=features->get_num_vectors(); 00071 00072 if (num_vec!=num_train_labels) 00073 { 00074 SG_ERROR("number of vectors %d does not match " 00075 "number of training labels %d\n", 00076 num_vec, num_train_labels); 00077 } 00078 00079 if (m_use_bias) 00080 w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat); 00081 else 00082 w=SGVector<float64_t>(num_feat); 00083 00084 liblinear_problem prob; 00085 if (m_use_bias) 00086 { 00087 prob.n=w.vlen+1; 00088 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1)); 00089 } 00090 else 00091 { 00092 prob.n=w.vlen; 00093 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0)); 00094 } 00095 prob.l=num_vec; 00096 prob.x=features; 00097 prob.y=SG_MALLOC(float64_t, prob.l); 00098 00099 switch (m_liblinear_regression_type) 00100 { 00101 case L2R_L2LOSS_SVR: 00102 { 00103 double* Cs = SG_MALLOC(double, prob.l); 00104 for(int i = 0; i < prob.l; i++) 00105 Cs[i] = m_C; 00106 00107 function *fun_obj=new l2r_l2_svr_fun(&prob, Cs, m_tube_epsilon); 00108 CTron tron_obj(fun_obj, m_epsilon); 00109 tron_obj.tron(w.vector, m_max_train_time); 00110 delete fun_obj; 00111 SG_FREE(Cs); 00112 break; 00113 00114 } 00115 case L2R_L1LOSS_SVR_DUAL: 00116 solve_l2r_l1l2_svr(&prob); 00117 break; 00118 case L2R_L2LOSS_SVR_DUAL: 00119 solve_l2r_l1l2_svr(&prob); 00120 break; 00121 default: 00122 SG_ERROR("Error: unknown regression type\n") 00123 break; 00124 } 00125 00126 return true; 00127 } 00128 00129 // A coordinate descent algorithm for 00130 // L1-loss and L2-loss epsilon-SVR dual problem 00131 // 00132 // min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i, 00133 // s.t. -upper_bound_i <= \beta_i <= upper_bound_i, 00134 // 00135 // where Qij = xi^T xj and 00136 // D is a diagonal matrix 00137 // 00138 // In L1-SVM case: 00139 // upper_bound_i = C 00140 // lambda_i = 0 00141 // In L2-SVM case: 00142 // upper_bound_i = INF 00143 // lambda_i = 1/(2*C) 00144 // 00145 // Given: 00146 // x, y, p, C 00147 // eps is the stopping tolerance 00148 // 00149 // solution will be put in w 00150 // 00151 // See Algorithm 4 of Ho and Lin, 2012 00152 00153 #undef GETI 00154 #define GETI(i) (0) 00155 // To support weights for instances, use GETI(i) (i) 00156 00157 void CLibLinearRegression::solve_l2r_l1l2_svr(const liblinear_problem *prob) 00158 { 00159 int l = prob->l; 00160 double C = m_C; 00161 double p = m_tube_epsilon; 00162 int w_size = prob->n; 00163 double eps = m_epsilon; 00164 int i, s, iter = 0; 00165 int max_iter = 1000; 00166 int active_size = l; 00167 int *index = new int[l]; 00168 00169 double d, G, H; 00170 double Gmax_old = CMath::INFTY; 00171 double Gmax_new, Gnorm1_new; 00172 double Gnorm1_init = 0.0; 00173 double *beta = new double[l]; 00174 double *QD = new double[l]; 00175 double *y = prob->y; 00176 00177 // L2R_L2LOSS_SVR_DUAL 00178 double lambda[1], upper_bound[1]; 00179 lambda[0] = 0.5/C; 00180 upper_bound[0] = CMath::INFTY; 00181 00182 if(m_liblinear_regression_type == L2R_L1LOSS_SVR_DUAL) 00183 { 00184 lambda[0] = 0; 00185 upper_bound[0] = C; 00186 } 00187 00188 // Initial beta can be set here. Note that 00189 // -upper_bound <= beta[i] <= upper_bound 00190 for(i=0; i<l; i++) 00191 beta[i] = 0; 00192 00193 for(i=0; i<w_size; i++) 00194 w[i] = 0; 00195 for(i=0; i<l; i++) 00196 { 00197 QD[i] = prob->x->dot(i, prob->x,i); 00198 prob->x->add_to_dense_vec(beta[i], i, w.vector, w_size); 00199 00200 if (prob->use_bias) 00201 w.vector[w_size]+=beta[i]; 00202 00203 index[i] = i; 00204 } 00205 00206 00207 while(iter < max_iter) 00208 { 00209 Gmax_new = 0; 00210 Gnorm1_new = 0; 00211 00212 for(i=0; i<active_size; i++) 00213 { 00214 int j = CMath::random(i, active_size-1); 00215 CMath::swap(index[i], index[j]); 00216 } 00217 00218 for(s=0; s<active_size; s++) 00219 { 00220 i = index[s]; 00221 G = -y[i] + lambda[GETI(i)]*beta[i]; 00222 H = QD[i] + lambda[GETI(i)]; 00223 00224 G += prob->x->dense_dot(i, w.vector, w_size); 00225 if (prob->use_bias) 00226 G+=w.vector[w_size]; 00227 00228 double Gp = G+p; 00229 double Gn = G-p; 00230 double violation = 0; 00231 if(beta[i] == 0) 00232 { 00233 if(Gp < 0) 00234 violation = -Gp; 00235 else if(Gn > 0) 00236 violation = Gn; 00237 else if(Gp>Gmax_old && Gn<-Gmax_old) 00238 { 00239 active_size--; 00240 CMath::swap(index[s], index[active_size]); 00241 s--; 00242 continue; 00243 } 00244 } 00245 else if(beta[i] >= upper_bound[GETI(i)]) 00246 { 00247 if(Gp > 0) 00248 violation = Gp; 00249 else if(Gp < -Gmax_old) 00250 { 00251 active_size--; 00252 CMath::swap(index[s], index[active_size]); 00253 s--; 00254 continue; 00255 } 00256 } 00257 else if(beta[i] <= -upper_bound[GETI(i)]) 00258 { 00259 if(Gn < 0) 00260 violation = -Gn; 00261 else if(Gn > Gmax_old) 00262 { 00263 active_size--; 00264 CMath::swap(index[s], index[active_size]); 00265 s--; 00266 continue; 00267 } 00268 } 00269 else if(beta[i] > 0) 00270 violation = fabs(Gp); 00271 else 00272 violation = fabs(Gn); 00273 00274 Gmax_new = CMath::max(Gmax_new, violation); 00275 Gnorm1_new += violation; 00276 00277 // obtain Newton direction d 00278 if(Gp < H*beta[i]) 00279 d = -Gp/H; 00280 else if(Gn > H*beta[i]) 00281 d = -Gn/H; 00282 else 00283 d = -beta[i]; 00284 00285 if(fabs(d) < 1.0e-12) 00286 continue; 00287 00288 double beta_old = beta[i]; 00289 beta[i] = CMath::min(CMath::max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]); 00290 d = beta[i]-beta_old; 00291 00292 if(d != 0) 00293 { 00294 prob->x->add_to_dense_vec(d, i, w.vector, w_size); 00295 00296 if (prob->use_bias) 00297 w.vector[w_size]+=d; 00298 } 00299 } 00300 00301 if(iter == 0) 00302 Gnorm1_init = Gnorm1_new; 00303 iter++; 00304 00305 SG_SABS_PROGRESS(Gnorm1_new, -CMath::log10(Gnorm1_new), -CMath::log10(eps*Gnorm1_init), -CMath::log10(Gnorm1_init), 6) 00306 00307 if(Gnorm1_new <= eps*Gnorm1_init) 00308 { 00309 if(active_size == l) 00310 break; 00311 else 00312 { 00313 active_size = l; 00314 Gmax_old = CMath::INFTY; 00315 continue; 00316 } 00317 } 00318 00319 Gmax_old = Gmax_new; 00320 } 00321 00322 SG_DONE() 00323 SG_INFO("\noptimization finished, #iter = %d\n", iter) 00324 if(iter >= max_iter) 00325 SG_INFO("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n") 00326 00327 // calculate objective value 00328 double v = 0; 00329 int nSV = 0; 00330 for(i=0; i<w_size; i++) 00331 v += w[i]*w[i]; 00332 v = 0.5*v; 00333 for(i=0; i<l; i++) 00334 { 00335 v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i]; 00336 if(beta[i] != 0) 00337 nSV++; 00338 } 00339 00340 SG_INFO("Objective value = %lf\n", v) 00341 SG_INFO("nSV = %d\n",nSV) 00342 00343 delete [] beta; 00344 delete [] QD; 00345 delete [] index; 00346 } 00347 00348 #endif /* HAVE_LAPACK */