SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Harshit Syal 00008 * Copyright (C) 2012 Harshit Syal 00009 */ 00010 #include <shogun/lib/config.h> 00011 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/classifier/svm/NewtonSVM.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/machine/LinearMachine.h> 00016 #include <shogun/features/DotFeatures.h> 00017 #include <shogun/labels/Labels.h> 00018 #include <shogun/labels/BinaryLabels.h> 00019 #include <shogun/mathematics/lapack.h> 00020 00021 //#define DEBUG_NEWTON 00022 //#define V_NEWTON 00023 using namespace shogun; 00024 00025 CNewtonSVM::CNewtonSVM() 00026 : CLinearMachine(), C(1), use_bias(true) 00027 { 00028 } 00029 00030 CNewtonSVM::CNewtonSVM(float64_t c, CDotFeatures* traindat, CLabels* trainlab, int32_t itr) 00031 : CLinearMachine() 00032 { 00033 lambda=1/c; 00034 num_iter=itr; 00035 prec=1e-6; 00036 num_iter=20; 00037 use_bias=true; 00038 C=c; 00039 set_features(traindat); 00040 set_labels(trainlab); 00041 } 00042 00043 00044 CNewtonSVM::~CNewtonSVM() 00045 { 00046 } 00047 00048 00049 bool CNewtonSVM::train_machine(CFeatures* data) 00050 { 00051 ASSERT(m_labels) 00052 ASSERT(m_labels->get_label_type() == LT_BINARY) 00053 00054 if (data) 00055 { 00056 if (!data->has_property(FP_DOT)) 00057 SG_ERROR("Specified features are not of type CDotFeatures\n") 00058 set_features((CDotFeatures*) data); 00059 } 00060 00061 ASSERT(features) 00062 00063 SGVector<float64_t> train_labels=((CBinaryLabels*) m_labels)->get_labels(); 00064 int32_t num_feat=features->get_dim_feature_space(); 00065 int32_t num_vec=features->get_num_vectors(); 00066 00067 //Assigning dimensions for whole class scope 00068 x_n=num_vec; 00069 x_d=num_feat; 00070 00071 ASSERT(num_vec==train_labels.vlen) 00072 00073 float64_t* weights = SG_CALLOC(float64_t, x_d+1); 00074 float64_t* out=SG_MALLOC(float64_t, x_n); 00075 SGVector<float64_t>::fill_vector(out, x_n, 1.0); 00076 00077 int32_t *sv=SG_MALLOC(int32_t, x_n), size_sv=0, iter=0; 00078 float64_t obj, *grad=SG_MALLOC(float64_t, x_d+1); 00079 float64_t t; 00080 00081 while(1) 00082 { 00083 iter++; 00084 00085 if (iter>num_iter) 00086 { 00087 SG_PRINT("Maximum number of Newton steps reached. Try larger lambda") 00088 break; 00089 } 00090 00091 obj_fun_linear(weights, out, &obj, sv, &size_sv, grad); 00092 00093 #ifdef DEBUG_NEWTON 00094 SG_PRINT("fun linear passed !\n") 00095 SG_PRINT("Obj =%f\n", obj) 00096 SG_PRINT("Grad=\n") 00097 00098 for (int32_t i=0; i<x_d+1; i++) 00099 SG_PRINT("grad[%d]=%.16g\n", i, grad[i]) 00100 SG_PRINT("SV=\n") 00101 00102 for (int32_t i=0; i<size_sv; i++) 00103 SG_PRINT("sv[%d]=%d\n", i, sv[i]) 00104 #endif 00105 00106 SGVector<float64_t> sgv; 00107 float64_t* Xsv = SG_MALLOC(float64_t, x_d*size_sv); 00108 for (int32_t k=0; k<size_sv; k++) 00109 { 00110 sgv=features->get_computed_dot_feature_vector(sv[k]); 00111 for (int32_t j=0; j<x_d; j++) 00112 Xsv[k*x_d+j]=sgv.vector[j]; 00113 } 00114 int32_t tx=x_d; 00115 int32_t ty=size_sv; 00116 SGMatrix<float64_t>::transpose_matrix(Xsv, tx, ty); 00117 00118 #ifdef DEBUG_NEWTON 00119 SGMatrix<float64_t>::display_matrix(Xsv, x_d, size_sv); 00120 #endif 00121 00122 float64_t* lcrossdiag=SG_MALLOC(float64_t, (x_d+1)*(x_d+1)); 00123 float64_t* vector=SG_MALLOC(float64_t, x_d+1); 00124 00125 for (int32_t i=0; i<x_d; i++) 00126 vector[i]=lambda; 00127 00128 vector[x_d]=0; 00129 00130 SGMatrix<float64_t>::create_diagonal_matrix(lcrossdiag, vector, x_d+1); 00131 float64_t* Xsv2=SG_MALLOC(float64_t, x_d*x_d); 00132 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, x_d, x_d, size_sv, 00133 1.0, Xsv, size_sv, Xsv, size_sv, 0.0, Xsv2, x_d); 00134 float64_t* sum=SG_CALLOC(float64_t, x_d); 00135 00136 for (int32_t j=0; j<x_d; j++) 00137 { 00138 for (int32_t i=0; i<size_sv; i++) 00139 sum[j]+=Xsv[i+j*size_sv]; 00140 } 00141 00142 float64_t* Xsv2sum=SG_MALLOC(float64_t, (x_d+1)*(x_d+1)); 00143 00144 for (int32_t i=0; i<x_d; i++) 00145 { 00146 for (int32_t j=0; j<x_d; j++) 00147 Xsv2sum[j*(x_d+1)+i]=Xsv2[j*x_d+i]; 00148 00149 Xsv2sum[x_d*(x_d+1)+i]=sum[i]; 00150 } 00151 00152 for (int32_t j=0; j<x_d; j++) 00153 Xsv2sum[j*(x_d+1)+x_d]=sum[j]; 00154 00155 Xsv2sum[x_d*(x_d+1)+x_d]=size_sv; 00156 float64_t* identity_matrix=SG_MALLOC(float64_t, (x_d+1)*(x_d+1)); 00157 00158 SGVector<float64_t>::fill_vector(vector, x_d+1, 1.0); 00159 00160 SGMatrix<float64_t>::create_diagonal_matrix(identity_matrix, vector, x_d+1); 00161 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, x_d+1, x_d+1, 00162 x_d+1, 1.0, lcrossdiag, x_d+1, identity_matrix, x_d+1, 1.0, 00163 Xsv2sum, x_d+1); 00164 00165 float64_t* inverse=SG_MALLOC(float64_t, (x_d+1)*(x_d+1)); 00166 int32_t r=x_d+1; 00167 SGMatrix<float64_t>::pinv(Xsv2sum, r, r, inverse); 00168 00169 float64_t* step=SG_MALLOC(float64_t, r); 00170 float64_t* s2=SG_MALLOC(float64_t, r); 00171 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, r, 1, r, 1.0, 00172 inverse, r, grad, r, 0.0, s2, r); 00173 00174 for (int32_t i=0; i<r; i++) 00175 step[i]=-s2[i]; 00176 00177 line_search_linear(weights, step, out, &t); 00178 00179 #ifdef DEBUG_NEWTON 00180 SG_PRINT("t=%f\n\n", t) 00181 00182 for (int32_t i=0; i<x_n; i++) 00183 SG_PRINT("out[%d]=%.16g\n", i, out[i]) 00184 00185 for (int32_t i=0; i<x_d+1; i++) 00186 SG_PRINT("weights[%d]=%.16g\n", i, weights[i]) 00187 #endif 00188 00189 SGVector<float64_t>::vec1_plus_scalar_times_vec2(weights, t, step, r); 00190 float64_t newton_decrement; 00191 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, r, -0.5, 00192 step, r, grad, r, 0.0, &newton_decrement, 1); 00193 #ifdef V_NEWTON 00194 SG_PRINT("Itr=%d, Obj=%f, No of sv=%d, Newton dec=%0.3f, line search=%0.3f\n\n", 00195 iter, obj, size_sv, newton_decrement, t); 00196 #endif 00197 00198 SG_FREE(Xsv); 00199 SG_FREE(vector); 00200 SG_FREE(lcrossdiag); 00201 SG_FREE(Xsv2); 00202 SG_FREE(Xsv2sum); 00203 SG_FREE(identity_matrix); 00204 SG_FREE(inverse); 00205 SG_FREE(step); 00206 SG_FREE(s2); 00207 00208 if (newton_decrement*2<prec*obj) 00209 break; 00210 } 00211 00212 #ifdef V_NEWTON 00213 SG_PRINT("FINAL W AND BIAS Vector=\n\n") 00214 CMath::display_matrix(weights, x_d+1, 1); 00215 #endif 00216 00217 set_w(SGVector<float64_t>(weights, x_d)); 00218 set_bias(weights[x_d]); 00219 00220 SG_FREE(sv); 00221 SG_FREE(grad); 00222 SG_FREE(out); 00223 00224 return true; 00225 00226 00227 } 00228 00229 void CNewtonSVM::line_search_linear(float64_t* weights, float64_t* d, float64_t* 00230 out, float64_t* tx) 00231 { 00232 SGVector<float64_t> Y=((CBinaryLabels*) m_labels)->get_labels(); 00233 float64_t* outz=SG_MALLOC(float64_t, x_n); 00234 float64_t* temp1=SG_MALLOC(float64_t, x_n); 00235 float64_t* temp1forout=SG_MALLOC(float64_t, x_n); 00236 float64_t* outzsv=SG_MALLOC(float64_t, x_n); 00237 float64_t* Ysv=SG_MALLOC(float64_t, x_n); 00238 float64_t* Xsv=SG_MALLOC(float64_t, x_n); 00239 float64_t* temp2=SG_MALLOC(float64_t, x_d); 00240 float64_t t=0.0; 00241 float64_t* Xd=SG_MALLOC(float64_t, x_n); 00242 00243 for (int32_t i=0; i<x_n; i++) 00244 Xd[i]=features->dense_dot(i, d, x_d); 00245 00246 SGVector<float64_t>::add_scalar(d[x_d], Xd, x_n); 00247 00248 #ifdef DEBUG_NEWTON 00249 CMath::display_vector(d, x_d+1, "Weight vector"); 00250 00251 for (int32_t i=0; i<x_d+1; i++) 00252 SG_SPRINT("Xd[%d]=%.18g\n", i, Xd[i]) 00253 00254 CMath::display_vector(Xd, x_n, "XD vector="); 00255 #endif 00256 00257 float64_t wd; 00258 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, 00259 weights, x_d, d, x_d, 0.0, &wd, 1); 00260 float64_t tempg, dd; 00261 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d, lambda, d, 00262 x_d, d, x_d, 0.0, &dd, 1); 00263 00264 float64_t g, h; 00265 int32_t sv_len=0, *sv=SG_MALLOC(int32_t, x_n); 00266 00267 do 00268 { 00269 SGVector<float64_t>::vector_multiply(temp1, Y.vector, Xd, x_n); 00270 memcpy(temp1forout, temp1, sizeof(float64_t)*x_n); 00271 SGVector<float64_t>::scale_vector(t, temp1forout, x_n); 00272 SGVector<float64_t>::add(outz, 1.0, out, -1.0, temp1forout, x_n); 00273 00274 // Calculation of sv 00275 sv_len=0; 00276 00277 for (int32_t i=0; i<x_n; i++) 00278 { 00279 if (outz[i]>0) 00280 sv[sv_len++]=i; 00281 } 00282 00283 //Calculation of gradient 'g' 00284 for (int32_t i=0; i<sv_len; i++) 00285 { 00286 outzsv[i]=outz[sv[i]]; 00287 Ysv[i]=Y.vector[sv[i]]; 00288 Xsv[i]=Xd[sv[i]]; 00289 } 00290 00291 memset(temp1, 0, sizeof(float64_t)*sv_len); 00292 SGVector<float64_t>::vector_multiply(temp1, outzsv, Ysv, sv_len); 00293 tempg=SGVector<float64_t>::dot(temp1, Xsv, sv_len); 00294 g=wd+(t*dd); 00295 g-=tempg; 00296 00297 // Calculation of second derivative 'h' 00298 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, sv_len, 1.0, 00299 Xsv, sv_len, Xsv, sv_len, 0.0, &h, 1); 00300 h+=dd; 00301 00302 // Calculation of 1D Newton step 'd' 00303 t-=g/h; 00304 00305 if (((g*g)/h)<1e-10) 00306 break; 00307 00308 } while(1); 00309 00310 for (int32_t i=0; i<x_n; i++) 00311 out[i]=outz[i]; 00312 *tx=t; 00313 00314 SG_FREE(sv); 00315 SG_FREE(temp1); 00316 SG_FREE(temp2); 00317 SG_FREE(temp1forout); 00318 SG_FREE(outz); 00319 SG_FREE(outzsv); 00320 SG_FREE(Ysv); 00321 SG_FREE(Xsv); 00322 SG_FREE(Xd); 00323 } 00324 00325 void CNewtonSVM::obj_fun_linear(float64_t* weights, float64_t* out, 00326 float64_t* obj, int32_t* sv, int32_t* numsv, float64_t* grad) 00327 { 00328 SGVector<float64_t> v=((CBinaryLabels*) m_labels)->get_labels(); 00329 00330 for (int32_t i=0; i<x_n; i++) 00331 { 00332 if (out[i]<0) 00333 out[i]=0; 00334 } 00335 00336 #ifdef DEBUG_NEWTON 00337 for (int32_t i=0; i<x_n; i++) 00338 SG_SPRINT("out[%d]=%.16g\n", i, out[i]) 00339 #endif 00340 00341 //create copy of w0 00342 float64_t* w0=SG_MALLOC(float64_t, x_d+1); 00343 memcpy(w0, weights, sizeof(float64_t)*(x_d)); 00344 w0[x_d]=0; //do not penalize b 00345 00346 //create copy of out 00347 float64_t* out1=SG_MALLOC(float64_t, x_n); 00348 00349 //compute steps for obj 00350 SGVector<float64_t>::vector_multiply(out1, out, out, x_n); 00351 float64_t p1=SGVector<float64_t>::sum(out1, x_n)/2; 00352 float64_t C1; 00353 float64_t* w0copy=SG_MALLOC(float64_t, x_d+1); 00354 memcpy(w0copy, w0, sizeof(float64_t)*(x_d+1)); 00355 SGVector<float64_t>::scale_vector(0.5, w0copy, x_d+1); 00356 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, 1, 1, x_d+1, lambda, 00357 w0, x_d+1, w0copy, x_d+1, 0.0, &C1, 1); 00358 *obj=p1+C1; 00359 SGVector<float64_t>::scale_vector(lambda, w0, x_d); 00360 float64_t* temp=SG_CALLOC(float64_t, x_n); //temp = out.*Y 00361 SGVector<float64_t>::vector_multiply(temp, out, v.vector, x_n); 00362 float64_t* temp1=SG_CALLOC(float64_t, x_d); 00363 SGVector<float64_t> vec; 00364 00365 for (int32_t i=0; i<x_n; i++) 00366 { 00367 features->add_to_dense_vec(temp[i], i, temp1, x_d); 00368 #ifdef DEBUG_NEWTON 00369 SG_SPRINT("\ntemp[%d]=%f", i, temp[i]) 00370 CMath::display_vector(vec.vector, x_d, "vector"); 00371 CMath::display_vector(temp1, x_d, "debuging"); 00372 #endif 00373 } 00374 float64_t* p2=SG_MALLOC(float64_t, x_d+1); 00375 00376 for (int32_t i=0; i<x_d; i++) 00377 p2[i]=temp1[i]; 00378 00379 p2[x_d]=SGVector<float64_t>::sum(temp, x_n); 00380 SGVector<float64_t>::add(grad, 1.0, w0, -1.0, p2, x_d+1); 00381 int32_t sv_len=0; 00382 00383 for (int32_t i=0; i<x_n; i++) 00384 { 00385 if (out[i]>0) 00386 sv[sv_len++]=i; 00387 } 00388 00389 *numsv=sv_len; 00390 00391 SG_FREE(w0); 00392 SG_FREE(w0copy); 00393 SG_FREE(out1); 00394 SG_FREE(temp); 00395 SG_FREE(temp1); 00396 SG_FREE(p2); 00397 } 00398 #endif //HAVE_LAPACK