SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Alexander Binder 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * 00010 * Update to patch 0.10.0 - thanks to Eric aka Yoo (thereisnoknife@gmail.com) 00011 * 00012 */ 00013 00014 #include <shogun/classifier/mkl/MKLMulticlassGradient.h> 00015 #include <shogun/mathematics/Math.h> 00016 00017 using namespace shogun; 00018 00019 MKLMulticlassGradient::MKLMulticlassGradient() 00020 { 00021 numkernels = 0; 00022 pnorm=2; 00023 00024 } 00025 MKLMulticlassGradient::~MKLMulticlassGradient() 00026 { 00027 00028 } 00029 00030 MKLMulticlassGradient MKLMulticlassGradient::operator=(MKLMulticlassGradient & gl) 00031 { 00032 numkernels=gl.numkernels; 00033 pnorm=gl.pnorm; 00034 return (*this); 00035 00036 } 00037 MKLMulticlassGradient::MKLMulticlassGradient(MKLMulticlassGradient & gl) 00038 { 00039 numkernels=gl.numkernels; 00040 pnorm=gl.pnorm; 00041 00042 } 00043 00044 void MKLMulticlassGradient::setup(const int32_t numkernels2) 00045 { 00046 numkernels=numkernels2; 00047 if (numkernels<=1) 00048 { 00049 SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input " 00050 "numkernels out of bounds: %d\n",numkernels); 00051 } 00052 00053 } 00054 00055 void MKLMulticlassGradient::set_mkl_norm(float64_t norm) 00056 { 00057 pnorm=norm; 00058 if(pnorm<1 ) 00059 SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1") 00060 } 00061 00062 00063 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2, 00064 const float64_t sumofpositivealphas) 00065 { 00066 normsofsubkernels.push_back(normw2); 00067 sumsofalphas.push_back(sumofpositivealphas); 00068 } 00069 00070 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas) 00071 { 00072 00073 assert((int32_t)gammas.size()+1==numkernels); 00074 00075 double pi4=3.151265358979238/2; 00076 00077 weights.resize(numkernels); 00078 00079 00080 // numkernels-dimensional polar transform 00081 weights[0]=1; 00082 00083 for(int32_t i=0; i< numkernels-1 ;++i) 00084 { 00085 for(int32_t k=0; k< i+1 ;++k) 00086 { 00087 weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) ); 00088 } 00089 weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) ); 00090 } 00091 00092 // pnorm- manifold adjustment 00093 if(pnorm!=2.0) 00094 { 00095 for(int32_t i=0; i< numkernels ;++i) 00096 weights[i]=pow(weights[i],2.0/pnorm); 00097 } 00098 } 00099 00100 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim) 00101 { 00102 00103 assert((int32_t)gammas.size()+1==numkernels); 00104 00105 double pi4=3.151265358979238/2; 00106 00107 gammagradient.resize(numkernels); 00108 std::fill(gammagradient.begin(),gammagradient.end(),0.0); 00109 00110 // numkernels-dimensional polar transform 00111 gammagradient[0]=1; 00112 00113 for(int32_t i=0; i< numkernels-1 ;++i) 00114 { 00115 if(i!=dim) 00116 { 00117 for(int32_t k=0; k< std::min(i+1,dim+2) ;++k) 00118 { 00119 gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ; 00120 } 00121 if(i<dim) 00122 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm); 00123 } 00124 else if(i==dim) 00125 { 00126 // i==dim, higher dims are 0 00127 for(int32_t k=0; k< i+1 ;++k) 00128 { 00129 gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) ); 00130 } 00131 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) ); 00132 } 00133 } 00134 } 00135 00136 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index) 00137 { 00138 assert(index>=0); 00139 assert(index < (int32_t) sumsofalphas.size()); 00140 assert(index < (int32_t) normsofsubkernels.size()); 00141 00142 00143 float64_t obj= -sumsofalphas[index]; 00144 for(int32_t i=0; i< numkernels ;++i) 00145 { 00146 obj+=0.5*normsofsubkernels[index][i]*weights[i]; 00147 } 00148 return(obj); 00149 } 00150 00151 00152 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights) 00153 { 00154 00155 float64_t pi4=3.151265358979238/2; 00156 00157 float64_t fingrad=1e-7; 00158 int32_t maxhalfiter=20; 00159 int32_t totaliters=6; 00160 float64_t maxrelobjdiff=1e-6; 00161 00162 std::vector<float64_t> finalgamma,curgamma; 00163 00164 curgamma.resize(numkernels-1); 00165 if(oldweights.empty()) 00166 { 00167 std::fill(curgamma.begin(),curgamma.end(),pi4/2); 00168 } 00169 else 00170 { 00171 // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos 00172 std::vector<float64_t> tmpbeta(numkernels); 00173 for(int32_t i=numkernels-1; i>= 0 ;--i) 00174 { 00175 tmpbeta[i]=pow(oldweights[i],pnorm/2); 00176 } 00177 00178 for(int32_t i=numkernels-1; i>= 1 ;--i) 00179 { 00180 curgamma[i-1]=asin(tmpbeta[i]); 00181 00182 if(i<numkernels-1) 00183 { 00184 if( cos(curgamma[i])<=0) 00185 { 00186 SG_SINFO("linesearch(...): at i %d cos(curgamma[i-1])<=0 %f\n",i, cos(curgamma[i-1])) 00187 //curgamma[i-1]=pi4/2; 00188 } 00189 } 00190 00191 for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary 00192 { 00193 if(cos(curgamma[i-1])>0) 00194 { 00195 tmpbeta[k]/=cos(curgamma[i-1]); 00196 if(tmpbeta[k]>1) 00197 { 00198 SG_SINFO("linesearch(...): at k %d tmpbeta[k]>1 %f\n",k, tmpbeta[k]) 00199 } 00200 tmpbeta[k]=std::min(1.0,std::max(0.0, tmpbeta[k])); 00201 } 00202 } 00203 } 00204 } 00205 00206 for(size_t i=0;i<curgamma.size();++i) 00207 { 00208 SG_SINFO("linesearch(...): curgamma[i] %f\n",curgamma[i]) 00209 } 00210 00211 00212 bool finished=false; 00213 int32_t longiters=0; 00214 while(!finished) 00215 { 00216 ++longiters; 00217 std::vector<float64_t> curbeta; 00218 genbetas( curbeta ,curgamma); 00219 //find smallest objective 00220 int32_t minind=0; 00221 float64_t minval=objectives(curbeta, minind); 00222 SG_SINFO("linesearch(...): objectives at i %f\n",minval) 00223 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00224 { 00225 float64_t tmpval=objectives(curbeta, i); 00226 SG_SINFO("linesearch(...): objectives at i %f\n",tmpval) 00227 if(tmpval<minval) 00228 { 00229 minval=tmpval; 00230 minind=i; 00231 } 00232 } 00233 float64_t lobj=minval; 00234 //compute gradient for smallest objective 00235 std::vector<float64_t> curgrad; 00236 for(int32_t i=0; i< numkernels-1 ;++i) 00237 { 00238 ::std::vector<float64_t> gammagradient; 00239 gengammagradient( gammagradient ,curgamma,i); 00240 curgrad.push_back(objectives(gammagradient, minind)); 00241 } 00242 //find boundary hit point (check for each dim) to [0, pi/4] 00243 std::vector<float64_t> maxalphas(numkernels-1,0); 00244 float64_t maxgrad=0; 00245 for(int32_t i=0; i< numkernels-1 ;++i) 00246 { 00247 maxgrad=std::max(maxgrad,fabs(curgrad[i]) ); 00248 if(curgrad[i]<0) 00249 { 00250 maxalphas[i]=(0-curgamma[i])/curgrad[i]; 00251 } 00252 else if(curgrad[i]>0) 00253 { 00254 maxalphas[i]=(pi4-curgamma[i])/curgrad[i]; 00255 } 00256 else 00257 { 00258 maxalphas[i]=1024*1024; 00259 } 00260 } 00261 00262 float64_t maxalpha=maxalphas[0]; 00263 for(int32_t i=1; i< numkernels-1 ;++i) 00264 { 00265 maxalpha=std::min(maxalpha,maxalphas[i]); 00266 } 00267 00268 if((maxalpha>1024*1023)|| (maxgrad<fingrad)) 00269 { 00270 //curgrad[i] approx 0 for all i terminate 00271 finished=true; 00272 finalgamma=curgamma; 00273 } 00274 else // of if(maxalpha>1024*1023) 00275 { 00276 //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish 00277 // curgamma + al*curgrad ,aximizes al in [0, maxal] 00278 float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2; 00279 00280 std::vector<float64_t> tmpgamma=curgamma, tmpbeta; 00281 for(int32_t i=1; i< numkernels-1 ;++i) 00282 { 00283 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i]; 00284 } 00285 genbetas( tmpbeta ,tmpgamma); 00286 float64_t curobj=objectives(tmpbeta, 0); 00287 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00288 { 00289 curobj=std::min(curobj,objectives(tmpbeta, i)); 00290 } 00291 00292 int curhalfiter=0; 00293 while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff )) 00294 { 00295 rightalpha=midalpha; 00296 midalpha=(leftalpha+rightalpha)/2; 00297 ++curhalfiter; 00298 tmpgamma=curgamma; 00299 for(int32_t i=1; i< numkernels-1 ;++i) 00300 { 00301 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i]; 00302 } 00303 genbetas( tmpbeta ,tmpgamma); 00304 curobj=objectives(tmpbeta, 0); 00305 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00306 { 00307 curobj=std::min(curobj,objectives(tmpbeta, i)); 00308 } 00309 } 00310 00311 float64_t robj=curobj; 00312 float64_t tmpobj=std::max(lobj,robj); 00313 do 00314 { 00315 00316 tmpobj=std::max(lobj,robj); 00317 00318 tmpgamma=curgamma; 00319 for(int32_t i=1; i< numkernels-1 ;++i) 00320 { 00321 tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i]; 00322 } 00323 genbetas( tmpbeta ,tmpgamma); 00324 curobj=objectives(tmpbeta, 0); 00325 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i) 00326 { 00327 curobj=std::min(curobj,objectives(tmpbeta, i)); 00328 } 00329 00330 if(lobj>robj) 00331 { 00332 rightalpha=midalpha; 00333 robj=curobj; 00334 } 00335 else 00336 { 00337 leftalpha=midalpha; 00338 lobj=curobj; 00339 } 00340 midalpha=(leftalpha+rightalpha)/2; 00341 00342 } 00343 while( fabs(curobj/tmpobj-1 ) > maxrelobjdiff ); 00344 finalgamma=tmpgamma; 00345 curgamma=tmpgamma; 00346 } // else // of if(maxalpha>1024*1023) 00347 00348 if(longiters>= totaliters) 00349 { 00350 finished=true; 00351 } 00352 } 00353 genbetas(finalbeta,finalgamma); 00354 float64_t nor=0; 00355 for(int32_t i=0; i< numkernels ;++i) 00356 { 00357 nor+=pow(finalbeta[i],pnorm); 00358 } 00359 if(nor>0) 00360 { 00361 nor=pow(nor,1.0/pnorm); 00362 for(int32_t i=0; i< numkernels ;++i) 00363 { 00364 finalbeta[i]/=nor; 00365 } 00366 } 00367 } 00368 00369 00370 void MKLMulticlassGradient::linesearch2(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights) 00371 { 00372 00373 const float64_t epsRegul = 0.01; 00374 00375 int32_t num_kernels=(int)oldweights.size(); 00376 int32_t nofKernelsGood=num_kernels; 00377 00378 finalbeta=oldweights; 00379 00380 for( int32_t p=0; p<num_kernels; ++p ) 00381 { 00382 //SG_PRINT("MKL-direct: sumw[%3d] = %e ( oldbeta = %e )\n", p, sumw[p], old_beta[p] ) 00383 if( oldweights[p] >= 0.0 ) 00384 { 00385 finalbeta[p] = normsofsubkernels.back()[p] * oldweights[p]*oldweights[p] / pnorm; 00386 finalbeta[p] = CMath::pow( finalbeta[p], 1.0 / (pnorm+1.0) ); 00387 } 00388 else 00389 { 00390 finalbeta[p] = 0.0; 00391 --nofKernelsGood; 00392 } 00393 ASSERT( finalbeta[p] >= 0 ) 00394 } 00395 00396 // --- normalize 00397 float64_t Z = 0.0; 00398 for( int32_t p=0; p<num_kernels; ++p ) 00399 Z += CMath::pow( finalbeta[p], pnorm ); 00400 00401 Z = CMath::pow( Z, -1.0/pnorm ); 00402 ASSERT( Z >= 0 ) 00403 for( int32_t p=0; p<num_kernels; ++p ) 00404 finalbeta[p] *= Z; 00405 00406 // --- regularize & renormalize 00407 float64_t preR = 0.0; 00408 for( int32_t p=0; p<num_kernels; ++p ) 00409 preR += CMath::pow( oldweights[p] - finalbeta[p], 2.0 ); 00410 00411 const float64_t R = CMath::sqrt( preR / pnorm ) * epsRegul; 00412 if( !( R >= 0 ) ) 00413 { 00414 SG_PRINT("MKL-direct: p = %.3f\n", pnorm ) 00415 SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood ) 00416 SG_PRINT("MKL-direct: Z = %e\n", Z ) 00417 SG_PRINT("MKL-direct: eps = %e\n", epsRegul ) 00418 for( int32_t p=0; p<num_kernels; ++p ) 00419 { 00420 const float64_t t = CMath::pow( oldweights[p] - finalbeta[p], 2.0 ); 00421 SG_PRINT("MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, oldweights[p]-finalbeta[p], oldweights[p], finalbeta[p] ) 00422 } 00423 SG_PRINT("MKL-direct: preR = %e\n", preR ) 00424 SG_PRINT("MKL-direct: preR/p = %e\n", preR/pnorm ) 00425 SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/pnorm) ) 00426 SG_PRINT("MKL-direct: R = %e\n", R ) 00427 SG_ERROR("Assertion R >= 0 failed!\n" ) 00428 } 00429 00430 Z = 0.0; 00431 for( int32_t p=0; p<num_kernels; ++p ) 00432 { 00433 finalbeta[p] += R; 00434 Z += CMath::pow( finalbeta[p], pnorm ); 00435 ASSERT( finalbeta[p] >= 0 ) 00436 } 00437 Z = CMath::pow( Z, -1.0/pnorm ); 00438 ASSERT( Z >= 0 ) 00439 for( int32_t p=0; p<num_kernels; ++p ) 00440 { 00441 finalbeta[p] *= Z; 00442 ASSERT( finalbeta[p] >= 0.0 ) 00443 if( finalbeta[p] > 1.0 ) 00444 finalbeta[p] = 1.0; 00445 } 00446 } 00447 00448 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) 00449 { 00450 if(pnorm<1 ) 00451 SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1") 00452 00453 SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm) 00454 00455 std::vector<float64_t> initw(weights2); 00456 linesearch2(weights2,initw); 00457 00458 SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights \n") 00459 for(size_t i=0;i<weights2.size();++i) 00460 { 00461 SG_SINFO(" %f",weights2[i]) 00462 } 00463 SG_SINFO(" \n") 00464 00465 /* 00466 int maxnumlinesrch=15; 00467 float64_t maxdiff=1e-6; 00468 00469 bool finished =false; 00470 int numiter=0; 00471 do 00472 { 00473 ++numiter; 00474 std::vector<float64_t> initw(weights2); 00475 linesearch(weights2,initw); 00476 float64_t norm=0; 00477 if(!initw.empty()) 00478 { 00479 for(size_t i=0;i<weights2.size();++i) 00480 { 00481 norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]); 00482 } 00483 norm=sqrt(norm); 00484 } 00485 else 00486 { 00487 norm=maxdiff+1; 00488 } 00489 00490 if((norm < maxdiff) ||(numiter>=maxnumlinesrch )) 00491 { 00492 finished=true; 00493 } 00494 // for(size_t i=0;i<weights2.size();++i) 00495 // { 00496 // SG_SINFO("MKLMulticlassGradient::computeweights(...): oldweights %f\n",initw[i]) 00497 // } 00498 SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights at iter %d normdiff %f\n",numiter,norm) 00499 for(size_t i=0;i<weights2.size();++i) 00500 { 00501 SG_SINFO(" %f",weights2[i]) 00502 } 00503 SG_SINFO(" \n") 00504 } 00505 while(false==finished); 00506 */ 00507 00508 }