SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
MKLMulticlassGradient.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Alexander Binder
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  *
00010  * Update to patch 0.10.0 - thanks to Eric aka Yoo (thereisnoknife@gmail.com)
00011  *
00012  */
00013 
00014 #include <shogun/classifier/mkl/MKLMulticlassGradient.h>
00015 #include <shogun/mathematics/Math.h>
00016 
00017 using namespace shogun;
00018 
00019 MKLMulticlassGradient::MKLMulticlassGradient()
00020 {
00021     numkernels = 0;
00022     pnorm=2;
00023 
00024 }
00025 MKLMulticlassGradient::~MKLMulticlassGradient()
00026 {
00027 
00028 }
00029 
00030 MKLMulticlassGradient MKLMulticlassGradient::operator=(MKLMulticlassGradient & gl)
00031 {
00032     numkernels=gl.numkernels;
00033     pnorm=gl.pnorm;
00034     return (*this);
00035 
00036 }
00037 MKLMulticlassGradient::MKLMulticlassGradient(MKLMulticlassGradient & gl)
00038 {
00039     numkernels=gl.numkernels;
00040     pnorm=gl.pnorm;
00041 
00042 }
00043 
00044 void MKLMulticlassGradient::setup(const int32_t numkernels2)
00045 {
00046     numkernels=numkernels2;
00047     if (numkernels<=1)
00048     {
00049         SG_ERROR("void glpkwrapper::setup(const int32_tnumkernels): input "
00050                 "numkernels out of bounds: %d\n",numkernels);
00051     }
00052 
00053 }
00054 
00055 void MKLMulticlassGradient::set_mkl_norm(float64_t norm)
00056 {
00057     pnorm=norm;
00058     if(pnorm<1 )
00059       SG_ERROR("MKLMulticlassGradient::set_mkl_norm(float64_t norm) : parameter pnorm<1")
00060 }
00061 
00062 
00063 void MKLMulticlassGradient::addconstraint(const ::std::vector<float64_t> & normw2,
00064         const float64_t sumofpositivealphas)
00065 {
00066     normsofsubkernels.push_back(normw2);
00067     sumsofalphas.push_back(sumofpositivealphas);
00068 }
00069 
00070 void MKLMulticlassGradient::genbetas( ::std::vector<float64_t> & weights ,const ::std::vector<float64_t> & gammas)
00071 {
00072 
00073     assert((int32_t)gammas.size()+1==numkernels);
00074 
00075     double pi4=3.151265358979238/2;
00076 
00077     weights.resize(numkernels);
00078 
00079 
00080     // numkernels-dimensional polar transform
00081     weights[0]=1;
00082 
00083     for(int32_t i=0; i< numkernels-1 ;++i)
00084     {
00085         for(int32_t k=0; k< i+1 ;++k)
00086         {
00087             weights[k]*=cos( std::min(std::max(0.0,gammas[i]),pi4) );
00088         }
00089         weights[i+1]=sin( std::min(std::max(0.0,gammas[i]),pi4) );
00090     }
00091 
00092     // pnorm- manifold adjustment
00093     if(pnorm!=2.0)
00094     {
00095         for(int32_t i=0; i< numkernels ;++i)
00096             weights[i]=pow(weights[i],2.0/pnorm);
00097     }
00098 }
00099 
00100 void MKLMulticlassGradient::gengammagradient( ::std::vector<float64_t> & gammagradient ,const ::std::vector<float64_t> & gammas,const int32_t dim)
00101 {
00102 
00103     assert((int32_t)gammas.size()+1==numkernels);
00104 
00105     double pi4=3.151265358979238/2;
00106 
00107     gammagradient.resize(numkernels);
00108     std::fill(gammagradient.begin(),gammagradient.end(),0.0);
00109 
00110     // numkernels-dimensional polar transform
00111     gammagradient[0]=1;
00112 
00113     for(int32_t i=0; i< numkernels-1 ;++i)
00114     {
00115         if(i!=dim)
00116         {
00117             for(int32_t k=0; k< std::min(i+1,dim+2) ;++k)
00118             {
00119                 gammagradient[k]*=pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm) ;
00120             }
00121             if(i<dim)
00122                 gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm);
00123         }
00124         else if(i==dim)
00125         {
00126             // i==dim, higher dims are 0
00127             for(int32_t k=0; k< i+1 ;++k)
00128             {
00129                 gammagradient[k]*= pow( cos( std::min(std::max(0.0,gammas[i]),pi4) ), 2.0/pnorm-1.0)*(-1)*sin( std::min(std::max(0.0,gammas[i]),pi4) );
00130             }
00131             gammagradient[i+1]=pow( sin( std::min(std::max(0.0,gammas[i]),pi4) ),2.0/pnorm-1)*cos( std::min(std::max(0.0,gammas[i]),pi4) );
00132         }
00133     }
00134 }
00135 
00136 float64_t MKLMulticlassGradient::objectives(const ::std::vector<float64_t> & weights, const int32_t index)
00137 {
00138     assert(index>=0);
00139     assert(index < (int32_t) sumsofalphas.size());
00140     assert(index < (int32_t) normsofsubkernels.size());
00141 
00142 
00143     float64_t obj= -sumsofalphas[index];
00144     for(int32_t i=0; i< numkernels ;++i)
00145     {
00146         obj+=0.5*normsofsubkernels[index][i]*weights[i];
00147     }
00148     return(obj);
00149 }
00150 
00151 
00152 void MKLMulticlassGradient::linesearch(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
00153 {
00154 
00155     float64_t pi4=3.151265358979238/2;
00156 
00157     float64_t fingrad=1e-7;
00158     int32_t maxhalfiter=20;
00159     int32_t totaliters=6;
00160     float64_t maxrelobjdiff=1e-6;
00161 
00162     std::vector<float64_t> finalgamma,curgamma;
00163 
00164     curgamma.resize(numkernels-1);
00165     if(oldweights.empty())
00166     {
00167     std::fill(curgamma.begin(),curgamma.end(),pi4/2);
00168     }
00169     else
00170     {
00171     // curgamma from init: arcsin on highest dim ^p/2 !!! and divided everthing by its cos
00172         std::vector<float64_t> tmpbeta(numkernels);
00173         for(int32_t i=numkernels-1; i>= 0 ;--i)
00174         {
00175          tmpbeta[i]=pow(oldweights[i],pnorm/2);
00176         }
00177 
00178         for(int32_t i=numkernels-1; i>= 1 ;--i)
00179         {
00180             curgamma[i-1]=asin(tmpbeta[i]);
00181 
00182             if(i<numkernels-1)
00183             {
00184                 if( cos(curgamma[i])<=0)
00185                 {
00186                SG_SINFO("linesearch(...): at i %d cos(curgamma[i-1])<=0 %f\n",i, cos(curgamma[i-1]))
00187                     //curgamma[i-1]=pi4/2;
00188                 }
00189             }
00190 
00191             for(int32_t k=numkernels-2; k>= 1 ;--k) // k==0 not necessary
00192             {
00193                 if(cos(curgamma[i-1])>0)
00194                 {
00195                     tmpbeta[k]/=cos(curgamma[i-1]);
00196                     if(tmpbeta[k]>1)
00197                     {
00198                   SG_SINFO("linesearch(...): at k %d tmpbeta[k]>1 %f\n",k, tmpbeta[k])
00199                     }
00200                     tmpbeta[k]=std::min(1.0,std::max(0.0, tmpbeta[k]));
00201                 }
00202             }
00203         }
00204     }
00205 
00206                 for(size_t i=0;i<curgamma.size();++i)
00207     {
00208         SG_SINFO("linesearch(...): curgamma[i] %f\n",curgamma[i])
00209     }
00210 
00211 
00212     bool finished=false;
00213     int32_t longiters=0;
00214     while(!finished)
00215     {
00216         ++longiters;
00217         std::vector<float64_t> curbeta;
00218         genbetas( curbeta ,curgamma);
00219         //find smallest objective
00220         int32_t minind=0;
00221         float64_t minval=objectives(curbeta,  minind);
00222         SG_SINFO("linesearch(...): objectives at i %f\n",minval)
00223         for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00224         {
00225             float64_t tmpval=objectives(curbeta, i);
00226         SG_SINFO("linesearch(...): objectives at i %f\n",tmpval)
00227             if(tmpval<minval)
00228             {
00229                 minval=tmpval;
00230                 minind=i;
00231             }
00232         }
00233         float64_t lobj=minval;
00234         //compute gradient for smallest objective
00235         std::vector<float64_t> curgrad;
00236         for(int32_t i=0; i< numkernels-1 ;++i)
00237         {
00238             ::std::vector<float64_t> gammagradient;
00239             gengammagradient(  gammagradient ,curgamma,i);
00240             curgrad.push_back(objectives(gammagradient, minind));
00241         }
00242         //find boundary hit point (check for each dim) to [0, pi/4]
00243         std::vector<float64_t> maxalphas(numkernels-1,0);
00244         float64_t maxgrad=0;
00245         for(int32_t i=0; i< numkernels-1 ;++i)
00246         {
00247             maxgrad=std::max(maxgrad,fabs(curgrad[i]) );
00248             if(curgrad[i]<0)
00249             {
00250                 maxalphas[i]=(0-curgamma[i])/curgrad[i];
00251             }
00252             else if(curgrad[i]>0)
00253             {
00254                 maxalphas[i]=(pi4-curgamma[i])/curgrad[i];
00255             }
00256             else
00257             {
00258                 maxalphas[i]=1024*1024;
00259             }
00260         }
00261 
00262         float64_t maxalpha=maxalphas[0];
00263         for(int32_t i=1; i< numkernels-1 ;++i)
00264         {
00265             maxalpha=std::min(maxalpha,maxalphas[i]);
00266         }
00267 
00268         if((maxalpha>1024*1023)|| (maxgrad<fingrad))
00269         {
00270             //curgrad[i] approx 0 for all i terminate
00271             finished=true;
00272             finalgamma=curgamma;
00273         }
00274         else // of if(maxalpha>1024*1023)
00275         {
00276         //linesearch: shrink until min of all objective increases compared to starting point, then check left half and right halve until finish
00277         // curgamma + al*curgrad ,aximizes al in [0, maxal]
00278             float64_t leftalpha=0, rightalpha=maxalpha, midalpha=(leftalpha+rightalpha)/2;
00279 
00280             std::vector<float64_t> tmpgamma=curgamma, tmpbeta;
00281             for(int32_t i=1; i< numkernels-1 ;++i)
00282             {
00283                 tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00284             }
00285             genbetas( tmpbeta ,tmpgamma);
00286             float64_t curobj=objectives(tmpbeta, 0);
00287             for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00288             {
00289                 curobj=std::min(curobj,objectives(tmpbeta, i));
00290             }
00291 
00292             int curhalfiter=0;
00293             while((curobj < minval)&&(curhalfiter<maxhalfiter)&&(fabs(curobj/minval-1 ) > maxrelobjdiff ))
00294             {
00295                 rightalpha=midalpha;
00296                 midalpha=(leftalpha+rightalpha)/2;
00297                 ++curhalfiter;
00298                 tmpgamma=curgamma;
00299                 for(int32_t i=1; i< numkernels-1 ;++i)
00300                 {
00301                     tmpgamma[i]=tmpgamma[i]+rightalpha*curgrad[i];
00302                 }
00303                 genbetas( tmpbeta ,tmpgamma);
00304                 curobj=objectives(tmpbeta, 0);
00305                 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00306                 {
00307                     curobj=std::min(curobj,objectives(tmpbeta, i));
00308                 }
00309             }
00310 
00311             float64_t robj=curobj;
00312          float64_t tmpobj=std::max(lobj,robj);
00313             do
00314             {
00315 
00316                 tmpobj=std::max(lobj,robj);
00317 
00318                 tmpgamma=curgamma;
00319                 for(int32_t i=1; i< numkernels-1 ;++i)
00320                 {
00321                     tmpgamma[i]=tmpgamma[i]+midalpha*curgrad[i];
00322                 }
00323                 genbetas( tmpbeta ,tmpgamma);
00324                 curobj=objectives(tmpbeta, 0);
00325                 for(int32_t i=1; i< (int32_t)sumsofalphas.size() ;++i)
00326                 {
00327                     curobj=std::min(curobj,objectives(tmpbeta, i));
00328                 }
00329 
00330                 if(lobj>robj)
00331                 {
00332                     rightalpha=midalpha;
00333                     robj=curobj;
00334                 }
00335                 else
00336                 {
00337                     leftalpha=midalpha;
00338                     lobj=curobj;
00339                 }
00340                 midalpha=(leftalpha+rightalpha)/2;
00341 
00342             }
00343             while(  fabs(curobj/tmpobj-1 ) > maxrelobjdiff  );
00344             finalgamma=tmpgamma;
00345             curgamma=tmpgamma;
00346         } // else // of if(maxalpha>1024*1023)
00347 
00348         if(longiters>= totaliters)
00349         {
00350             finished=true;
00351         }
00352     }
00353     genbetas(finalbeta,finalgamma);
00354     float64_t nor=0;
00355     for(int32_t i=0; i< numkernels ;++i)
00356     {
00357         nor+=pow(finalbeta[i],pnorm);
00358     }
00359     if(nor>0)
00360     {
00361         nor=pow(nor,1.0/pnorm);
00362         for(int32_t i=0; i< numkernels ;++i)
00363         {
00364             finalbeta[i]/=nor;
00365         }
00366     }
00367 }
00368 
00369 
00370 void MKLMulticlassGradient::linesearch2(std::vector<float64_t> & finalbeta,const std::vector<float64_t> & oldweights)
00371 {
00372 
00373 const float64_t epsRegul = 0.01;
00374 
00375 int32_t num_kernels=(int)oldweights.size();
00376 int32_t nofKernelsGood=num_kernels;
00377 
00378 finalbeta=oldweights;
00379 
00380     for( int32_t p=0; p<num_kernels; ++p )
00381     {
00382         //SG_PRINT("MKL-direct:  sumw[%3d] = %e  ( oldbeta = %e )\n", p, sumw[p], old_beta[p] )
00383         if(  oldweights[p] >= 0.0 )
00384         {
00385             finalbeta[p] = normsofsubkernels.back()[p] * oldweights[p]*oldweights[p] / pnorm;
00386             finalbeta[p] = CMath::pow( finalbeta[p], 1.0 / (pnorm+1.0) );
00387         }
00388         else
00389         {
00390             finalbeta[p] = 0.0;
00391             --nofKernelsGood;
00392         }
00393         ASSERT( finalbeta[p] >= 0 )
00394     }
00395 
00396     // --- normalize
00397     float64_t Z = 0.0;
00398     for( int32_t p=0; p<num_kernels; ++p )
00399         Z += CMath::pow( finalbeta[p], pnorm );
00400 
00401     Z = CMath::pow( Z, -1.0/pnorm );
00402     ASSERT( Z >= 0 )
00403     for( int32_t p=0; p<num_kernels; ++p )
00404         finalbeta[p] *= Z;
00405 
00406     // --- regularize & renormalize
00407     float64_t preR = 0.0;
00408     for( int32_t p=0; p<num_kernels; ++p )
00409         preR += CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
00410 
00411     const float64_t R = CMath::sqrt( preR / pnorm ) * epsRegul;
00412     if( !( R >= 0 ) )
00413     {
00414         SG_PRINT("MKL-direct: p = %.3f\n", pnorm )
00415         SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood )
00416         SG_PRINT("MKL-direct: Z = %e\n", Z )
00417         SG_PRINT("MKL-direct: eps = %e\n", epsRegul )
00418         for( int32_t p=0; p<num_kernels; ++p )
00419         {
00420             const float64_t t = CMath::pow( oldweights[p] - finalbeta[p], 2.0 );
00421             SG_PRINT("MKL-direct: t[%3d] = %e  ( diff = %e = %e - %e )\n", p, t, oldweights[p]-finalbeta[p], oldweights[p], finalbeta[p] )
00422         }
00423         SG_PRINT("MKL-direct: preR = %e\n", preR )
00424         SG_PRINT("MKL-direct: preR/p = %e\n", preR/pnorm )
00425         SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/pnorm) )
00426         SG_PRINT("MKL-direct: R = %e\n", R )
00427         SG_ERROR("Assertion R >= 0 failed!\n" )
00428     }
00429 
00430     Z = 0.0;
00431     for( int32_t p=0; p<num_kernels; ++p )
00432     {
00433         finalbeta[p] += R;
00434         Z += CMath::pow( finalbeta[p], pnorm );
00435         ASSERT( finalbeta[p] >= 0 )
00436     }
00437     Z = CMath::pow( Z, -1.0/pnorm );
00438     ASSERT( Z >= 0 )
00439     for( int32_t p=0; p<num_kernels; ++p )
00440     {
00441         finalbeta[p] *= Z;
00442         ASSERT( finalbeta[p] >= 0.0 )
00443         if( finalbeta[p] > 1.0 )
00444             finalbeta[p] = 1.0;
00445     }
00446 }
00447 
00448 void MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2)
00449 {
00450     if(pnorm<1 )
00451         SG_ERROR("MKLMulticlassGradient::computeweights(std::vector<float64_t> & weights2) : parameter pnorm<1")
00452 
00453     SG_SDEBUG("MKLMulticlassGradient::computeweights(...): pnorm %f\n",pnorm)
00454 
00455     std::vector<float64_t> initw(weights2);
00456     linesearch2(weights2,initw);
00457 
00458     SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights \n")
00459     for(size_t i=0;i<weights2.size();++i)
00460     {
00461         SG_SINFO(" %f",weights2[i])
00462     }
00463     SG_SINFO(" \n")
00464 
00465     /*
00466        int maxnumlinesrch=15;
00467        float64_t maxdiff=1e-6;
00468 
00469        bool finished =false;
00470        int numiter=0;
00471        do
00472        {
00473        ++numiter;
00474        std::vector<float64_t> initw(weights2);
00475        linesearch(weights2,initw);
00476        float64_t norm=0;
00477        if(!initw.empty())
00478        {
00479        for(size_t i=0;i<weights2.size();++i)
00480        {
00481        norm+=(weights2[i]-initw[i])*(weights2[i]-initw[i]);
00482        }
00483        norm=sqrt(norm);
00484        }
00485        else
00486        {
00487        norm=maxdiff+1;
00488        }
00489 
00490        if((norm < maxdiff) ||(numiter>=maxnumlinesrch ))
00491        {
00492        finished=true;
00493        }
00494     // for(size_t i=0;i<weights2.size();++i)
00495     // {
00496     //    SG_SINFO("MKLMulticlassGradient::computeweights(...): oldweights %f\n",initw[i])
00497     //  }
00498     SG_SINFO("MKLMulticlassGradient::computeweights(...): newweights at iter %d normdiff %f\n",numiter,norm)
00499     for(size_t i=0;i<weights2.size();++i)
00500     {
00501     SG_SINFO(" %f",weights2[i])
00502     }
00503     SG_SINFO(" \n")
00504     }
00505     while(false==finished);
00506     */
00507 
00508 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation