SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SGDQN.cpp
Go to the documentation of this file.
00001 /*
00002    SVM with Quasi-Newton stochastic gradient
00003    Copyright (C) 2009- Antoine Bordes
00004 
00005    This program is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Lesser General Public
00007    License as published by the Free Software Foundation; either
00008    version 2.1 of the License, or (at your option) any later version.
00009 
00010    This program is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013    GNU General Public License for more details.
00014 
00015    You should have received a copy of the GNU Lesser General Public
00016    License along with this library; if not, write to the Free Software
00017    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00018 
00019    Shogun adjustments (w) 2011 Siddharth Kherada
00020 */
00021 
00022 #include <shogun/classifier/svm/SGDQN.h>
00023 #include <shogun/base/Parameter.h>
00024 #include <shogun/lib/Signal.h>
00025 #include <shogun/mathematics/Math.h>
00026 #include <shogun/loss/HingeLoss.h>
00027 #include <shogun/labels/BinaryLabels.h>
00028 
00029 using namespace shogun;
00030 
00031 CSGDQN::CSGDQN()
00032 : CLinearMachine()
00033 {
00034     init();
00035 }
00036 
00037 CSGDQN::CSGDQN(float64_t C)
00038 : CLinearMachine()
00039 {
00040     init();
00041 
00042     C1=C;
00043     C2=C;
00044 }
00045 
00046 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00047 : CLinearMachine()
00048 {
00049     init();
00050     C1=C;
00051     C2=C;
00052 
00053     set_features(traindat);
00054     set_labels(trainlab);
00055 }
00056 
00057 CSGDQN::~CSGDQN()
00058 {
00059     SG_UNREF(loss);
00060 }
00061 
00062 void CSGDQN::set_loss_function(CLossFunction* loss_func)
00063 {
00064     SG_REF(loss_func);
00065     SG_UNREF(loss);
00066     loss=loss_func;
00067 }
00068 
00069 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
00070 {
00071     for (int32_t i=0; i < dim;i++)
00072     {
00073         float64_t diffw=W_1[i]-W[i];
00074         if(diffw)
00075             B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
00076         else
00077             B[i]+=1/lambda;
00078     }
00079 }
00080 
00081 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2)
00082 {
00083     for (int32_t i=0; i < dim;i++)
00084     {
00085         if(B[i])
00086         {
00087             Bc[i] = Bc[i] * c1 + B[i] * c2;
00088             Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
00089         }
00090     }
00091 }
00092 
00093 bool CSGDQN::train(CFeatures* data)
00094 {
00095 
00096     ASSERT(m_labels)
00097     ASSERT(m_labels->get_label_type() == LT_BINARY)
00098 
00099     if (data)
00100     {
00101         if (!data->has_property(FP_DOT))
00102             SG_ERROR("Specified features are not of type CDotFeatures\n")
00103         set_features((CDotFeatures*) data);
00104     }
00105 
00106     ASSERT(features)
00107 
00108     int32_t num_train_labels=m_labels->get_num_labels();
00109     int32_t num_vec=features->get_num_vectors();
00110 
00111     ASSERT(num_vec==num_train_labels)
00112     ASSERT(num_vec>0)
00113 
00114     w=SGVector<float64_t>(features->get_dim_feature_space());
00115     w.zero();
00116 
00117     float64_t lambda= 1.0/(C1*num_vec);
00118 
00119     // Shift t in order to have a
00120     // reasonable initial learning rate.
00121     // This assumes |x| \approx 1.
00122     float64_t maxw = 1.0 / sqrt(lambda);
00123     float64_t typw = sqrt(maxw);
00124     float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00125     t = 1 / (eta0 * lambda);
00126 
00127     SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)
00128 
00129 
00130     float64_t* Bc=SG_MALLOC(float64_t, w.vlen);
00131     SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda);
00132 
00133     float64_t* result=SG_MALLOC(float64_t, w.vlen);
00134     float64_t* B=SG_MALLOC(float64_t, w.vlen);
00135 
00136     //Calibrate
00137     calibrate();
00138 
00139     SG_INFO("Training on %d vectors\n", num_vec)
00140     CSignal::clear_cancel();
00141 
00142     ELossType loss_type = loss->get_loss_type();
00143     bool is_log_loss = false;
00144     if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00145         is_log_loss = true;
00146 
00147     for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00148     {
00149         count = skip;
00150         bool updateB=false;
00151         for (int32_t i=0; i<num_vec; i++)
00152         {
00153             SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
00154             ASSERT(w.vlen==v.vlen)
00155             float64_t eta = 1.0/t;
00156             float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
00157             float64_t z = y * features->dense_dot(i, w.vector, w.vlen);
00158             if(updateB==true)
00159             {
00160                 if (z < 1 || is_log_loss)
00161                 {
00162                     SGVector<float64_t> w_1=w.clone();
00163                     float64_t loss_1=-loss->first_derivative(z,1);
00164                     SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00165                     SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen);
00166                     float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen);
00167                     float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
00168                     if(diffloss)
00169                     {
00170                         compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss);
00171                         if(t>skip)
00172                             combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
00173                         else
00174                             combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
00175                     }
00176                 }
00177                 updateB=false;
00178             }
00179             else
00180             {
00181                 if(--count<=0)
00182                 {
00183                     SGVector<float64_t>::vector_multiply(result,Bc,w.vector,w.vlen);
00184                     SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen);
00185                     count = skip;
00186                     updateB=true;
00187                 }
00188 
00189                 if (z < 1 || is_log_loss)
00190                 {
00191                     SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00192                     SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen);
00193                 }
00194             }
00195             t++;
00196         }
00197     }
00198     SG_FREE(result);
00199     SG_FREE(B);
00200 
00201     return true;
00202 }
00203 
00204 
00205 
00206 void CSGDQN::calibrate()
00207 {
00208     ASSERT(features)
00209     int32_t num_vec=features->get_num_vectors();
00210     int32_t c_dim=features->get_dim_feature_space();
00211 
00212     ASSERT(num_vec>0)
00213     ASSERT(c_dim>0)
00214 
00215     SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim)
00216 
00217     int32_t n = 0;
00218     float64_t r = 0;
00219 
00220     for (int32_t j=0; j<num_vec ; j++, n++)
00221         r += features->get_nnz_features_for_vector(j);
00222 
00223 
00224     // compute weight decay skip
00225     skip = (int32_t) ((16 * n * c_dim) / r);
00226 }
00227 
00228 void CSGDQN::init()
00229 {
00230     t=0;
00231     C1=1;
00232     C2=1;
00233     epochs=5;
00234     skip=1000;
00235     count=1000;
00236 
00237     loss=new CHingeLoss();
00238     SG_REF(loss);
00239 
00240     m_parameters->add(&C1, "C1",  "Cost constant 1.");
00241     m_parameters->add(&C2, "C2",  "Cost constant 2.");
00242     m_parameters->add(&epochs, "epochs",  "epochs");
00243     m_parameters->add(&skip, "skip",  "skip");
00244     m_parameters->add(&count, "count",  "count");
00245 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation