SHOGUN
v3.2.0
|
00001 /* 00002 SVM with Quasi-Newton stochastic gradient 00003 Copyright (C) 2009- Antoine Bordes 00004 00005 This program is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Lesser General Public 00007 License as published by the Free Software Foundation; either 00008 version 2.1 of the License, or (at your option) any later version. 00009 00010 This program is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU Lesser General Public 00016 License along with this library; if not, write to the Free Software 00017 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00018 00019 Shogun adjustments (w) 2011 Siddharth Kherada 00020 */ 00021 00022 #include <shogun/classifier/svm/SGDQN.h> 00023 #include <shogun/base/Parameter.h> 00024 #include <shogun/lib/Signal.h> 00025 #include <shogun/mathematics/Math.h> 00026 #include <shogun/loss/HingeLoss.h> 00027 #include <shogun/labels/BinaryLabels.h> 00028 00029 using namespace shogun; 00030 00031 CSGDQN::CSGDQN() 00032 : CLinearMachine() 00033 { 00034 init(); 00035 } 00036 00037 CSGDQN::CSGDQN(float64_t C) 00038 : CLinearMachine() 00039 { 00040 init(); 00041 00042 C1=C; 00043 C2=C; 00044 } 00045 00046 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00047 : CLinearMachine() 00048 { 00049 init(); 00050 C1=C; 00051 C2=C; 00052 00053 set_features(traindat); 00054 set_labels(trainlab); 00055 } 00056 00057 CSGDQN::~CSGDQN() 00058 { 00059 SG_UNREF(loss); 00060 } 00061 00062 void CSGDQN::set_loss_function(CLossFunction* loss_func) 00063 { 00064 SG_REF(loss_func); 00065 SG_UNREF(loss); 00066 loss=loss_func; 00067 } 00068 00069 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val) 00070 { 00071 for (int32_t i=0; i < dim;i++) 00072 { 00073 float64_t diffw=W_1[i]-W[i]; 00074 if(diffw) 00075 B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]); 00076 else 00077 B[i]+=1/lambda; 00078 } 00079 } 00080 00081 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2) 00082 { 00083 for (int32_t i=0; i < dim;i++) 00084 { 00085 if(B[i]) 00086 { 00087 Bc[i] = Bc[i] * c1 + B[i] * c2; 00088 Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2); 00089 } 00090 } 00091 } 00092 00093 bool CSGDQN::train(CFeatures* data) 00094 { 00095 00096 ASSERT(m_labels) 00097 ASSERT(m_labels->get_label_type() == LT_BINARY) 00098 00099 if (data) 00100 { 00101 if (!data->has_property(FP_DOT)) 00102 SG_ERROR("Specified features are not of type CDotFeatures\n") 00103 set_features((CDotFeatures*) data); 00104 } 00105 00106 ASSERT(features) 00107 00108 int32_t num_train_labels=m_labels->get_num_labels(); 00109 int32_t num_vec=features->get_num_vectors(); 00110 00111 ASSERT(num_vec==num_train_labels) 00112 ASSERT(num_vec>0) 00113 00114 w=SGVector<float64_t>(features->get_dim_feature_space()); 00115 w.zero(); 00116 00117 float64_t lambda= 1.0/(C1*num_vec); 00118 00119 // Shift t in order to have a 00120 // reasonable initial learning rate. 00121 // This assumes |x| \approx 1. 00122 float64_t maxw = 1.0 / sqrt(lambda); 00123 float64_t typw = sqrt(maxw); 00124 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1)); 00125 t = 1 / (eta0 * lambda); 00126 00127 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0) 00128 00129 00130 float64_t* Bc=SG_MALLOC(float64_t, w.vlen); 00131 SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda); 00132 00133 float64_t* result=SG_MALLOC(float64_t, w.vlen); 00134 float64_t* B=SG_MALLOC(float64_t, w.vlen); 00135 00136 //Calibrate 00137 calibrate(); 00138 00139 SG_INFO("Training on %d vectors\n", num_vec) 00140 CSignal::clear_cancel(); 00141 00142 ELossType loss_type = loss->get_loss_type(); 00143 bool is_log_loss = false; 00144 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN)) 00145 is_log_loss = true; 00146 00147 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++) 00148 { 00149 count = skip; 00150 bool updateB=false; 00151 for (int32_t i=0; i<num_vec; i++) 00152 { 00153 SGVector<float64_t> v = features->get_computed_dot_feature_vector(i); 00154 ASSERT(w.vlen==v.vlen) 00155 float64_t eta = 1.0/t; 00156 float64_t y = ((CBinaryLabels*) m_labels)->get_label(i); 00157 float64_t z = y * features->dense_dot(i, w.vector, w.vlen); 00158 if(updateB==true) 00159 { 00160 if (z < 1 || is_log_loss) 00161 { 00162 SGVector<float64_t> w_1=w.clone(); 00163 float64_t loss_1=-loss->first_derivative(z,1); 00164 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen); 00165 SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen); 00166 float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen); 00167 float64_t diffloss = -loss->first_derivative(z2,1) - loss_1; 00168 if(diffloss) 00169 { 00170 compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss); 00171 if(t>skip) 00172 combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda); 00173 else 00174 combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda); 00175 } 00176 } 00177 updateB=false; 00178 } 00179 else 00180 { 00181 if(--count<=0) 00182 { 00183 SGVector<float64_t>::vector_multiply(result,Bc,w.vector,w.vlen); 00184 SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen); 00185 count = skip; 00186 updateB=true; 00187 } 00188 00189 if (z < 1 || is_log_loss) 00190 { 00191 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen); 00192 SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen); 00193 } 00194 } 00195 t++; 00196 } 00197 } 00198 SG_FREE(result); 00199 SG_FREE(B); 00200 00201 return true; 00202 } 00203 00204 00205 00206 void CSGDQN::calibrate() 00207 { 00208 ASSERT(features) 00209 int32_t num_vec=features->get_num_vectors(); 00210 int32_t c_dim=features->get_dim_feature_space(); 00211 00212 ASSERT(num_vec>0) 00213 ASSERT(c_dim>0) 00214 00215 SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim) 00216 00217 int32_t n = 0; 00218 float64_t r = 0; 00219 00220 for (int32_t j=0; j<num_vec ; j++, n++) 00221 r += features->get_nnz_features_for_vector(j); 00222 00223 00224 // compute weight decay skip 00225 skip = (int32_t) ((16 * n * c_dim) / r); 00226 } 00227 00228 void CSGDQN::init() 00229 { 00230 t=0; 00231 C1=1; 00232 C2=1; 00233 epochs=5; 00234 skip=1000; 00235 count=1000; 00236 00237 loss=new CHingeLoss(); 00238 SG_REF(loss); 00239 00240 m_parameters->add(&C1, "C1", "Cost constant 1."); 00241 m_parameters->add(&C2, "C2", "Cost constant 2."); 00242 m_parameters->add(&epochs, "epochs", "epochs"); 00243 m_parameters->add(&skip, "skip", "skip"); 00244 m_parameters->add(&count, "count", "count"); 00245 }