SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2010-2012 Jun Liu, Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/slep/slep_mc_plain_lr.h> 00012 #ifdef HAVE_EIGEN3 00013 #include <shogun/lib/slep/q1/eppMatrix.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/mathematics/eigen3.h> 00016 #include <shogun/lib/Signal.h> 00017 #include <shogun/lib/Time.h> 00018 #include <iostream> 00019 00020 using namespace shogun; 00021 using namespace Eigen; 00022 using namespace std; 00023 00024 namespace shogun 00025 { 00026 00027 slep_result_t slep_mc_plain_lr( 00028 CDotFeatures* features, 00029 CMulticlassLabels* labels, 00030 float64_t z, 00031 const slep_options& options) 00032 { 00033 int i,j; 00034 // obtain problem parameters 00035 int n_feats = features->get_dim_feature_space(); 00036 int n_vecs = features->get_num_vectors(); 00037 int n_classes = labels->get_num_classes(); 00038 00039 // labels vector containing values in range (0 .. n_classes) 00040 SGVector<float64_t> labels_vector = labels->get_labels(); 00041 00042 // initialize matrices and vectors to be used 00043 // weight vector 00044 MatrixXd w = MatrixXd::Zero(n_feats, n_classes); 00045 // intercepts (biases) 00046 VectorXd c = VectorXd::Zero(n_classes); 00047 00048 if (options.last_result) 00049 { 00050 SGMatrix<float64_t> last_w = options.last_result->w; 00051 SGVector<float64_t> last_c = options.last_result->c; 00052 for (i=0; i<n_classes; i++) 00053 { 00054 c[i] = last_c[i]; 00055 for (j=0; j<n_feats; j++) 00056 w(j,i) = last_w(j,i); 00057 } 00058 } 00059 // iterative process matrices and vectors 00060 MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes); 00061 VectorXd cp = c, ccp = VectorXd::Zero(n_classes); 00062 // search point weight vector 00063 MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes); 00064 // search point intercepts 00065 VectorXd search_c = VectorXd::Zero(n_classes); 00066 // dot products 00067 MatrixXd Aw = MatrixXd::Zero(n_vecs, n_classes); 00068 for (j=0; j<n_classes; j++) 00069 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0); 00070 MatrixXd As = MatrixXd::Zero(n_vecs, n_classes); 00071 MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes); 00072 // gradients 00073 MatrixXd g = MatrixXd::Zero(n_feats, n_classes); 00074 VectorXd gc = VectorXd::Zero(n_classes); 00075 // projection 00076 MatrixXd v = MatrixXd::Zero(n_feats, n_classes); 00077 00078 // Lipschitz continuous gradient parameter for line search 00079 double L = 1.0/(n_vecs*n_classes); 00080 // coefficients for search point computation 00081 double alphap = 0, alpha = 1; 00082 00083 // lambda regularization parameter 00084 double lambda = z; 00085 // objective values 00086 double objective = 0.0; 00087 double objective_p = 0.0; 00088 00089 int iter = 0; 00090 bool done = false; 00091 CTime time; 00092 //internal::set_is_malloc_allowed(false); 00093 while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations())) 00094 { 00095 double beta = (alphap-1)/alpha; 00096 // compute search points 00097 search_w = w + beta*wwp; 00098 search_c = c + beta*ccp; 00099 00100 // update dot products with search point 00101 As = Aw + beta*(Aw-Awp); 00102 00103 // compute objective and gradient at search point 00104 double fun_s = 0; 00105 g.setZero(); 00106 gc.setZero(); 00107 // for each vector 00108 for (i=0; i<n_vecs; i++) 00109 { 00110 // class of current vector 00111 int vec_class = labels_vector[i]; 00112 // for each class 00113 for (j=0; j<n_classes; j++) 00114 { 00115 // compute logistic loss 00116 double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j)); 00117 double bb = aa > 0.0 ? aa : 0.0; 00118 // avoid underflow via log-sum-exp trick 00119 fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb; 00120 double prob = 1.0/(1+CMath::exp(aa)); 00121 double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob); 00122 // update gradient of intercepts 00123 gc[j] += b; 00124 // update gradient of weight vectors 00125 features->add_to_dense_vec(b, i, g.col(j).data(), n_feats); 00126 } 00127 } 00128 //fun_s /= (n_vecs*n_classes); 00129 00130 wp = w; 00131 Awp = Aw; 00132 cp = c; 00133 00134 int inner_iter = 0; 00135 double fun_x = 0; 00136 00137 // line search process 00138 while (inner_iter<5000) 00139 { 00140 // compute line search point 00141 v = search_w - g/L; 00142 c = search_c - gc/L; 00143 00144 // compute projection of gradient 00145 eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q); 00146 00147 v = w - search_w; 00148 00149 // update dot products 00150 for (j=0; j<n_classes; j++) 00151 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0); 00152 00153 // compute objective at search point 00154 fun_x = 0; 00155 for (i=0; i<n_vecs; i++) 00156 { 00157 int vec_class = labels_vector[i]; 00158 for (j=0; j<n_classes; j++) 00159 { 00160 double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j)); 00161 double bb = aa > 0.0 ? aa : 0.0; 00162 fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb; 00163 } 00164 } 00165 //fun_x /= (n_vecs*n_classes); 00166 00167 // check for termination of line search 00168 double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2; 00169 double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc); 00170 00171 // stop if projected gradient is less than 1e-20 00172 if (r_sum <= 1e-20) 00173 { 00174 SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum) 00175 done = true; 00176 break; 00177 } 00178 00179 if (l_sum <= r_sum*L) 00180 break; 00181 else 00182 L = CMath::max(2*L, l_sum/r_sum); 00183 00184 inner_iter++; 00185 } 00186 00187 // update alpha coefficients 00188 alphap = alpha; 00189 alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2; 00190 00191 // update wwp and ccp 00192 wwp = w - wp; 00193 ccp = c - cp; 00194 00195 // update objectives 00196 objective_p = objective; 00197 objective = fun_x; 00198 00199 // regularize objective with tree norm 00200 double L1q_norm = 0.0; 00201 for (int m=0; m<n_classes; m++) 00202 L1q_norm += w.col(m).norm(); 00203 objective += lambda*L1q_norm; 00204 00205 //cout << "Objective = " << objective << endl; 00206 00207 // check for termination of whole process 00208 if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2)) 00209 { 00210 SG_SINFO("Objective changes less than tolerance\n") 00211 done = true; 00212 } 00213 00214 iter++; 00215 } 00216 SG_SINFO("%d iterations passed, objective = %f\n",iter,objective) 00217 //internal::set_is_malloc_allowed(true); 00218 00219 // output computed weight vectors and intercepts 00220 SGMatrix<float64_t> r_w(n_feats,n_classes); 00221 for (j=0; j<n_classes; j++) 00222 { 00223 for (i=0; i<n_feats; i++) 00224 r_w(i,j) = w(i,j); 00225 } 00226 //r_w.display_matrix(); 00227 SGVector<float64_t> r_c(n_classes); 00228 for (j=0; j<n_classes; j++) 00229 r_c[j] = c[j]; 00230 return slep_result_t(r_w, r_c); 00231 }; 00232 }; 00233 #endif