SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Fernando J. Iglesias Garcia 00008 * Copyright (C) 2013 Fernando J. Iglesias Garcia 00009 */ 00010 00011 #ifdef HAVE_EIGEN3 00012 #ifdef HAVE_LAPACK 00013 00014 #include <shogun/metric/LMNN.h> 00015 #include <shogun/metric/LMNNImpl.h> 00016 #include <shogun/mathematics/Math.h> 00017 00019 00020 // trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B') 00021 #define TRACE(A,B) (((A).array()*(B).transpose().array()).sum()) 00022 00023 using namespace shogun; 00024 using namespace Eigen; 00025 00026 CLMNN::CLMNN() 00027 { 00028 init(); 00029 00030 m_statistics = new CLMNNStatistics(); 00031 SG_REF(m_statistics); 00032 } 00033 00034 CLMNN::CLMNN(CDenseFeatures<float64_t>* features, CMulticlassLabels* labels, int32_t k) 00035 { 00036 init(); 00037 00038 m_features = features; 00039 m_labels = labels; 00040 m_k = k; 00041 00042 SG_REF(m_features) 00043 SG_REF(m_labels) 00044 00045 m_statistics = new CLMNNStatistics(); 00046 SG_REF(m_statistics); 00047 } 00048 00049 CLMNN::~CLMNN() 00050 { 00051 SG_UNREF(m_features) 00052 SG_UNREF(m_labels) 00053 SG_UNREF(m_statistics); 00054 } 00055 00056 const char* CLMNN::get_name() const 00057 { 00058 return "LMNN"; 00059 } 00060 00061 void CLMNN::train(SGMatrix<float64_t> init_transform) 00062 { 00063 SG_DEBUG("Entering CLMNN::train().\n") 00064 00065 00066 CLMNNImpl::check_training_setup(m_features, m_labels, init_transform); 00067 00069 00070 // cast is safe, check_training_setup ensures features are dense 00071 CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features); 00072 CMulticlassLabels* y = CLabelsFactory::to_multiclass(m_labels); 00073 SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features()); 00074 00075 // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L 00076 MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows, 00077 init_transform.num_cols); 00078 // Compute target or genuine neighbours 00079 SG_DEBUG("Finding target nearest neighbors.\n") 00080 SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k); 00081 // Initialize (sub-)gradient 00082 SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n") 00083 MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn); 00084 // Value of the objective function at every iteration 00085 SGVector<float64_t> obj(m_maxiter); 00086 // The step size is modified depending on how the objective changes, leave the 00087 // step size member unchanged and use a local one 00088 float64_t stepsize = m_stepsize; 00089 // Last active set of impostors computed exactly, current and previous impostors sets 00090 ImpostorsSetType exact_impostors, cur_impostors, prev_impostors; 00091 // Iteration counter 00092 uint32_t iter = 0; 00093 // Criterion for termination 00094 bool stop = false; 00095 // Make space for the training statistics 00096 m_statistics->resize(m_maxiter); 00097 00099 while (!stop) 00100 { 00101 SG_PROGRESS(iter, 0, m_maxiter) 00102 00103 // Find current set of impostors 00104 SG_DEBUG("Finding impostors.\n") 00105 cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction); 00106 SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size()) 00107 00108 // (Sub-) gradient computation 00109 SG_DEBUG("Updating gradient.\n") 00110 CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization); 00111 // Take gradient step 00112 SG_DEBUG("Taking gradient step.\n") 00113 CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal); 00114 00115 // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient 00116 // plus the number of current impostors to account for the margin 00117 SG_DEBUG("Computing objective.\n") 00118 obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size(); 00119 00120 // Correct step size 00121 CLMNNImpl::correct_stepsize(stepsize, obj, iter); 00122 00123 // Check termination criterion 00124 stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold); 00125 00126 // Update iteration counter 00127 iter = iter + 1; 00128 // Update previous set of impostors 00129 prev_impostors = cur_impostors; 00130 00131 // Store statistics for this iteration 00132 m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size()); 00133 00134 SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n", 00135 iter, obj[iter-1], cur_impostors.size(), stepsize) 00136 } 00137 00138 // Truncate statistics in case convergence was reached in less than maxiter 00139 m_statistics->resize(iter); 00140 00142 int32_t nfeats = x->get_num_features(); 00143 float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats); 00144 m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats); 00145 00146 SG_DEBUG("Leaving CLMNN::train().\n") 00147 } 00148 00149 SGMatrix<float64_t> CLMNN::get_linear_transform() const 00150 { 00151 return m_linear_transform; 00152 } 00153 00154 CCustomMahalanobisDistance* CLMNN::get_distance() const 00155 { 00156 // Compute Mahalanobis distance matrix M = L^T*L 00157 00158 // Put the linear transform L in Eigen to perform the matrix multiplication 00159 // L is not copied to another region of memory 00160 Map<const MatrixXd> map_linear_transform(m_linear_transform.matrix, 00161 m_linear_transform.num_rows, m_linear_transform.num_cols); 00162 // TODO exploit that M is symmetric 00163 MatrixXd M = map_linear_transform.transpose()*map_linear_transform; 00164 // TODO avoid copying 00165 SGMatrix<float64_t> mahalanobis_matrix(M.rows(), M.cols()); 00166 for (index_t i = 0; i < M.rows(); i++) 00167 for (index_t j = 0; j < M.cols(); j++) 00168 mahalanobis_matrix(i,j) = M(i,j); 00169 00170 // Create custom Mahalanobis distance with matrix M associated with the training features 00171 00172 CCustomMahalanobisDistance* distance = 00173 new CCustomMahalanobisDistance(m_features, m_features, mahalanobis_matrix); 00174 SG_REF(distance) 00175 00176 return distance; 00177 } 00178 00179 int32_t CLMNN::get_k() const 00180 { 00181 return m_k; 00182 } 00183 00184 void CLMNN::set_k(const int32_t k) 00185 { 00186 REQUIRE(k>0, "The number of target neighbors per example must be larger than zero\n"); 00187 m_k = k; 00188 } 00189 00190 float64_t CLMNN::get_regularization() const 00191 { 00192 return m_regularization; 00193 } 00194 00195 void CLMNN::set_regularization(const float64_t regularization) 00196 { 00197 m_regularization = regularization; 00198 } 00199 00200 float64_t CLMNN::get_stepsize() const 00201 { 00202 return m_stepsize; 00203 } 00204 00205 void CLMNN::set_stepsize(const float64_t stepsize) 00206 { 00207 REQUIRE(stepsize>0, "The step size used in gradient descent must be larger than zero\n") 00208 m_stepsize = stepsize; 00209 } 00210 00211 float64_t CLMNN::get_stepsize_threshold() const 00212 { 00213 return m_stepsize_threshold; 00214 } 00215 00216 void CLMNN::set_stepsize_threshold(const float64_t stepsize_threshold) 00217 { 00218 REQUIRE(stepsize_threshold>0, 00219 "The threshold for the step size must be larger than zero\n") 00220 m_stepsize_threshold = stepsize_threshold; 00221 } 00222 00223 uint32_t CLMNN::get_maxiter() const 00224 { 00225 return m_maxiter; 00226 } 00227 00228 void CLMNN::set_maxiter(const uint32_t maxiter) 00229 { 00230 REQUIRE(maxiter>0, "The number of maximum iterations must be larger than zero\n") 00231 m_maxiter = maxiter; 00232 } 00233 00234 uint32_t CLMNN::get_correction() const 00235 { 00236 return m_correction; 00237 } 00238 00239 void CLMNN::set_correction(const uint32_t correction) 00240 { 00241 m_correction = correction; 00242 } 00243 00244 float64_t CLMNN::get_obj_threshold() const 00245 { 00246 return m_obj_threshold; 00247 } 00248 00249 void CLMNN::set_obj_threshold(const float64_t obj_threshold) 00250 { 00251 REQUIRE(obj_threshold>0, 00252 "The threshold for the objective must be larger than zero\n") 00253 m_obj_threshold = obj_threshold; 00254 } 00255 00256 bool CLMNN::get_diagonal() const 00257 { 00258 return m_diagonal; 00259 } 00260 00261 void CLMNN::set_diagonal(const bool diagonal) 00262 { 00263 m_diagonal = diagonal; 00264 } 00265 00266 CLMNNStatistics* CLMNN::get_statistics() const 00267 { 00268 SG_REF(m_statistics); 00269 return m_statistics; 00270 } 00271 00272 void CLMNN::init() 00273 { 00274 SG_ADD(&m_linear_transform, "linear_transform", 00275 "Linear transform in matrix form", MS_NOT_AVAILABLE) 00276 SG_ADD((CSGObject**) &m_features, "features", "Training features", 00277 MS_NOT_AVAILABLE) 00278 SG_ADD((CSGObject**) &m_labels, "labels", "Training labels", 00279 MS_NOT_AVAILABLE) 00280 SG_ADD(&m_k, "k", "Number of target neighbours per example", 00281 MS_NOT_AVAILABLE) 00282 SG_ADD(&m_regularization, "regularization", "Regularization", 00283 MS_AVAILABLE) 00284 SG_ADD(&m_stepsize, "stepsize", "Step size in gradient descent", 00285 MS_NOT_AVAILABLE) 00286 SG_ADD(&m_stepsize_threshold, "stepsize_threshold", "Step size threshold", 00287 MS_NOT_AVAILABLE) 00288 SG_ADD(&m_maxiter, "maxiter", "Maximum number of iterations", 00289 MS_NOT_AVAILABLE) 00290 SG_ADD(&m_correction, "correction", 00291 "Iterations between exact impostors search", MS_NOT_AVAILABLE) 00292 SG_ADD(&m_obj_threshold, "obj_threshold", "Objective threshold", 00293 MS_NOT_AVAILABLE) 00294 SG_ADD(&m_diagonal, "m_diagonal", "Diagonal transformation", MS_NOT_AVAILABLE); 00295 SG_ADD((CSGObject**) &m_statistics, "statistics", "Training statistics", 00296 MS_NOT_AVAILABLE); 00297 00298 m_features = NULL; 00299 m_labels = NULL; 00300 m_k = 1; 00301 m_regularization = 0.5; 00302 m_stepsize = 1e-07; 00303 m_stepsize_threshold = 1e-22; 00304 m_maxiter = 1000; 00305 m_correction = 15; 00306 m_obj_threshold = 1e-9; 00307 m_diagonal = false; 00308 m_statistics = NULL; 00309 } 00310 00311 CLMNNStatistics::CLMNNStatistics() 00312 { 00313 init(); 00314 } 00315 00316 CLMNNStatistics::~CLMNNStatistics() 00317 { 00318 } 00319 00320 const char* CLMNNStatistics::get_name() const 00321 { 00322 return "LMNNStatistics"; 00323 } 00324 00325 void CLMNNStatistics::resize(int32_t size) 00326 { 00327 REQUIRE(size > 0, "The new size in CLMNNStatistics::resize must be larger than zero." 00328 " Given value is %d.\n", size); 00329 00330 obj.resize_vector(size); 00331 stepsize.resize_vector(size); 00332 num_impostors.resize_vector(size); 00333 } 00334 00335 void CLMNNStatistics::set(index_t iter, float64_t obj_iter, float64_t stepsize_iter, 00336 uint32_t num_impostors_iter) 00337 { 00338 REQUIRE(iter >= 0 && iter < obj.vlen, "The iteration index in CLMNNStatistics::set " 00339 "must be larger or equal to zero and less than the size (%d). Given valu is %d.\n", obj.vlen, iter); 00340 00341 obj[iter] = obj_iter; 00342 stepsize[iter] = stepsize_iter; 00343 num_impostors[iter] = num_impostors_iter; 00344 } 00345 00346 void CLMNNStatistics::init() 00347 { 00348 SG_ADD(&obj, "obj", "Objective at each iteration", MS_NOT_AVAILABLE); 00349 SG_ADD(&stepsize, "stepsize", "Step size at each iteration", MS_NOT_AVAILABLE); 00350 SG_ADD(&num_impostors, "num_impostors", "Number of impostors at each iteration", 00351 MS_NOT_AVAILABLE); 00352 } 00353 00354 #endif /* HAVE_LAPACK */ 00355 #endif /* HAVE_EIGEN3 */