SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
LMNN.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Fernando J. Iglesias Garcia
00008  * Copyright (C) 2013 Fernando J. Iglesias Garcia
00009  */
00010 
00011 #ifdef HAVE_EIGEN3
00012 #ifdef HAVE_LAPACK
00013 
00014 #include <shogun/metric/LMNN.h>
00015 #include <shogun/metric/LMNNImpl.h>
00016 #include <shogun/mathematics/Math.h>
00017 
00019 
00020 // trace of the product of two matrices computed fast using trace(A*B)=sum(A.*B')
00021 #define TRACE(A,B)      (((A).array()*(B).transpose().array()).sum())
00022 
00023 using namespace shogun;
00024 using namespace Eigen;
00025 
00026 CLMNN::CLMNN()
00027 {
00028     init();
00029 
00030     m_statistics = new CLMNNStatistics();
00031     SG_REF(m_statistics);
00032 }
00033 
00034 CLMNN::CLMNN(CDenseFeatures<float64_t>* features, CMulticlassLabels* labels, int32_t k)
00035 {
00036     init();
00037 
00038     m_features = features;
00039     m_labels = labels;
00040     m_k = k;
00041 
00042     SG_REF(m_features)
00043     SG_REF(m_labels)
00044 
00045     m_statistics = new CLMNNStatistics();
00046     SG_REF(m_statistics);
00047 }
00048 
00049 CLMNN::~CLMNN()
00050 {
00051     SG_UNREF(m_features)
00052     SG_UNREF(m_labels)
00053     SG_UNREF(m_statistics);
00054 }
00055 
00056 const char* CLMNN::get_name() const
00057 {
00058     return "LMNN";
00059 }
00060 
00061 void CLMNN::train(SGMatrix<float64_t> init_transform)
00062 {
00063     SG_DEBUG("Entering CLMNN::train().\n")
00064 
00065     
00066     CLMNNImpl::check_training_setup(m_features, m_labels, init_transform);
00067 
00069 
00070     // cast is safe, check_training_setup ensures features are dense
00071     CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(m_features);
00072     CMulticlassLabels* y = CLabelsFactory::to_multiclass(m_labels);
00073     SG_DEBUG("%d input vectors with %d dimensions.\n", x->get_num_vectors(), x->get_num_features());
00074 
00075     // Use Eigen matrix for the linear transform L. The Mahalanobis distance is L^T*L
00076     MatrixXd L = Map<MatrixXd>(init_transform.matrix, init_transform.num_rows,
00077             init_transform.num_cols);
00078     // Compute target or genuine neighbours
00079     SG_DEBUG("Finding target nearest neighbors.\n")
00080     SGMatrix<index_t> target_nn = CLMNNImpl::find_target_nn(x, y, m_k);
00081     // Initialize (sub-)gradient
00082     SG_DEBUG("Summing outer products for (sub-)gradient initialization.\n")
00083     MatrixXd gradient = (1-m_regularization)*CLMNNImpl::sum_outer_products(x, target_nn);
00084     // Value of the objective function at every iteration
00085     SGVector<float64_t> obj(m_maxiter);
00086     // The step size is modified depending on how the objective changes, leave the
00087     // step size member unchanged and use a local one
00088     float64_t stepsize = m_stepsize;
00089     // Last active set of impostors computed exactly, current and previous impostors sets
00090     ImpostorsSetType exact_impostors, cur_impostors, prev_impostors;
00091     // Iteration counter
00092     uint32_t iter = 0;
00093     // Criterion for termination
00094     bool stop = false;
00095     // Make space for the training statistics
00096     m_statistics->resize(m_maxiter);
00097 
00099     while (!stop)
00100     {
00101         SG_PROGRESS(iter, 0, m_maxiter)
00102 
00103         // Find current set of impostors
00104         SG_DEBUG("Finding impostors.\n")
00105         cur_impostors = CLMNNImpl::find_impostors(x,y,L,target_nn,iter,m_correction);
00106         SG_DEBUG("Found %d impostors in the current set.\n", cur_impostors.size())
00107 
00108         // (Sub-) gradient computation
00109         SG_DEBUG("Updating gradient.\n")
00110         CLMNNImpl::update_gradient(x, gradient, cur_impostors, prev_impostors, m_regularization);
00111         // Take gradient step
00112         SG_DEBUG("Taking gradient step.\n")
00113         CLMNNImpl::gradient_step(L, gradient, stepsize, m_diagonal);
00114 
00115         // Compute the objective, trace of Mahalanobis distance matrix (L squared) times the gradient
00116         // plus the number of current impostors to account for the margin
00117         SG_DEBUG("Computing objective.\n")
00118         obj[iter] = TRACE(L.transpose()*L,gradient) + m_regularization*cur_impostors.size();
00119 
00120         // Correct step size
00121         CLMNNImpl::correct_stepsize(stepsize, obj, iter);
00122 
00123         // Check termination criterion
00124         stop = CLMNNImpl::check_termination(stepsize, obj, iter, m_maxiter, m_stepsize_threshold, m_obj_threshold);
00125 
00126         // Update iteration counter
00127         iter = iter + 1;
00128         // Update previous set of impostors
00129         prev_impostors = cur_impostors;
00130 
00131         // Store statistics for this iteration
00132         m_statistics->set(iter-1, obj[iter-1], stepsize, cur_impostors.size());
00133 
00134         SG_DEBUG("iteration=%d, objective=%.4f, #impostors=%4d, stepsize=%.4E\n",
00135                 iter, obj[iter-1], cur_impostors.size(), stepsize)
00136     }
00137 
00138     // Truncate statistics in case convergence was reached in less than maxiter
00139     m_statistics->resize(iter);
00140 
00142     int32_t nfeats = x->get_num_features();
00143     float64_t* cloned_data = SGMatrix<float64_t>::clone_matrix(L.data(), nfeats, nfeats);
00144     m_linear_transform = SGMatrix<float64_t>(cloned_data, nfeats, nfeats);
00145 
00146     SG_DEBUG("Leaving CLMNN::train().\n")
00147 }
00148 
00149 SGMatrix<float64_t> CLMNN::get_linear_transform() const
00150 {
00151     return m_linear_transform;
00152 }
00153 
00154 CCustomMahalanobisDistance* CLMNN::get_distance() const
00155 {
00156     // Compute Mahalanobis distance matrix M = L^T*L
00157 
00158     // Put the linear transform L in Eigen to perform the matrix multiplication
00159     // L is not copied to another region of memory
00160     Map<const MatrixXd> map_linear_transform(m_linear_transform.matrix,
00161             m_linear_transform.num_rows, m_linear_transform.num_cols);
00162     // TODO exploit that M is symmetric
00163     MatrixXd M = map_linear_transform.transpose()*map_linear_transform;
00164     // TODO avoid copying
00165     SGMatrix<float64_t> mahalanobis_matrix(M.rows(), M.cols());
00166     for (index_t i = 0; i < M.rows(); i++)
00167         for (index_t j = 0; j < M.cols(); j++)
00168             mahalanobis_matrix(i,j) = M(i,j);
00169 
00170     // Create custom Mahalanobis distance with matrix M associated with the training features
00171 
00172     CCustomMahalanobisDistance* distance =
00173             new CCustomMahalanobisDistance(m_features, m_features, mahalanobis_matrix);
00174     SG_REF(distance)
00175 
00176     return distance;
00177 }
00178 
00179 int32_t CLMNN::get_k() const
00180 {
00181     return m_k;
00182 }
00183 
00184 void CLMNN::set_k(const int32_t k)
00185 {
00186     REQUIRE(k>0, "The number of target neighbors per example must be larger than zero\n");
00187     m_k = k;
00188 }
00189 
00190 float64_t CLMNN::get_regularization() const
00191 {
00192     return m_regularization;
00193 }
00194 
00195 void CLMNN::set_regularization(const float64_t regularization)
00196 {
00197     m_regularization = regularization;
00198 }
00199 
00200 float64_t CLMNN::get_stepsize() const
00201 {
00202     return m_stepsize;
00203 }
00204 
00205 void CLMNN::set_stepsize(const float64_t stepsize)
00206 {
00207     REQUIRE(stepsize>0, "The step size used in gradient descent must be larger than zero\n")
00208     m_stepsize = stepsize;
00209 }
00210 
00211 float64_t CLMNN::get_stepsize_threshold() const
00212 {
00213     return m_stepsize_threshold;
00214 }
00215 
00216 void CLMNN::set_stepsize_threshold(const float64_t stepsize_threshold)
00217 {
00218     REQUIRE(stepsize_threshold>0,
00219             "The threshold for the step size must be larger than zero\n")
00220     m_stepsize_threshold = stepsize_threshold;
00221 }
00222 
00223 uint32_t CLMNN::get_maxiter() const
00224 {
00225     return m_maxiter;
00226 }
00227 
00228 void CLMNN::set_maxiter(const uint32_t maxiter)
00229 {
00230     REQUIRE(maxiter>0, "The number of maximum iterations must be larger than zero\n")
00231     m_maxiter = maxiter;
00232 }
00233 
00234 uint32_t CLMNN::get_correction() const
00235 {
00236     return m_correction;
00237 }
00238 
00239 void CLMNN::set_correction(const uint32_t correction)
00240 {
00241     m_correction = correction;
00242 }
00243 
00244 float64_t CLMNN::get_obj_threshold() const
00245 {
00246     return m_obj_threshold;
00247 }
00248 
00249 void CLMNN::set_obj_threshold(const float64_t obj_threshold)
00250 {
00251     REQUIRE(obj_threshold>0,
00252             "The threshold for the objective must be larger than zero\n")
00253     m_obj_threshold = obj_threshold;
00254 }
00255 
00256 bool CLMNN::get_diagonal() const
00257 {
00258     return m_diagonal;
00259 }
00260 
00261 void CLMNN::set_diagonal(const bool diagonal)
00262 {
00263     m_diagonal = diagonal;
00264 }
00265 
00266 CLMNNStatistics* CLMNN::get_statistics() const
00267 {
00268     SG_REF(m_statistics);
00269     return m_statistics;
00270 }
00271 
00272 void CLMNN::init()
00273 {
00274     SG_ADD(&m_linear_transform, "linear_transform",
00275             "Linear transform in matrix form", MS_NOT_AVAILABLE)
00276     SG_ADD((CSGObject**) &m_features, "features", "Training features",
00277             MS_NOT_AVAILABLE)
00278     SG_ADD((CSGObject**) &m_labels, "labels", "Training labels",
00279             MS_NOT_AVAILABLE)
00280     SG_ADD(&m_k, "k", "Number of target neighbours per example",
00281             MS_NOT_AVAILABLE)
00282     SG_ADD(&m_regularization, "regularization", "Regularization",
00283             MS_AVAILABLE)
00284     SG_ADD(&m_stepsize, "stepsize", "Step size in gradient descent",
00285             MS_NOT_AVAILABLE)
00286     SG_ADD(&m_stepsize_threshold, "stepsize_threshold", "Step size threshold",
00287             MS_NOT_AVAILABLE)
00288     SG_ADD(&m_maxiter, "maxiter", "Maximum number of iterations",
00289             MS_NOT_AVAILABLE)
00290     SG_ADD(&m_correction, "correction",
00291             "Iterations between exact impostors search", MS_NOT_AVAILABLE)
00292     SG_ADD(&m_obj_threshold, "obj_threshold", "Objective threshold",
00293             MS_NOT_AVAILABLE)
00294     SG_ADD(&m_diagonal, "m_diagonal", "Diagonal transformation", MS_NOT_AVAILABLE);
00295     SG_ADD((CSGObject**) &m_statistics, "statistics", "Training statistics",
00296             MS_NOT_AVAILABLE);
00297 
00298     m_features = NULL;
00299     m_labels = NULL;
00300     m_k = 1;
00301     m_regularization = 0.5;
00302     m_stepsize = 1e-07;
00303     m_stepsize_threshold = 1e-22;
00304     m_maxiter = 1000;
00305     m_correction = 15;
00306     m_obj_threshold = 1e-9;
00307     m_diagonal = false;
00308     m_statistics = NULL;
00309 }
00310 
00311 CLMNNStatistics::CLMNNStatistics()
00312 {
00313     init();
00314 }
00315 
00316 CLMNNStatistics::~CLMNNStatistics()
00317 {
00318 }
00319 
00320 const char* CLMNNStatistics::get_name() const
00321 {
00322     return "LMNNStatistics";
00323 }
00324 
00325 void CLMNNStatistics::resize(int32_t size)
00326 {
00327     REQUIRE(size > 0, "The new size in CLMNNStatistics::resize must be larger than zero."
00328              " Given value is %d.\n", size);
00329 
00330     obj.resize_vector(size);
00331     stepsize.resize_vector(size);
00332     num_impostors.resize_vector(size);
00333 }
00334 
00335 void CLMNNStatistics::set(index_t iter, float64_t obj_iter, float64_t stepsize_iter,
00336         uint32_t num_impostors_iter)
00337 {
00338     REQUIRE(iter >= 0 && iter < obj.vlen, "The iteration index in CLMNNStatistics::set "
00339             "must be larger or equal to zero and less than the size (%d). Given valu is %d.\n", obj.vlen, iter);
00340 
00341     obj[iter] = obj_iter;
00342     stepsize[iter] = stepsize_iter;
00343     num_impostors[iter] = num_impostors_iter;
00344 }
00345 
00346 void CLMNNStatistics::init()
00347 {
00348     SG_ADD(&obj, "obj", "Objective at each iteration", MS_NOT_AVAILABLE);
00349     SG_ADD(&stepsize, "stepsize", "Step size at each iteration", MS_NOT_AVAILABLE);
00350     SG_ADD(&num_impostors, "num_impostors", "Number of impostors at each iteration",
00351             MS_NOT_AVAILABLE);
00352 }
00353 
00354 #endif /* HAVE_LAPACK */
00355 #endif /* HAVE_EIGEN3 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation