SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
LMNNImpl.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Fernando J. Iglesias Garcia
00008  * Copyright (C) 2013 Fernando J. Iglesias Garcia
00009  */
00010 
00011 #ifdef HAVE_EIGEN3
00012 #ifdef HAVE_LAPACK
00013 
00014 #include <shogun/metric/LMNNImpl.h>
00015 #include <shogun/multiclass/KNN.h>
00016 #include <shogun/preprocessor/PruneVarSubMean.h>
00017 #include <shogun/preprocessor/PCA.h>
00018 
00019 #include <iterator>
00020 
00022 
00023 // column-wise sum of the squared elements of a matrix
00024 #define SUMSQCOLS(A)    ((A).array().square().colwise().sum())
00025 
00026 using namespace shogun;
00027 using namespace Eigen;
00028 
00029 CImpostorNode::CImpostorNode(index_t ex, index_t tar, index_t imp)
00030 : example(ex), target(tar), impostor(imp)
00031 {
00032 }
00033 
00034 bool CImpostorNode::operator<(const CImpostorNode& rhs) const
00035 {
00036     if (example == rhs.example)
00037     {
00038         if (target == rhs.target)
00039             return impostor < rhs.impostor;
00040         else
00041             return target < rhs.target;
00042     }
00043     else
00044         return example < rhs.example;
00045 }
00046 
00047 void CLMNNImpl::check_training_setup(CFeatures* features, const CLabels* labels,
00048         SGMatrix<float64_t>& init_transform)
00049 {
00050     REQUIRE(features->has_property(FP_DOT),
00051             "LMNN can only be applied to features that support dot products\n")
00052     REQUIRE(labels->get_label_type()==LT_MULTICLASS,
00053             "LMNN supports only MulticlassLabels\n")
00054     REQUIRE(labels->get_num_labels()==features->get_num_vectors(),
00055             "The number of feature vectors must be equal to the number of labels\n")
00056     //FIXME this requirement should be dropped in the future
00057     REQUIRE(features->get_feature_class()==C_DENSE,
00058             "Currently, LMNN supports only DenseFeatures\n")
00059 
00060     // cast is safe, we ensure above that features are dense
00061     CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(features);
00062 
00064     if (init_transform.num_rows==0)
00065         init_transform = CLMNNImpl::compute_pca_transform(x);
00066 
00067     REQUIRE(init_transform.num_rows==x->get_num_features() &&
00068             init_transform.num_rows==init_transform.num_cols,
00069             "The initial transform must be a square matrix of size equal to the "
00070             "number of features\n")
00071 }
00072 
00073 SGMatrix<index_t> CLMNNImpl::find_target_nn(CDenseFeatures<float64_t>* x,
00074         CMulticlassLabels* y, int32_t k)
00075 {
00076     SG_SDEBUG("Entering CLMNNImpl::find_target_nn().\n")
00077 
00078     // get the number of features
00079     int32_t d = x->get_num_features();
00080     SGMatrix<index_t> target_neighbors(k, x->get_num_vectors());
00081     SGVector<float64_t> unique_labels = y->get_unique_labels();
00082     CDenseFeatures<float64_t>* features_slice = new CDenseFeatures<float64_t>();
00083     CMulticlassLabels* labels_slice = new CMulticlassLabels();
00084     SGVector<index_t> idxsmap(x->get_num_vectors());
00085 
00086     // increase ref count because a KNN instance will be created for each slice, and it
00087     // decreses the ref count of its labels and features when destroyed
00088     SG_REF(features_slice)
00089     SG_REF(labels_slice)
00090 
00091     for (index_t i = 0; i < unique_labels.vlen; ++i)
00092     {
00093         int32_t slice_size = 0;
00094         for (int32_t j = 0; j < y->get_num_labels(); ++j)
00095         {
00096             if (y->get_label(j)==unique_labels[i])
00097             {
00098                 idxsmap[slice_size] = j;
00099                 ++slice_size;
00100             }
00101         }
00102 
00103         MatrixXd slice_mat(d, slice_size);
00104         for (int32_t j = 0; j < slice_size; ++j)
00105             slice_mat.col(j) = Map<const VectorXd>(x->get_feature_vector(idxsmap[j]).vector, d);
00106 
00107         features_slice->set_feature_matrix(SGMatrix<float64_t>(slice_mat.data(), d, slice_size, false));
00108 
00109         //FIXME the labels are not actually necessary to get the nearest neighbors, the
00110         //features suffice. The labels are needed when we want to classify.
00111         SGVector<float64_t> labels_vec(slice_size);
00112         labels_vec.set_const(unique_labels[i]);
00113         labels_slice->set_labels(labels_vec);
00114 
00115         CKNN* knn = new CKNN(k+1, new CEuclideanDistance(features_slice, features_slice), labels_slice);
00116         SGMatrix<int32_t> target_slice = knn->nearest_neighbors();
00117         // sanity check
00118         ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size)
00119 
00120         for (index_t j = 0; j < target_slice.num_cols; ++j)
00121         {
00122             for (index_t l = 1; l < target_slice.num_rows; ++l)
00123                 target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
00124         }
00125 
00126         // clean up knn
00127         SG_UNREF(knn)
00128     }
00129 
00130     // clean up features and labels
00131     SG_UNREF(features_slice)
00132     SG_UNREF(labels_slice)
00133 
00134     SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
00135 
00136     return target_neighbors;
00137 }
00138 
00139 MatrixXd CLMNNImpl::sum_outer_products(CDenseFeatures<float64_t>* x, const SGMatrix<index_t> target_nn)
00140 {
00141     // get the number of features
00142     int32_t d = x->get_num_features();
00143     // initialize the sum of outer products (sop)
00144     MatrixXd sop(d,d);
00145     sop.setZero();
00146     // map the feature matrix (each column is a feature vector) to an Eigen matrix
00147     Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, x->get_num_vectors());
00148 
00149     // sum the outer products stored in C using the indices specified in target_nn
00150     for (index_t i = 0; i < target_nn.num_cols; ++i)
00151     {
00152         for (index_t j = 0; j < target_nn.num_rows; ++j)
00153         {
00154             VectorXd dx = X.col(i) - X.col(target_nn(j,i));
00155             sop += dx*dx.transpose();
00156         }
00157     }
00158 
00159     return sop;
00160 }
00161 
00162 ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures<float64_t>* x,
00163         CMulticlassLabels* y, const MatrixXd& L, const SGMatrix<index_t> target_nn,
00164         const uint32_t iter, const uint32_t correction)
00165 {
00166     SG_SDEBUG("Entering CLMNNImpl::find_impostors().\n")
00167 
00168     // get the number of examples from data
00169     int32_t n = x->get_num_vectors();
00170     // get the number of features
00171     int32_t d = x->get_num_features();
00172     // get the number of neighbors
00173     int32_t k = target_nn.num_rows;
00174 
00175     // map the feature matrix (each column is a feature vector) to an Eigen matrix
00176     Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, n);
00177     // transform the feature vectors
00178     MatrixXd LX = L*X;
00179 
00180     // compute square distances plus margin from examples to target neighbors
00181     MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
00182 
00183     // initialize impostors set
00184     ImpostorsSetType N;
00185     // exact impostors set shared between calls to this method
00186     static ImpostorsSetType Nexact;
00187 
00188     // impostors search
00189     REQUIRE(correction>0, "The number of iterations between exact updates of the "
00190             "impostors set must be greater than 0\n")
00191     if ((iter % correction)==0)
00192     {
00193         Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
00194         N = Nexact;
00195     }
00196     else
00197     {
00198         N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
00199     }
00200 
00201     SG_SDEBUG("Leaving CLMNNImpl::find_impostors().\n")
00202 
00203     return N;
00204 }
00205 
00206 void CLMNNImpl::update_gradient(CDenseFeatures<float64_t>* x, MatrixXd& G,
00207         const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
00208 {
00209     // compute the difference sets
00210     ImpostorsSetType Np_Nc, Nc_Np;
00211     set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
00212     set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
00213 
00214     // map the feature matrix (each column is a feature vector) to an Eigen matrix
00215     Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
00216 
00217     // remove the gradient contributions of the impostors that were in the previous
00218     // set but disappeared in the current
00219     for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
00220     {
00221         VectorXd dx1 = X.col(it->example) - X.col(it->target);
00222         VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
00223         G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
00224     }
00225 
00226     // add the gradient contributions of the new impostors
00227     for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
00228     {
00229         VectorXd dx1 = X.col(it->example) - X.col(it->target);
00230         VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
00231         G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
00232     }
00233 }
00234 
00235 void CLMNNImpl::gradient_step(MatrixXd& L, const MatrixXd& G, float64_t stepsize, bool diagonal)
00236 {
00237     if (diagonal)
00238     {
00239         // compute M as the square of L
00240         MatrixXd M = L.transpose()*L;
00241         // do step in M along the gradient direction
00242         M -= stepsize*G;
00243         // keep only the elements in the diagonal of M
00244         VectorXd m = M.diagonal();
00245 
00246         VectorXd zero;
00247         zero.resize(m.size());
00248         zero.setZero();
00249 
00250         // return to representation in L
00251         VectorXd l = m.array().max(zero.array()).array().sqrt();
00252         L = l.asDiagonal();
00253     }
00254     else
00255     {
00256         // do step in L along the gradient direction (no need to project M then)
00257         L -= stepsize*(2*L*G);
00258     }
00259 }
00260 
00261 void CLMNNImpl::correct_stepsize(float64_t& stepsize, const SGVector<float64_t> obj, const uint32_t iter)
00262 {
00263     if (iter > 0)
00264     {
00265         // Difference between current and previous objective
00266         float64_t delta = obj[iter] - obj[iter-1];
00267 
00268         if (delta > 0)
00269         {
00270             // The objective has increased, we have probably jumped over the optimum,
00271             // thus, decrease the step size
00272             stepsize *= 0.5;
00273         }
00274         else
00275         {
00276             // The objective has decreased, we are in the right direction,
00277             // increase the step size
00278             stepsize *= 1.01;
00279         }
00280     }
00281 }
00282 
00283 bool CLMNNImpl::check_termination(float64_t stepsize, const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
00284 {
00285     if (iter >= maxiter-1)
00286     {
00287         SG_SWARNING("Maximum number of iterations reached before convergence.");
00288         return true;
00289     }
00290 
00291     if (stepsize < stepsize_threshold)
00292     {
00293         SG_SDEBUG("Step size too small to make more progress. Convergence reached.\n");
00294         return true;
00295     }
00296 
00297     if (iter >= 10)
00298     {
00299         for (int32_t i = 0; i < 3; ++i)
00300         {
00301             if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
00302                 return false;
00303         }
00304 
00305         SG_SDEBUG("No more progress in the objective. Convergence reached.\n");
00306         return true;
00307     }
00308 
00309     // For the rest of the cases, do not stop LMNN.
00310     return false;
00311 }
00312 
00313 SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>* features)
00314 {
00315     SG_SDEBUG("Initializing LMNN transform using PCA.\n");
00316 
00317     // Substract the mean of the features
00318     // Create clone of the features to keep the input features unmodified
00319     CDenseFeatures<float64_t>* cloned_features =
00320             new CDenseFeatures<float64_t>(features->get_feature_matrix().clone());
00321     CPruneVarSubMean* mean_substractor =
00322             new CPruneVarSubMean(false); // false to avoid variance normalization
00323     mean_substractor->init(cloned_features);
00324     mean_substractor->apply_to_feature_matrix(cloned_features);
00325 
00326     // Obtain the linear transform applying PCA
00327     CPCA* pca = new CPCA();
00328     pca->set_target_dim(cloned_features->get_num_features());
00329     pca->init(cloned_features);
00330     SGMatrix<float64_t> pca_transform = pca->get_transformation_matrix();
00331 
00332     SG_UNREF(pca);
00333     SG_UNREF(mean_substractor);
00334     SG_UNREF(cloned_features);
00335 
00336     return pca_transform;
00337 }
00338 
00339 MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix<index_t> target_nn)
00340 {
00341     // get the number of examples
00342     ASSERT(LX.cols()==target_nn.num_cols)
00343     int32_t n = LX.cols();
00344     // get the number of features
00345     int32_t d = LX.rows();
00346     // get the number of neighbors
00347     int32_t k = target_nn.num_rows;
00348 
00350 
00351     // create Shogun features from LX to later apply subset
00352     SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
00353     CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat);
00354 
00355     // initialize distances
00356     MatrixXd sqdists(k,n);
00357     sqdists.setZero();
00358     for (int32_t i = 0; i < k; ++i)
00359     {
00360         //FIXME avoid copying the rows of target_nn and access them directly. Maybe
00361         //find_target_nn should be changed to return the output transposed wrt how it is
00362         //done atm.
00363         SGVector<index_t> subset_vec = target_nn.get_row_vector(i);
00364         lx->add_subset(subset_vec);
00365         // after the subset, there are still n columns, i.e. the subset is used to
00366         // modify the order of the columns in x according to the target neighbors
00367         sqdists.row(i) = SUMSQCOLS(LX - Map<const MatrixXd>(lx->get_feature_matrix().matrix, d, n)) + 1;
00368         lx->remove_subset();
00369     }
00370 
00371     // clean up features used to apply subset
00372     SG_UNREF(lx);
00373 
00374     return sqdists;
00375 }
00376 
00377 ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& sqdists,
00378         CMulticlassLabels* y, const SGMatrix<index_t> target_nn, int32_t k)
00379 {
00380     SG_SDEBUG("Entering CLMNNImpl::find_impostors_exact().\n")
00381 
00382     // initialize empty impostors set
00383     ImpostorsSetType N = ImpostorsSetType();
00384 
00385     // get the number of examples from data
00386     int32_t n = LX.cols();
00387     // get the number of features
00388     int32_t d = LX.rows();
00389     // create Shogun features from LX to later apply subset
00390     SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
00391     CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat);
00392 
00393     // get a vector with unique label values
00394     SGVector<float64_t> unique = y->get_unique_labels();
00395 
00396     // for each label except from the largest one
00397     for (index_t i = 0; i < unique.vlen-1; ++i)
00398     {
00399         // get the indices of the examples labelled as unique[i]
00400         std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
00401         // get the indices of the examples that have a larger label value, so that
00402         // pairwise distances are computed once
00403         std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
00404 
00405         // get distance with features indexed by iidxs and gtidxs separated
00406         CEuclideanDistance* euclidean = CLMNNImpl::setup_distance(lx,iidxs,gtidxs);
00407         euclidean->set_disable_sqrt(true);
00408 
00409         for (int32_t j = 0; j < k; ++j)
00410         {
00411             for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
00412             {
00413                 for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
00414                 {
00415                     // FIXME study if using upper bounded distances can be an improvement
00416                     float64_t distance = euclidean->distance(ii,jj);
00417 
00418                     if (distance <= sqdists(j,iidxs[ii]))
00419                         N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
00420 
00421                     if (distance <= sqdists(j,gtidxs[jj]))
00422                         N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
00423                 }
00424             }
00425         }
00426 
00427         SG_UNREF(euclidean);
00428     }
00429 
00430     SG_UNREF(lx);
00431 
00432     SG_SDEBUG("Leaving CLMNNImpl::find_impostors_exact().\n")
00433 
00434     return N;
00435 }
00436 
00437 ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists,
00438         const ImpostorsSetType& Nexact, const SGMatrix<index_t> target_nn)
00439 {
00440     SG_SDEBUG("Entering CLMNNImpl::find_impostors_approx().\n")
00441 
00442     // initialize empty impostors set
00443     ImpostorsSetType N = ImpostorsSetType();
00444 
00445     // compute square distances from examples to impostors
00446     SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
00447 
00448     // find in the exact set of impostors computed last, the triplets that remain impostors
00449     index_t i = 0;
00450     for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
00451     {
00452         // find in target_nn(:,it->example) the position of the target neighbor it->target
00453         index_t target_idx = 0;
00454         while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
00455             ++target_idx;
00456 
00457         REQUIRE(target_idx<target_nn.num_rows, "The index of the target neighbour in the "
00458                 "impostors set was not found in the target neighbours matrix. "
00459                 "There must be a bug in find_impostors_exact.\n")
00460 
00461         if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
00462             N.insert(*it);
00463     }
00464 
00465     SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
00466 
00467     return N;
00468 }
00469 
00470 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact)
00471 {
00472     // get the number of examples
00473     int32_t n = LX.cols();
00474     // get the number of features
00475     int32_t d = LX.rows();
00476     // get the number of impostors
00477     size_t num_impostors = Nexact.size();
00478 
00480 
00481     // create Shogun features from LX and distance
00482     SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
00483     CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat);
00484     CEuclideanDistance* euclidean = new CEuclideanDistance(lx,lx);
00485     euclidean->set_disable_sqrt(true);
00486 
00487     // initialize vector of square distances
00488     SGVector<float64_t> sqdists(num_impostors);
00489     // compute square distances
00490     index_t i = 0;
00491     for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
00492         sqdists[i++] = euclidean->distance(it->example,it->impostor);
00493 
00494     // clean up distance
00495     SG_UNREF(euclidean);
00496 
00497     return sqdists;
00498 }
00499 
00500 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
00501         float64_t yi)
00502 {
00503     // indices of the examples with label equal to yi
00504     std::vector<index_t> idxs;
00505 
00506     for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
00507     {
00508         if (y->get_label(i) == yi)
00509             idxs.push_back(i);
00510     }
00511 
00512     return idxs;
00513 }
00514 
00515 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
00516         float64_t yi)
00517 {
00518     // indices of the examples with label equal greater than yi
00519     std::vector<index_t> idxs;
00520 
00521     for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
00522     {
00523         if (y->get_label(i) > yi)
00524             idxs.push_back(i);
00525     }
00526 
00527     return idxs;
00528 }
00529 
00530 CEuclideanDistance* CLMNNImpl::setup_distance(CDenseFeatures<float64_t>* x,
00531         std::vector<index_t>& a, std::vector<index_t>& b)
00532 {
00533     // create new features only containing examples whose indices are in a
00534     x->add_subset(SGVector<index_t>(a.data(), a.size(), false));
00535     CDenseFeatures<float64_t>* afeats = new CDenseFeatures<float64_t>(x->get_feature_matrix());
00536     x->remove_subset();
00537 
00538     // create new features only containing examples whose indices are in b
00539     x->add_subset(SGVector<index_t>(b.data(), b.size(), false));
00540     CDenseFeatures<float64_t>* bfeats = new CDenseFeatures<float64_t>(x->get_feature_matrix());
00541     x->remove_subset();
00542 
00543     // create and return distance
00544     CEuclideanDistance* euclidean = new CEuclideanDistance(afeats,bfeats);
00545     return euclidean;
00546 }
00547 
00548 #endif /* HAVE_LAPACK */
00549 #endif /* HAVE_EIGEN3 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation