SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Fernando J. Iglesias Garcia 00008 * Copyright (C) 2013 Fernando J. Iglesias Garcia 00009 */ 00010 00011 #ifdef HAVE_EIGEN3 00012 #ifdef HAVE_LAPACK 00013 00014 #include <shogun/metric/LMNNImpl.h> 00015 #include <shogun/multiclass/KNN.h> 00016 #include <shogun/preprocessor/PruneVarSubMean.h> 00017 #include <shogun/preprocessor/PCA.h> 00018 00019 #include <iterator> 00020 00022 00023 // column-wise sum of the squared elements of a matrix 00024 #define SUMSQCOLS(A) ((A).array().square().colwise().sum()) 00025 00026 using namespace shogun; 00027 using namespace Eigen; 00028 00029 CImpostorNode::CImpostorNode(index_t ex, index_t tar, index_t imp) 00030 : example(ex), target(tar), impostor(imp) 00031 { 00032 } 00033 00034 bool CImpostorNode::operator<(const CImpostorNode& rhs) const 00035 { 00036 if (example == rhs.example) 00037 { 00038 if (target == rhs.target) 00039 return impostor < rhs.impostor; 00040 else 00041 return target < rhs.target; 00042 } 00043 else 00044 return example < rhs.example; 00045 } 00046 00047 void CLMNNImpl::check_training_setup(CFeatures* features, const CLabels* labels, 00048 SGMatrix<float64_t>& init_transform) 00049 { 00050 REQUIRE(features->has_property(FP_DOT), 00051 "LMNN can only be applied to features that support dot products\n") 00052 REQUIRE(labels->get_label_type()==LT_MULTICLASS, 00053 "LMNN supports only MulticlassLabels\n") 00054 REQUIRE(labels->get_num_labels()==features->get_num_vectors(), 00055 "The number of feature vectors must be equal to the number of labels\n") 00056 //FIXME this requirement should be dropped in the future 00057 REQUIRE(features->get_feature_class()==C_DENSE, 00058 "Currently, LMNN supports only DenseFeatures\n") 00059 00060 // cast is safe, we ensure above that features are dense 00061 CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(features); 00062 00064 if (init_transform.num_rows==0) 00065 init_transform = CLMNNImpl::compute_pca_transform(x); 00066 00067 REQUIRE(init_transform.num_rows==x->get_num_features() && 00068 init_transform.num_rows==init_transform.num_cols, 00069 "The initial transform must be a square matrix of size equal to the " 00070 "number of features\n") 00071 } 00072 00073 SGMatrix<index_t> CLMNNImpl::find_target_nn(CDenseFeatures<float64_t>* x, 00074 CMulticlassLabels* y, int32_t k) 00075 { 00076 SG_SDEBUG("Entering CLMNNImpl::find_target_nn().\n") 00077 00078 // get the number of features 00079 int32_t d = x->get_num_features(); 00080 SGMatrix<index_t> target_neighbors(k, x->get_num_vectors()); 00081 SGVector<float64_t> unique_labels = y->get_unique_labels(); 00082 CDenseFeatures<float64_t>* features_slice = new CDenseFeatures<float64_t>(); 00083 CMulticlassLabels* labels_slice = new CMulticlassLabels(); 00084 SGVector<index_t> idxsmap(x->get_num_vectors()); 00085 00086 // increase ref count because a KNN instance will be created for each slice, and it 00087 // decreses the ref count of its labels and features when destroyed 00088 SG_REF(features_slice) 00089 SG_REF(labels_slice) 00090 00091 for (index_t i = 0; i < unique_labels.vlen; ++i) 00092 { 00093 int32_t slice_size = 0; 00094 for (int32_t j = 0; j < y->get_num_labels(); ++j) 00095 { 00096 if (y->get_label(j)==unique_labels[i]) 00097 { 00098 idxsmap[slice_size] = j; 00099 ++slice_size; 00100 } 00101 } 00102 00103 MatrixXd slice_mat(d, slice_size); 00104 for (int32_t j = 0; j < slice_size; ++j) 00105 slice_mat.col(j) = Map<const VectorXd>(x->get_feature_vector(idxsmap[j]).vector, d); 00106 00107 features_slice->set_feature_matrix(SGMatrix<float64_t>(slice_mat.data(), d, slice_size, false)); 00108 00109 //FIXME the labels are not actually necessary to get the nearest neighbors, the 00110 //features suffice. The labels are needed when we want to classify. 00111 SGVector<float64_t> labels_vec(slice_size); 00112 labels_vec.set_const(unique_labels[i]); 00113 labels_slice->set_labels(labels_vec); 00114 00115 CKNN* knn = new CKNN(k+1, new CEuclideanDistance(features_slice, features_slice), labels_slice); 00116 SGMatrix<int32_t> target_slice = knn->nearest_neighbors(); 00117 // sanity check 00118 ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size) 00119 00120 for (index_t j = 0; j < target_slice.num_cols; ++j) 00121 { 00122 for (index_t l = 1; l < target_slice.num_rows; ++l) 00123 target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ]; 00124 } 00125 00126 // clean up knn 00127 SG_UNREF(knn) 00128 } 00129 00130 // clean up features and labels 00131 SG_UNREF(features_slice) 00132 SG_UNREF(labels_slice) 00133 00134 SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n") 00135 00136 return target_neighbors; 00137 } 00138 00139 MatrixXd CLMNNImpl::sum_outer_products(CDenseFeatures<float64_t>* x, const SGMatrix<index_t> target_nn) 00140 { 00141 // get the number of features 00142 int32_t d = x->get_num_features(); 00143 // initialize the sum of outer products (sop) 00144 MatrixXd sop(d,d); 00145 sop.setZero(); 00146 // map the feature matrix (each column is a feature vector) to an Eigen matrix 00147 Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, x->get_num_vectors()); 00148 00149 // sum the outer products stored in C using the indices specified in target_nn 00150 for (index_t i = 0; i < target_nn.num_cols; ++i) 00151 { 00152 for (index_t j = 0; j < target_nn.num_rows; ++j) 00153 { 00154 VectorXd dx = X.col(i) - X.col(target_nn(j,i)); 00155 sop += dx*dx.transpose(); 00156 } 00157 } 00158 00159 return sop; 00160 } 00161 00162 ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures<float64_t>* x, 00163 CMulticlassLabels* y, const MatrixXd& L, const SGMatrix<index_t> target_nn, 00164 const uint32_t iter, const uint32_t correction) 00165 { 00166 SG_SDEBUG("Entering CLMNNImpl::find_impostors().\n") 00167 00168 // get the number of examples from data 00169 int32_t n = x->get_num_vectors(); 00170 // get the number of features 00171 int32_t d = x->get_num_features(); 00172 // get the number of neighbors 00173 int32_t k = target_nn.num_rows; 00174 00175 // map the feature matrix (each column is a feature vector) to an Eigen matrix 00176 Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, n); 00177 // transform the feature vectors 00178 MatrixXd LX = L*X; 00179 00180 // compute square distances plus margin from examples to target neighbors 00181 MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn); 00182 00183 // initialize impostors set 00184 ImpostorsSetType N; 00185 // exact impostors set shared between calls to this method 00186 static ImpostorsSetType Nexact; 00187 00188 // impostors search 00189 REQUIRE(correction>0, "The number of iterations between exact updates of the " 00190 "impostors set must be greater than 0\n") 00191 if ((iter % correction)==0) 00192 { 00193 Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k); 00194 N = Nexact; 00195 } 00196 else 00197 { 00198 N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn); 00199 } 00200 00201 SG_SDEBUG("Leaving CLMNNImpl::find_impostors().\n") 00202 00203 return N; 00204 } 00205 00206 void CLMNNImpl::update_gradient(CDenseFeatures<float64_t>* x, MatrixXd& G, 00207 const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization) 00208 { 00209 // compute the difference sets 00210 ImpostorsSetType Np_Nc, Nc_Np; 00211 set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin())); 00212 set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin())); 00213 00214 // map the feature matrix (each column is a feature vector) to an Eigen matrix 00215 Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors()); 00216 00217 // remove the gradient contributions of the impostors that were in the previous 00218 // set but disappeared in the current 00219 for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it) 00220 { 00221 VectorXd dx1 = X.col(it->example) - X.col(it->target); 00222 VectorXd dx2 = X.col(it->example) - X.col(it->impostor); 00223 G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose()); 00224 } 00225 00226 // add the gradient contributions of the new impostors 00227 for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it) 00228 { 00229 VectorXd dx1 = X.col(it->example) - X.col(it->target); 00230 VectorXd dx2 = X.col(it->example) - X.col(it->impostor); 00231 G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose()); 00232 } 00233 } 00234 00235 void CLMNNImpl::gradient_step(MatrixXd& L, const MatrixXd& G, float64_t stepsize, bool diagonal) 00236 { 00237 if (diagonal) 00238 { 00239 // compute M as the square of L 00240 MatrixXd M = L.transpose()*L; 00241 // do step in M along the gradient direction 00242 M -= stepsize*G; 00243 // keep only the elements in the diagonal of M 00244 VectorXd m = M.diagonal(); 00245 00246 VectorXd zero; 00247 zero.resize(m.size()); 00248 zero.setZero(); 00249 00250 // return to representation in L 00251 VectorXd l = m.array().max(zero.array()).array().sqrt(); 00252 L = l.asDiagonal(); 00253 } 00254 else 00255 { 00256 // do step in L along the gradient direction (no need to project M then) 00257 L -= stepsize*(2*L*G); 00258 } 00259 } 00260 00261 void CLMNNImpl::correct_stepsize(float64_t& stepsize, const SGVector<float64_t> obj, const uint32_t iter) 00262 { 00263 if (iter > 0) 00264 { 00265 // Difference between current and previous objective 00266 float64_t delta = obj[iter] - obj[iter-1]; 00267 00268 if (delta > 0) 00269 { 00270 // The objective has increased, we have probably jumped over the optimum, 00271 // thus, decrease the step size 00272 stepsize *= 0.5; 00273 } 00274 else 00275 { 00276 // The objective has decreased, we are in the right direction, 00277 // increase the step size 00278 stepsize *= 1.01; 00279 } 00280 } 00281 } 00282 00283 bool CLMNNImpl::check_termination(float64_t stepsize, const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold) 00284 { 00285 if (iter >= maxiter-1) 00286 { 00287 SG_SWARNING("Maximum number of iterations reached before convergence."); 00288 return true; 00289 } 00290 00291 if (stepsize < stepsize_threshold) 00292 { 00293 SG_SDEBUG("Step size too small to make more progress. Convergence reached.\n"); 00294 return true; 00295 } 00296 00297 if (iter >= 10) 00298 { 00299 for (int32_t i = 0; i < 3; ++i) 00300 { 00301 if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold) 00302 return false; 00303 } 00304 00305 SG_SDEBUG("No more progress in the objective. Convergence reached.\n"); 00306 return true; 00307 } 00308 00309 // For the rest of the cases, do not stop LMNN. 00310 return false; 00311 } 00312 00313 SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>* features) 00314 { 00315 SG_SDEBUG("Initializing LMNN transform using PCA.\n"); 00316 00317 // Substract the mean of the features 00318 // Create clone of the features to keep the input features unmodified 00319 CDenseFeatures<float64_t>* cloned_features = 00320 new CDenseFeatures<float64_t>(features->get_feature_matrix().clone()); 00321 CPruneVarSubMean* mean_substractor = 00322 new CPruneVarSubMean(false); // false to avoid variance normalization 00323 mean_substractor->init(cloned_features); 00324 mean_substractor->apply_to_feature_matrix(cloned_features); 00325 00326 // Obtain the linear transform applying PCA 00327 CPCA* pca = new CPCA(); 00328 pca->set_target_dim(cloned_features->get_num_features()); 00329 pca->init(cloned_features); 00330 SGMatrix<float64_t> pca_transform = pca->get_transformation_matrix(); 00331 00332 SG_UNREF(pca); 00333 SG_UNREF(mean_substractor); 00334 SG_UNREF(cloned_features); 00335 00336 return pca_transform; 00337 } 00338 00339 MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix<index_t> target_nn) 00340 { 00341 // get the number of examples 00342 ASSERT(LX.cols()==target_nn.num_cols) 00343 int32_t n = LX.cols(); 00344 // get the number of features 00345 int32_t d = LX.rows(); 00346 // get the number of neighbors 00347 int32_t k = target_nn.num_rows; 00348 00350 00351 // create Shogun features from LX to later apply subset 00352 SGMatrix<float64_t> lx_mat(LX.data(), d, n, false); 00353 CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat); 00354 00355 // initialize distances 00356 MatrixXd sqdists(k,n); 00357 sqdists.setZero(); 00358 for (int32_t i = 0; i < k; ++i) 00359 { 00360 //FIXME avoid copying the rows of target_nn and access them directly. Maybe 00361 //find_target_nn should be changed to return the output transposed wrt how it is 00362 //done atm. 00363 SGVector<index_t> subset_vec = target_nn.get_row_vector(i); 00364 lx->add_subset(subset_vec); 00365 // after the subset, there are still n columns, i.e. the subset is used to 00366 // modify the order of the columns in x according to the target neighbors 00367 sqdists.row(i) = SUMSQCOLS(LX - Map<const MatrixXd>(lx->get_feature_matrix().matrix, d, n)) + 1; 00368 lx->remove_subset(); 00369 } 00370 00371 // clean up features used to apply subset 00372 SG_UNREF(lx); 00373 00374 return sqdists; 00375 } 00376 00377 ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& sqdists, 00378 CMulticlassLabels* y, const SGMatrix<index_t> target_nn, int32_t k) 00379 { 00380 SG_SDEBUG("Entering CLMNNImpl::find_impostors_exact().\n") 00381 00382 // initialize empty impostors set 00383 ImpostorsSetType N = ImpostorsSetType(); 00384 00385 // get the number of examples from data 00386 int32_t n = LX.cols(); 00387 // get the number of features 00388 int32_t d = LX.rows(); 00389 // create Shogun features from LX to later apply subset 00390 SGMatrix<float64_t> lx_mat(LX.data(), d, n, false); 00391 CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat); 00392 00393 // get a vector with unique label values 00394 SGVector<float64_t> unique = y->get_unique_labels(); 00395 00396 // for each label except from the largest one 00397 for (index_t i = 0; i < unique.vlen-1; ++i) 00398 { 00399 // get the indices of the examples labelled as unique[i] 00400 std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]); 00401 // get the indices of the examples that have a larger label value, so that 00402 // pairwise distances are computed once 00403 std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]); 00404 00405 // get distance with features indexed by iidxs and gtidxs separated 00406 CEuclideanDistance* euclidean = CLMNNImpl::setup_distance(lx,iidxs,gtidxs); 00407 euclidean->set_disable_sqrt(true); 00408 00409 for (int32_t j = 0; j < k; ++j) 00410 { 00411 for (std::size_t ii = 0; ii < iidxs.size(); ++ii) 00412 { 00413 for (std::size_t jj = 0; jj < gtidxs.size(); ++jj) 00414 { 00415 // FIXME study if using upper bounded distances can be an improvement 00416 float64_t distance = euclidean->distance(ii,jj); 00417 00418 if (distance <= sqdists(j,iidxs[ii])) 00419 N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) ); 00420 00421 if (distance <= sqdists(j,gtidxs[jj])) 00422 N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) ); 00423 } 00424 } 00425 } 00426 00427 SG_UNREF(euclidean); 00428 } 00429 00430 SG_UNREF(lx); 00431 00432 SG_SDEBUG("Leaving CLMNNImpl::find_impostors_exact().\n") 00433 00434 return N; 00435 } 00436 00437 ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists, 00438 const ImpostorsSetType& Nexact, const SGMatrix<index_t> target_nn) 00439 { 00440 SG_SDEBUG("Entering CLMNNImpl::find_impostors_approx().\n") 00441 00442 // initialize empty impostors set 00443 ImpostorsSetType N = ImpostorsSetType(); 00444 00445 // compute square distances from examples to impostors 00446 SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact); 00447 00448 // find in the exact set of impostors computed last, the triplets that remain impostors 00449 index_t i = 0; 00450 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it) 00451 { 00452 // find in target_nn(:,it->example) the position of the target neighbor it->target 00453 index_t target_idx = 0; 00454 while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows) 00455 ++target_idx; 00456 00457 REQUIRE(target_idx<target_nn.num_rows, "The index of the target neighbour in the " 00458 "impostors set was not found in the target neighbours matrix. " 00459 "There must be a bug in find_impostors_exact.\n") 00460 00461 if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) ) 00462 N.insert(*it); 00463 } 00464 00465 SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n") 00466 00467 return N; 00468 } 00469 00470 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact) 00471 { 00472 // get the number of examples 00473 int32_t n = LX.cols(); 00474 // get the number of features 00475 int32_t d = LX.rows(); 00476 // get the number of impostors 00477 size_t num_impostors = Nexact.size(); 00478 00480 00481 // create Shogun features from LX and distance 00482 SGMatrix<float64_t> lx_mat(LX.data(), d, n, false); 00483 CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat); 00484 CEuclideanDistance* euclidean = new CEuclideanDistance(lx,lx); 00485 euclidean->set_disable_sqrt(true); 00486 00487 // initialize vector of square distances 00488 SGVector<float64_t> sqdists(num_impostors); 00489 // compute square distances 00490 index_t i = 0; 00491 for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it) 00492 sqdists[i++] = euclidean->distance(it->example,it->impostor); 00493 00494 // clean up distance 00495 SG_UNREF(euclidean); 00496 00497 return sqdists; 00498 } 00499 00500 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y, 00501 float64_t yi) 00502 { 00503 // indices of the examples with label equal to yi 00504 std::vector<index_t> idxs; 00505 00506 for (index_t i = 0; i < index_t(y->get_num_labels()); ++i) 00507 { 00508 if (y->get_label(i) == yi) 00509 idxs.push_back(i); 00510 } 00511 00512 return idxs; 00513 } 00514 00515 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y, 00516 float64_t yi) 00517 { 00518 // indices of the examples with label equal greater than yi 00519 std::vector<index_t> idxs; 00520 00521 for (index_t i = 0; i < index_t(y->get_num_labels()); ++i) 00522 { 00523 if (y->get_label(i) > yi) 00524 idxs.push_back(i); 00525 } 00526 00527 return idxs; 00528 } 00529 00530 CEuclideanDistance* CLMNNImpl::setup_distance(CDenseFeatures<float64_t>* x, 00531 std::vector<index_t>& a, std::vector<index_t>& b) 00532 { 00533 // create new features only containing examples whose indices are in a 00534 x->add_subset(SGVector<index_t>(a.data(), a.size(), false)); 00535 CDenseFeatures<float64_t>* afeats = new CDenseFeatures<float64_t>(x->get_feature_matrix()); 00536 x->remove_subset(); 00537 00538 // create new features only containing examples whose indices are in b 00539 x->add_subset(SGVector<index_t>(b.data(), b.size(), false)); 00540 CDenseFeatures<float64_t>* bfeats = new CDenseFeatures<float64_t>(x->get_feature_matrix()); 00541 x->remove_subset(); 00542 00543 // create and return distance 00544 CEuclideanDistance* euclidean = new CEuclideanDistance(afeats,bfeats); 00545 return euclidean; 00546 } 00547 00548 #endif /* HAVE_LAPACK */ 00549 #endif /* HAVE_EIGEN3 */