SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Kevin Hughes 00008 * Copyright (C) 2013 Kevin Hughes 00009 * 00010 * Thanks to Fernando Jose Iglesias Garcia (shogun) 00011 * and Matthieu Perrot (scikit-learn) 00012 */ 00013 00014 #include <shogun/lib/common.h> 00015 00016 #ifdef HAVE_EIGEN3 00017 00018 #include <shogun/multiclass/MCLDA.h> 00019 #include <shogun/machine/NativeMulticlassMachine.h> 00020 #include <shogun/features/Features.h> 00021 #include <shogun/labels/Labels.h> 00022 #include <shogun/labels/MulticlassLabels.h> 00023 #include <shogun/mathematics/Math.h> 00024 00025 #include <shogun/mathematics/eigen3.h> 00026 00027 using namespace shogun; 00028 using namespace Eigen; 00029 00030 CMCLDA::CMCLDA(float64_t tolerance, bool store_cov) 00031 : CNativeMulticlassMachine() 00032 { 00033 init(); 00034 m_tolerance=tolerance; 00035 m_store_cov=store_cov; 00036 00037 } 00038 00039 CMCLDA::CMCLDA(CDenseFeatures<float64_t>* traindat, CLabels* trainlab, float64_t tolerance, bool store_cov) 00040 : CNativeMulticlassMachine() 00041 { 00042 init(); 00043 00044 m_tolerance=tolerance; 00045 m_store_cov=store_cov; 00046 00047 set_features(traindat); 00048 set_labels(trainlab); 00049 } 00050 00051 CMCLDA::~CMCLDA() 00052 { 00053 SG_UNREF(m_features); 00054 00055 cleanup(); 00056 } 00057 00058 void CMCLDA::init() 00059 { 00060 SG_ADD(&m_tolerance, "m_tolerance", "Tolerance member.", MS_AVAILABLE); 00061 SG_ADD(&m_store_cov, "m_store_cov", "Store covariance member", MS_NOT_AVAILABLE); 00062 SG_ADD((CSGObject**) &m_features, "m_features", "Feature object.", MS_NOT_AVAILABLE); 00063 SG_ADD(&m_means, "m_means", "Mean vectors list", MS_NOT_AVAILABLE); 00064 SG_ADD(&m_cov, "m_cov", "covariance matrix", MS_NOT_AVAILABLE); 00065 SG_ADD(&m_xbar, "m_xbar", "total mean", MS_NOT_AVAILABLE); 00066 SG_ADD(&m_scalings, "m_scalings", "scalings", MS_NOT_AVAILABLE); 00067 SG_ADD(&m_rank, "m_rank", "rank", MS_NOT_AVAILABLE); 00068 SG_ADD(&m_coef, "m_coef", "weight vector", MS_NOT_AVAILABLE); 00069 SG_ADD(&m_intercept, "m_intercept", "intercept", MS_NOT_AVAILABLE); 00070 00071 m_features = NULL; 00072 m_num_classes=0; 00073 m_dim=0; 00074 m_rank=0; 00075 } 00076 00077 void CMCLDA::cleanup() 00078 { 00079 m_num_classes = 0; 00080 } 00081 00082 CMulticlassLabels* CMCLDA::apply_multiclass(CFeatures* data) 00083 { 00084 if (data) 00085 { 00086 if (!data->has_property(FP_DOT)) 00087 SG_ERROR("Specified features are not of type CDotFeatures\n") 00088 00089 set_features((CDotFeatures*) data); 00090 } 00091 00092 if (!m_features) 00093 return NULL; 00094 00095 int32_t num_vecs = m_features->get_num_vectors(); 00096 ASSERT(num_vecs > 0) 00097 ASSERT( m_dim == m_features->get_dim_feature_space() ); 00098 00099 // collect features into a matrix 00100 CDenseFeatures< float64_t >* rf = (CDenseFeatures< float64_t >*) m_features; 00101 00102 MatrixXd X(num_vecs, m_dim); 00103 00104 int32_t vlen; 00105 bool vfree; 00106 float64_t* vec; 00107 Map< VectorXd > Em_xbar(m_xbar, m_dim); 00108 for (int i = 0; i < num_vecs; i++) 00109 { 00110 vec = rf->get_feature_vector(i, vlen, vfree); 00111 ASSERT(vec) 00112 00113 Map< VectorXd > Evec(vec, vlen); 00114 00115 X.row(i) = Evec - Em_xbar; 00116 00117 rf->free_feature_vector(vec, i, vfree); 00118 } 00119 00120 #ifdef DEBUG_MCLDA 00121 SG_PRINT("\n>>> Displaying X ...\n") 00122 SGMatrix< float64_t >::display_matrix(X.data(), num_vecs, m_dim); 00123 #endif 00124 00125 // center and scale data 00126 MatrixXd Xs(num_vecs, m_rank); 00127 Map< MatrixXd > Em_scalings(m_scalings.matrix, m_dim, m_rank); 00128 Xs = X*Em_scalings; 00129 00130 #ifdef DEBUG_MCLDA 00131 SG_PRINT("\n>>> Displaying Xs ...\n") 00132 SGMatrix< float64_t >::display_matrix(Xs.data(), num_vecs, m_rank); 00133 #endif 00134 00135 // decision function 00136 MatrixXd d(num_vecs, m_num_classes); 00137 Map< MatrixXd > Em_coef(m_coef.matrix, m_num_classes, m_rank); 00138 Map< VectorXd > Em_intercept(m_intercept.vector, m_num_classes); 00139 d = (Xs*Em_coef.transpose()).rowwise() + Em_intercept.transpose(); 00140 00141 #ifdef DEBUG_MCLDA 00142 SG_PRINT("\n>>> Displaying d ...\n") 00143 SGMatrix< float64_t >::display_matrix(d.data(), num_vecs, m_num_classes); 00144 #endif 00145 00146 // argmax to apply labels 00147 CMulticlassLabels* out = new CMulticlassLabels(num_vecs); 00148 for (int i = 0; i < num_vecs; i++) 00149 out->set_label(i, SGVector<float64_t>::arg_max(d.data()+i, num_vecs, m_num_classes)); 00150 00151 return out; 00152 } 00153 00154 bool CMCLDA::train_machine(CFeatures* data) 00155 { 00156 if (!m_labels) 00157 SG_ERROR("No labels allocated in MCLDA training\n") 00158 00159 if (data) 00160 { 00161 if (!data->has_property(FP_DOT)) 00162 SG_ERROR("Speficied features are not of type CDotFeatures\n") 00163 00164 set_features((CDotFeatures*) data); 00165 } 00166 00167 if (!m_features) 00168 SG_ERROR("No features allocated in MCLDA training\n") 00169 00170 SGVector< int32_t > train_labels = ((CMulticlassLabels*) m_labels)->get_int_labels(); 00171 00172 if (!train_labels.vector) 00173 SG_ERROR("No train_labels allocated in MCLDA training\n") 00174 00175 cleanup(); 00176 00177 m_num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes(); 00178 m_dim = m_features->get_dim_feature_space(); 00179 int32_t num_vec = m_features->get_num_vectors(); 00180 00181 if (num_vec != train_labels.vlen) 00182 SG_ERROR("Dimension mismatch between features and labels in MCLDA training") 00183 00184 int32_t* class_idxs = SG_MALLOC(int32_t, num_vec*m_num_classes); 00185 int32_t* class_nums = SG_MALLOC(int32_t, m_num_classes); // number of examples of each class 00186 memset(class_nums, 0, m_num_classes*sizeof(int32_t)); 00187 00188 for (int i = 0; i < train_labels.vlen; i++) 00189 { 00190 int32_t class_idx = train_labels.vector[i]; 00191 00192 if (class_idx < 0 || class_idx >= m_num_classes) 00193 { 00194 SG_ERROR("found label out of {0, 1, 2, ..., num_classes-1}...") 00195 return false; 00196 } 00197 else 00198 { 00199 class_idxs[ class_idx*num_vec + class_nums[class_idx]++ ] = i; 00200 } 00201 } 00202 00203 for (int i = 0; i < m_num_classes; i++) 00204 { 00205 if (class_nums[i] <= 0) 00206 { 00207 SG_ERROR("What? One class with no elements\n") 00208 return false; 00209 } 00210 } 00211 00212 CDenseFeatures< float64_t >* rf = (CDenseFeatures< float64_t >*) m_features; 00213 00214 // if ( m_store_cov ) 00215 index_t * cov_dims = SG_MALLOC(index_t, 3); 00216 cov_dims[0] = m_dim; 00217 cov_dims[1] = m_dim; 00218 cov_dims[2] = m_num_classes; 00219 SGNDArray< float64_t > covs(cov_dims, 3); 00220 00221 m_means = SGMatrix< float64_t >(m_dim, m_num_classes, true); 00222 00223 // matrix of all samples 00224 MatrixXd X = MatrixXd::Zero(num_vec, m_dim); 00225 int32_t iX = 0; 00226 00227 m_means.zero(); 00228 m_cov.zero(); 00229 00230 int32_t vlen; 00231 bool vfree; 00232 float64_t* vec; 00233 for (int k = 0; k < m_num_classes; k++) 00234 { 00235 // gather all the samples for class k into buffer and calculate the mean of class k 00236 MatrixXd buffer(class_nums[k], m_dim); 00237 Map< VectorXd > Em_mean(m_means.get_column_vector(k), m_dim); 00238 for (int i = 0; i < class_nums[k]; i++) 00239 { 00240 vec = rf->get_feature_vector(class_idxs[k*num_vec + i], vlen, vfree); 00241 ASSERT(vec) 00242 00243 Map< VectorXd > Evec(vec, vlen); 00244 Em_mean += Evec; 00245 buffer.row(i) = Evec; 00246 00247 rf->free_feature_vector(vec, class_idxs[k*num_vec + i], vfree); 00248 } 00249 00250 Em_mean /= class_nums[k]; 00251 00252 // subtract the mean of class k from each sample of class k and store the centered data in Xc 00253 for (int i = 0; i < class_nums[k]; i++) 00254 { 00255 buffer.row(i) -= Em_mean; 00256 X.row(iX) += buffer.row(i); 00257 iX++; 00258 } 00259 00260 if (m_store_cov) 00261 { 00262 // calc cov = buffer.T * buffer 00263 Map< MatrixXd > Em_cov_k(covs.get_matrix(k), m_dim, m_dim); 00264 Em_cov_k = buffer.transpose() * buffer; 00265 } 00266 } 00267 00268 #ifdef DEBUG_MCLDA 00269 SG_PRINT("\n>>> Displaying means ...\n") 00270 SGMatrix< float64_t >::display_matrix(m_means.matrix, m_dim, m_num_classes); 00271 #endif 00272 00273 if (m_store_cov) 00274 { 00275 m_cov = SGMatrix< float64_t >(m_dim, m_dim, true); 00276 m_cov.zero(); 00277 Map< MatrixXd > Em_cov(m_cov.matrix, m_dim, m_dim); 00278 00279 for (int k = 0; k < m_num_classes; k++) 00280 { 00281 Map< MatrixXd > Em_cov_k(covs.get_matrix(k), m_dim, m_dim); 00282 Em_cov += Em_cov_k; 00283 } 00284 00285 Em_cov /= m_num_classes; 00286 } 00287 00288 #ifdef DEBUG_MCLDA 00289 if (m_store_cov) 00290 { 00291 SG_PRINT("\n>>> Displaying cov ...\n") 00292 SGMatrix< float64_t >::display_matrix(m_cov.matrix, m_dim, m_dim); 00293 } 00294 #endif 00295 00297 // 1) within (univariate) scaling by with classes std-dev 00298 00299 // std-dev of X 00300 m_xbar = SGVector< float64_t >(m_dim); 00301 m_xbar.zero(); 00302 Map< VectorXd > Em_xbar(m_xbar.vector, m_dim); 00303 Em_xbar = X.colwise().sum(); 00304 Em_xbar /= num_vec; 00305 00306 VectorXd std = VectorXd::Zero(m_dim); 00307 std = (X.rowwise() - Em_xbar.transpose()).array().pow(2).colwise().sum(); 00308 std = std.array() / num_vec; 00309 00310 for (int j = 0; j < m_dim; j++) 00311 if(std[j] == 0) 00312 std[j] = 1; 00313 00314 float64_t fac = 1.0 / (num_vec - m_num_classes); 00315 00316 #ifdef DEBUG_MCLDA 00317 SG_PRINT("\n>>> Displaying m_xbar ...\n") 00318 SGVector< float64_t >::display_vector(m_xbar.vector, m_dim); 00319 00320 SG_PRINT("\n>>> Displaying std ...\n") 00321 SGVector< float64_t >::display_vector(std.data(), m_dim); 00322 #endif 00323 00325 // 2) Within variance scaling 00326 for (int i = 0; i < num_vec; i++) 00327 X.row(i) = sqrt(fac) * X.row(i).array() / std.transpose().array(); 00328 00329 00330 // SVD of centered (within)scaled data 00331 VectorXd S(m_dim); 00332 MatrixXd V(m_dim, m_dim); 00333 00334 Eigen::JacobiSVD<MatrixXd> eSvd; 00335 eSvd.compute(X,Eigen::ComputeFullV); 00336 memcpy(S.data(), eSvd.singularValues().data(), m_dim*sizeof(float64_t)); 00337 memcpy(V.data(), eSvd.matrixV().data(), m_dim*m_dim*sizeof(float64_t)); 00338 V.transposeInPlace(); 00339 00340 int rank = 0; 00341 while (rank < m_dim && S[rank] > m_tolerance) 00342 { 00343 rank++; 00344 } 00345 00346 if (rank < m_dim) 00347 SG_ERROR("Warning: Variables are collinear\n") 00348 00349 MatrixXd scalings(m_dim, rank); 00350 for (int i = 0; i < m_dim; i++) 00351 for (int j = 0; j < rank; j++) 00352 scalings(i,j) = V(j,i) / std[j] / S[j]; 00353 00354 #ifdef DEBUG_MCLDA 00355 SG_PRINT("\n>>> Displaying scalings ...\n") 00356 SGMatrix< float64_t >::display_matrix(scalings.data(), m_dim, rank); 00357 #endif 00358 00360 // 3) Between variance scaling 00361 00362 // Xc = m_means dot scalings 00363 MatrixXd Xc(m_num_classes, rank); 00364 Map< MatrixXd > Em_means(m_means.matrix, m_dim, m_num_classes); 00365 Xc = (Em_means.transpose()*scalings); 00366 00367 for (int i = 0; i < m_num_classes; i++) 00368 Xc.row(i) *= sqrt(class_nums[i] * fac); 00369 00370 // Centers are living in a space with n_classes-1 dim (maximum) 00371 // Use svd to find projection in the space spanned by the 00372 // (n_classes) centers 00373 S = VectorXd(rank); 00374 V = MatrixXd(rank, rank); 00375 00376 eSvd.compute(Xc,Eigen::ComputeFullV); 00377 memcpy(S.data(), eSvd.singularValues().data(), rank*sizeof(float64_t)); 00378 memcpy(V.data(), eSvd.matrixV().data(), rank*rank*sizeof(float64_t)); 00379 00380 m_rank = 0; 00381 while (m_rank < rank && S[m_rank] > m_tolerance*S[0]) 00382 { 00383 m_rank++; 00384 } 00385 00386 // compose the scalings 00387 m_scalings = SGMatrix< float64_t >(rank, m_rank); 00388 Map< MatrixXd > Em_scalings(m_scalings.matrix, rank, m_rank); 00389 Em_scalings = scalings * V.leftCols(m_rank); 00390 00391 #ifdef DEBUG_MCLDA 00392 SG_PRINT("\n>>> Displaying m_scalings ...\n") 00393 SGMatrix< float64_t >::display_matrix(m_scalings.matrix, rank, m_rank); 00394 #endif 00395 00396 // weight vectors / centroids 00397 MatrixXd meansc(m_dim, m_num_classes); 00398 meansc = Em_means.colwise() - Em_xbar; 00399 00400 m_coef = SGMatrix< float64_t >(m_num_classes, m_rank); 00401 Map< MatrixXd > Em_coef(m_coef.matrix, m_num_classes, m_rank); 00402 Em_coef = meansc.transpose() * Em_scalings; 00403 00404 #ifdef DEBUG_MCLDA 00405 SG_PRINT("\n>>> Displaying m_coefs ...\n") 00406 SGMatrix< float64_t >::display_matrix(m_coef.matrix, m_num_classes, m_rank); 00407 #endif 00408 00409 // intercept 00410 m_intercept = SGVector< float64_t >(m_num_classes); 00411 m_intercept.zero(); 00412 for (int j = 0; j < m_num_classes; j++) 00413 m_intercept[j] = -0.5*m_coef[j]*m_coef[j] + log(class_nums[j]/float(num_vec)); 00414 00415 #ifdef DEBUG_MCLDA 00416 SG_PRINT("\n>>> Displaying m_intercept ...\n") 00417 SGVector< float64_t >::display_vector(m_intercept.vector, m_num_classes); 00418 #endif 00419 00420 SG_FREE(class_idxs); 00421 SG_FREE(class_nums); 00422 00423 return true; 00424 } 00425 00426 #endif /* HAVE_EIGEN3 */