SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (c) 2012-2013 Sergey Lisitsyn 00008 */ 00009 00010 #include <shogun/lib/tapkee/tapkee_shogun.hpp> 00011 00012 #ifdef HAVE_EIGEN3 00013 00014 #define CUSTOM_UNIFORM_RANDOM_INDEX_FUNCTION shogun::CMath::random() 00015 #define CUSTOM_UNIFORM_RANDOM_FUNCTION shogun::CMath::random(static_cast<tapkee::ScalarType>(0),static_cast<tapkee::ScalarType>(1)) 00016 #define CUSTOM_GAUSSIAN_RANDOM_FUNCTION shogun::CMath::normal_random(static_cast<tapkee::ScalarType>(0),static_cast<tapkee::ScalarType>(1)) 00017 #define TAPKEE_EIGEN_INCLUDE_FILE <shogun/mathematics/eigen3.h> 00018 00019 #ifdef HAVE_ARPACK 00020 #define TAPKEE_WITH_ARPACK 00021 #endif 00022 #define TAPKEE_USE_LGPL_COVERTREE 00023 #include <shogun/lib/tapkee/tapkee.hpp> 00024 #include <shogun/lib/tapkee/callbacks/pimpl_callbacks.hpp> 00025 00026 using namespace shogun; 00027 00028 class ShogunLoggerImplementation : public tapkee::LoggerImplementation 00029 { 00030 virtual void message_info(const std::string& msg) 00031 { 00032 SG_SINFO((msg+"\n").c_str()) 00033 } 00034 virtual void message_warning(const std::string& msg) 00035 { 00036 SG_SWARNING((msg+"\n").c_str()) 00037 } 00038 virtual void message_error(const std::string& msg) 00039 { 00040 SG_SERROR((msg+"\n").c_str()) 00041 } 00042 virtual void message_debug(const std::string& msg) 00043 { 00044 SG_SDEBUG((msg+"\n").c_str()) 00045 } 00046 virtual void message_benchmark(const std::string& msg) 00047 { 00048 SG_SINFO((msg+"\n").c_str()) 00049 } 00050 }; 00051 00052 struct ShogunFeatureVectorCallback 00053 { 00054 ShogunFeatureVectorCallback(CDotFeatures* f) : dim(0), features(f) { } 00055 inline tapkee::IndexType dimension() const 00056 { 00057 if (features) 00058 return (dim = features->get_dim_feature_space()); 00059 00060 return 0; 00061 } 00062 inline void vector(int i, tapkee::DenseVector& v) const 00063 { 00064 v.setZero(); 00065 features->add_to_dense_vec(1.0,i,v.data(),dim); 00066 } 00067 mutable int32_t dim; 00068 CDotFeatures* features; 00069 }; 00070 00071 00072 CDenseFeatures<float64_t>* shogun::tapkee_embed(const shogun::TAPKEE_PARAMETERS_FOR_SHOGUN& parameters) 00073 { 00074 tapkee::LoggingSingleton::instance().set_logger_impl(new ShogunLoggerImplementation); 00075 tapkee::LoggingSingleton::instance().enable_benchmark(); 00076 tapkee::LoggingSingleton::instance().enable_info(); 00077 00078 pimpl_kernel_callback<CKernel> kernel_callback(parameters.kernel); 00079 pimpl_distance_callback<CDistance> distance_callback(parameters.distance); 00080 ShogunFeatureVectorCallback features_callback(parameters.features); 00081 00082 tapkee::DimensionReductionMethod method; 00083 #ifdef HAVE_ARPACK 00084 tapkee::EigenMethod eigen_method = tapkee::Arpack; 00085 #else 00086 tapkee::EigenMethod eigen_method = tapkee::Dense; 00087 #endif 00088 tapkee::NeighborsMethod neighbors_method = tapkee::CoverTree; 00089 size_t N = 0; 00090 00091 switch (parameters.method) 00092 { 00093 case SHOGUN_KERNEL_LOCALLY_LINEAR_EMBEDDING: 00094 case SHOGUN_LOCALLY_LINEAR_EMBEDDING: 00095 method = tapkee::KernelLocallyLinearEmbedding; 00096 N = parameters.kernel->get_num_vec_lhs(); 00097 break; 00098 case SHOGUN_NEIGHBORHOOD_PRESERVING_EMBEDDING: 00099 method = tapkee::NeighborhoodPreservingEmbedding; 00100 N = parameters.kernel->get_num_vec_lhs(); 00101 break; 00102 case SHOGUN_LOCAL_TANGENT_SPACE_ALIGNMENT: 00103 method = tapkee::KernelLocalTangentSpaceAlignment; 00104 N = parameters.kernel->get_num_vec_lhs(); 00105 break; 00106 case SHOGUN_LINEAR_LOCAL_TANGENT_SPACE_ALIGNMENT: 00107 method = tapkee::LinearLocalTangentSpaceAlignment; 00108 N = parameters.kernel->get_num_vec_lhs(); 00109 break; 00110 case SHOGUN_HESSIAN_LOCALLY_LINEAR_EMBEDDING: 00111 method = tapkee::HessianLocallyLinearEmbedding; 00112 N = parameters.kernel->get_num_vec_lhs(); 00113 break; 00114 case SHOGUN_DIFFUSION_MAPS: 00115 method = tapkee::DiffusionMap; 00116 N = parameters.distance->get_num_vec_lhs(); 00117 break; 00118 case SHOGUN_LAPLACIAN_EIGENMAPS: 00119 method = tapkee::LaplacianEigenmaps; 00120 N = parameters.distance->get_num_vec_lhs(); 00121 break; 00122 case SHOGUN_LOCALITY_PRESERVING_PROJECTIONS: 00123 method = tapkee::LocalityPreservingProjections; 00124 N = parameters.distance->get_num_vec_lhs(); 00125 break; 00126 case SHOGUN_MULTIDIMENSIONAL_SCALING: 00127 method = tapkee::MultidimensionalScaling; 00128 N = parameters.distance->get_num_vec_lhs(); 00129 break; 00130 case SHOGUN_LANDMARK_MULTIDIMENSIONAL_SCALING: 00131 method = tapkee::LandmarkMultidimensionalScaling; 00132 N = parameters.distance->get_num_vec_lhs(); 00133 break; 00134 case SHOGUN_ISOMAP: 00135 method = tapkee::Isomap; 00136 N = parameters.distance->get_num_vec_lhs(); 00137 break; 00138 case SHOGUN_LANDMARK_ISOMAP: 00139 method = tapkee::LandmarkIsomap; 00140 N = parameters.distance->get_num_vec_lhs(); 00141 break; 00142 case SHOGUN_STOCHASTIC_PROXIMITY_EMBEDDING: 00143 method = tapkee::StochasticProximityEmbedding; 00144 N = parameters.distance->get_num_vec_lhs(); 00145 break; 00146 case SHOGUN_FACTOR_ANALYSIS: 00147 method = tapkee::FactorAnalysis; 00148 N = parameters.features->get_num_vectors(); 00149 break; 00150 case SHOGUN_TDISTRIBUTED_STOCHASTIC_NEIGHBOR_EMBEDDING: 00151 method = tapkee::tDistributedStochasticNeighborEmbedding; 00152 N = parameters.features->get_num_vectors(); 00153 break; 00154 case SHOGUN_MANIFOLD_SCULPTING: 00155 method = tapkee::ManifoldSculpting; 00156 N = parameters.features->get_num_vectors(); 00157 break; 00158 } 00159 00160 std::vector<int32_t> indices(N); 00161 for (size_t i=0; i<N; i++) 00162 indices[i] = i; 00163 00164 tapkee::ParametersSet parameters_set = 00165 (tapkee::keywords::method=method, 00166 tapkee::keywords::eigen_method=eigen_method, 00167 tapkee::keywords::neighbors_method=neighbors_method, 00168 tapkee::keywords::num_neighbors=parameters.n_neighbors, 00169 tapkee::keywords::diffusion_map_timesteps = parameters.n_timesteps, 00170 tapkee::keywords::target_dimension = parameters.target_dimension, 00171 tapkee::keywords::spe_num_updates = parameters.spe_num_updates, 00172 tapkee::keywords::nullspace_shift = parameters.eigenshift, 00173 tapkee::keywords::landmark_ratio = parameters.landmark_ratio, 00174 tapkee::keywords::gaussian_kernel_width = parameters.gaussian_kernel_width, 00175 tapkee::keywords::spe_tolerance = parameters.spe_tolerance, 00176 tapkee::keywords::spe_global_strategy = parameters.spe_global_strategy, 00177 tapkee::keywords::max_iteration = parameters.max_iteration, 00178 tapkee::keywords::fa_epsilon = parameters.fa_epsilon, 00179 tapkee::keywords::sne_perplexity = parameters.sne_perplexity, 00180 tapkee::keywords::sne_theta = parameters.sne_theta, 00181 tapkee::keywords::squishing_rate = parameters.squishing_rate 00182 ); 00183 00184 tapkee::TapkeeOutput output = tapkee::embed(indices.begin(),indices.end(), 00185 kernel_callback,distance_callback,features_callback,parameters_set); 00186 tapkee::DenseMatrix result_embedding = output.embedding; 00187 // destroy projecting function 00188 output.projection.clear(); 00189 00190 SGMatrix<float64_t> feature_matrix(parameters.target_dimension,N); 00191 // TODO avoid copying 00192 for (uint32_t i=0; i<N; i++) 00193 { 00194 for (uint32_t j=0; j<parameters.target_dimension; j++) 00195 { 00196 feature_matrix(j,i) = result_embedding(i,j); 00197 } 00198 } 00199 return new CDenseFeatures<float64_t>(feature_matrix); 00200 } 00201 00202 #endif