SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2011-2012 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 * Copyright (C) 2010 Berlin Institute of Technology 00012 */ 00013 00014 #ifndef _SPARSEFEATURES__H__ 00015 #define _SPARSEFEATURES__H__ 00016 00017 #include <shogun/lib/common.h> 00018 #include <shogun/lib/DataType.h> 00019 #include <shogun/lib/SGSparseMatrix.h> 00020 #include <shogun/lib/Cache.h> 00021 #include <shogun/io/File.h> 00022 #include <shogun/io/LibSVMFile.h> 00023 00024 #include <shogun/labels/RegressionLabels.h> 00025 #include <shogun/features/Features.h> 00026 #include <shogun/features/DotFeatures.h> 00027 #include <shogun/features/DenseFeatures.h> 00028 00029 namespace shogun 00030 { 00031 00032 class CFile; 00033 class CLibSVMFile; 00034 class CRegressionLabels; 00035 class CFeatures; 00036 class CDotFeatures; 00037 template <class ST> class CDenseFeatures; 00038 template <class ST> class SGSparseMatrix; 00039 00058 template <class ST> class CSparseFeatures : public CDotFeatures 00059 { 00060 public: 00065 CSparseFeatures(int32_t size=0); 00066 00072 CSparseFeatures(SGSparseMatrix<ST> sparse); 00073 00079 CSparseFeatures(SGMatrix<ST> dense); 00080 00082 CSparseFeatures(const CSparseFeatures & orig); 00083 00088 CSparseFeatures(CFile* loader); 00089 00091 virtual ~CSparseFeatures(); 00092 00097 void free_sparse_feature_matrix(); 00098 00103 void free_sparse_features(); 00104 00109 virtual CFeatures* duplicate() const; 00110 00120 ST get_feature(int32_t num, int32_t index); 00121 00127 SGVector<ST> get_full_feature_vector(int32_t num); 00128 00134 virtual int32_t get_nnz_features_for_vector(int32_t num); 00135 00145 SGSparseVector<ST> get_sparse_feature_vector(int32_t num); 00146 00159 ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b); 00160 00172 void add_to_dense_vec(float64_t alpha, int32_t num, 00173 float64_t* vec, int32_t dim, bool abs_val=false); 00174 00181 void free_sparse_feature_vector(int32_t num); 00182 00192 SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec); 00193 00201 SGSparseMatrix<ST> get_sparse_feature_matrix(); 00202 00209 CSparseFeatures<ST>* get_transposed(); 00210 00222 SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec); 00223 00231 void set_sparse_feature_matrix(SGSparseMatrix<ST> sm); 00232 00239 SGMatrix<ST> get_full_feature_matrix(); 00240 00250 virtual void set_full_feature_matrix(SGMatrix<ST> full); 00251 00259 virtual bool apply_preprocessor(bool force_preprocessing=false); 00260 00267 void obtain_from_simple(CDenseFeatures<ST>* sf); 00268 00273 virtual int32_t get_num_vectors() const; 00274 00279 int32_t get_num_features() const; 00280 00292 int32_t set_num_features(int32_t num); 00293 00298 virtual EFeatureClass get_feature_class() const; 00299 00304 virtual EFeatureType get_feature_type() const; 00305 00312 void free_feature_vector(int32_t num); 00313 00318 int64_t get_num_nonzero_entries(); 00319 00327 float64_t* compute_squared(float64_t* sq); 00328 00343 float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs, 00344 float64_t* sq_lhs, int32_t idx_a, 00345 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs, 00346 int32_t idx_b); 00347 00354 void load(CFile* loader); 00355 00363 SGVector<float64_t> load_with_labels(CLibSVMFile* loader); 00364 00371 void save(CFile* writer); 00372 00380 void save_with_labels(CLibSVMFile* writer, SGVector<float64_t> labels); 00381 00387 void sort_features(); 00388 00396 virtual int32_t get_dim_feature_space() const; 00397 00407 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2); 00408 00417 virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len); 00418 00419 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00420 00421 struct sparse_feature_iterator 00422 { 00424 SGSparseVector<ST> sv; 00425 00427 int32_t vector_index; 00428 00430 int32_t index; 00431 00433 void print_info() 00434 { 00435 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n", 00436 sv.features, vector_index, sv.num_feat_entries, index); 00437 } 00438 }; 00439 #endif 00440 00452 virtual void* get_feature_iterator(int32_t vector_index); 00453 00464 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator); 00465 00471 virtual void free_feature_iterator(void* iterator); 00472 00479 virtual CFeatures* copy_subset(SGVector<index_t> indices); 00480 00482 virtual const char* get_name() const { return "SparseFeatures"; } 00483 00484 protected: 00495 virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num, 00496 int32_t& len, SGSparseVectorEntry<ST>* target=NULL); 00497 00498 private: 00499 void init(); 00500 00501 protected: 00502 00504 SGSparseMatrix<ST> sparse_feature_matrix; 00505 00507 CCache< SGSparseVectorEntry<ST> >* feature_cache; 00508 }; 00509 } 00510 #endif /* _SPARSEFEATURES__H__ */