SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SparseFeatures.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2012 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  * Copyright (C) 2010 Berlin Institute of Technology
00012  */
00013 
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016 
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/DataType.h>
00019 #include <shogun/lib/SGSparseMatrix.h>
00020 #include <shogun/lib/Cache.h>
00021 #include <shogun/io/File.h>
00022 #include <shogun/io/LibSVMFile.h>
00023 
00024 #include <shogun/labels/RegressionLabels.h>
00025 #include <shogun/features/Features.h>
00026 #include <shogun/features/DotFeatures.h>
00027 #include <shogun/features/DenseFeatures.h>
00028 
00029 namespace shogun
00030 {
00031 
00032 class CFile;
00033 class CLibSVMFile;
00034 class CRegressionLabels;
00035 class CFeatures;
00036 class CDotFeatures;
00037 template <class ST> class CDenseFeatures;
00038 template <class ST> class SGSparseMatrix;
00039 
00058 template <class ST> class CSparseFeatures : public CDotFeatures
00059 {
00060     public:
00065         CSparseFeatures(int32_t size=0);
00066 
00072         CSparseFeatures(SGSparseMatrix<ST> sparse);
00073 
00079         CSparseFeatures(SGMatrix<ST> dense);
00080 
00082         CSparseFeatures(const CSparseFeatures & orig);
00083 
00088         CSparseFeatures(CFile* loader);
00089 
00091         virtual ~CSparseFeatures();
00092 
00097         void free_sparse_feature_matrix();
00098 
00103         void free_sparse_features();
00104 
00109         virtual CFeatures* duplicate() const;
00110 
00120         ST get_feature(int32_t num, int32_t index);
00121 
00127         SGVector<ST> get_full_feature_vector(int32_t num);
00128 
00134         virtual int32_t get_nnz_features_for_vector(int32_t num);
00135 
00145         SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00146 
00159         ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00160 
00172         void add_to_dense_vec(float64_t alpha, int32_t num,
00173                 float64_t* vec, int32_t dim, bool abs_val=false);
00174 
00181         void free_sparse_feature_vector(int32_t num);
00182 
00192         SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00193 
00201         SGSparseMatrix<ST> get_sparse_feature_matrix();
00202 
00209         CSparseFeatures<ST>* get_transposed();
00210 
00222         SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00223 
00231         void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00232 
00239         SGMatrix<ST> get_full_feature_matrix();
00240 
00250         virtual void set_full_feature_matrix(SGMatrix<ST> full);
00251 
00259         virtual bool apply_preprocessor(bool force_preprocessing=false);
00260 
00267         void obtain_from_simple(CDenseFeatures<ST>* sf);
00268 
00273         virtual int32_t get_num_vectors() const;
00274 
00279         int32_t get_num_features() const;
00280 
00292         int32_t set_num_features(int32_t num);
00293 
00298         virtual EFeatureClass get_feature_class() const;
00299 
00304         virtual EFeatureType get_feature_type() const;
00305 
00312         void free_feature_vector(int32_t num);
00313 
00318         int64_t get_num_nonzero_entries();
00319 
00327         float64_t* compute_squared(float64_t* sq);
00328 
00343         float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00344                 float64_t* sq_lhs, int32_t idx_a,
00345                 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00346                 int32_t idx_b);
00347 
00354         void load(CFile* loader);
00355 
00363         SGVector<float64_t> load_with_labels(CLibSVMFile* loader);
00364 
00371         void save(CFile* writer);
00372 
00380         void save_with_labels(CLibSVMFile* writer, SGVector<float64_t> labels);
00381 
00387         void sort_features();
00388 
00396         virtual int32_t get_dim_feature_space() const;
00397 
00407         virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00408 
00417         virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len);
00418 
00419         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00420 
00421         struct sparse_feature_iterator
00422         {
00424             SGSparseVector<ST> sv;
00425 
00427             int32_t vector_index;
00428 
00430             int32_t index;
00431 
00433             void print_info()
00434             {
00435                 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00436                         sv.features, vector_index, sv.num_feat_entries, index);
00437             }
00438         };
00439         #endif
00440 
00452         virtual void* get_feature_iterator(int32_t vector_index);
00453 
00464         virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00465 
00471         virtual void free_feature_iterator(void* iterator);
00472 
00479         virtual CFeatures* copy_subset(SGVector<index_t> indices);
00480 
00482         virtual const char* get_name() const { return "SparseFeatures"; }
00483 
00484     protected:
00495         virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00496             int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00497 
00498     private:
00499         void init();
00500 
00501     protected:
00502 
00504         SGSparseMatrix<ST> sparse_feature_matrix;
00505 
00507         CCache< SGSparseVectorEntry<ST> >* feature_cache;
00508 };
00509 }
00510 #endif /* _SPARSEFEATURES__H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation