SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Shashwat Lal Das 00008 * Modifications (W) 2013 Thoralf Klein 00009 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00010 */ 00011 #ifndef _STREAMING_SPARSEFEATURES__H__ 00012 #define _STREAMING_SPARSEFEATURES__H__ 00013 00014 #include <shogun/lib/common.h> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/features/streaming/StreamingDotFeatures.h> 00017 #include <shogun/lib/DataType.h> 00018 #include <shogun/io/streaming/InputParser.h> 00019 00020 namespace shogun 00021 { 00044 template <class T> class CStreamingSparseFeatures : public CStreamingDotFeatures 00045 { 00046 public: 00047 00055 CStreamingSparseFeatures(); 00056 00065 CStreamingSparseFeatures(CStreamingFile* file, 00066 bool is_labelled, 00067 int32_t size); 00068 00074 virtual ~CStreamingSparseFeatures(); 00075 00085 virtual void set_vector_reader(); 00086 00096 virtual void set_vector_and_label_reader(); 00097 00103 virtual void start_parser(); 00104 00110 virtual void end_parser(); 00111 00120 virtual bool get_next_example(); 00121 00128 T get_feature(int32_t index); 00129 00135 SGSparseVector<T> get_vector(); 00136 00144 virtual float64_t get_label(); 00145 00152 virtual void release_example(); 00153 00158 virtual void reset_stream(); 00159 00171 int32_t set_num_features(int32_t num); 00172 00180 virtual int32_t get_dim_feature_space() const; 00181 00192 virtual float32_t dot(CStreamingDotFeatures *df); 00193 00204 static T sparse_dot(T alpha, SGSparseVectorEntry<T>* avec, int32_t alen, SGSparseVectorEntry<T>* bvec, int32_t blen); 00205 00215 T dense_dot(T alpha, T* vec, int32_t dim, T b); 00216 00225 virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len); 00226 00235 virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len); 00236 00246 virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val=false); 00247 00257 virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val=false); 00258 00264 int64_t get_num_nonzero_entries(); 00265 00271 float32_t compute_squared(); 00272 00278 void sort_features(); 00279 00285 virtual int32_t get_num_features(); 00286 00292 virtual int32_t get_nnz_features_for_vector(); 00293 00299 virtual EFeatureType get_feature_type() const; 00300 00306 virtual EFeatureClass get_feature_class() const; 00307 00313 virtual CFeatures* duplicate() const; 00314 00320 virtual const char* get_name() const { return "StreamingSparseFeatures"; } 00321 00327 virtual int32_t get_num_vectors() const; 00328 00329 private: 00334 virtual void init(); 00335 00343 virtual void init(CStreamingFile *file, bool is_labelled, int32_t size); 00344 00345 protected: 00347 CInputParser< SGSparseVectorEntry<T> > parser; 00348 00350 SGSparseVector<T> current_sgvector; 00351 00353 index_t current_vec_index; 00354 00356 float64_t current_label; 00357 00359 int32_t current_num_features; 00360 }; 00361 00362 } 00363 #endif // _STREAMING_SPARSEFEATURES__H__