SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingSparseFeatures.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Modifications (W) 2013 Thoralf Klein
00009  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00010  */
00011 #ifndef _STREAMING_SPARSEFEATURES__H__
00012 #define _STREAMING_SPARSEFEATURES__H__
00013 
00014 #include <shogun/lib/common.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/features/streaming/StreamingDotFeatures.h>
00017 #include <shogun/lib/DataType.h>
00018 #include <shogun/io/streaming/InputParser.h>
00019 
00020 namespace shogun
00021 {
00044 template <class T> class CStreamingSparseFeatures : public CStreamingDotFeatures
00045 {
00046 public:
00047 
00055     CStreamingSparseFeatures();
00056 
00065     CStreamingSparseFeatures(CStreamingFile* file,
00066                  bool is_labelled,
00067                  int32_t size);
00068 
00074     virtual ~CStreamingSparseFeatures();
00075 
00085     virtual void set_vector_reader();
00086 
00096     virtual void set_vector_and_label_reader();
00097 
00103     virtual void start_parser();
00104 
00110     virtual void end_parser();
00111 
00120     virtual bool get_next_example();
00121 
00128     T get_feature(int32_t index);
00129 
00135     SGSparseVector<T> get_vector();
00136 
00144     virtual float64_t get_label();
00145 
00152     virtual void release_example();
00153 
00158     virtual void reset_stream();
00159 
00171     int32_t set_num_features(int32_t num);
00172 
00180     virtual int32_t get_dim_feature_space() const;
00181 
00192     virtual float32_t dot(CStreamingDotFeatures *df);
00193 
00204     static T sparse_dot(T alpha, SGSparseVectorEntry<T>* avec, int32_t alen, SGSparseVectorEntry<T>* bvec, int32_t blen);
00205 
00215     T dense_dot(T alpha, T* vec, int32_t dim, T b);
00216 
00225     virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len);
00226 
00235     virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len);
00236 
00246     virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val=false);
00247 
00257     virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val=false);
00258 
00264     int64_t get_num_nonzero_entries();
00265 
00271     float32_t compute_squared();
00272 
00278     void sort_features();
00279 
00285     virtual int32_t get_num_features();
00286 
00292     virtual int32_t get_nnz_features_for_vector();
00293 
00299     virtual EFeatureType get_feature_type() const;
00300 
00306     virtual EFeatureClass get_feature_class() const;
00307 
00313     virtual CFeatures* duplicate() const;
00314 
00320     virtual const char* get_name() const { return "StreamingSparseFeatures"; }
00321 
00327     virtual int32_t get_num_vectors() const;
00328 
00329 private:
00334     virtual void init();
00335 
00343     virtual void init(CStreamingFile *file, bool is_labelled, int32_t size);
00344 
00345 protected:
00347     CInputParser< SGSparseVectorEntry<T> > parser;
00348 
00350     SGSparseVector<T> current_sgvector;
00351 
00353     index_t current_vec_index;
00354 
00356     float64_t current_label;
00357 
00359     int32_t current_num_features;
00360 };
00361 
00362 }
00363 #endif // _STREAMING_SPARSEFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation