SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingStringFeatures.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 #ifndef _STREAMING_STRINGFEATURES__H__
00011 #define _STREAMING_STRINGFEATURES__H__
00012 
00013 #include <shogun/lib/common.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/base/Parameter.h>
00016 #include <shogun/lib/DataType.h>
00017 #include <shogun/io/streaming/InputParser.h>
00018 
00019 #include <shogun/features/streaming/StreamingFeatures.h>
00020 #include <shogun/features/Alphabet.h>
00021 
00022 namespace shogun
00023 {
00027 template <class T> class CStreamingStringFeatures : public CStreamingFeatures
00028 {
00029 public:
00030 
00038     CStreamingStringFeatures();
00039 
00048     CStreamingStringFeatures(CStreamingFile* file,
00049                  bool is_labelled,
00050                  int32_t size);
00051 
00057     virtual ~CStreamingStringFeatures();
00058 
00068     virtual void set_vector_reader();
00069 
00079     virtual void set_vector_and_label_reader();
00080 
00087     void use_alphabet(EAlphabet alpha);
00088 
00095     void use_alphabet(CAlphabet* alpha);
00096 
00104     void set_remap(CAlphabet* ascii_alphabet, CAlphabet* binary_alphabet);
00105 
00113     void set_remap(EAlphabet ascii_alphabet=DNA, EAlphabet binary_alphabet=RAWDNA);
00114 
00119     CAlphabet* get_alphabet();
00120 
00127     floatmax_t get_num_symbols();
00128 
00134     virtual void start_parser();
00135 
00141     virtual void end_parser();
00142 
00151     virtual bool get_next_example();
00152 
00158     SGString<T> get_vector();
00159 
00167     virtual float64_t get_label();
00168 
00175     virtual void release_example();
00176 
00182     virtual int32_t get_vector_length();
00183 
00189     virtual EFeatureType get_feature_type() const;
00190 
00196     virtual EFeatureClass get_feature_class() const;
00197 
00203     virtual CFeatures* duplicate() const;
00204 
00210     virtual const char* get_name() const { return "StreamingStringFeatures"; }
00211 
00217     virtual int32_t get_num_vectors() const;
00218 
00224     virtual int32_t get_num_features();
00225 
00226 private:
00227 
00232     void init();
00233 
00241     void init(CStreamingFile *file, bool is_labelled, int32_t size);
00242 
00243 protected:
00244 
00246     CInputParser<T> parser;
00247 
00249     CAlphabet* alphabet;
00250 
00252     CAlphabet* alpha_ascii;
00253 
00255     CAlphabet* alpha_bin;
00256 
00258     CStreamingFile* working_file;
00259 
00261     SGString<T> current_sgstring;
00262 
00264     T* current_string;
00265 
00267     int32_t current_length;
00268 
00270     float64_t current_label;
00271 
00273     bool has_labels;
00274 
00276     bool remap_to_bin;
00277 
00279     int32_t num_symbols;
00280 };
00281 
00282 }
00283 #endif // _STREAMING_STRINGFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation