SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
CSVFile.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evgeniy Andreev (gsomix)
00008  */
00009 
00010 #ifndef __CSVFILE_H__
00011 #define __CSVFILE_H__
00012 
00013 #include <shogun/io/File.h>
00014 
00015 #include <shogun/io/LineReader.h>
00016 #include <shogun/io/Parser.h>
00017 #include <shogun/lib/DelimiterTokenizer.h>
00018 #include <shogun/lib/v_array.h>
00019 
00020 namespace shogun
00021 {
00022 
00026 class CCSVFile : public CFile
00027 {
00028 public:
00030     CCSVFile();
00031 
00037     CCSVFile(FILE* f, const char* name=NULL);
00038 
00045     CCSVFile(int fd, const char* mode, const char* name=NULL);
00046 
00053     CCSVFile(const char* fname, char rw='r', const char* name=NULL);
00054 
00056     virtual ~CCSVFile();
00057 
00062     void set_transpose(bool value);
00063 
00068     void set_delimiter(char delimiter);
00069 
00074     void set_lines_to_skip(int32_t num_lines);
00075 
00081     int32_t get_stats(int32_t& num_tokens);
00082 
00090     virtual void get_vector(int8_t*& vector, int32_t& len);
00091     virtual void get_vector(uint8_t*& vector, int32_t& len);
00092     virtual void get_vector(char*& vector, int32_t& len);
00093     virtual void get_vector(int32_t*& vector, int32_t& len);
00094     virtual void get_vector(uint32_t*& vector, int32_t& len);
00095     virtual void get_vector(float64_t*& vector, int32_t& len);
00096     virtual void get_vector(float32_t*& vector, int32_t& len);
00097     virtual void get_vector(floatmax_t*& vector, int32_t& len);
00098     virtual void get_vector(int16_t*& vector, int32_t& len);
00099     virtual void get_vector(uint16_t*& vector, int32_t& len);
00100     virtual void get_vector(int64_t*& vector, int32_t& len);
00101     virtual void get_vector(uint64_t*& vector, int32_t& len);
00103 
00112     virtual void get_matrix(
00113             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00114     virtual void get_matrix(
00115             int8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00116     virtual void get_matrix(
00117             char*& matrix, int32_t& num_feat, int32_t& num_vec);
00118     virtual void get_matrix(
00119             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00120     virtual void get_matrix(
00121             uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00122     virtual void get_matrix(
00123             int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00124     virtual void get_matrix(
00125             uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00126     virtual void get_matrix(
00127             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00128     virtual void get_matrix(
00129             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00130     virtual void get_matrix(
00131             floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00132     virtual void get_matrix(
00133             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00134     virtual void get_matrix(
00135             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00137 
00146     virtual void get_ndarray(
00147             uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00148     virtual void get_ndarray(
00149             char*& array, int32_t*& dims, int32_t& num_dims);
00150     virtual void get_ndarray(
00151             int32_t*& array, int32_t*& dims, int32_t& num_dims);
00152     virtual void get_ndarray(
00153             float32_t*& array, int32_t*& dims, int32_t& num_dims);
00154     virtual void get_ndarray(
00155             float64_t*& array, int32_t*& dims, int32_t& num_dims);
00156     virtual void get_ndarray(
00157             int16_t*& array, int32_t*& dims, int32_t& num_dims);
00158     virtual void get_ndarray(
00159             uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00161 
00170     virtual void get_sparse_matrix(
00171             SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00172     virtual void get_sparse_matrix(
00173             SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00174     virtual void get_sparse_matrix(
00175         SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00176     virtual void get_sparse_matrix(
00177             SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00178     virtual void get_sparse_matrix(
00179             SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00180     virtual void get_sparse_matrix(
00181             SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00182     virtual void get_sparse_matrix(
00183             SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00184     virtual void get_sparse_matrix(
00185             SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00186     virtual void get_sparse_matrix(
00187             SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00188     virtual void get_sparse_matrix(
00189             SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00190     virtual void get_sparse_matrix(
00191             SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00192     virtual void get_sparse_matrix(
00193             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00194     virtual void get_sparse_matrix(
00195             SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00197 
00206     virtual void get_string_list(
00207             SGString<uint8_t>*& strings, int32_t& num_str,
00208             int32_t& max_string_len);
00209     virtual void get_string_list(
00210             SGString<int8_t>*& strings, int32_t& num_str,
00211             int32_t& max_string_len);
00212     virtual void get_string_list(
00213             SGString<char>*& strings, int32_t& num_str,
00214             int32_t& max_string_len);
00215     virtual void get_string_list(
00216             SGString<int32_t>*& strings, int32_t& num_str,
00217             int32_t& max_string_len);
00218     virtual void get_string_list(
00219             SGString<uint32_t>*& strings, int32_t& num_str,
00220             int32_t& max_string_len);
00221     virtual void get_string_list(
00222             SGString<int16_t>*& strings, int32_t& num_str,
00223             int32_t& max_string_len);
00224     virtual void get_string_list(
00225             SGString<uint16_t>*& strings, int32_t& num_str,
00226             int32_t& max_string_len);
00227     virtual void get_string_list(
00228             SGString<int64_t>*& strings, int32_t& num_str,
00229             int32_t& max_string_len);
00230     virtual void get_string_list(
00231             SGString<uint64_t>*& strings, int32_t& num_str,
00232             int32_t& max_string_len);
00233     virtual void get_string_list(
00234             SGString<float32_t>*& strings, int32_t& num_str,
00235             int32_t& max_string_len);
00236     virtual void get_string_list(
00237             SGString<float64_t>*& strings, int32_t& num_str,
00238             int32_t& max_string_len);
00239     virtual void get_string_list(
00240             SGString<floatmax_t>*& strings, int32_t& num_str,
00241             int32_t& max_string_len);
00243 
00245     /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/
00246 
00254     virtual void set_vector(const int8_t* vector, int32_t len);
00255     virtual void set_vector(const uint8_t* vector, int32_t len);
00256     virtual void set_vector(const char* vector, int32_t len);
00257     virtual void set_vector(const int32_t* vector, int32_t len);
00258     virtual void set_vector(const uint32_t* vector, int32_t len);
00259     virtual void set_vector(const float32_t* vector, int32_t len);
00260     virtual void set_vector(const float64_t* vector, int32_t len);
00261     virtual void set_vector(const floatmax_t* vector, int32_t len);
00262     virtual void set_vector(const int16_t* vector, int32_t len);
00263     virtual void set_vector(const uint16_t* vector, int32_t len);
00264     virtual void set_vector(const int64_t* vector, int32_t len);
00265     virtual void set_vector(const uint64_t* vector, int32_t len);
00267 
00275     virtual void set_matrix(
00276             const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00277     virtual void set_matrix(
00278             const int8_t* matrix, int32_t num_feat, int32_t num_vec);
00279     virtual void set_matrix(
00280             const char* matrix, int32_t num_feat, int32_t num_vec);
00281     virtual void set_matrix(
00282             const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00283     virtual void set_matrix(
00284             const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00285     virtual void set_matrix(
00286             const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00287     virtual void set_matrix(
00288             const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00289     virtual void set_matrix(
00290             const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00291     virtual void set_matrix(
00292             const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00293     virtual void set_matrix(
00294             const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00295     virtual void set_matrix(
00296             const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00297     virtual void set_matrix(
00298             const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00300 
00308     virtual void set_sparse_matrix(
00309             const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00310     virtual void set_sparse_matrix(
00311             const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00312     virtual void set_sparse_matrix(
00313             const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00314     virtual void set_sparse_matrix(
00315             const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00316     virtual void set_sparse_matrix(
00317             const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00318     virtual void set_sparse_matrix(
00319             const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00320     virtual void set_sparse_matrix(
00321             const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00322     virtual void set_sparse_matrix(
00323             const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00324     virtual void set_sparse_matrix(
00325             const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00326     virtual void set_sparse_matrix(
00327             const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
00328     virtual void set_sparse_matrix(
00329             const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00330     virtual void set_sparse_matrix(
00331             const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00332     virtual void set_sparse_matrix(
00333             const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00335 
00344     virtual void set_string_list(
00345             const SGString<uint8_t>* strings, int32_t num_str);
00346     virtual void set_string_list(
00347             const SGString<int8_t>* strings, int32_t num_str);
00348     virtual void set_string_list(
00349             const SGString<char>* strings, int32_t num_str);
00350     virtual void set_string_list(
00351             const SGString<int32_t>* strings, int32_t num_str);
00352     virtual void set_string_list(
00353             const SGString<uint32_t>* strings, int32_t num_str);
00354     virtual void set_string_list(
00355             const SGString<int16_t>* strings, int32_t num_str);
00356     virtual void set_string_list(
00357             const SGString<uint16_t>* strings, int32_t num_str);
00358     virtual void set_string_list(
00359             const SGString<int64_t>* strings, int32_t num_str);
00360     virtual void set_string_list(
00361             const SGString<uint64_t>* strings, int32_t num_str);
00362     virtual void set_string_list(
00363             const SGString<float32_t>* strings, int32_t num_str);
00364     virtual void set_string_list(
00365             const SGString<float64_t>* strings, int32_t num_str);
00366     virtual void set_string_list(
00367             const SGString<floatmax_t>* strings, int32_t num_str);
00369 
00378     static void tokenize(char delim, substring s, v_array<substring> &ret);
00379 
00380     virtual const char* get_name() const { return "CSVFile"; }
00381 
00382 private:
00384     void init();
00385 
00387     void init_with_defaults();
00388 
00390     void skip_lines(int32_t num_lines);
00391 
00392 private:
00394     CLineReader* m_line_reader;
00395 
00397     CParser* m_parser;
00398 
00400     CDelimiterTokenizer* m_line_tokenizer;
00401 
00403     CDelimiterTokenizer* m_tokenizer;
00404 
00406     bool is_data_transposed;
00407 
00409     char m_delimiter;
00410 
00412     int32_t m_num_to_skip;
00413 };
00414 
00415 }
00416 
00417 #endif 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation