SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
LibSVMFile.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evgeniy Andreev (gsomix)
00008  * Written (W) 2010 Soeren Sonnenburg
00009  */
00010 
00011 #ifndef __LIBSVMFILE_H__
00012 #define __LIBSVMFILE_H__
00013 
00014 #include <shogun/io/File.h>
00015 
00016 #include <shogun/io/LineReader.h>
00017 #include <shogun/io/Parser.h>
00018 #include <shogun/lib/DelimiterTokenizer.h>
00019 
00020 namespace shogun
00021 {
00022 
00030 class CLibSVMFile : public CFile
00031 {
00032 public:
00034     CLibSVMFile();
00035 
00041     CLibSVMFile(FILE* f, const char* name=NULL);
00042 
00049     CLibSVMFile(const char* fname, char rw='r', const char* name=NULL);
00050 
00052     virtual ~CLibSVMFile();
00053 
00061     virtual void get_vector(int8_t*& vector, int32_t& len) { };
00062     virtual void get_vector(uint8_t*& vector, int32_t& len) { };
00063     virtual void get_vector(char*& vector, int32_t& len) { };
00064     virtual void get_vector(int32_t*& vector, int32_t& len) { };
00065     virtual void get_vector(uint32_t*& vector, int32_t& len) { };
00066     virtual void get_vector(float64_t*& vector, int32_t& len) { };
00067     virtual void get_vector(float32_t*& vector, int32_t& len) { };
00068     virtual void get_vector(floatmax_t*& vector, int32_t& len) { };
00069     virtual void get_vector(int16_t*& vector, int32_t& len) { };
00070     virtual void get_vector(uint16_t*& vector, int32_t& len) { };
00071     virtual void get_vector(int64_t*& vector, int32_t& len) { };
00072     virtual void get_vector(uint64_t*& vector, int32_t& len) { };
00074 
00083     virtual void get_matrix(
00084             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00085     virtual void get_matrix(
00086             int8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00087     virtual void get_matrix(
00088             char*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00089     virtual void get_matrix(
00090             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00091     virtual void get_matrix(
00092             uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00093     virtual void get_matrix(
00094             int64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00095     virtual void get_matrix(
00096             uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00097     virtual void get_matrix(
00098             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00099     virtual void get_matrix(
00100             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00101     virtual void get_matrix(
00102             floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00103     virtual void get_matrix(
00104             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00105     virtual void get_matrix(
00106             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { };
00108 
00117     virtual void get_ndarray(
00118             uint8_t*& array, int32_t*& dims, int32_t& num_dims) { };
00119     virtual void get_ndarray(
00120             char*& array, int32_t*& dims, int32_t& num_dims) { };
00121     virtual void get_ndarray(
00122             int32_t*& array, int32_t*& dims, int32_t& num_dims) { };
00123     virtual void get_ndarray(
00124             float32_t*& array, int32_t*& dims, int32_t& num_dims) { };
00125     virtual void get_ndarray(
00126             float64_t*& array, int32_t*& dims, int32_t& num_dims){ };
00127     virtual void get_ndarray(
00128             int16_t*& array, int32_t*& dims, int32_t& num_dims){ };
00129     virtual void get_ndarray(
00130             uint16_t*& array, int32_t*& dims, int32_t& num_dims){ };
00132 
00141     virtual void get_sparse_matrix(
00142             SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00143     virtual void get_sparse_matrix(
00144             SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00145     virtual void get_sparse_matrix(
00146             SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00147     virtual void get_sparse_matrix(
00148             SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00149     virtual void get_sparse_matrix(
00150             SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00151     virtual void get_sparse_matrix(
00152             SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00153     virtual void get_sparse_matrix(
00154             SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00155     virtual void get_sparse_matrix(
00156             SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00157     virtual void get_sparse_matrix(
00158             SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00159     virtual void get_sparse_matrix(
00160             SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00161     virtual void get_sparse_matrix(
00162             SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00163     virtual void get_sparse_matrix(
00164             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00165     virtual void get_sparse_matrix(
00166             SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00168 
00177     virtual void get_sparse_matrix(
00178             SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec,
00179             float64_t*& labels, bool load_labels=true);
00180     virtual void get_sparse_matrix(
00181             SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00182             float64_t*& labels, bool load_labels=true);
00183     virtual void get_sparse_matrix(
00184             SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00185             float64_t*& labels, bool load_labels=true);
00186     virtual void get_sparse_matrix(
00187             SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec,
00188             float64_t*& labels, bool load_labels=true);
00189     virtual void get_sparse_matrix(
00190             SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00191             float64_t*& labels, bool load_labels=true);
00192     virtual void get_sparse_matrix(
00193             SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00194             float64_t*& labels, bool load_labels=true);
00195     virtual void get_sparse_matrix(
00196             SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00197             float64_t*& labels, bool load_labels=true);
00198     virtual void get_sparse_matrix(
00199             SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00200             float64_t*& labels, bool load_labels=true);
00201     virtual void get_sparse_matrix(
00202             SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00203             float64_t*& labels, bool load_labels=true);
00204     virtual void get_sparse_matrix(
00205             SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00206             float64_t*& labels, bool load_labels=true);
00207     virtual void get_sparse_matrix(
00208             SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00209             float64_t*& labels, bool load_labels=true);
00210     virtual void get_sparse_matrix(
00211             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00212             float64_t*& labels, bool load_labels=true);
00213     virtual void get_sparse_matrix(
00214             SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec,
00215             float64_t*& labels, bool load_labels=true);
00216 
00225     virtual void get_string_list(
00226             SGString<uint8_t>*& strings, int32_t& num_str,
00227             int32_t& max_string_len) { };
00228     virtual void get_string_list(
00229             SGString<int8_t>*& strings, int32_t& num_str,
00230             int32_t& max_string_len) { };
00231     virtual void get_string_list(
00232             SGString<char>*& strings, int32_t& num_str,
00233             int32_t& max_string_len) { };
00234     virtual void get_string_list(
00235             SGString<int32_t>*& strings, int32_t& num_str,
00236             int32_t& max_string_len) { };
00237     virtual void get_string_list(
00238             SGString<uint32_t>*& strings, int32_t& num_str,
00239             int32_t& max_string_len) { };
00240     virtual void get_string_list(
00241             SGString<int16_t>*& strings, int32_t& num_str,
00242             int32_t& max_string_len) { };
00243     virtual void get_string_list(
00244             SGString<uint16_t>*& strings, int32_t& num_str,
00245             int32_t& max_string_len) { };
00246     virtual void get_string_list(
00247             SGString<int64_t>*& strings, int32_t& num_str,
00248             int32_t& max_string_len) { };
00249     virtual void get_string_list(
00250             SGString<uint64_t>*& strings, int32_t& num_str,
00251             int32_t& max_string_len) { };
00252     virtual void get_string_list(
00253             SGString<float32_t>*& strings, int32_t& num_str,
00254             int32_t& max_string_len) { };
00255     virtual void get_string_list(
00256             SGString<float64_t>*& strings, int32_t& num_str,
00257             int32_t& max_string_len) { };
00258     virtual void get_string_list(
00259             SGString<floatmax_t>*& strings, int32_t& num_str,
00260             int32_t& max_string_len) { };
00262 
00264     /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/
00265 
00273     virtual void set_vector(const int8_t* vector, int32_t len) { };
00274     virtual void set_vector(const uint8_t* vector, int32_t len) { };
00275     virtual void set_vector(const char* vector, int32_t len) { };
00276     virtual void set_vector(const int32_t* vector, int32_t len) { };
00277     virtual void set_vector(const uint32_t* vector, int32_t len) { };
00278     virtual void set_vector(const float32_t* vector, int32_t len) { };
00279     virtual void set_vector(const float64_t* vector, int32_t len) { };
00280     virtual void set_vector(const floatmax_t* vector, int32_t len) { };
00281     virtual void set_vector(const int16_t* vector, int32_t len) { };
00282     virtual void set_vector(const uint16_t* vector, int32_t len) { };
00283     virtual void set_vector(const int64_t* vector, int32_t len) { };
00284     virtual void set_vector(const uint64_t* vector, int32_t len) { };
00286 
00294     virtual void set_matrix(
00295             const uint8_t* matrix, int32_t num_feat, int32_t num_vec) { };
00296     virtual void set_matrix(
00297             const int8_t* matrix, int32_t num_feat, int32_t num_vec) { };
00298     virtual void set_matrix(
00299             const char* matrix, int32_t num_feat, int32_t num_vec) { };
00300     virtual void set_matrix(
00301             const int32_t* matrix, int32_t num_feat, int32_t num_vec) { };
00302     virtual void set_matrix(
00303             const uint32_t* matrix, int32_t num_feat, int32_t num_vec) { };
00304     virtual void set_matrix(
00305             const int64_t* matrix, int32_t num_feat, int32_t num_vec) { };
00306     virtual void set_matrix(
00307             const uint64_t* matrix, int32_t num_feat, int32_t num_vec) { };
00308     virtual void set_matrix(
00309             const float32_t* matrix, int32_t num_feat, int32_t num_vec) { };
00310     virtual void set_matrix(
00311             const float64_t* matrix, int32_t num_feat, int32_t num_vec) { };
00312     virtual void set_matrix(
00313             const floatmax_t* matrix, int32_t num_feat, int32_t num_vec) { };
00314     virtual void set_matrix(
00315             const int16_t* matrix, int32_t num_feat, int32_t num_vec) { };
00316     virtual void set_matrix(
00317             const uint16_t* matrix, int32_t num_feat, int32_t num_vec) { };
00319 
00327     virtual void set_sparse_matrix(
00328             const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec);
00329     virtual void set_sparse_matrix(
00330             const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00331     virtual void set_sparse_matrix(
00332             const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec);
00333     virtual void set_sparse_matrix(
00334             const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec);
00335     virtual void set_sparse_matrix(
00336             const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00337     virtual void set_sparse_matrix(
00338             const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00339     virtual void set_sparse_matrix(
00340             const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00341     virtual void set_sparse_matrix(
00342             const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00343     virtual void set_sparse_matrix(
00344             const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00345     virtual void set_sparse_matrix(
00346             const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec);
00347     virtual void set_sparse_matrix(
00348             const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00349     virtual void set_sparse_matrix(
00350             const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00351     virtual void set_sparse_matrix(
00352             const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00354 
00362     virtual void set_sparse_matrix(
00363             const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec,
00364             const float64_t* labels);
00365     virtual void set_sparse_matrix(
00366             const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec,
00367             const float64_t* labels);
00368     virtual void set_sparse_matrix(
00369             const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec,
00370             const float64_t* labels);
00371     virtual void set_sparse_matrix(
00372             const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec,
00373             const float64_t* labels);
00374     virtual void set_sparse_matrix(
00375             const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec,
00376             const float64_t* labels);
00377     virtual void set_sparse_matrix(
00378             const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec,
00379             const float64_t* labels);
00380     virtual void set_sparse_matrix(
00381             const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec,
00382             const float64_t* labels);
00383     virtual void set_sparse_matrix(
00384             const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec,
00385             const float64_t* labels);
00386     virtual void set_sparse_matrix(
00387             const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec,
00388             const float64_t* labels);
00389     virtual void set_sparse_matrix(
00390             const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec,
00391             const float64_t* labels);
00392     virtual void set_sparse_matrix(
00393             const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec,
00394             const float64_t* labels);
00395     virtual void set_sparse_matrix(
00396             const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec,
00397             const float64_t* labels);
00398     virtual void set_sparse_matrix(
00399             const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec,
00400             const float64_t* labels);
00402 
00403 
00412     virtual void set_string_list(
00413             const SGString<uint8_t>* strings, int32_t num_str) { };
00414     virtual void set_string_list(
00415             const SGString<int8_t>* strings, int32_t num_str) { };
00416     virtual void set_string_list(
00417             const SGString<char>* strings, int32_t num_str) { };
00418     virtual void set_string_list(
00419             const SGString<int32_t>* strings, int32_t num_str) { };
00420     virtual void set_string_list(
00421             const SGString<uint32_t>* strings, int32_t num_str) { };
00422     virtual void set_string_list(
00423             const SGString<int16_t>* strings, int32_t num_str) { };
00424     virtual void set_string_list(
00425             const SGString<uint16_t>* strings, int32_t num_str) { };
00426     virtual void set_string_list(
00427             const SGString<int64_t>* strings, int32_t num_str) { };
00428     virtual void set_string_list(
00429             const SGString<uint64_t>* strings, int32_t num_str) { };
00430     virtual void set_string_list(
00431             const SGString<float32_t>* strings, int32_t num_str) { };
00432     virtual void set_string_list(
00433             const SGString<float64_t>* strings, int32_t num_str) { };
00434     virtual void set_string_list(
00435             const SGString<floatmax_t>* strings, int32_t num_str) { };
00437 
00438     virtual const char* get_name() const { return "LibSVMFile"; }
00439 
00440 private:
00442     void init();
00443 
00445     void init_with_defaults();
00446 
00448     int32_t get_num_lines();
00449 
00450 private:
00452     char m_delimiter;
00453 
00455     CLineReader* m_line_reader;
00456 
00458     CParser* m_parser;
00459 
00461     CDelimiterTokenizer* m_line_tokenizer;
00462 
00464     CDelimiterTokenizer* m_whitespace_tokenizer;
00465 
00467     CDelimiterTokenizer* m_delimiter_tokenizer;
00468 };
00469 
00470 }
00471 
00472 #endif 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation