SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evgeniy Andreev (gsomix) 00008 * Written (W) 2010 Soeren Sonnenburg 00009 */ 00010 00011 #ifndef __LIBSVMFILE_H__ 00012 #define __LIBSVMFILE_H__ 00013 00014 #include <shogun/io/File.h> 00015 00016 #include <shogun/io/LineReader.h> 00017 #include <shogun/io/Parser.h> 00018 #include <shogun/lib/DelimiterTokenizer.h> 00019 00020 namespace shogun 00021 { 00022 00030 class CLibSVMFile : public CFile 00031 { 00032 public: 00034 CLibSVMFile(); 00035 00041 CLibSVMFile(FILE* f, const char* name=NULL); 00042 00049 CLibSVMFile(const char* fname, char rw='r', const char* name=NULL); 00050 00052 virtual ~CLibSVMFile(); 00053 00061 virtual void get_vector(int8_t*& vector, int32_t& len) { }; 00062 virtual void get_vector(uint8_t*& vector, int32_t& len) { }; 00063 virtual void get_vector(char*& vector, int32_t& len) { }; 00064 virtual void get_vector(int32_t*& vector, int32_t& len) { }; 00065 virtual void get_vector(uint32_t*& vector, int32_t& len) { }; 00066 virtual void get_vector(float64_t*& vector, int32_t& len) { }; 00067 virtual void get_vector(float32_t*& vector, int32_t& len) { }; 00068 virtual void get_vector(floatmax_t*& vector, int32_t& len) { }; 00069 virtual void get_vector(int16_t*& vector, int32_t& len) { }; 00070 virtual void get_vector(uint16_t*& vector, int32_t& len) { }; 00071 virtual void get_vector(int64_t*& vector, int32_t& len) { }; 00072 virtual void get_vector(uint64_t*& vector, int32_t& len) { }; 00074 00083 virtual void get_matrix( 00084 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00085 virtual void get_matrix( 00086 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00087 virtual void get_matrix( 00088 char*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00089 virtual void get_matrix( 00090 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00091 virtual void get_matrix( 00092 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00093 virtual void get_matrix( 00094 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00095 virtual void get_matrix( 00096 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00097 virtual void get_matrix( 00098 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00099 virtual void get_matrix( 00100 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00101 virtual void get_matrix( 00102 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00103 virtual void get_matrix( 00104 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00105 virtual void get_matrix( 00106 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec) { }; 00108 00117 virtual void get_ndarray( 00118 uint8_t*& array, int32_t*& dims, int32_t& num_dims) { }; 00119 virtual void get_ndarray( 00120 char*& array, int32_t*& dims, int32_t& num_dims) { }; 00121 virtual void get_ndarray( 00122 int32_t*& array, int32_t*& dims, int32_t& num_dims) { }; 00123 virtual void get_ndarray( 00124 float32_t*& array, int32_t*& dims, int32_t& num_dims) { }; 00125 virtual void get_ndarray( 00126 float64_t*& array, int32_t*& dims, int32_t& num_dims){ }; 00127 virtual void get_ndarray( 00128 int16_t*& array, int32_t*& dims, int32_t& num_dims){ }; 00129 virtual void get_ndarray( 00130 uint16_t*& array, int32_t*& dims, int32_t& num_dims){ }; 00132 00141 virtual void get_sparse_matrix( 00142 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec); 00143 virtual void get_sparse_matrix( 00144 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00145 virtual void get_sparse_matrix( 00146 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00147 virtual void get_sparse_matrix( 00148 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec); 00149 virtual void get_sparse_matrix( 00150 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00151 virtual void get_sparse_matrix( 00152 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00153 virtual void get_sparse_matrix( 00154 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00155 virtual void get_sparse_matrix( 00156 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00157 virtual void get_sparse_matrix( 00158 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00159 virtual void get_sparse_matrix( 00160 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00161 virtual void get_sparse_matrix( 00162 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00163 virtual void get_sparse_matrix( 00164 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00165 virtual void get_sparse_matrix( 00166 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00168 00177 virtual void get_sparse_matrix( 00178 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec, 00179 float64_t*& labels, bool load_labels=true); 00180 virtual void get_sparse_matrix( 00181 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00182 float64_t*& labels, bool load_labels=true); 00183 virtual void get_sparse_matrix( 00184 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00185 float64_t*& labels, bool load_labels=true); 00186 virtual void get_sparse_matrix( 00187 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec, 00188 float64_t*& labels, bool load_labels=true); 00189 virtual void get_sparse_matrix( 00190 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00191 float64_t*& labels, bool load_labels=true); 00192 virtual void get_sparse_matrix( 00193 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00194 float64_t*& labels, bool load_labels=true); 00195 virtual void get_sparse_matrix( 00196 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00197 float64_t*& labels, bool load_labels=true); 00198 virtual void get_sparse_matrix( 00199 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00200 float64_t*& labels, bool load_labels=true); 00201 virtual void get_sparse_matrix( 00202 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00203 float64_t*& labels, bool load_labels=true); 00204 virtual void get_sparse_matrix( 00205 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00206 float64_t*& labels, bool load_labels=true); 00207 virtual void get_sparse_matrix( 00208 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00209 float64_t*& labels, bool load_labels=true); 00210 virtual void get_sparse_matrix( 00211 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00212 float64_t*& labels, bool load_labels=true); 00213 virtual void get_sparse_matrix( 00214 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec, 00215 float64_t*& labels, bool load_labels=true); 00216 00225 virtual void get_string_list( 00226 SGString<uint8_t>*& strings, int32_t& num_str, 00227 int32_t& max_string_len) { }; 00228 virtual void get_string_list( 00229 SGString<int8_t>*& strings, int32_t& num_str, 00230 int32_t& max_string_len) { }; 00231 virtual void get_string_list( 00232 SGString<char>*& strings, int32_t& num_str, 00233 int32_t& max_string_len) { }; 00234 virtual void get_string_list( 00235 SGString<int32_t>*& strings, int32_t& num_str, 00236 int32_t& max_string_len) { }; 00237 virtual void get_string_list( 00238 SGString<uint32_t>*& strings, int32_t& num_str, 00239 int32_t& max_string_len) { }; 00240 virtual void get_string_list( 00241 SGString<int16_t>*& strings, int32_t& num_str, 00242 int32_t& max_string_len) { }; 00243 virtual void get_string_list( 00244 SGString<uint16_t>*& strings, int32_t& num_str, 00245 int32_t& max_string_len) { }; 00246 virtual void get_string_list( 00247 SGString<int64_t>*& strings, int32_t& num_str, 00248 int32_t& max_string_len) { }; 00249 virtual void get_string_list( 00250 SGString<uint64_t>*& strings, int32_t& num_str, 00251 int32_t& max_string_len) { }; 00252 virtual void get_string_list( 00253 SGString<float32_t>*& strings, int32_t& num_str, 00254 int32_t& max_string_len) { }; 00255 virtual void get_string_list( 00256 SGString<float64_t>*& strings, int32_t& num_str, 00257 int32_t& max_string_len) { }; 00258 virtual void get_string_list( 00259 SGString<floatmax_t>*& strings, int32_t& num_str, 00260 int32_t& max_string_len) { }; 00262 00264 /*virtual void get_vector(void*& vector, int32_t& len, DataType& dtype);*/ 00265 00273 virtual void set_vector(const int8_t* vector, int32_t len) { }; 00274 virtual void set_vector(const uint8_t* vector, int32_t len) { }; 00275 virtual void set_vector(const char* vector, int32_t len) { }; 00276 virtual void set_vector(const int32_t* vector, int32_t len) { }; 00277 virtual void set_vector(const uint32_t* vector, int32_t len) { }; 00278 virtual void set_vector(const float32_t* vector, int32_t len) { }; 00279 virtual void set_vector(const float64_t* vector, int32_t len) { }; 00280 virtual void set_vector(const floatmax_t* vector, int32_t len) { }; 00281 virtual void set_vector(const int16_t* vector, int32_t len) { }; 00282 virtual void set_vector(const uint16_t* vector, int32_t len) { }; 00283 virtual void set_vector(const int64_t* vector, int32_t len) { }; 00284 virtual void set_vector(const uint64_t* vector, int32_t len) { }; 00286 00294 virtual void set_matrix( 00295 const uint8_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00296 virtual void set_matrix( 00297 const int8_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00298 virtual void set_matrix( 00299 const char* matrix, int32_t num_feat, int32_t num_vec) { }; 00300 virtual void set_matrix( 00301 const int32_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00302 virtual void set_matrix( 00303 const uint32_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00304 virtual void set_matrix( 00305 const int64_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00306 virtual void set_matrix( 00307 const uint64_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00308 virtual void set_matrix( 00309 const float32_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00310 virtual void set_matrix( 00311 const float64_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00312 virtual void set_matrix( 00313 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00314 virtual void set_matrix( 00315 const int16_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00316 virtual void set_matrix( 00317 const uint16_t* matrix, int32_t num_feat, int32_t num_vec) { }; 00319 00327 virtual void set_sparse_matrix( 00328 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec); 00329 virtual void set_sparse_matrix( 00330 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec); 00331 virtual void set_sparse_matrix( 00332 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec); 00333 virtual void set_sparse_matrix( 00334 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec); 00335 virtual void set_sparse_matrix( 00336 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec); 00337 virtual void set_sparse_matrix( 00338 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec); 00339 virtual void set_sparse_matrix( 00340 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec); 00341 virtual void set_sparse_matrix( 00342 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec); 00343 virtual void set_sparse_matrix( 00344 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec); 00345 virtual void set_sparse_matrix( 00346 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 00347 virtual void set_sparse_matrix( 00348 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec); 00349 virtual void set_sparse_matrix( 00350 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec); 00351 virtual void set_sparse_matrix( 00352 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec); 00354 00362 virtual void set_sparse_matrix( 00363 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec, 00364 const float64_t* labels); 00365 virtual void set_sparse_matrix( 00366 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec, 00367 const float64_t* labels); 00368 virtual void set_sparse_matrix( 00369 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec, 00370 const float64_t* labels); 00371 virtual void set_sparse_matrix( 00372 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec, 00373 const float64_t* labels); 00374 virtual void set_sparse_matrix( 00375 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec, 00376 const float64_t* labels); 00377 virtual void set_sparse_matrix( 00378 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec, 00379 const float64_t* labels); 00380 virtual void set_sparse_matrix( 00381 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec, 00382 const float64_t* labels); 00383 virtual void set_sparse_matrix( 00384 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec, 00385 const float64_t* labels); 00386 virtual void set_sparse_matrix( 00387 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec, 00388 const float64_t* labels); 00389 virtual void set_sparse_matrix( 00390 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec, 00391 const float64_t* labels); 00392 virtual void set_sparse_matrix( 00393 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec, 00394 const float64_t* labels); 00395 virtual void set_sparse_matrix( 00396 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec, 00397 const float64_t* labels); 00398 virtual void set_sparse_matrix( 00399 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec, 00400 const float64_t* labels); 00402 00403 00412 virtual void set_string_list( 00413 const SGString<uint8_t>* strings, int32_t num_str) { }; 00414 virtual void set_string_list( 00415 const SGString<int8_t>* strings, int32_t num_str) { }; 00416 virtual void set_string_list( 00417 const SGString<char>* strings, int32_t num_str) { }; 00418 virtual void set_string_list( 00419 const SGString<int32_t>* strings, int32_t num_str) { }; 00420 virtual void set_string_list( 00421 const SGString<uint32_t>* strings, int32_t num_str) { }; 00422 virtual void set_string_list( 00423 const SGString<int16_t>* strings, int32_t num_str) { }; 00424 virtual void set_string_list( 00425 const SGString<uint16_t>* strings, int32_t num_str) { }; 00426 virtual void set_string_list( 00427 const SGString<int64_t>* strings, int32_t num_str) { }; 00428 virtual void set_string_list( 00429 const SGString<uint64_t>* strings, int32_t num_str) { }; 00430 virtual void set_string_list( 00431 const SGString<float32_t>* strings, int32_t num_str) { }; 00432 virtual void set_string_list( 00433 const SGString<float64_t>* strings, int32_t num_str) { }; 00434 virtual void set_string_list( 00435 const SGString<floatmax_t>* strings, int32_t num_str) { }; 00437 00438 virtual const char* get_name() const { return "LibSVMFile"; } 00439 00440 private: 00442 void init(); 00443 00445 void init_with_defaults(); 00446 00448 int32_t get_num_lines(); 00449 00450 private: 00452 char m_delimiter; 00453 00455 CLineReader* m_line_reader; 00456 00458 CParser* m_parser; 00459 00461 CDelimiterTokenizer* m_line_tokenizer; 00462 00464 CDelimiterTokenizer* m_whitespace_tokenizer; 00465 00467 CDelimiterTokenizer* m_delimiter_tokenizer; 00468 }; 00469 00470 } 00471 00472 #endif