SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingAsciiFile.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/io/streaming/StreamingAsciiFile.h>
00012 #include <shogun/mathematics/Math.h>
00013 
00014 #include <ctype.h>
00015 
00016 using namespace shogun;
00017 
00018 CStreamingAsciiFile::CStreamingAsciiFile()
00019         : CStreamingFile()
00020 {
00021     SG_UNSTABLE("CStreamingAsciiFile::CStreamingAsciiFile()", "\n")
00022     m_delimiter = ' ';
00023 }
00024 
00025 CStreamingAsciiFile::CStreamingAsciiFile(const char* fname, char rw)
00026         : CStreamingFile(fname, rw)
00027 {
00028     m_delimiter = ' ';
00029 }
00030 
00031 CStreamingAsciiFile::~CStreamingAsciiFile()
00032 {
00033 }
00034 
00035 /* Methods for reading dense vectors from an ascii file */
00036 
00037 #define GET_VECTOR(fname, conv, sg_type)                                    \
00038 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat)   \
00039 {                                                                           \
00040         char* buffer = NULL;                                                \
00041         ssize_t bytes_read;                                                 \
00042         int32_t old_len = num_feat;                                         \
00043                                                                             \
00044         SG_SET_LOCALE_C;                                                    \
00045         bytes_read = buf->read_line(buffer);                                \
00046                                                                             \
00047         if (bytes_read<=0)                                                  \
00048         {                                                                   \
00049                 vector=NULL;                                                \
00050                 num_feat=-1;                                                \
00051                 SG_RESET_LOCALE;                                            \
00052                 return;                                                     \
00053         }                                                                   \
00054                                                                             \
00055         /* determine num_feat, populate dynamic array */                    \
00056         int32_t nf=0;                                                       \
00057         num_feat=0;                                                         \
00058                                                                             \
00059         char* ptr_item=NULL;                                                \
00060         char* ptr_data=buffer;                                              \
00061         DynArray<char*>* items=new DynArray<char*>();                       \
00062                                                                             \
00063         while (*ptr_data)                                                   \
00064         {                                                                   \
00065                 if ((*ptr_data=='\n') ||                                    \
00066                     (ptr_data - buffer >= bytes_read))                      \
00067                 {                                                           \
00068                         if (ptr_item)                                       \
00069                                 nf++;                                       \
00070                                                                             \
00071                         append_item(items, ptr_data, ptr_item);             \
00072                         num_feat=nf;                                        \
00073                                                                             \
00074                         nf=0;                                               \
00075                         ptr_item=NULL;                                      \
00076                         break;                                              \
00077                 }                                                           \
00078                 else if (!isblank(*ptr_data) && !ptr_item)                  \
00079                 {                                                           \
00080                         ptr_item=ptr_data;                                  \
00081                 }                                                           \
00082                 else if (isblank(*ptr_data) && ptr_item)                    \
00083                 {                                                           \
00084                         append_item(items, ptr_data, ptr_item);             \
00085                         ptr_item=NULL;                                      \
00086                         nf++;                                               \
00087                 }                                                           \
00088                                                                             \
00089                 ptr_data++;                                                 \
00090         }                                                                   \
00091                                                                             \
00092         SG_DEBUG("num_feat %d\n", num_feat)                                 \
00093                                                                             \
00094         /* now copy data into vector */                                     \
00095         if (old_len < num_feat)                                             \
00096                 vector=SG_REALLOC(sg_type, vector, old_len, num_feat);      \
00097                                                                             \
00098         for (int32_t i=0; i<num_feat; i++)                                  \
00099         {                                                                   \
00100                 char* item=items->get_element(i);                           \
00101                 vector[i]=conv(item);                                       \
00102                 SG_FREE(item);                                              \
00103         }                                                                   \
00104         delete items;                                                       \
00105         SG_RESET_LOCALE;                                                    \
00106 }
00107 
00108 GET_VECTOR(get_bool_vector, str_to_bool, bool)
00109 GET_VECTOR(get_byte_vector, atoi, uint8_t)
00110 GET_VECTOR(get_char_vector, atoi, char)
00111 GET_VECTOR(get_int_vector, atoi, int32_t)
00112 GET_VECTOR(get_short_vector, atoi, int16_t)
00113 GET_VECTOR(get_word_vector, atoi, uint16_t)
00114 GET_VECTOR(get_int8_vector, atoi, int8_t)
00115 GET_VECTOR(get_uint_vector, atoi, uint32_t)
00116 GET_VECTOR(get_long_vector, atoi, int64_t)
00117 GET_VECTOR(get_ulong_vector, atoi, uint64_t)
00118 GET_VECTOR(get_longreal_vector, atoi, floatmax_t)
00119 #undef GET_VECTOR
00120 
00121 #define GET_FLOAT_VECTOR(sg_type)                                           \
00122         void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\
00123         {                                                                   \
00124                 char *line=NULL;                                            \
00125                 SG_SET_LOCALE_C;                                            \
00126                 int32_t num_chars = buf->read_line(line);                   \
00127                 int32_t old_len = len;                                      \
00128                                                                             \
00129                 if (num_chars == 0)                                         \
00130                 {                                                           \
00131                         len = -1;                                           \
00132                         SG_RESET_LOCALE;                                    \
00133                         return;                                             \
00134                 }                                                           \
00135                                                                             \
00136                 substring example_string = {line, line + num_chars};        \
00137                                                                             \
00138                 CCSVFile::tokenize(m_delimiter, example_string, words);     \
00139                                                                             \
00140                 len = words.index();                                        \
00141                 substring* feature_start = &words[0];                       \
00142                                                                             \
00143                 if (len > old_len)                                          \
00144                         vector = SG_REALLOC(sg_type, vector, old_len, len); \
00145                                                                             \
00146                 int32_t j=0;                                                \
00147                 for (substring* i = feature_start; i != words.end; i++)     \
00148                 {                                                           \
00149                         vector[j++] = SGIO::float_of_substring(*i);         \
00150                 }                                                           \
00151                 SG_RESET_LOCALE;                                            \
00152         }
00153 
00154 GET_FLOAT_VECTOR(float32_t)
00155 GET_FLOAT_VECTOR(float64_t)
00156 #undef GET_FLOAT_VECTOR
00157 
00158 /* Methods for reading a dense vector and a label from an ascii file */
00159 
00160 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type)                      \
00161         void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \
00162         {                                                               \
00163                 char* buffer = NULL;                                    \
00164                 ssize_t bytes_read;                                     \
00165                 int32_t old_len = num_feat;                             \
00166                 SG_SET_LOCALE_C;                                        \
00167                                                                         \
00168                 bytes_read = buf->read_line(buffer);                    \
00169                                                                         \
00170                 if (bytes_read<=0)                                      \
00171                 {                                                       \
00172                         vector=NULL;                                    \
00173                         num_feat=-1;                                    \
00174                         SG_RESET_LOCALE;                                \
00175                         return;                                         \
00176                 }                                                       \
00177                                                                         \
00178                 /* determine num_feat, populate dynamic array */        \
00179                 int32_t nf=0;                                           \
00180                 num_feat=0;                                             \
00181                                                                         \
00182                 char* ptr_item=NULL;                                    \
00183                 char* ptr_data=buffer;                                  \
00184                 DynArray<char*>* items=new DynArray<char*>();           \
00185                                                                         \
00186                 while (*ptr_data)                                       \
00187                 {                                                       \
00188                         if ((*ptr_data=='\n') ||                        \
00189                             (ptr_data - buffer >= bytes_read))          \
00190                         {                                               \
00191                                 if (ptr_item)                           \
00192                                         nf++;                           \
00193                                                                         \
00194                                 append_item(items, ptr_data, ptr_item); \
00195                                 num_feat=nf;                            \
00196                                                                         \
00197                                 nf=0;                                   \
00198                                 ptr_item=NULL;                          \
00199                                 break;                                  \
00200                         }                                               \
00201                         else if (!isblank(*ptr_data) && !ptr_item)      \
00202                         {                                               \
00203                                 ptr_item=ptr_data;                      \
00204                         }                                               \
00205                         else if (isblank(*ptr_data) && ptr_item)        \
00206                         {                                               \
00207                                 append_item(items, ptr_data, ptr_item); \
00208                                 ptr_item=NULL;                          \
00209                                 nf++;                                   \
00210                         }                                               \
00211                                                                         \
00212                         ptr_data++;                                     \
00213                 }                                                       \
00214                                                                         \
00215                 SG_DEBUG("num_feat %d\n", num_feat)                 \
00216                 /* The first element is the label */                    \
00217                 label=atof(items->get_element(0));                      \
00218                 /* now copy rest of the data into vector */             \
00219                 if (old_len < num_feat - 1)                             \
00220                         vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1);    \
00221                                                                         \
00222                 for (int32_t i=1; i<num_feat; i++)                      \
00223                 {                                                       \
00224                         char* item=items->get_element(i);               \
00225                         vector[i-1]=conv(item);                         \
00226                         SG_FREE(item);                                  \
00227                 }                                                       \
00228                 delete items;                                           \
00229                 num_feat--;                                             \
00230                 SG_RESET_LOCALE;                                        \
00231         }
00232 
00233 GET_VECTOR_AND_LABEL(get_bool_vector_and_label, str_to_bool, bool)
00234 GET_VECTOR_AND_LABEL(get_byte_vector_and_label, atoi, uint8_t)
00235 GET_VECTOR_AND_LABEL(get_char_vector_and_label, atoi, char)
00236 GET_VECTOR_AND_LABEL(get_int_vector_and_label, atoi, int32_t)
00237 GET_VECTOR_AND_LABEL(get_short_vector_and_label, atoi, int16_t)
00238 GET_VECTOR_AND_LABEL(get_word_vector_and_label, atoi, uint16_t)
00239 GET_VECTOR_AND_LABEL(get_int8_vector_and_label, atoi, int8_t)
00240 GET_VECTOR_AND_LABEL(get_uint_vector_and_label, atoi, uint32_t)
00241 GET_VECTOR_AND_LABEL(get_long_vector_and_label, atoi, int64_t)
00242 GET_VECTOR_AND_LABEL(get_ulong_vector_and_label, atoi, uint64_t)
00243 GET_VECTOR_AND_LABEL(get_longreal_vector_and_label, atoi, floatmax_t)
00244 #undef GET_VECTOR_AND_LABEL
00245 
00246 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type)                             \
00247         void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
00248         {                                                               \
00249                 char *line=NULL;                                        \
00250                 SG_SET_LOCALE_C;                                        \
00251                 int32_t num_chars = buf->read_line(line);               \
00252                 int32_t old_len = len;                                  \
00253                                                                         \
00254                 if (num_chars == 0)                                     \
00255                 {                                                       \
00256                         len = -1;                                       \
00257                         SG_RESET_LOCALE;                                \
00258                         return;                                         \
00259                 }                                                       \
00260                                                                         \
00261                 substring example_string = {line, line + num_chars};    \
00262                                                                         \
00263                 CCSVFile::tokenize(m_delimiter, example_string, words); \
00264                                                                         \
00265                 label = SGIO::float_of_substring(words[0]);             \
00266                                                                         \
00267                 len = words.index() - 1;                                \
00268                 substring* feature_start = &words[1];                   \
00269                                                                         \
00270                 if (len > old_len)                                      \
00271                         vector = SG_REALLOC(sg_type, vector, old_len, len); \
00272                                                                         \
00273                 int32_t j=0;                                            \
00274                 for (substring* i = feature_start; i != words.end; i++) \
00275                 {                                                       \
00276                         vector[j++] = SGIO::float_of_substring(*i);     \
00277                 }                                                       \
00278                 SG_RESET_LOCALE;                                        \
00279         }
00280 
00281 GET_FLOAT_VECTOR_AND_LABEL(float32_t)
00282 GET_FLOAT_VECTOR_AND_LABEL(float64_t)
00283 #undef GET_FLOAT_VECTOR_AND_LABEL
00284 
00285 /* Methods for reading a string vector from an ascii file (see StringFeatures) */
00286 
00287 #define GET_STRING(fname, conv, sg_type)                                \
00288 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len)    \
00289 {                                                                       \
00290         char* buffer = NULL;                                            \
00291         ssize_t bytes_read;                                             \
00292                                                                         \
00293         SG_SET_LOCALE_C;                                                \
00294         bytes_read = buf->read_line(buffer);                            \
00295                                                                         \
00296         if (bytes_read<=1)                                              \
00297         {                                                               \
00298                 vector=NULL;                                            \
00299                 len=-1;                                                 \
00300                 SG_RESET_LOCALE;                                        \
00301                 return;                                                 \
00302         }                                                               \
00303                                                                         \
00304         SG_DEBUG("Line read from the file:\n%s\n", buffer)              \
00305         /* Remove the terminating \n */                                 \
00306         if (buffer[bytes_read-1]=='\n')                                 \
00307         {                                                               \
00308                 len=bytes_read-1;                                       \
00309                 buffer[bytes_read-1]='\0';                              \
00310         }                                                               \
00311         else                                                            \
00312                 len=bytes_read;                                         \
00313         vector=(sg_type *) buffer;                                      \
00314         SG_RESET_LOCALE;                                                \
00315 }
00316 
00317 GET_STRING(get_bool_string, str_to_bool, bool)
00318 GET_STRING(get_byte_string, atoi, uint8_t)
00319 GET_STRING(get_char_string, atoi, char)
00320 GET_STRING(get_int_string, atoi, int32_t)
00321 GET_STRING(get_shortreal_string, atof, float32_t)
00322 GET_STRING(get_real_string, atof, float64_t)
00323 GET_STRING(get_short_string, atoi, int16_t)
00324 GET_STRING(get_word_string, atoi, uint16_t)
00325 GET_STRING(get_int8_string, atoi, int8_t)
00326 GET_STRING(get_uint_string, atoi, uint32_t)
00327 GET_STRING(get_long_string, atoi, int64_t)
00328 GET_STRING(get_ulong_string, atoi, uint64_t)
00329 GET_STRING(get_longreal_string, atoi, floatmax_t)
00330 #undef GET_STRING
00331 
00332 /* Methods for reading a string vector and a label from an ascii file */
00333 
00334 #define GET_STRING_AND_LABEL(fname, conv, sg_type)                      \
00335 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \
00336 {                                                                       \
00337         char* buffer = NULL;                                            \
00338         ssize_t bytes_read;                                             \
00339                                                                         \
00340         SG_SET_LOCALE_C;                                                \
00341         bytes_read = buf->read_line(buffer);                            \
00342                                                                         \
00343         if (bytes_read<=1)                                              \
00344         {                                                               \
00345                 vector=NULL;                                            \
00346                 len=-1;                                                 \
00347                 SG_RESET_LOCALE;                                        \
00348                 return;                                                 \
00349         }                                                               \
00350                                                                         \
00351         int32_t str_start_pos=-1;                                       \
00352                                                                         \
00353         for (int32_t i=0; i<bytes_read; i++)                            \
00354         {                                                               \
00355                 if (buffer[i] == ' ')                                   \
00356                 {                                                       \
00357                         buffer[i]='\0';                                 \
00358                         label=atoi(buffer);                             \
00359                         buffer[i]=' ';                                  \
00360                         str_start_pos=i+1;                              \
00361                         break;                                          \
00362                 }                                                       \
00363         }                                                               \
00364         /* If no label found, set vector=NULL and length=-1 */          \
00365         if (str_start_pos == -1)                                        \
00366         {                                                               \
00367                 vector=NULL;                                            \
00368                 len=-1;                                                 \
00369                 return;                                                 \
00370         }                                                               \
00371         /* Remove terminating \n */                                     \
00372         if (buffer[bytes_read-1]=='\n')                                 \
00373         {                                                               \
00374                 buffer[bytes_read-1]='\0';                              \
00375                 len=bytes_read-str_start_pos-1;                         \
00376         }                                                               \
00377         else                                                            \
00378                 len=bytes_read-str_start_pos;                           \
00379                                                                         \
00380         vector=(sg_type*) &buffer[str_start_pos];                       \
00381         SG_RESET_LOCALE;                                                \
00382 }
00383 
00384 GET_STRING_AND_LABEL(get_bool_string_and_label, str_to_bool, bool)
00385 GET_STRING_AND_LABEL(get_byte_string_and_label, atoi, uint8_t)
00386 GET_STRING_AND_LABEL(get_char_string_and_label, atoi, char)
00387 GET_STRING_AND_LABEL(get_int_string_and_label, atoi, int32_t)
00388 GET_STRING_AND_LABEL(get_shortreal_string_and_label, atof, float32_t)
00389 GET_STRING_AND_LABEL(get_real_string_and_label, atof, float64_t)
00390 GET_STRING_AND_LABEL(get_short_string_and_label, atoi, int16_t)
00391 GET_STRING_AND_LABEL(get_word_string_and_label, atoi, uint16_t)
00392 GET_STRING_AND_LABEL(get_int8_string_and_label, atoi, int8_t)
00393 GET_STRING_AND_LABEL(get_uint_string_and_label, atoi, uint32_t)
00394 GET_STRING_AND_LABEL(get_long_string_and_label, atoi, int64_t)
00395 GET_STRING_AND_LABEL(get_ulong_string_and_label, atoi, uint64_t)
00396 GET_STRING_AND_LABEL(get_longreal_string_and_label, atoi, floatmax_t)
00397 #undef GET_STRING_AND_LABEL
00398 
00399 /* Methods for reading a sparse vector from an ascii file */
00400 
00401 #define GET_SPARSE_VECTOR(fname, conv, sg_type)                         \
00402 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \
00403 {                                                                       \
00404         char* buffer = NULL;                                            \
00405         ssize_t bytes_read;                                             \
00406         SG_SET_LOCALE_C;                                                \
00407                                                                         \
00408         bytes_read = buf->read_line(buffer);                            \
00409                                                                         \
00410         if (bytes_read<=1)                                              \
00411         {                                                               \
00412                 vector=NULL;                                            \
00413                 len=-1;                                                 \
00414                 SG_RESET_LOCALE;                                        \
00415                 return;                                                 \
00416         }                                                               \
00417                                                                         \
00418         /* Remove terminating \n */                                     \
00419         int32_t num_chars;                                              \
00420         if (buffer[bytes_read-1]=='\n')                                 \
00421           {                                                             \
00422             num_chars=bytes_read-1;                                     \
00423             buffer[num_chars]='\0';                                     \
00424           }                                                             \
00425         else                                                            \
00426           num_chars=bytes_read;                                         \
00427                                                                         \
00428         int32_t num_dims=0;                                             \
00429         for (int32_t i=0; i<num_chars; i++)                             \
00430         {                                                               \
00431                 if (buffer[i]==':')                                     \
00432                 {                                                       \
00433                         num_dims++;                                     \
00434                 }                                                       \
00435         }                                                               \
00436                                                                         \
00437         int32_t index_start_pos=-1;                                     \
00438         int32_t feature_start_pos;                                      \
00439         int32_t current_feat=0;                                         \
00440         if (len < num_dims)                                             \
00441             vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
00442         for (int32_t i=0; i<num_chars; i++)                             \
00443         {                                                               \
00444                 if (buffer[i]==':')                                     \
00445                 {                                                       \
00446                         buffer[i]='\0';                                 \
00447                         vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
00448                         /* Unset index_start_pos */                     \
00449                         index_start_pos=-1;                             \
00450                                                                         \
00451                         feature_start_pos=i+1;                          \
00452                         while ((buffer[i]!=' ') && (i<num_chars))       \
00453                         {                                               \
00454                                 i++;                                    \
00455                         }                                               \
00456                                                                         \
00457                         buffer[i]='\0';                                 \
00458                         vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
00459                                                                         \
00460                         current_feat++;                                 \
00461                 }                                                       \
00462                 else if (buffer[i]==' ')                                \
00463                   i++;                                                  \
00464                 else                                                    \
00465                   {                                                     \
00466                     /* Set index_start_pos if not set already */        \
00467                     /* if already set, it means the index is  */        \
00468                     /* more than one digit long.              */        \
00469                     if (index_start_pos == -1)                          \
00470                         index_start_pos=i;                              \
00471                   }                                                     \
00472         }                                                               \
00473                                                                         \
00474         len=current_feat;                                               \
00475         SG_RESET_LOCALE;                                                \
00476 }
00477 
00478 GET_SPARSE_VECTOR(get_bool_sparse_vector, str_to_bool, bool)
00479 GET_SPARSE_VECTOR(get_byte_sparse_vector, atoi, uint8_t)
00480 GET_SPARSE_VECTOR(get_char_sparse_vector, atoi, char)
00481 GET_SPARSE_VECTOR(get_int_sparse_vector, atoi, int32_t)
00482 GET_SPARSE_VECTOR(get_shortreal_sparse_vector, atof, float32_t)
00483 GET_SPARSE_VECTOR(get_real_sparse_vector, atof, float64_t)
00484 GET_SPARSE_VECTOR(get_short_sparse_vector, atoi, int16_t)
00485 GET_SPARSE_VECTOR(get_word_sparse_vector, atoi, uint16_t)
00486 GET_SPARSE_VECTOR(get_int8_sparse_vector, atoi, int8_t)
00487 GET_SPARSE_VECTOR(get_uint_sparse_vector, atoi, uint32_t)
00488 GET_SPARSE_VECTOR(get_long_sparse_vector, atoi, int64_t)
00489 GET_SPARSE_VECTOR(get_ulong_sparse_vector, atoi, uint64_t)
00490 GET_SPARSE_VECTOR(get_longreal_sparse_vector, atoi, floatmax_t)
00491 #undef GET_SPARSE_VECTOR
00492 
00493 /* Methods for reading a sparse vector and a label from an ascii file */
00494 
00495 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)               \
00496 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \
00497 {                                                                       \
00498         char* buffer = NULL;                                            \
00499         ssize_t bytes_read;                                             \
00500         SG_SET_LOCALE_C;                                                \
00501                                                                         \
00502         bytes_read = buf->read_line(buffer);                            \
00503                                                                         \
00504         if (bytes_read<=1)                                              \
00505         {                                                               \
00506                 vector=NULL;                                            \
00507                 len=-1;                                                 \
00508                 SG_RESET_LOCALE;                                        \
00509                 return;                                                 \
00510         }                                                               \
00511                                                                         \
00512         /* Remove terminating \n */                                     \
00513         int32_t num_chars;                                              \
00514         if (buffer[bytes_read-1]=='\n')                                 \
00515         {                                                               \
00516                 num_chars=bytes_read-1;                                 \
00517                 buffer[num_chars]='\0';                                 \
00518         }                                                               \
00519         else                                                            \
00520                 num_chars=bytes_read;                                   \
00521                                                                         \
00522         int32_t num_dims=0;                                             \
00523         for (int32_t i=0; i<num_chars; i++)                             \
00524         {                                                               \
00525                 if (buffer[i]==':')                                     \
00526                 {                                                       \
00527                         num_dims++;                                     \
00528                 }                                                       \
00529         }                                                               \
00530                                                                         \
00531         int32_t index_start_pos=-1;                                     \
00532         int32_t feature_start_pos;                                      \
00533         int32_t current_feat=0;                                         \
00534         int32_t label_pos=-1;                                           \
00535         if (len < num_dims)                                             \
00536             vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \
00537                                                                         \
00538         for (int32_t i=1; i<num_chars; i++)                             \
00539         {                                                               \
00540                 if (buffer[i]==':')                                     \
00541                 {                                                       \
00542                         break;                                          \
00543                 }                                                       \
00544                 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') )           \
00545                 {                                                       \
00546                         buffer[i]='\0';                                 \
00547                         label_pos=i;                                    \
00548                         label=atof(buffer);                             \
00549                         break;                                          \
00550                 }                                                       \
00551         }                                                               \
00552                                                                         \
00553         if (label_pos==-1)                                              \
00554                 SG_ERROR("No label found!\n")                           \
00555                                                                         \
00556         buffer+=label_pos+1;                                            \
00557         num_chars-=label_pos+1;                                         \
00558         for (int32_t i=0; i<num_chars; i++)                             \
00559         {                                                               \
00560                 if (buffer[i]==':')                                     \
00561                 {                                                       \
00562                         buffer[i]='\0';                                 \
00563                         vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \
00564                         /* Unset index_start_pos */                     \
00565                         index_start_pos=-1;                             \
00566                                                                         \
00567                         feature_start_pos=i+1;                          \
00568                         while ((buffer[i]!=' ') && (i<num_chars))       \
00569                         {                                               \
00570                                 i++;                                    \
00571                         }                                               \
00572                                                                         \
00573                         buffer[i]='\0';                                 \
00574                         vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \
00575                                                                         \
00576                         current_feat++;                                 \
00577                 }                                                       \
00578                 else if (buffer[i]==' ')                                \
00579                         i++;                                            \
00580                 else                                                    \
00581                 {                                                       \
00582                         /* Set index_start_pos if not set already */    \
00583                         /* if already set, it means the index is  */    \
00584                         /* more than one digit long.              */    \
00585                         if (index_start_pos == -1)                      \
00586                                 index_start_pos=i;                      \
00587                 }                                                       \
00588         }                                                               \
00589                                                                         \
00590         len=current_feat;                                               \
00591         SG_RESET_LOCALE;                                                \
00592 }
00593 
00594 GET_SPARSE_VECTOR_AND_LABEL(get_bool_sparse_vector_and_label, str_to_bool, bool)
00595 GET_SPARSE_VECTOR_AND_LABEL(get_byte_sparse_vector_and_label, atoi, uint8_t)
00596 GET_SPARSE_VECTOR_AND_LABEL(get_char_sparse_vector_and_label, atoi, char)
00597 GET_SPARSE_VECTOR_AND_LABEL(get_int_sparse_vector_and_label, atoi, int32_t)
00598 GET_SPARSE_VECTOR_AND_LABEL(get_shortreal_sparse_vector_and_label, atof, float32_t)
00599 GET_SPARSE_VECTOR_AND_LABEL(get_real_sparse_vector_and_label, atof, float64_t)
00600 GET_SPARSE_VECTOR_AND_LABEL(get_short_sparse_vector_and_label, atoi, int16_t)
00601 GET_SPARSE_VECTOR_AND_LABEL(get_word_sparse_vector_and_label, atoi, uint16_t)
00602 GET_SPARSE_VECTOR_AND_LABEL(get_int8_sparse_vector_and_label, atoi, int8_t)
00603 GET_SPARSE_VECTOR_AND_LABEL(get_uint_sparse_vector_and_label, atoi, uint32_t)
00604 GET_SPARSE_VECTOR_AND_LABEL(get_long_sparse_vector_and_label, atoi, int64_t)
00605 GET_SPARSE_VECTOR_AND_LABEL(get_ulong_sparse_vector_and_label, atoi, uint64_t)
00606 GET_SPARSE_VECTOR_AND_LABEL(get_longreal_sparse_vector_and_label, atoi, floatmax_t)
00607 #undef GET_SPARSE_VECTOR_AND_LABEL
00608 
00609 template <class T>
00610 void CStreamingAsciiFile::append_item(
00611         DynArray<T>* items, char* ptr_data, char* ptr_item)
00612 {
00613         REQUIRE(ptr_data && ptr_item, "Data and Item to append should not be NULL\n");
00614 
00615         size_t len=(ptr_data-ptr_item)/sizeof(char);
00616         char* item=SG_MALLOC(char, len+1);
00617         memset(item, 0, sizeof(char)*(len+1));
00618         item=strncpy(item, ptr_item, len);
00619 
00620         SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item)
00621         items->append_element(item);
00622 }
00623 
00624 void CStreamingAsciiFile::set_delimiter(char delimiter)
00625 {
00626     m_delimiter = delimiter;
00627 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation