SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ProtobufFile.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evgeniy Andreev (gsomix)
00008  */
00009 #ifdef HAVE_PROTOBUF
00010 
00011 #include <shogun/io/ProtobufFile.h>
00012 
00013 #include <shogun/lib/SGVector.h>
00014 #include <shogun/lib/SGMatrix.h>
00015 #include <shogun/lib/SGSparseVector.h>
00016 #include <shogun/lib/SGString.h>
00017 
00018 using namespace shogun;
00019 
00020 CProtobufFile::CProtobufFile()
00021 {
00022     init();
00023 }
00024 
00025 CProtobufFile::CProtobufFile(FILE* f, const char* name) :
00026     CFile(f, name)
00027 {
00028     init();
00029 }
00030 
00031 CProtobufFile::CProtobufFile(const char* fname, char rw, const char* name) :
00032     CFile(fname, rw, name)
00033 {
00034     init();
00035 }
00036 
00037 CProtobufFile::~CProtobufFile()
00038 {
00039     SG_FREE(buffer);
00040 }
00041 
00042 void CProtobufFile::init()
00043 {
00044     version=1;
00045     message_size=1024*1024;
00046 
00047     buffer=SG_MALLOC(uint8_t, message_size*sizeof(uint32_t));
00048 }
00049 
00050 #define GET_VECTOR(sg_type) \
00051 void CProtobufFile::get_vector(sg_type*& vector, int32_t& len) \
00052 { \
00053     read_and_validate_global_header(ShogunVersion::VECTOR); \
00054     VectorHeader data_header=read_vector_header(); \
00055     len=data_header.len(); \
00056     read_memory_block(vector, len, data_header.num_messages()); \
00057 }
00058 
00059 GET_VECTOR(int8_t)
00060 GET_VECTOR(uint8_t)
00061 GET_VECTOR(char)
00062 GET_VECTOR(int32_t)
00063 GET_VECTOR(uint32_t)
00064 GET_VECTOR(float32_t)
00065 GET_VECTOR(float64_t)
00066 GET_VECTOR(floatmax_t)
00067 GET_VECTOR(int16_t)
00068 GET_VECTOR(uint16_t)
00069 GET_VECTOR(int64_t)
00070 GET_VECTOR(uint64_t)
00071 #undef GET_VECTOR
00072 
00073 #define GET_MATRIX(read_func, sg_type) \
00074 void CProtobufFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
00075 { \
00076     read_and_validate_global_header(ShogunVersion::MATRIX); \
00077     MatrixHeader data_header=read_matrix_header(); \
00078     num_feat=data_header.num_cols(); \
00079     num_vec=data_header.num_rows(); \
00080     read_memory_block(matrix, num_feat*num_vec, data_header.num_messages()); \
00081 }
00082 
00083 GET_MATRIX(read_char, int8_t)
00084 GET_MATRIX(read_byte, uint8_t)
00085 GET_MATRIX(read_char, char)
00086 GET_MATRIX(read_int, int32_t)
00087 GET_MATRIX(read_uint, uint32_t)
00088 GET_MATRIX(read_short_real, float32_t)
00089 GET_MATRIX(read_real, float64_t)
00090 GET_MATRIX(read_long_real, floatmax_t)
00091 GET_MATRIX(read_short, int16_t)
00092 GET_MATRIX(read_word, uint16_t)
00093 GET_MATRIX(read_long, int64_t)
00094 GET_MATRIX(read_ulong, uint64_t)
00095 #undef GET_MATRIX
00096 
00097 #define GET_NDARRAY(read_func, sg_type) \
00098 void CProtobufFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \
00099 { \
00100     SG_NOTIMPLEMENTED \
00101 }
00102 
00103 GET_NDARRAY(read_byte, uint8_t)
00104 GET_NDARRAY(read_char, char)
00105 GET_NDARRAY(read_int, int32_t)
00106 GET_NDARRAY(read_short_real, float32_t)
00107 GET_NDARRAY(read_real, float64_t)
00108 GET_NDARRAY(read_short, int16_t)
00109 GET_NDARRAY(read_word, uint16_t)
00110 #undef GET_NDARRAY
00111 
00112 #define GET_SPARSE_MATRIX(sg_type) \
00113 void CProtobufFile::get_sparse_matrix( \
00114             SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
00115 { \
00116     read_and_validate_global_header(ShogunVersion::SPARSE_MATRIX); \
00117     SparseMatrixHeader data_header=read_sparse_matrix_header(); \
00118     num_feat=data_header.num_features(); \
00119     num_vec=data_header.num_vectors(); \
00120     read_sparse_matrix(matrix, data_header); \
00121 }
00122 
00123 GET_SPARSE_MATRIX(bool)
00124 GET_SPARSE_MATRIX(int8_t)
00125 GET_SPARSE_MATRIX(uint8_t)
00126 GET_SPARSE_MATRIX(char)
00127 GET_SPARSE_MATRIX(int32_t)
00128 GET_SPARSE_MATRIX(uint32_t)
00129 GET_SPARSE_MATRIX(float32_t)
00130 GET_SPARSE_MATRIX(float64_t)
00131 GET_SPARSE_MATRIX(floatmax_t)
00132 GET_SPARSE_MATRIX(int16_t)
00133 GET_SPARSE_MATRIX(uint16_t)
00134 GET_SPARSE_MATRIX(int64_t)
00135 GET_SPARSE_MATRIX(uint64_t)
00136 #undef GET_SPARSE_MATRIX
00137 
00138 #define SET_VECTOR(sg_type) \
00139 void CProtobufFile::set_vector(const sg_type* vector, int32_t len) \
00140 { \
00141     int32_t num_messages=compute_num_messages(len, sizeof(sg_type)); \
00142     write_global_header(ShogunVersion::VECTOR); \
00143     write_vector_header(len, num_messages); \
00144     write_memory_block(vector, len, num_messages); \
00145 }
00146 
00147 SET_VECTOR(int8_t)
00148 SET_VECTOR(uint8_t)
00149 SET_VECTOR(char)
00150 SET_VECTOR(int32_t)
00151 SET_VECTOR(uint32_t)
00152 SET_VECTOR(int64_t)
00153 SET_VECTOR(uint64_t)
00154 SET_VECTOR(float32_t)
00155 SET_VECTOR(float64_t)
00156 SET_VECTOR(floatmax_t)
00157 SET_VECTOR(int16_t)
00158 SET_VECTOR(uint16_t)
00159 #undef SET_VECTOR
00160 
00161 #define SET_MATRIX(sg_type) \
00162 void CProtobufFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
00163 { \
00164     int32_t num_messages=compute_num_messages(num_feat*num_vec, sizeof(sg_type)); \
00165     write_global_header(ShogunVersion::MATRIX); \
00166     write_matrix_header(num_feat, num_vec, num_messages); \
00167     write_memory_block(matrix, num_feat*num_vec, num_messages); \
00168 }
00169 
00170 SET_MATRIX(int8_t)
00171 SET_MATRIX(uint8_t)
00172 SET_MATRIX(char)
00173 SET_MATRIX(int32_t)
00174 SET_MATRIX(uint32_t)
00175 SET_MATRIX(int64_t)
00176 SET_MATRIX(uint64_t)
00177 SET_MATRIX(float32_t)
00178 SET_MATRIX(float64_t)
00179 SET_MATRIX(floatmax_t)
00180 SET_MATRIX(int16_t)
00181 SET_MATRIX(uint16_t)
00182 #undef SET_MATRIX
00183 
00184 #define SET_SPARSE_MATRIX(sg_type) \
00185 void CProtobufFile::set_sparse_matrix( \
00186             const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
00187 { \
00188     write_global_header(ShogunVersion::SPARSE_MATRIX); \
00189     write_sparse_matrix_header(matrix, num_feat, num_vec); \
00190     write_sparse_matrix(matrix, num_vec); \
00191 }
00192 
00193 SET_SPARSE_MATRIX(bool)
00194 SET_SPARSE_MATRIX(int8_t)
00195 SET_SPARSE_MATRIX(uint8_t)
00196 SET_SPARSE_MATRIX(char)
00197 SET_SPARSE_MATRIX(int32_t)
00198 SET_SPARSE_MATRIX(uint32_t)
00199 SET_SPARSE_MATRIX(int64_t)
00200 SET_SPARSE_MATRIX(uint64_t)
00201 SET_SPARSE_MATRIX(float32_t)
00202 SET_SPARSE_MATRIX(float64_t)
00203 SET_SPARSE_MATRIX(floatmax_t)
00204 SET_SPARSE_MATRIX(int16_t)
00205 SET_SPARSE_MATRIX(uint16_t)
00206 #undef SET_SPARSE_MATRIX
00207 
00208 #define GET_STRING_LIST(sg_type) \
00209 void CProtobufFile::get_string_list( \
00210             SGString<sg_type>*& strings, int32_t& num_str, \
00211             int32_t& max_string_len) \
00212 { \
00213     read_and_validate_global_header(ShogunVersion::STRING_LIST); \
00214     StringListHeader data_header=read_string_list_header(); \
00215     num_str=data_header.num_str(); \
00216     max_string_len=data_header.max_string_len(); \
00217     read_string_list(strings, data_header); \
00218 }
00219 
00220 GET_STRING_LIST(int8_t)
00221 GET_STRING_LIST(uint8_t)
00222 GET_STRING_LIST(char)
00223 GET_STRING_LIST(int32_t)
00224 GET_STRING_LIST(uint32_t)
00225 GET_STRING_LIST(int64_t)
00226 GET_STRING_LIST(uint64_t)
00227 GET_STRING_LIST(float32_t)
00228 GET_STRING_LIST(float64_t)
00229 GET_STRING_LIST(floatmax_t)
00230 GET_STRING_LIST(int16_t)
00231 GET_STRING_LIST(uint16_t)
00232 #undef GET_STRING_LIST
00233 
00234 #define SET_STRING_LIST(sg_type) \
00235 void CProtobufFile::set_string_list( \
00236             const SGString<sg_type>* strings, int32_t num_str) \
00237 { \
00238     write_global_header(ShogunVersion::STRING_LIST); \
00239     write_string_list_header(strings, num_str); \
00240     write_string_list(strings, num_str); \
00241 }
00242 
00243 SET_STRING_LIST(int8_t)
00244 SET_STRING_LIST(uint8_t)
00245 SET_STRING_LIST(char)
00246 SET_STRING_LIST(int32_t)
00247 SET_STRING_LIST(uint32_t)
00248 SET_STRING_LIST(int64_t)
00249 SET_STRING_LIST(uint64_t)
00250 SET_STRING_LIST(float32_t)
00251 SET_STRING_LIST(float64_t)
00252 SET_STRING_LIST(floatmax_t)
00253 SET_STRING_LIST(int16_t)
00254 SET_STRING_LIST(uint16_t)
00255 #undef SET_STRING_LIST
00256 
00257 void CProtobufFile::write_big_endian_uint(uint32_t number, uint8_t* array, uint32_t size)
00258 {
00259     if (size<4)
00260         SG_ERROR("array is too small to write\n");
00261 
00262     array[0]=(number>>24)&0xffu;
00263     array[1]=(number>>16)&0xffu;
00264     array[2]=(number>>8)&0xffu;
00265     array[3]=number&0xffu;
00266 }
00267 
00268 uint32_t CProtobufFile::read_big_endian_uint(uint8_t* array, uint32_t size)
00269 {
00270     if (size<4)
00271         SG_ERROR("array is too small to read\n");
00272 
00273     return (array[0]<<24) | (array[1]<<16) | (array[2]<<8) | array[3];
00274 }
00275 
00276 int32_t CProtobufFile::compute_num_messages(uint64_t len, int32_t sizeof_type) const
00277 {
00278     uint32_t elements_in_message=message_size/sizeof_type;
00279     uint32_t num_messages=len/elements_in_message;
00280     if (len % elements_in_message > 0)
00281         num_messages++;
00282 
00283     return num_messages;
00284 }
00285 
00286 void CProtobufFile::read_and_validate_global_header(ShogunVersion_SGDataType type)
00287 {
00288     ShogunVersion header;
00289     read_message(header);
00290     REQUIRE(header.version()==version, "wrong version\n")
00291     REQUIRE(header.data_type()==type, "wrong type\n")
00292 }
00293 
00294 void CProtobufFile::write_global_header(ShogunVersion_SGDataType type)
00295 {
00296     ShogunVersion header;
00297     header.set_version(version);
00298     header.set_data_type(type);
00299     write_message(header);
00300 }
00301 
00302 VectorHeader CProtobufFile::read_vector_header()
00303 {
00304     VectorHeader data_header;
00305     read_message(data_header);
00306 
00307     return data_header;
00308 }
00309 
00310 SparseMatrixHeader CProtobufFile::read_sparse_matrix_header()
00311 {
00312     SparseMatrixHeader data_header;
00313     read_message(data_header);
00314 
00315     return data_header;
00316 }
00317 
00318 MatrixHeader CProtobufFile::read_matrix_header()
00319 {
00320     MatrixHeader data_header;
00321     read_message(data_header);
00322 
00323     return data_header;
00324 }
00325 
00326 StringListHeader CProtobufFile::read_string_list_header()
00327 {
00328     StringListHeader data_header;
00329     read_message(data_header);
00330 
00331     return data_header;
00332 }
00333 
00334 void CProtobufFile::write_vector_header(int32_t len, int32_t num_messages)
00335 {
00336     VectorHeader data_header;
00337     data_header.set_len(len);
00338     data_header.set_num_messages(num_messages);
00339     write_message(data_header);
00340 }
00341 
00342 void CProtobufFile::write_matrix_header(int32_t num_feat, int32_t num_vec, int32_t num_messages)
00343 {
00344     MatrixHeader data_header;
00345     data_header.set_num_cols(num_feat);
00346     data_header.set_num_rows(num_vec);
00347     data_header.set_num_messages(num_messages);
00348     write_message(data_header);
00349 }
00350 
00351 #define WRITE_SPARSE_MATRIX_HEADER(sg_type) \
00352 void CProtobufFile::write_sparse_matrix_header( \
00353     const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
00354 { \
00355     SparseMatrixHeader data_header; \
00356     data_header.set_num_features(num_feat); \
00357     data_header.set_num_vectors(num_vec); \
00358     for (int32_t i=0; i<num_vec; i++) \
00359     { \
00360         data_header.add_num_feat_entries(matrix[i].num_feat_entries); \
00361     } \
00362     \
00363     write_message(data_header); \
00364 }
00365 
00366 WRITE_SPARSE_MATRIX_HEADER(bool)
00367 WRITE_SPARSE_MATRIX_HEADER(int8_t)
00368 WRITE_SPARSE_MATRIX_HEADER(uint8_t)
00369 WRITE_SPARSE_MATRIX_HEADER(char)
00370 WRITE_SPARSE_MATRIX_HEADER(int32_t)
00371 WRITE_SPARSE_MATRIX_HEADER(uint32_t)
00372 WRITE_SPARSE_MATRIX_HEADER(int64_t)
00373 WRITE_SPARSE_MATRIX_HEADER(uint64_t)
00374 WRITE_SPARSE_MATRIX_HEADER(float32_t)
00375 WRITE_SPARSE_MATRIX_HEADER(float64_t)
00376 WRITE_SPARSE_MATRIX_HEADER(floatmax_t)
00377 WRITE_SPARSE_MATRIX_HEADER(int16_t)
00378 WRITE_SPARSE_MATRIX_HEADER(uint16_t)
00379 #undef WRITE_SPARSE_MATRIX_HEADER
00380 
00381 #define WRITE_STRING_LIST_HEADER(sg_type) \
00382 void CProtobufFile::write_string_list_header(const SGString<sg_type>* strings, int32_t num_str) \
00383 { \
00384     int32_t max_string_len=0; \
00385     StringListHeader data_header; \
00386     data_header.set_num_str(num_str); \
00387     for (int32_t i=0; i<num_str; i++) \
00388     { \
00389         data_header.add_str_len(strings[i].slen); \
00390         if (strings[i].slen>max_string_len) \
00391             max_string_len=strings[i].slen; \
00392     } \
00393     data_header.set_max_string_len(max_string_len); \
00394     write_message(data_header); \
00395 }
00396 
00397 WRITE_STRING_LIST_HEADER(int8_t)
00398 WRITE_STRING_LIST_HEADER(uint8_t)
00399 WRITE_STRING_LIST_HEADER(char)
00400 WRITE_STRING_LIST_HEADER(int32_t)
00401 WRITE_STRING_LIST_HEADER(uint32_t)
00402 WRITE_STRING_LIST_HEADER(int64_t)
00403 WRITE_STRING_LIST_HEADER(uint64_t)
00404 WRITE_STRING_LIST_HEADER(float32_t)
00405 WRITE_STRING_LIST_HEADER(float64_t)
00406 WRITE_STRING_LIST_HEADER(floatmax_t)
00407 WRITE_STRING_LIST_HEADER(int16_t)
00408 WRITE_STRING_LIST_HEADER(uint16_t)
00409 #undef WRITE_STRING_LIST_HEADER
00410 
00411 void CProtobufFile::read_message(google::protobuf::Message& message)
00412 {
00413     uint32_t bytes_read=0;
00414     uint32_t msg_size=0;
00415 
00416     // read size of message
00417     bytes_read=fread(uint_buffer, sizeof(char), sizeof(uint32_t), file);
00418     REQUIRE(bytes_read==sizeof(uint32_t), "IO error\n");
00419     msg_size=read_big_endian_uint(uint_buffer, sizeof(uint32_t));
00420     REQUIRE(msg_size>0, "message size should be more than zero\n");
00421 
00422     // read message
00423     bytes_read=fread(buffer, sizeof(char), msg_size, file);
00424     REQUIRE(bytes_read==msg_size, "IO error\n");
00425 
00426     // try to parse message from read data
00427     REQUIRE(message.ParseFromArray(buffer, msg_size), "cannot parse header\n");
00428 }
00429 
00430 void CProtobufFile::write_message(const google::protobuf::Message& message)
00431 {
00432     uint32_t bytes_write=0;
00433     uint32_t msg_size=message.ByteSize();
00434 
00435     // write size of message
00436     write_big_endian_uint(msg_size, uint_buffer, sizeof(uint32_t));
00437     bytes_write=fwrite(uint_buffer, sizeof(char), sizeof(uint32_t), file);
00438     REQUIRE(bytes_write==sizeof(uint32_t), "IO error\n");
00439 
00440     // write serialized message
00441     message.SerializeToArray(buffer, msg_size);
00442     bytes_write=fwrite(buffer, sizeof(char), msg_size, file);
00443     REQUIRE(bytes_write==msg_size, "IO error\n");
00444 }
00445 
00446 #define READ_MEMORY_BLOCK(chunk_type, sg_type) \
00447 void CProtobufFile::read_memory_block(sg_type*& vector, uint64_t len, int32_t num_messages) \
00448 { \
00449     vector=SG_MALLOC(sg_type, len); \
00450     \
00451     chunk_type chunk; \
00452     int32_t elements_in_message=message_size/sizeof(sg_type); \
00453     for (int32_t i=0; i<num_messages; i++) \
00454     { \
00455         read_message(chunk); \
00456         \
00457         int32_t num_elements_to_read=0; \
00458         if ((len-(i+1)*elements_in_message)<=0) \
00459             num_elements_to_read=len-i*elements_in_message; \
00460         else \
00461             num_elements_to_read=elements_in_message; \
00462         \
00463         for (int32_t j=0; j<num_elements_to_read; j++) \
00464             vector[j+i*elements_in_message]=chunk.data(j); \
00465     } \
00466 }
00467 
00468 READ_MEMORY_BLOCK(Int32Chunk, int8_t)
00469 READ_MEMORY_BLOCK(UInt32Chunk, uint8_t)
00470 READ_MEMORY_BLOCK(UInt32Chunk, char)
00471 READ_MEMORY_BLOCK(Int32Chunk, int32_t)
00472 READ_MEMORY_BLOCK(UInt32Chunk, uint32_t)
00473 READ_MEMORY_BLOCK(Float32Chunk, float32_t)
00474 READ_MEMORY_BLOCK(Float64Chunk, float64_t)
00475 READ_MEMORY_BLOCK(Float64Chunk, floatmax_t)
00476 READ_MEMORY_BLOCK(Int32Chunk, int16_t)
00477 READ_MEMORY_BLOCK(UInt32Chunk, uint16_t)
00478 READ_MEMORY_BLOCK(Int64Chunk, int64_t)
00479 READ_MEMORY_BLOCK(UInt64Chunk, uint64_t)
00480 #undef READ_MEMORY_BLOCK
00481 
00482 #define WRITE_MEMORY_BLOCK(chunk_type, sg_type) \
00483 void CProtobufFile::write_memory_block(const sg_type* vector, uint64_t len, int32_t num_messages) \
00484 { \
00485     chunk_type chunk; \
00486     int32_t elements_in_message=message_size/sizeof(sg_type); \
00487     for (int32_t i=0; i<num_messages; i++) \
00488     { \
00489         \
00490         int32_t num_elements_to_write=0; \
00491         if ((len-(i+1)*elements_in_message)<=0) \
00492             num_elements_to_write=len-i*elements_in_message; \
00493         else \
00494             num_elements_to_write=elements_in_message; \
00495         \
00496         for (int32_t j=0; j<num_elements_to_write; j++) \
00497             chunk.add_data(vector[j+i*elements_in_message]); \
00498         \
00499         write_message(chunk); \
00500         chunk.Clear(); \
00501     } \
00502 }
00503 
00504 WRITE_MEMORY_BLOCK(Int32Chunk, int8_t)
00505 WRITE_MEMORY_BLOCK(UInt32Chunk, uint8_t)
00506 WRITE_MEMORY_BLOCK(UInt32Chunk, char)
00507 WRITE_MEMORY_BLOCK(Int32Chunk, int32_t)
00508 WRITE_MEMORY_BLOCK(UInt64Chunk, uint32_t)
00509 WRITE_MEMORY_BLOCK(Int64Chunk, int64_t)
00510 WRITE_MEMORY_BLOCK(UInt64Chunk, uint64_t)
00511 WRITE_MEMORY_BLOCK(Float32Chunk, float32_t)
00512 WRITE_MEMORY_BLOCK(Float64Chunk, float64_t)
00513 WRITE_MEMORY_BLOCK(Float64Chunk, floatmax_t)
00514 WRITE_MEMORY_BLOCK(Int32Chunk, int16_t)
00515 WRITE_MEMORY_BLOCK(UInt32Chunk, uint16_t)
00516 #undef WRITE_MEMORY_BLOCK
00517 
00518 #define READ_SPARSE_MATRIX(chunk_type, sg_type) \
00519 void CProtobufFile::read_sparse_matrix( \
00520             SGSparseVector<sg_type>*& matrix, const SparseMatrixHeader& data_header) \
00521 { \
00522     matrix=SG_MALLOC(SGSparseVector<sg_type>, data_header.num_vectors()); \
00523     \
00524     UInt64Chunk feat_index_chunk; \
00525     chunk_type entry_chunk; \
00526     read_message(feat_index_chunk); \
00527     read_message(entry_chunk); \
00528     \
00529     int32_t elements_in_message=message_size/sizeof(sg_type); \
00530     int32_t buffer_counter=0; \
00531     for (uint32_t i=0; i<data_header.num_vectors(); i++) \
00532     { \
00533         matrix[i]=SGSparseVector<sg_type>(data_header.num_feat_entries(i)); \
00534         for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \
00535         { \
00536             matrix[i].features[j].feat_index=feat_index_chunk.data(buffer_counter); \
00537             matrix[i].features[j].entry=entry_chunk.data(buffer_counter); \
00538             buffer_counter++; \
00539             \
00540             if (buffer_counter==elements_in_message) \
00541             { \
00542                 read_message(feat_index_chunk); \
00543                 read_message(entry_chunk); \
00544                 buffer_counter=0; \
00545             } \
00546         } \
00547     } \
00548 }
00549 
00550 READ_SPARSE_MATRIX(BoolChunk, bool)
00551 READ_SPARSE_MATRIX(Int32Chunk, int8_t)
00552 READ_SPARSE_MATRIX(UInt32Chunk, uint8_t)
00553 READ_SPARSE_MATRIX(UInt32Chunk, char)
00554 READ_SPARSE_MATRIX(Int32Chunk, int32_t)
00555 READ_SPARSE_MATRIX(UInt32Chunk, uint32_t)
00556 READ_SPARSE_MATRIX(Float32Chunk, float32_t)
00557 READ_SPARSE_MATRIX(Float64Chunk, float64_t)
00558 READ_SPARSE_MATRIX(Float64Chunk, floatmax_t)
00559 READ_SPARSE_MATRIX(Int32Chunk, int16_t)
00560 READ_SPARSE_MATRIX(UInt32Chunk, uint16_t)
00561 READ_SPARSE_MATRIX(Int64Chunk, int64_t)
00562 READ_SPARSE_MATRIX(UInt64Chunk, uint64_t)
00563 #undef READ_SPARSE_MATRIX
00564 
00565 #define WRITE_SPARSE_MATRIX(chunk_type, sg_type) \
00566 void CProtobufFile::write_sparse_matrix( \
00567             const SGSparseVector<sg_type>* matrix, int32_t num_vec) \
00568 { \
00569     UInt64Chunk feat_index_chunk; \
00570     chunk_type entry_chunk; \
00571     int32_t elements_in_message=message_size/sizeof(sg_type); \
00572     int32_t buffer_counter=0; \
00573     for (int32_t i=0; i<num_vec; i++) \
00574     { \
00575         for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \
00576         { \
00577             feat_index_chunk.add_data(matrix[i].features[j].feat_index); \
00578             entry_chunk.add_data(matrix[i].features[j].entry); \
00579             buffer_counter++; \
00580             \
00581             if (buffer_counter==elements_in_message) \
00582             { \
00583                 write_message(feat_index_chunk); \
00584                 write_message(entry_chunk); \
00585                 feat_index_chunk.Clear(); \
00586                 entry_chunk.Clear(); \
00587                 buffer_counter=0; \
00588             } \
00589         } \
00590     } \
00591     \
00592     if (buffer_counter!=0) \
00593     { \
00594         write_message(feat_index_chunk); \
00595         write_message(entry_chunk); \
00596     } \
00597 }
00598 
00599 WRITE_SPARSE_MATRIX(BoolChunk, bool)
00600 WRITE_SPARSE_MATRIX(Int32Chunk, int8_t)
00601 WRITE_SPARSE_MATRIX(UInt32Chunk, uint8_t)
00602 WRITE_SPARSE_MATRIX(UInt32Chunk, char)
00603 WRITE_SPARSE_MATRIX(Int32Chunk, int32_t)
00604 WRITE_SPARSE_MATRIX(UInt64Chunk, uint32_t)
00605 WRITE_SPARSE_MATRIX(Int64Chunk, int64_t)
00606 WRITE_SPARSE_MATRIX(UInt64Chunk, uint64_t)
00607 WRITE_SPARSE_MATRIX(Float32Chunk, float32_t)
00608 WRITE_SPARSE_MATRIX(Float64Chunk, float64_t)
00609 WRITE_SPARSE_MATRIX(Float64Chunk, floatmax_t)
00610 WRITE_SPARSE_MATRIX(Int32Chunk, int16_t)
00611 WRITE_SPARSE_MATRIX(UInt32Chunk, uint16_t)
00612 #undef WRITE_SPARSE_MATRIX
00613 
00614 #define READ_STRING_LIST(chunk_type, sg_type) \
00615 void CProtobufFile::read_string_list( \
00616             SGString<sg_type>*& strings, const StringListHeader& data_header) \
00617 { \
00618     strings=SG_MALLOC(SGString<sg_type>, data_header.num_str()); \
00619     \
00620     chunk_type chunk; \
00621     read_message(chunk); \
00622     int32_t elements_in_message=message_size/sizeof(sg_type); \
00623     int32_t buffer_counter=0; \
00624     for (uint32_t i=0; i<data_header.num_str(); i++) \
00625     { \
00626         strings[i]=SGString<sg_type>(data_header.str_len(i)); \
00627         for (int32_t j=0; j<strings[i].slen; j++) \
00628         { \
00629             strings[i].string[j]=chunk.data(buffer_counter); \
00630             buffer_counter++; \
00631             \
00632             if (buffer_counter==elements_in_message) \
00633             { \
00634                 read_message(chunk); \
00635                 buffer_counter=0; \
00636             } \
00637         } \
00638     } \
00639 }
00640 
00641 READ_STRING_LIST(Int32Chunk, int8_t)
00642 READ_STRING_LIST(UInt32Chunk, uint8_t)
00643 READ_STRING_LIST(UInt32Chunk, char)
00644 READ_STRING_LIST(Int32Chunk, int32_t)
00645 READ_STRING_LIST(UInt32Chunk, uint32_t)
00646 READ_STRING_LIST(Float32Chunk, float32_t)
00647 READ_STRING_LIST(Float64Chunk, float64_t)
00648 READ_STRING_LIST(Float64Chunk, floatmax_t)
00649 READ_STRING_LIST(Int32Chunk, int16_t)
00650 READ_STRING_LIST(UInt32Chunk, uint16_t)
00651 READ_STRING_LIST(Int64Chunk, int64_t)
00652 READ_STRING_LIST(UInt64Chunk, uint64_t)
00653 #undef READ_STRING_LIST
00654 
00655 #define WRITE_STRING_LIST(chunk_type, sg_type) \
00656 void CProtobufFile::write_string_list( \
00657             const SGString<sg_type>* strings, int32_t num_str) \
00658 { \
00659     chunk_type chunk; \
00660     int32_t elements_in_message=message_size/sizeof(sg_type); \
00661     int32_t buffer_counter=0; \
00662     for (int32_t i=0; i<num_str; i++) \
00663     { \
00664         for (int32_t j=0; j<strings[i].slen; j++) \
00665         { \
00666             chunk.add_data(strings[i].string[j]); \
00667             buffer_counter++; \
00668             \
00669             if (buffer_counter==elements_in_message) \
00670             { \
00671                 write_message(chunk); \
00672                 chunk.Clear(); \
00673                 buffer_counter=0; \
00674             } \
00675         } \
00676     } \
00677     \
00678     if (buffer_counter!=0) \
00679         write_message(chunk); \
00680 }
00681 
00682 WRITE_STRING_LIST(Int32Chunk, int8_t)
00683 WRITE_STRING_LIST(UInt32Chunk, uint8_t)
00684 WRITE_STRING_LIST(UInt32Chunk, char)
00685 WRITE_STRING_LIST(Int32Chunk, int32_t)
00686 WRITE_STRING_LIST(UInt64Chunk, uint32_t)
00687 WRITE_STRING_LIST(Int64Chunk, int64_t)
00688 WRITE_STRING_LIST(UInt64Chunk, uint64_t)
00689 WRITE_STRING_LIST(Float32Chunk, float32_t)
00690 WRITE_STRING_LIST(Float64Chunk, float64_t)
00691 WRITE_STRING_LIST(Float64Chunk, floatmax_t)
00692 WRITE_STRING_LIST(Int32Chunk, int16_t)
00693 WRITE_STRING_LIST(UInt32Chunk, uint16_t)
00694 #undef WRITE_STRING_LIST
00695 
00696 #endif /* HAVE_PROTOBUF */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation