SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evgeniy Andreev (gsomix) 00008 */ 00009 #ifdef HAVE_PROTOBUF 00010 00011 #include <shogun/io/ProtobufFile.h> 00012 00013 #include <shogun/lib/SGVector.h> 00014 #include <shogun/lib/SGMatrix.h> 00015 #include <shogun/lib/SGSparseVector.h> 00016 #include <shogun/lib/SGString.h> 00017 00018 using namespace shogun; 00019 00020 CProtobufFile::CProtobufFile() 00021 { 00022 init(); 00023 } 00024 00025 CProtobufFile::CProtobufFile(FILE* f, const char* name) : 00026 CFile(f, name) 00027 { 00028 init(); 00029 } 00030 00031 CProtobufFile::CProtobufFile(const char* fname, char rw, const char* name) : 00032 CFile(fname, rw, name) 00033 { 00034 init(); 00035 } 00036 00037 CProtobufFile::~CProtobufFile() 00038 { 00039 SG_FREE(buffer); 00040 } 00041 00042 void CProtobufFile::init() 00043 { 00044 version=1; 00045 message_size=1024*1024; 00046 00047 buffer=SG_MALLOC(uint8_t, message_size*sizeof(uint32_t)); 00048 } 00049 00050 #define GET_VECTOR(sg_type) \ 00051 void CProtobufFile::get_vector(sg_type*& vector, int32_t& len) \ 00052 { \ 00053 read_and_validate_global_header(ShogunVersion::VECTOR); \ 00054 VectorHeader data_header=read_vector_header(); \ 00055 len=data_header.len(); \ 00056 read_memory_block(vector, len, data_header.num_messages()); \ 00057 } 00058 00059 GET_VECTOR(int8_t) 00060 GET_VECTOR(uint8_t) 00061 GET_VECTOR(char) 00062 GET_VECTOR(int32_t) 00063 GET_VECTOR(uint32_t) 00064 GET_VECTOR(float32_t) 00065 GET_VECTOR(float64_t) 00066 GET_VECTOR(floatmax_t) 00067 GET_VECTOR(int16_t) 00068 GET_VECTOR(uint16_t) 00069 GET_VECTOR(int64_t) 00070 GET_VECTOR(uint64_t) 00071 #undef GET_VECTOR 00072 00073 #define GET_MATRIX(read_func, sg_type) \ 00074 void CProtobufFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00075 { \ 00076 read_and_validate_global_header(ShogunVersion::MATRIX); \ 00077 MatrixHeader data_header=read_matrix_header(); \ 00078 num_feat=data_header.num_cols(); \ 00079 num_vec=data_header.num_rows(); \ 00080 read_memory_block(matrix, num_feat*num_vec, data_header.num_messages()); \ 00081 } 00082 00083 GET_MATRIX(read_char, int8_t) 00084 GET_MATRIX(read_byte, uint8_t) 00085 GET_MATRIX(read_char, char) 00086 GET_MATRIX(read_int, int32_t) 00087 GET_MATRIX(read_uint, uint32_t) 00088 GET_MATRIX(read_short_real, float32_t) 00089 GET_MATRIX(read_real, float64_t) 00090 GET_MATRIX(read_long_real, floatmax_t) 00091 GET_MATRIX(read_short, int16_t) 00092 GET_MATRIX(read_word, uint16_t) 00093 GET_MATRIX(read_long, int64_t) 00094 GET_MATRIX(read_ulong, uint64_t) 00095 #undef GET_MATRIX 00096 00097 #define GET_NDARRAY(read_func, sg_type) \ 00098 void CProtobufFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \ 00099 { \ 00100 SG_NOTIMPLEMENTED \ 00101 } 00102 00103 GET_NDARRAY(read_byte, uint8_t) 00104 GET_NDARRAY(read_char, char) 00105 GET_NDARRAY(read_int, int32_t) 00106 GET_NDARRAY(read_short_real, float32_t) 00107 GET_NDARRAY(read_real, float64_t) 00108 GET_NDARRAY(read_short, int16_t) 00109 GET_NDARRAY(read_word, uint16_t) 00110 #undef GET_NDARRAY 00111 00112 #define GET_SPARSE_MATRIX(sg_type) \ 00113 void CProtobufFile::get_sparse_matrix( \ 00114 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \ 00115 { \ 00116 read_and_validate_global_header(ShogunVersion::SPARSE_MATRIX); \ 00117 SparseMatrixHeader data_header=read_sparse_matrix_header(); \ 00118 num_feat=data_header.num_features(); \ 00119 num_vec=data_header.num_vectors(); \ 00120 read_sparse_matrix(matrix, data_header); \ 00121 } 00122 00123 GET_SPARSE_MATRIX(bool) 00124 GET_SPARSE_MATRIX(int8_t) 00125 GET_SPARSE_MATRIX(uint8_t) 00126 GET_SPARSE_MATRIX(char) 00127 GET_SPARSE_MATRIX(int32_t) 00128 GET_SPARSE_MATRIX(uint32_t) 00129 GET_SPARSE_MATRIX(float32_t) 00130 GET_SPARSE_MATRIX(float64_t) 00131 GET_SPARSE_MATRIX(floatmax_t) 00132 GET_SPARSE_MATRIX(int16_t) 00133 GET_SPARSE_MATRIX(uint16_t) 00134 GET_SPARSE_MATRIX(int64_t) 00135 GET_SPARSE_MATRIX(uint64_t) 00136 #undef GET_SPARSE_MATRIX 00137 00138 #define SET_VECTOR(sg_type) \ 00139 void CProtobufFile::set_vector(const sg_type* vector, int32_t len) \ 00140 { \ 00141 int32_t num_messages=compute_num_messages(len, sizeof(sg_type)); \ 00142 write_global_header(ShogunVersion::VECTOR); \ 00143 write_vector_header(len, num_messages); \ 00144 write_memory_block(vector, len, num_messages); \ 00145 } 00146 00147 SET_VECTOR(int8_t) 00148 SET_VECTOR(uint8_t) 00149 SET_VECTOR(char) 00150 SET_VECTOR(int32_t) 00151 SET_VECTOR(uint32_t) 00152 SET_VECTOR(int64_t) 00153 SET_VECTOR(uint64_t) 00154 SET_VECTOR(float32_t) 00155 SET_VECTOR(float64_t) 00156 SET_VECTOR(floatmax_t) 00157 SET_VECTOR(int16_t) 00158 SET_VECTOR(uint16_t) 00159 #undef SET_VECTOR 00160 00161 #define SET_MATRIX(sg_type) \ 00162 void CProtobufFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \ 00163 { \ 00164 int32_t num_messages=compute_num_messages(num_feat*num_vec, sizeof(sg_type)); \ 00165 write_global_header(ShogunVersion::MATRIX); \ 00166 write_matrix_header(num_feat, num_vec, num_messages); \ 00167 write_memory_block(matrix, num_feat*num_vec, num_messages); \ 00168 } 00169 00170 SET_MATRIX(int8_t) 00171 SET_MATRIX(uint8_t) 00172 SET_MATRIX(char) 00173 SET_MATRIX(int32_t) 00174 SET_MATRIX(uint32_t) 00175 SET_MATRIX(int64_t) 00176 SET_MATRIX(uint64_t) 00177 SET_MATRIX(float32_t) 00178 SET_MATRIX(float64_t) 00179 SET_MATRIX(floatmax_t) 00180 SET_MATRIX(int16_t) 00181 SET_MATRIX(uint16_t) 00182 #undef SET_MATRIX 00183 00184 #define SET_SPARSE_MATRIX(sg_type) \ 00185 void CProtobufFile::set_sparse_matrix( \ 00186 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 00187 { \ 00188 write_global_header(ShogunVersion::SPARSE_MATRIX); \ 00189 write_sparse_matrix_header(matrix, num_feat, num_vec); \ 00190 write_sparse_matrix(matrix, num_vec); \ 00191 } 00192 00193 SET_SPARSE_MATRIX(bool) 00194 SET_SPARSE_MATRIX(int8_t) 00195 SET_SPARSE_MATRIX(uint8_t) 00196 SET_SPARSE_MATRIX(char) 00197 SET_SPARSE_MATRIX(int32_t) 00198 SET_SPARSE_MATRIX(uint32_t) 00199 SET_SPARSE_MATRIX(int64_t) 00200 SET_SPARSE_MATRIX(uint64_t) 00201 SET_SPARSE_MATRIX(float32_t) 00202 SET_SPARSE_MATRIX(float64_t) 00203 SET_SPARSE_MATRIX(floatmax_t) 00204 SET_SPARSE_MATRIX(int16_t) 00205 SET_SPARSE_MATRIX(uint16_t) 00206 #undef SET_SPARSE_MATRIX 00207 00208 #define GET_STRING_LIST(sg_type) \ 00209 void CProtobufFile::get_string_list( \ 00210 SGString<sg_type>*& strings, int32_t& num_str, \ 00211 int32_t& max_string_len) \ 00212 { \ 00213 read_and_validate_global_header(ShogunVersion::STRING_LIST); \ 00214 StringListHeader data_header=read_string_list_header(); \ 00215 num_str=data_header.num_str(); \ 00216 max_string_len=data_header.max_string_len(); \ 00217 read_string_list(strings, data_header); \ 00218 } 00219 00220 GET_STRING_LIST(int8_t) 00221 GET_STRING_LIST(uint8_t) 00222 GET_STRING_LIST(char) 00223 GET_STRING_LIST(int32_t) 00224 GET_STRING_LIST(uint32_t) 00225 GET_STRING_LIST(int64_t) 00226 GET_STRING_LIST(uint64_t) 00227 GET_STRING_LIST(float32_t) 00228 GET_STRING_LIST(float64_t) 00229 GET_STRING_LIST(floatmax_t) 00230 GET_STRING_LIST(int16_t) 00231 GET_STRING_LIST(uint16_t) 00232 #undef GET_STRING_LIST 00233 00234 #define SET_STRING_LIST(sg_type) \ 00235 void CProtobufFile::set_string_list( \ 00236 const SGString<sg_type>* strings, int32_t num_str) \ 00237 { \ 00238 write_global_header(ShogunVersion::STRING_LIST); \ 00239 write_string_list_header(strings, num_str); \ 00240 write_string_list(strings, num_str); \ 00241 } 00242 00243 SET_STRING_LIST(int8_t) 00244 SET_STRING_LIST(uint8_t) 00245 SET_STRING_LIST(char) 00246 SET_STRING_LIST(int32_t) 00247 SET_STRING_LIST(uint32_t) 00248 SET_STRING_LIST(int64_t) 00249 SET_STRING_LIST(uint64_t) 00250 SET_STRING_LIST(float32_t) 00251 SET_STRING_LIST(float64_t) 00252 SET_STRING_LIST(floatmax_t) 00253 SET_STRING_LIST(int16_t) 00254 SET_STRING_LIST(uint16_t) 00255 #undef SET_STRING_LIST 00256 00257 void CProtobufFile::write_big_endian_uint(uint32_t number, uint8_t* array, uint32_t size) 00258 { 00259 if (size<4) 00260 SG_ERROR("array is too small to write\n"); 00261 00262 array[0]=(number>>24)&0xffu; 00263 array[1]=(number>>16)&0xffu; 00264 array[2]=(number>>8)&0xffu; 00265 array[3]=number&0xffu; 00266 } 00267 00268 uint32_t CProtobufFile::read_big_endian_uint(uint8_t* array, uint32_t size) 00269 { 00270 if (size<4) 00271 SG_ERROR("array is too small to read\n"); 00272 00273 return (array[0]<<24) | (array[1]<<16) | (array[2]<<8) | array[3]; 00274 } 00275 00276 int32_t CProtobufFile::compute_num_messages(uint64_t len, int32_t sizeof_type) const 00277 { 00278 uint32_t elements_in_message=message_size/sizeof_type; 00279 uint32_t num_messages=len/elements_in_message; 00280 if (len % elements_in_message > 0) 00281 num_messages++; 00282 00283 return num_messages; 00284 } 00285 00286 void CProtobufFile::read_and_validate_global_header(ShogunVersion_SGDataType type) 00287 { 00288 ShogunVersion header; 00289 read_message(header); 00290 REQUIRE(header.version()==version, "wrong version\n") 00291 REQUIRE(header.data_type()==type, "wrong type\n") 00292 } 00293 00294 void CProtobufFile::write_global_header(ShogunVersion_SGDataType type) 00295 { 00296 ShogunVersion header; 00297 header.set_version(version); 00298 header.set_data_type(type); 00299 write_message(header); 00300 } 00301 00302 VectorHeader CProtobufFile::read_vector_header() 00303 { 00304 VectorHeader data_header; 00305 read_message(data_header); 00306 00307 return data_header; 00308 } 00309 00310 SparseMatrixHeader CProtobufFile::read_sparse_matrix_header() 00311 { 00312 SparseMatrixHeader data_header; 00313 read_message(data_header); 00314 00315 return data_header; 00316 } 00317 00318 MatrixHeader CProtobufFile::read_matrix_header() 00319 { 00320 MatrixHeader data_header; 00321 read_message(data_header); 00322 00323 return data_header; 00324 } 00325 00326 StringListHeader CProtobufFile::read_string_list_header() 00327 { 00328 StringListHeader data_header; 00329 read_message(data_header); 00330 00331 return data_header; 00332 } 00333 00334 void CProtobufFile::write_vector_header(int32_t len, int32_t num_messages) 00335 { 00336 VectorHeader data_header; 00337 data_header.set_len(len); 00338 data_header.set_num_messages(num_messages); 00339 write_message(data_header); 00340 } 00341 00342 void CProtobufFile::write_matrix_header(int32_t num_feat, int32_t num_vec, int32_t num_messages) 00343 { 00344 MatrixHeader data_header; 00345 data_header.set_num_cols(num_feat); 00346 data_header.set_num_rows(num_vec); 00347 data_header.set_num_messages(num_messages); 00348 write_message(data_header); 00349 } 00350 00351 #define WRITE_SPARSE_MATRIX_HEADER(sg_type) \ 00352 void CProtobufFile::write_sparse_matrix_header( \ 00353 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \ 00354 { \ 00355 SparseMatrixHeader data_header; \ 00356 data_header.set_num_features(num_feat); \ 00357 data_header.set_num_vectors(num_vec); \ 00358 for (int32_t i=0; i<num_vec; i++) \ 00359 { \ 00360 data_header.add_num_feat_entries(matrix[i].num_feat_entries); \ 00361 } \ 00362 \ 00363 write_message(data_header); \ 00364 } 00365 00366 WRITE_SPARSE_MATRIX_HEADER(bool) 00367 WRITE_SPARSE_MATRIX_HEADER(int8_t) 00368 WRITE_SPARSE_MATRIX_HEADER(uint8_t) 00369 WRITE_SPARSE_MATRIX_HEADER(char) 00370 WRITE_SPARSE_MATRIX_HEADER(int32_t) 00371 WRITE_SPARSE_MATRIX_HEADER(uint32_t) 00372 WRITE_SPARSE_MATRIX_HEADER(int64_t) 00373 WRITE_SPARSE_MATRIX_HEADER(uint64_t) 00374 WRITE_SPARSE_MATRIX_HEADER(float32_t) 00375 WRITE_SPARSE_MATRIX_HEADER(float64_t) 00376 WRITE_SPARSE_MATRIX_HEADER(floatmax_t) 00377 WRITE_SPARSE_MATRIX_HEADER(int16_t) 00378 WRITE_SPARSE_MATRIX_HEADER(uint16_t) 00379 #undef WRITE_SPARSE_MATRIX_HEADER 00380 00381 #define WRITE_STRING_LIST_HEADER(sg_type) \ 00382 void CProtobufFile::write_string_list_header(const SGString<sg_type>* strings, int32_t num_str) \ 00383 { \ 00384 int32_t max_string_len=0; \ 00385 StringListHeader data_header; \ 00386 data_header.set_num_str(num_str); \ 00387 for (int32_t i=0; i<num_str; i++) \ 00388 { \ 00389 data_header.add_str_len(strings[i].slen); \ 00390 if (strings[i].slen>max_string_len) \ 00391 max_string_len=strings[i].slen; \ 00392 } \ 00393 data_header.set_max_string_len(max_string_len); \ 00394 write_message(data_header); \ 00395 } 00396 00397 WRITE_STRING_LIST_HEADER(int8_t) 00398 WRITE_STRING_LIST_HEADER(uint8_t) 00399 WRITE_STRING_LIST_HEADER(char) 00400 WRITE_STRING_LIST_HEADER(int32_t) 00401 WRITE_STRING_LIST_HEADER(uint32_t) 00402 WRITE_STRING_LIST_HEADER(int64_t) 00403 WRITE_STRING_LIST_HEADER(uint64_t) 00404 WRITE_STRING_LIST_HEADER(float32_t) 00405 WRITE_STRING_LIST_HEADER(float64_t) 00406 WRITE_STRING_LIST_HEADER(floatmax_t) 00407 WRITE_STRING_LIST_HEADER(int16_t) 00408 WRITE_STRING_LIST_HEADER(uint16_t) 00409 #undef WRITE_STRING_LIST_HEADER 00410 00411 void CProtobufFile::read_message(google::protobuf::Message& message) 00412 { 00413 uint32_t bytes_read=0; 00414 uint32_t msg_size=0; 00415 00416 // read size of message 00417 bytes_read=fread(uint_buffer, sizeof(char), sizeof(uint32_t), file); 00418 REQUIRE(bytes_read==sizeof(uint32_t), "IO error\n"); 00419 msg_size=read_big_endian_uint(uint_buffer, sizeof(uint32_t)); 00420 REQUIRE(msg_size>0, "message size should be more than zero\n"); 00421 00422 // read message 00423 bytes_read=fread(buffer, sizeof(char), msg_size, file); 00424 REQUIRE(bytes_read==msg_size, "IO error\n"); 00425 00426 // try to parse message from read data 00427 REQUIRE(message.ParseFromArray(buffer, msg_size), "cannot parse header\n"); 00428 } 00429 00430 void CProtobufFile::write_message(const google::protobuf::Message& message) 00431 { 00432 uint32_t bytes_write=0; 00433 uint32_t msg_size=message.ByteSize(); 00434 00435 // write size of message 00436 write_big_endian_uint(msg_size, uint_buffer, sizeof(uint32_t)); 00437 bytes_write=fwrite(uint_buffer, sizeof(char), sizeof(uint32_t), file); 00438 REQUIRE(bytes_write==sizeof(uint32_t), "IO error\n"); 00439 00440 // write serialized message 00441 message.SerializeToArray(buffer, msg_size); 00442 bytes_write=fwrite(buffer, sizeof(char), msg_size, file); 00443 REQUIRE(bytes_write==msg_size, "IO error\n"); 00444 } 00445 00446 #define READ_MEMORY_BLOCK(chunk_type, sg_type) \ 00447 void CProtobufFile::read_memory_block(sg_type*& vector, uint64_t len, int32_t num_messages) \ 00448 { \ 00449 vector=SG_MALLOC(sg_type, len); \ 00450 \ 00451 chunk_type chunk; \ 00452 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00453 for (int32_t i=0; i<num_messages; i++) \ 00454 { \ 00455 read_message(chunk); \ 00456 \ 00457 int32_t num_elements_to_read=0; \ 00458 if ((len-(i+1)*elements_in_message)<=0) \ 00459 num_elements_to_read=len-i*elements_in_message; \ 00460 else \ 00461 num_elements_to_read=elements_in_message; \ 00462 \ 00463 for (int32_t j=0; j<num_elements_to_read; j++) \ 00464 vector[j+i*elements_in_message]=chunk.data(j); \ 00465 } \ 00466 } 00467 00468 READ_MEMORY_BLOCK(Int32Chunk, int8_t) 00469 READ_MEMORY_BLOCK(UInt32Chunk, uint8_t) 00470 READ_MEMORY_BLOCK(UInt32Chunk, char) 00471 READ_MEMORY_BLOCK(Int32Chunk, int32_t) 00472 READ_MEMORY_BLOCK(UInt32Chunk, uint32_t) 00473 READ_MEMORY_BLOCK(Float32Chunk, float32_t) 00474 READ_MEMORY_BLOCK(Float64Chunk, float64_t) 00475 READ_MEMORY_BLOCK(Float64Chunk, floatmax_t) 00476 READ_MEMORY_BLOCK(Int32Chunk, int16_t) 00477 READ_MEMORY_BLOCK(UInt32Chunk, uint16_t) 00478 READ_MEMORY_BLOCK(Int64Chunk, int64_t) 00479 READ_MEMORY_BLOCK(UInt64Chunk, uint64_t) 00480 #undef READ_MEMORY_BLOCK 00481 00482 #define WRITE_MEMORY_BLOCK(chunk_type, sg_type) \ 00483 void CProtobufFile::write_memory_block(const sg_type* vector, uint64_t len, int32_t num_messages) \ 00484 { \ 00485 chunk_type chunk; \ 00486 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00487 for (int32_t i=0; i<num_messages; i++) \ 00488 { \ 00489 \ 00490 int32_t num_elements_to_write=0; \ 00491 if ((len-(i+1)*elements_in_message)<=0) \ 00492 num_elements_to_write=len-i*elements_in_message; \ 00493 else \ 00494 num_elements_to_write=elements_in_message; \ 00495 \ 00496 for (int32_t j=0; j<num_elements_to_write; j++) \ 00497 chunk.add_data(vector[j+i*elements_in_message]); \ 00498 \ 00499 write_message(chunk); \ 00500 chunk.Clear(); \ 00501 } \ 00502 } 00503 00504 WRITE_MEMORY_BLOCK(Int32Chunk, int8_t) 00505 WRITE_MEMORY_BLOCK(UInt32Chunk, uint8_t) 00506 WRITE_MEMORY_BLOCK(UInt32Chunk, char) 00507 WRITE_MEMORY_BLOCK(Int32Chunk, int32_t) 00508 WRITE_MEMORY_BLOCK(UInt64Chunk, uint32_t) 00509 WRITE_MEMORY_BLOCK(Int64Chunk, int64_t) 00510 WRITE_MEMORY_BLOCK(UInt64Chunk, uint64_t) 00511 WRITE_MEMORY_BLOCK(Float32Chunk, float32_t) 00512 WRITE_MEMORY_BLOCK(Float64Chunk, float64_t) 00513 WRITE_MEMORY_BLOCK(Float64Chunk, floatmax_t) 00514 WRITE_MEMORY_BLOCK(Int32Chunk, int16_t) 00515 WRITE_MEMORY_BLOCK(UInt32Chunk, uint16_t) 00516 #undef WRITE_MEMORY_BLOCK 00517 00518 #define READ_SPARSE_MATRIX(chunk_type, sg_type) \ 00519 void CProtobufFile::read_sparse_matrix( \ 00520 SGSparseVector<sg_type>*& matrix, const SparseMatrixHeader& data_header) \ 00521 { \ 00522 matrix=SG_MALLOC(SGSparseVector<sg_type>, data_header.num_vectors()); \ 00523 \ 00524 UInt64Chunk feat_index_chunk; \ 00525 chunk_type entry_chunk; \ 00526 read_message(feat_index_chunk); \ 00527 read_message(entry_chunk); \ 00528 \ 00529 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00530 int32_t buffer_counter=0; \ 00531 for (uint32_t i=0; i<data_header.num_vectors(); i++) \ 00532 { \ 00533 matrix[i]=SGSparseVector<sg_type>(data_header.num_feat_entries(i)); \ 00534 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \ 00535 { \ 00536 matrix[i].features[j].feat_index=feat_index_chunk.data(buffer_counter); \ 00537 matrix[i].features[j].entry=entry_chunk.data(buffer_counter); \ 00538 buffer_counter++; \ 00539 \ 00540 if (buffer_counter==elements_in_message) \ 00541 { \ 00542 read_message(feat_index_chunk); \ 00543 read_message(entry_chunk); \ 00544 buffer_counter=0; \ 00545 } \ 00546 } \ 00547 } \ 00548 } 00549 00550 READ_SPARSE_MATRIX(BoolChunk, bool) 00551 READ_SPARSE_MATRIX(Int32Chunk, int8_t) 00552 READ_SPARSE_MATRIX(UInt32Chunk, uint8_t) 00553 READ_SPARSE_MATRIX(UInt32Chunk, char) 00554 READ_SPARSE_MATRIX(Int32Chunk, int32_t) 00555 READ_SPARSE_MATRIX(UInt32Chunk, uint32_t) 00556 READ_SPARSE_MATRIX(Float32Chunk, float32_t) 00557 READ_SPARSE_MATRIX(Float64Chunk, float64_t) 00558 READ_SPARSE_MATRIX(Float64Chunk, floatmax_t) 00559 READ_SPARSE_MATRIX(Int32Chunk, int16_t) 00560 READ_SPARSE_MATRIX(UInt32Chunk, uint16_t) 00561 READ_SPARSE_MATRIX(Int64Chunk, int64_t) 00562 READ_SPARSE_MATRIX(UInt64Chunk, uint64_t) 00563 #undef READ_SPARSE_MATRIX 00564 00565 #define WRITE_SPARSE_MATRIX(chunk_type, sg_type) \ 00566 void CProtobufFile::write_sparse_matrix( \ 00567 const SGSparseVector<sg_type>* matrix, int32_t num_vec) \ 00568 { \ 00569 UInt64Chunk feat_index_chunk; \ 00570 chunk_type entry_chunk; \ 00571 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00572 int32_t buffer_counter=0; \ 00573 for (int32_t i=0; i<num_vec; i++) \ 00574 { \ 00575 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \ 00576 { \ 00577 feat_index_chunk.add_data(matrix[i].features[j].feat_index); \ 00578 entry_chunk.add_data(matrix[i].features[j].entry); \ 00579 buffer_counter++; \ 00580 \ 00581 if (buffer_counter==elements_in_message) \ 00582 { \ 00583 write_message(feat_index_chunk); \ 00584 write_message(entry_chunk); \ 00585 feat_index_chunk.Clear(); \ 00586 entry_chunk.Clear(); \ 00587 buffer_counter=0; \ 00588 } \ 00589 } \ 00590 } \ 00591 \ 00592 if (buffer_counter!=0) \ 00593 { \ 00594 write_message(feat_index_chunk); \ 00595 write_message(entry_chunk); \ 00596 } \ 00597 } 00598 00599 WRITE_SPARSE_MATRIX(BoolChunk, bool) 00600 WRITE_SPARSE_MATRIX(Int32Chunk, int8_t) 00601 WRITE_SPARSE_MATRIX(UInt32Chunk, uint8_t) 00602 WRITE_SPARSE_MATRIX(UInt32Chunk, char) 00603 WRITE_SPARSE_MATRIX(Int32Chunk, int32_t) 00604 WRITE_SPARSE_MATRIX(UInt64Chunk, uint32_t) 00605 WRITE_SPARSE_MATRIX(Int64Chunk, int64_t) 00606 WRITE_SPARSE_MATRIX(UInt64Chunk, uint64_t) 00607 WRITE_SPARSE_MATRIX(Float32Chunk, float32_t) 00608 WRITE_SPARSE_MATRIX(Float64Chunk, float64_t) 00609 WRITE_SPARSE_MATRIX(Float64Chunk, floatmax_t) 00610 WRITE_SPARSE_MATRIX(Int32Chunk, int16_t) 00611 WRITE_SPARSE_MATRIX(UInt32Chunk, uint16_t) 00612 #undef WRITE_SPARSE_MATRIX 00613 00614 #define READ_STRING_LIST(chunk_type, sg_type) \ 00615 void CProtobufFile::read_string_list( \ 00616 SGString<sg_type>*& strings, const StringListHeader& data_header) \ 00617 { \ 00618 strings=SG_MALLOC(SGString<sg_type>, data_header.num_str()); \ 00619 \ 00620 chunk_type chunk; \ 00621 read_message(chunk); \ 00622 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00623 int32_t buffer_counter=0; \ 00624 for (uint32_t i=0; i<data_header.num_str(); i++) \ 00625 { \ 00626 strings[i]=SGString<sg_type>(data_header.str_len(i)); \ 00627 for (int32_t j=0; j<strings[i].slen; j++) \ 00628 { \ 00629 strings[i].string[j]=chunk.data(buffer_counter); \ 00630 buffer_counter++; \ 00631 \ 00632 if (buffer_counter==elements_in_message) \ 00633 { \ 00634 read_message(chunk); \ 00635 buffer_counter=0; \ 00636 } \ 00637 } \ 00638 } \ 00639 } 00640 00641 READ_STRING_LIST(Int32Chunk, int8_t) 00642 READ_STRING_LIST(UInt32Chunk, uint8_t) 00643 READ_STRING_LIST(UInt32Chunk, char) 00644 READ_STRING_LIST(Int32Chunk, int32_t) 00645 READ_STRING_LIST(UInt32Chunk, uint32_t) 00646 READ_STRING_LIST(Float32Chunk, float32_t) 00647 READ_STRING_LIST(Float64Chunk, float64_t) 00648 READ_STRING_LIST(Float64Chunk, floatmax_t) 00649 READ_STRING_LIST(Int32Chunk, int16_t) 00650 READ_STRING_LIST(UInt32Chunk, uint16_t) 00651 READ_STRING_LIST(Int64Chunk, int64_t) 00652 READ_STRING_LIST(UInt64Chunk, uint64_t) 00653 #undef READ_STRING_LIST 00654 00655 #define WRITE_STRING_LIST(chunk_type, sg_type) \ 00656 void CProtobufFile::write_string_list( \ 00657 const SGString<sg_type>* strings, int32_t num_str) \ 00658 { \ 00659 chunk_type chunk; \ 00660 int32_t elements_in_message=message_size/sizeof(sg_type); \ 00661 int32_t buffer_counter=0; \ 00662 for (int32_t i=0; i<num_str; i++) \ 00663 { \ 00664 for (int32_t j=0; j<strings[i].slen; j++) \ 00665 { \ 00666 chunk.add_data(strings[i].string[j]); \ 00667 buffer_counter++; \ 00668 \ 00669 if (buffer_counter==elements_in_message) \ 00670 { \ 00671 write_message(chunk); \ 00672 chunk.Clear(); \ 00673 buffer_counter=0; \ 00674 } \ 00675 } \ 00676 } \ 00677 \ 00678 if (buffer_counter!=0) \ 00679 write_message(chunk); \ 00680 } 00681 00682 WRITE_STRING_LIST(Int32Chunk, int8_t) 00683 WRITE_STRING_LIST(UInt32Chunk, uint8_t) 00684 WRITE_STRING_LIST(UInt32Chunk, char) 00685 WRITE_STRING_LIST(Int32Chunk, int32_t) 00686 WRITE_STRING_LIST(UInt64Chunk, uint32_t) 00687 WRITE_STRING_LIST(Int64Chunk, int64_t) 00688 WRITE_STRING_LIST(UInt64Chunk, uint64_t) 00689 WRITE_STRING_LIST(Float32Chunk, float32_t) 00690 WRITE_STRING_LIST(Float64Chunk, float64_t) 00691 WRITE_STRING_LIST(Float64Chunk, floatmax_t) 00692 WRITE_STRING_LIST(Int32Chunk, int16_t) 00693 WRITE_STRING_LIST(UInt32Chunk, uint16_t) 00694 #undef WRITE_STRING_LIST 00695 00696 #endif /* HAVE_PROTOBUF */