SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Shashwat Lal Das 00008 * Written (W) 2012 Heiko Strathmann 00009 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/mathematics/Math.h> 00013 #include <shogun/features/streaming/StreamingDenseFeatures.h> 00014 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h> 00015 00016 namespace shogun 00017 { 00018 template<class T> 00019 CStreamingDenseFeatures<T>::CStreamingDenseFeatures() : 00020 CStreamingDotFeatures() 00021 { 00022 set_read_functions(); 00023 init(); 00024 parser.set_free_vector_after_release(false); 00025 } 00026 00027 template<class T> 00028 CStreamingDenseFeatures<T>::CStreamingDenseFeatures(CStreamingFile* file, 00029 bool is_labelled, int32_t size) : 00030 CStreamingDotFeatures() 00031 { 00032 init(file, is_labelled, size); 00033 set_read_functions(); 00034 parser.set_free_vector_after_release(false); 00035 } 00036 00037 template<class T> CStreamingDenseFeatures<T>::CStreamingDenseFeatures( 00038 CDenseFeatures<T>* dense_features, float64_t* lab) : 00039 CStreamingDotFeatures() 00040 { 00041 REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n") 00042 00043 CStreamingFileFromDenseFeatures<T>* file; 00044 bool is_labelled; 00045 int32_t size=1024; 00046 00047 is_labelled=lab; 00048 file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab); 00049 init(file, is_labelled, size); 00050 set_read_functions(); 00051 parser.set_free_vector_after_release(false); 00052 parser.set_free_vectors_on_destruct(false); 00053 seekable=true; 00054 } 00055 00056 template<class T> CStreamingDenseFeatures<T>::~CStreamingDenseFeatures() 00057 { 00058 SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name()) 00059 /* needed to prevent double free memory errors */ 00060 current_vector.vector=NULL; 00061 current_vector.vlen=0; 00062 SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name()) 00063 } 00064 00065 template<class T> void CStreamingDenseFeatures<T>::reset_stream() 00066 { 00067 if (seekable) 00068 { 00069 ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream(); 00070 parser.exit_parser(); 00071 parser.init(working_file, has_labels, 1); 00072 parser.set_free_vector_after_release(false); 00073 parser.start_parser(); 00074 } 00075 } 00076 00077 template<class T> float32_t CStreamingDenseFeatures<T>::dense_dot( 00078 const float32_t* vec2, int32_t vec2_len) 00079 { 00080 ASSERT(vec2_len==current_vector.vlen) 00081 float32_t result=0; 00082 00083 for (int32_t i=0; i<current_vector.vlen; i++) 00084 result+=current_vector[i]*vec2[i]; 00085 00086 return result; 00087 } 00088 00089 template<class T> float64_t CStreamingDenseFeatures<T>::dense_dot( 00090 const float64_t* vec2, int32_t vec2_len) 00091 { 00092 ASSERT(vec2_len==current_vector.vlen) 00093 float64_t result=0; 00094 00095 for (int32_t i=0; i<current_vector.vlen; i++) 00096 result+=current_vector[i]*vec2[i]; 00097 00098 return result; 00099 } 00100 00101 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec( 00102 float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val) 00103 { 00104 ASSERT(vec2_len==current_vector.vlen) 00105 00106 if (abs_val) 00107 { 00108 for (int32_t i=0; i<current_vector.vlen; i++) 00109 vec2[i]+=alpha*CMath::abs(current_vector[i]); 00110 } 00111 else 00112 { 00113 for (int32_t i=0; i<current_vector.vlen; i++) 00114 vec2[i]+=alpha*current_vector[i]; 00115 } 00116 } 00117 00118 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec( 00119 float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val) 00120 { 00121 ASSERT(vec2_len==current_vector.vlen) 00122 00123 if (abs_val) 00124 { 00125 for (int32_t i=0; i<current_vector.vlen; i++) 00126 vec2[i]+=alpha*CMath::abs(current_vector[i]); 00127 } 00128 else 00129 { 00130 for (int32_t i=0; i<current_vector.vlen; i++) 00131 vec2[i]+=alpha*current_vector[i]; 00132 } 00133 } 00134 00135 template<class T> int32_t CStreamingDenseFeatures<T>::get_nnz_features_for_vector() 00136 { 00137 return current_vector.vlen; 00138 } 00139 00140 template<class T> CFeatures* CStreamingDenseFeatures<T>::duplicate() const 00141 { 00142 return new CStreamingDenseFeatures<T>(*this); 00143 } 00144 00145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const 00146 { 00147 return 1; 00148 } 00149 00150 template<class T> 00151 void CStreamingDenseFeatures<T>::set_vector_reader() 00152 { 00153 parser.set_read_vector(&CStreamingFile::get_vector); 00154 } 00155 00156 template<class T> 00157 void CStreamingDenseFeatures<T>::set_vector_and_label_reader() 00158 { 00159 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label); 00160 } 00161 00162 #define GET_FEATURE_TYPE(f_type, sg_type) \ 00163 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \ 00164 { \ 00165 return f_type; \ 00166 } 00167 00168 GET_FEATURE_TYPE(F_BOOL, bool) 00169 GET_FEATURE_TYPE(F_CHAR, char) 00170 GET_FEATURE_TYPE(F_BYTE, uint8_t) 00171 GET_FEATURE_TYPE(F_BYTE, int8_t) 00172 GET_FEATURE_TYPE(F_SHORT, int16_t) 00173 GET_FEATURE_TYPE(F_WORD, uint16_t) 00174 GET_FEATURE_TYPE(F_INT, int32_t) 00175 GET_FEATURE_TYPE(F_UINT, uint32_t) 00176 GET_FEATURE_TYPE(F_LONG, int64_t) 00177 GET_FEATURE_TYPE(F_ULONG, uint64_t) 00178 GET_FEATURE_TYPE(F_SHORTREAL, float32_t) 00179 GET_FEATURE_TYPE(F_DREAL, float64_t) 00180 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t) 00181 #undef GET_FEATURE_TYPE 00182 00183 template<class T> 00184 void CStreamingDenseFeatures<T>::init() 00185 { 00186 working_file=NULL; 00187 seekable=false; 00188 00189 /* needed to prevent double free memory errors */ 00190 current_vector.vector=NULL; 00191 current_vector.vlen=-1; 00192 00193 set_generic<T>(); 00194 } 00195 00196 template<class T> 00197 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled, 00198 int32_t size) 00199 { 00200 init(); 00201 has_labels=is_labelled; 00202 working_file=file; 00203 SG_REF(working_file); 00204 parser.init(file, is_labelled, size); 00205 seekable=false; 00206 } 00207 00208 template<class T> 00209 void CStreamingDenseFeatures<T>::start_parser() 00210 { 00211 if (!parser.is_running()) 00212 parser.start_parser(); 00213 } 00214 00215 template<class T> 00216 void CStreamingDenseFeatures<T>::end_parser() 00217 { 00218 parser.end_parser(); 00219 } 00220 00221 template<class T> 00222 bool CStreamingDenseFeatures<T>::get_next_example() 00223 { 00224 bool ret_value; 00225 ret_value=(bool)parser.get_next_example(current_vector.vector, 00226 current_vector.vlen, current_label); 00227 00228 return ret_value; 00229 } 00230 00231 template<class T> 00232 SGVector<T> CStreamingDenseFeatures<T>::get_vector() 00233 { 00234 return current_vector; 00235 } 00236 00237 template<class T> 00238 float64_t CStreamingDenseFeatures<T>::get_label() 00239 { 00240 ASSERT(has_labels) 00241 00242 return current_label; 00243 } 00244 00245 template<class T> 00246 void CStreamingDenseFeatures<T>::release_example() 00247 { 00248 parser.finalize_example(); 00249 } 00250 00251 template<class T> 00252 int32_t CStreamingDenseFeatures<T>::get_dim_feature_space() const 00253 { 00254 return current_vector.vlen; 00255 } 00256 00257 template<class T> 00258 float32_t CStreamingDenseFeatures<T>::dot(CStreamingDotFeatures* df) 00259 { 00260 ASSERT(df) 00261 ASSERT(df->get_feature_type() == get_feature_type()) 00262 ASSERT(df->get_feature_class() == get_feature_class()) 00263 CStreamingDenseFeatures<T>* sf=(CStreamingDenseFeatures<T>*)df; 00264 00265 SGVector<T> other_vector=sf->get_vector(); 00266 00267 return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen); 00268 } 00269 00270 template<class T> 00271 float32_t CStreamingDenseFeatures<T>::dot(SGVector<T> sgvec1) 00272 { 00273 int32_t len1; 00274 len1=sgvec1.vlen; 00275 00276 if (len1!=current_vector.vlen) 00277 SG_ERROR( 00278 "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen); 00279 00280 return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1); 00281 } 00282 00283 template<class T> 00284 int32_t CStreamingDenseFeatures<T>::get_num_features() 00285 { 00286 return current_vector.vlen; 00287 } 00288 00289 template<class T> 00290 EFeatureClass CStreamingDenseFeatures<T>::get_feature_class() const 00291 { 00292 return C_STREAMING_DENSE; 00293 } 00294 00295 template<class T> 00296 CFeatures* CStreamingDenseFeatures<T>::get_streamed_features( 00297 index_t num_elements) 00298 { 00299 SG_DEBUG("entering %s(%p)::get_streamed_features(%d)\n", get_name(), this, 00300 num_elements); 00301 00302 /* init matrix empty since num_rows is not yet known */ 00303 SGMatrix<T> matrix; 00304 00305 for (index_t i=0; i<num_elements; ++i) 00306 { 00307 /* check if we run out of data */ 00308 if (!get_next_example()) 00309 { 00310 SG_WARNING("%s::get_streamed_features(): ran out of streaming " 00311 "data, reallocating matrix and returning!\n", get_name()); 00312 00313 /* allocating space for data so far */ 00314 SGMatrix<T> so_far(matrix.num_rows, i); 00315 00316 /* copy */ 00317 memcpy(so_far.matrix, matrix.matrix, 00318 so_far.num_rows*so_far.num_cols*sizeof(T)); 00319 00320 matrix=so_far; 00321 break; 00322 } 00323 else 00324 { 00325 /* allocate matrix memory during first run */ 00326 if (!matrix.matrix) 00327 { 00328 SG_DEBUG("%s::get_streamed_features(): allocating %dx%d matrix\n", 00329 get_name(), current_vector.vlen, num_elements); 00330 matrix=SGMatrix<T>(current_vector.vlen, num_elements); 00331 } 00332 00333 /* get an example from stream and copy to feature matrix */ 00334 SGVector<T> vec=get_vector(); 00335 00336 /* check for inconsistent dimensions */ 00337 if (vec.vlen!=matrix.num_rows) 00338 { 00339 SG_ERROR("%s::get_streamed_features(): streamed vectors have " 00340 "different dimensions. This is not allowed!\n", 00341 get_name()); 00342 } 00343 00344 /* copy vector into matrix */ 00345 memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector, 00346 vec.vlen*sizeof(T)); 00347 00348 /* evtl output vector */ 00349 if (sg_io->get_loglevel()==MSG_DEBUG) 00350 { 00351 SG_DEBUG("%d. ", i) 00352 vec.display_vector("streamed vector"); 00353 } 00354 00355 /* clean up */ 00356 release_example(); 00357 } 00358 00359 } 00360 00361 /* create new feature object from collected data */ 00362 CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix); 00363 00364 SG_DEBUG("leaving %s(%p)::get_streamed_features(%d) and returning %dx%d " 00365 "matrix\n", get_name(), this, num_elements, matrix.num_rows, 00366 matrix.num_cols); 00367 00368 return result; 00369 } 00370 00371 template class CStreamingDenseFeatures<bool> ; 00372 template class CStreamingDenseFeatures<char> ; 00373 template class CStreamingDenseFeatures<int8_t> ; 00374 template class CStreamingDenseFeatures<uint8_t> ; 00375 template class CStreamingDenseFeatures<int16_t> ; 00376 template class CStreamingDenseFeatures<uint16_t> ; 00377 template class CStreamingDenseFeatures<int32_t> ; 00378 template class CStreamingDenseFeatures<uint32_t> ; 00379 template class CStreamingDenseFeatures<int64_t> ; 00380 template class CStreamingDenseFeatures<uint64_t> ; 00381 template class CStreamingDenseFeatures<float32_t> ; 00382 template class CStreamingDenseFeatures<float64_t> ; 00383 template class CStreamingDenseFeatures<floatmax_t> ; 00384 }