SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingDenseFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Written (W) 2012 Heiko Strathmann
00009  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/mathematics/Math.h>
00013 #include <shogun/features/streaming/StreamingDenseFeatures.h>
00014 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
00015 
00016 namespace shogun
00017 {
00018 template<class T>
00019 CStreamingDenseFeatures<T>::CStreamingDenseFeatures() :
00020         CStreamingDotFeatures()
00021 {
00022     set_read_functions();
00023     init();
00024     parser.set_free_vector_after_release(false);
00025 }
00026 
00027 template<class T>
00028 CStreamingDenseFeatures<T>::CStreamingDenseFeatures(CStreamingFile* file,
00029         bool is_labelled, int32_t size) :
00030         CStreamingDotFeatures()
00031 {
00032     init(file, is_labelled, size);
00033     set_read_functions();
00034     parser.set_free_vector_after_release(false);
00035 }
00036 
00037 template<class T> CStreamingDenseFeatures<T>::CStreamingDenseFeatures(
00038         CDenseFeatures<T>* dense_features, float64_t* lab) :
00039         CStreamingDotFeatures()
00040 {
00041     REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n")
00042 
00043     CStreamingFileFromDenseFeatures<T>* file;
00044     bool is_labelled;
00045     int32_t size=1024;
00046 
00047     is_labelled=lab;
00048     file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
00049     init(file, is_labelled, size);
00050     set_read_functions();
00051     parser.set_free_vector_after_release(false);
00052     parser.set_free_vectors_on_destruct(false);
00053     seekable=true;
00054 }
00055 
00056 template<class T> CStreamingDenseFeatures<T>::~CStreamingDenseFeatures()
00057 {
00058     SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name())
00059     /* needed to prevent double free memory errors */
00060     current_vector.vector=NULL;
00061     current_vector.vlen=0;
00062     SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name())
00063 }
00064 
00065 template<class T> void CStreamingDenseFeatures<T>::reset_stream()
00066 {
00067     if (seekable)
00068     {
00069         ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
00070         parser.exit_parser();
00071         parser.init(working_file, has_labels, 1);
00072         parser.set_free_vector_after_release(false);
00073         parser.start_parser();
00074     }
00075 }
00076 
00077 template<class T> float32_t CStreamingDenseFeatures<T>::dense_dot(
00078         const float32_t* vec2, int32_t vec2_len)
00079 {
00080     ASSERT(vec2_len==current_vector.vlen)
00081     float32_t result=0;
00082 
00083     for (int32_t i=0; i<current_vector.vlen; i++)
00084         result+=current_vector[i]*vec2[i];
00085 
00086     return result;
00087 }
00088 
00089 template<class T> float64_t CStreamingDenseFeatures<T>::dense_dot(
00090         const float64_t* vec2, int32_t vec2_len)
00091 {
00092     ASSERT(vec2_len==current_vector.vlen)
00093     float64_t result=0;
00094 
00095     for (int32_t i=0; i<current_vector.vlen; i++)
00096         result+=current_vector[i]*vec2[i];
00097 
00098     return result;
00099 }
00100 
00101 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00102         float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
00103 {
00104     ASSERT(vec2_len==current_vector.vlen)
00105 
00106     if (abs_val)
00107     {
00108         for (int32_t i=0; i<current_vector.vlen; i++)
00109             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00110     }
00111     else
00112     {
00113         for (int32_t i=0; i<current_vector.vlen; i++)
00114             vec2[i]+=alpha*current_vector[i];
00115     }
00116 }
00117 
00118 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00119         float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
00120 {
00121     ASSERT(vec2_len==current_vector.vlen)
00122 
00123     if (abs_val)
00124     {
00125         for (int32_t i=0; i<current_vector.vlen; i++)
00126             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00127     }
00128     else
00129     {
00130         for (int32_t i=0; i<current_vector.vlen; i++)
00131             vec2[i]+=alpha*current_vector[i];
00132     }
00133 }
00134 
00135 template<class T> int32_t CStreamingDenseFeatures<T>::get_nnz_features_for_vector()
00136 {
00137     return current_vector.vlen;
00138 }
00139 
00140 template<class T> CFeatures* CStreamingDenseFeatures<T>::duplicate() const
00141 {
00142     return new CStreamingDenseFeatures<T>(*this);
00143 }
00144 
00145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
00146 {
00147     return 1;
00148 }
00149 
00150 template<class T>
00151 void CStreamingDenseFeatures<T>::set_vector_reader()
00152 {
00153     parser.set_read_vector(&CStreamingFile::get_vector);
00154 }
00155 
00156 template<class T>
00157 void CStreamingDenseFeatures<T>::set_vector_and_label_reader()
00158 {
00159     parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00160 }
00161 
00162 #define GET_FEATURE_TYPE(f_type, sg_type)               \
00163 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
00164 {                                   \
00165     return f_type;                          \
00166 }
00167 
00168 GET_FEATURE_TYPE(F_BOOL, bool)
00169 GET_FEATURE_TYPE(F_CHAR, char)
00170 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00171 GET_FEATURE_TYPE(F_BYTE, int8_t)
00172 GET_FEATURE_TYPE(F_SHORT, int16_t)
00173 GET_FEATURE_TYPE(F_WORD, uint16_t)
00174 GET_FEATURE_TYPE(F_INT, int32_t)
00175 GET_FEATURE_TYPE(F_UINT, uint32_t)
00176 GET_FEATURE_TYPE(F_LONG, int64_t)
00177 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00178 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00179 GET_FEATURE_TYPE(F_DREAL, float64_t)
00180 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00181 #undef GET_FEATURE_TYPE
00182 
00183 template<class T>
00184 void CStreamingDenseFeatures<T>::init()
00185 {
00186     working_file=NULL;
00187     seekable=false;
00188 
00189     /* needed to prevent double free memory errors */
00190     current_vector.vector=NULL;
00191     current_vector.vlen=-1;
00192 
00193     set_generic<T>();
00194 }
00195 
00196 template<class T>
00197 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
00198         int32_t size)
00199 {
00200     init();
00201     has_labels=is_labelled;
00202     working_file=file;
00203     SG_REF(working_file);
00204     parser.init(file, is_labelled, size);
00205     seekable=false;
00206 }
00207 
00208 template<class T>
00209 void CStreamingDenseFeatures<T>::start_parser()
00210 {
00211     if (!parser.is_running())
00212         parser.start_parser();
00213 }
00214 
00215 template<class T>
00216 void CStreamingDenseFeatures<T>::end_parser()
00217 {
00218     parser.end_parser();
00219 }
00220 
00221 template<class T>
00222 bool CStreamingDenseFeatures<T>::get_next_example()
00223 {
00224     bool ret_value;
00225     ret_value=(bool)parser.get_next_example(current_vector.vector,
00226             current_vector.vlen, current_label);
00227 
00228     return ret_value;
00229 }
00230 
00231 template<class T>
00232 SGVector<T> CStreamingDenseFeatures<T>::get_vector()
00233 {
00234     return current_vector;
00235 }
00236 
00237 template<class T>
00238 float64_t CStreamingDenseFeatures<T>::get_label()
00239 {
00240     ASSERT(has_labels)
00241 
00242     return current_label;
00243 }
00244 
00245 template<class T>
00246 void CStreamingDenseFeatures<T>::release_example()
00247 {
00248     parser.finalize_example();
00249 }
00250 
00251 template<class T>
00252 int32_t CStreamingDenseFeatures<T>::get_dim_feature_space() const
00253 {
00254     return current_vector.vlen;
00255 }
00256 
00257 template<class T>
00258 float32_t CStreamingDenseFeatures<T>::dot(CStreamingDotFeatures* df)
00259 {
00260     ASSERT(df)
00261     ASSERT(df->get_feature_type() == get_feature_type())
00262     ASSERT(df->get_feature_class() == get_feature_class())
00263     CStreamingDenseFeatures<T>* sf=(CStreamingDenseFeatures<T>*)df;
00264 
00265     SGVector<T> other_vector=sf->get_vector();
00266 
00267     return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
00268 }
00269 
00270 template<class T>
00271 float32_t CStreamingDenseFeatures<T>::dot(SGVector<T> sgvec1)
00272 {
00273     int32_t len1;
00274     len1=sgvec1.vlen;
00275 
00276     if (len1!=current_vector.vlen)
00277         SG_ERROR(
00278                 "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
00279 
00280     return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1);
00281 }
00282 
00283 template<class T>
00284 int32_t CStreamingDenseFeatures<T>::get_num_features()
00285 {
00286     return current_vector.vlen;
00287 }
00288 
00289 template<class T>
00290 EFeatureClass CStreamingDenseFeatures<T>::get_feature_class() const
00291 {
00292     return C_STREAMING_DENSE;
00293 }
00294 
00295 template<class T>
00296 CFeatures* CStreamingDenseFeatures<T>::get_streamed_features(
00297         index_t num_elements)
00298 {
00299     SG_DEBUG("entering %s(%p)::get_streamed_features(%d)\n", get_name(), this,
00300             num_elements);
00301 
00302     /* init matrix empty since num_rows is not yet known */
00303     SGMatrix<T> matrix;
00304 
00305     for (index_t i=0; i<num_elements; ++i)
00306     {
00307         /* check if we run out of data */
00308         if (!get_next_example())
00309         {
00310             SG_WARNING("%s::get_streamed_features(): ran out of streaming "
00311                     "data, reallocating matrix and returning!\n", get_name());
00312 
00313             /* allocating space for data so far */
00314             SGMatrix<T> so_far(matrix.num_rows, i);
00315 
00316             /* copy */
00317             memcpy(so_far.matrix, matrix.matrix,
00318                     so_far.num_rows*so_far.num_cols*sizeof(T));
00319 
00320             matrix=so_far;
00321             break;
00322         }
00323         else
00324         {
00325             /* allocate matrix memory during first run */
00326             if (!matrix.matrix)
00327             {
00328                 SG_DEBUG("%s::get_streamed_features(): allocating %dx%d matrix\n",
00329                         get_name(), current_vector.vlen, num_elements);
00330                 matrix=SGMatrix<T>(current_vector.vlen, num_elements);
00331             }
00332 
00333             /* get an example from stream and copy to feature matrix */
00334             SGVector<T> vec=get_vector();
00335 
00336             /* check for inconsistent dimensions */
00337             if (vec.vlen!=matrix.num_rows)
00338             {
00339                 SG_ERROR("%s::get_streamed_features(): streamed vectors have "
00340                         "different dimensions. This is not allowed!\n",
00341                         get_name());
00342             }
00343 
00344             /* copy vector into matrix */
00345             memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
00346                     vec.vlen*sizeof(T));
00347 
00348             /* evtl output vector */
00349             if (sg_io->get_loglevel()==MSG_DEBUG)
00350             {
00351                 SG_DEBUG("%d. ", i)
00352                 vec.display_vector("streamed vector");
00353             }
00354 
00355             /* clean up */
00356             release_example();
00357         }
00358 
00359     }
00360 
00361     /* create new feature object from collected data */
00362     CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
00363 
00364     SG_DEBUG("leaving %s(%p)::get_streamed_features(%d) and returning %dx%d "
00365             "matrix\n", get_name(), this, num_elements, matrix.num_rows,
00366             matrix.num_cols);
00367 
00368     return result;
00369 }
00370 
00371 template class CStreamingDenseFeatures<bool> ;
00372 template class CStreamingDenseFeatures<char> ;
00373 template class CStreamingDenseFeatures<int8_t> ;
00374 template class CStreamingDenseFeatures<uint8_t> ;
00375 template class CStreamingDenseFeatures<int16_t> ;
00376 template class CStreamingDenseFeatures<uint16_t> ;
00377 template class CStreamingDenseFeatures<int32_t> ;
00378 template class CStreamingDenseFeatures<uint32_t> ;
00379 template class CStreamingDenseFeatures<int64_t> ;
00380 template class CStreamingDenseFeatures<uint64_t> ;
00381 template class CStreamingDenseFeatures<float32_t> ;
00382 template class CStreamingDenseFeatures<float64_t> ;
00383 template class CStreamingDenseFeatures<floatmax_t> ;
00384 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation