SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingHashedSparseFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evangelos Anagnostopoulos
00008  * Copyright (C) 2013 Evangelos Anagnostopoulos
00009  */
00010 
00011 #include <shogun/features/streaming/StreamingHashedSparseFeatures.h>
00012 #include <shogun/features/HashedSparseFeatures.h>
00013 #include <shogun/io/streaming/StreamingFileFromSparseFeatures.h>
00014 
00015 namespace shogun
00016 {
00017 
00018 template <class ST>
00019 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures()
00020 {
00021     init(NULL, false, 0, 0, false, true);
00022 }
00023 
00024 template <class ST>
00025 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures(CStreamingFile* file,
00026     bool is_labelled, int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
00027 {
00028     init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
00029 }
00030 
00031 template <class ST>
00032 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures(CSparseFeatures<ST>* dot_features,
00033     int32_t d, bool use_quadr, bool keep_lin_terms, float64_t* lab)
00034 {
00035     ASSERT(dot_features);
00036 
00037     CStreamingFileFromSparseFeatures<ST>* file =
00038             new CStreamingFileFromSparseFeatures<ST>(dot_features, lab);
00039     bool is_labelled = (lab != NULL);
00040     int32_t size = 1024;
00041 
00042     init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
00043 
00044     parser.set_free_vectors_on_destruct(false);
00045     seekable=true;
00046 }
00047 
00048 template <class ST>
00049 CStreamingHashedSparseFeatures<ST>::~CStreamingHashedSparseFeatures()
00050 {
00051 }
00052 
00053 template <class ST>
00054 void CStreamingHashedSparseFeatures<ST>::init(CStreamingFile* file, bool is_labelled,
00055     int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
00056 {
00057     dim = d;
00058     SG_ADD(&dim, "dim", "Size of target dimension", MS_NOT_AVAILABLE);
00059 
00060     use_quadratic = use_quadr;
00061     keep_linear_terms = keep_lin_terms;
00062 
00063     SG_ADD(&use_quadratic, "use_quadratic", "Whether to use quadratic features",
00064         MS_NOT_AVAILABLE);
00065     SG_ADD(&keep_linear_terms, "keep_linear_terms", "Whether to keep the linear terms or not",
00066         MS_NOT_AVAILABLE);
00067 
00068     has_labels = is_labelled;
00069     if (file)
00070     {
00071         working_file = file;
00072         SG_REF(working_file);
00073         parser.init(file, is_labelled, size);
00074         seekable = false;
00075     }
00076     else
00077         file = NULL;
00078 
00079     set_read_functions();
00080     parser.set_free_vector_after_release(false);
00081 
00082     set_generic<ST>();
00083 }
00084 
00085 template <class ST>
00086 float32_t CStreamingHashedSparseFeatures<ST>::dot(CStreamingDotFeatures* df)
00087 {
00088     ASSERT(df);
00089     ASSERT(df->get_feature_type() == get_feature_type())
00090     ASSERT(strcmp(df->get_name(),get_name())==0)
00091 
00092     CStreamingHashedSparseFeatures<ST>* hdf = (CStreamingHashedSparseFeatures<ST>* ) df;
00093     return current_vector.sparse_dot(hdf->current_vector);
00094 }
00095 
00096 template <class ST>
00097 float32_t CStreamingHashedSparseFeatures<ST>::dense_dot(const float32_t* vec2, int32_t vec2_len)
00098 {
00099     ASSERT(vec2_len == dim);
00100 
00101     float32_t result = 0;
00102     for (index_t i=0; i<current_vector.num_feat_entries; i++)
00103         result += vec2[current_vector.features[i].feat_index] * current_vector.features[i].entry;
00104 
00105     return result;
00106 }
00107 
00108 template <class ST>
00109 void CStreamingHashedSparseFeatures<ST>::add_to_dense_vec(float32_t alpha, float32_t* vec2,
00110     int32_t vec2_len, bool abs_val)
00111 {
00112     ASSERT(vec2_len == dim);
00113 
00114     if (abs_val)
00115         alpha = CMath::abs(alpha);
00116 
00117     for (index_t i=0; i<current_vector.num_feat_entries; i++)
00118         vec2[current_vector.features[i].feat_index] += alpha * current_vector.features[i].entry;
00119 }
00120 
00121 template <class ST>
00122 int32_t CStreamingHashedSparseFeatures<ST>::get_dim_feature_space() const
00123 {
00124     return dim;
00125 }
00126 
00127 template <class ST>
00128 const char* CStreamingHashedSparseFeatures<ST>::get_name() const
00129 {
00130     return "StreamingHashedSparseFeatures";
00131 }
00132 
00133 template <class ST>
00134 int32_t CStreamingHashedSparseFeatures<ST>::get_num_vectors() const
00135 {
00136     return 1;
00137 }
00138 
00139 template <class ST>
00140 CFeatures* CStreamingHashedSparseFeatures<ST>::duplicate() const
00141 {
00142     return new CStreamingHashedSparseFeatures<ST>(*this);
00143 }
00144 
00145 template <class ST>
00146 void CStreamingHashedSparseFeatures<ST>::set_vector_reader()
00147 {
00148     SG_DEBUG("called inside set_vector_reader\n");
00149     parser.set_read_vector(&CStreamingFile::get_sparse_vector);
00150 }
00151 
00152 template <class ST>
00153 void CStreamingHashedSparseFeatures<ST>::set_vector_and_label_reader()
00154 {
00155     parser.set_read_vector_and_label(&CStreamingFile::get_sparse_vector_and_label);
00156 }
00157 
00158 template <class ST>
00159 EFeatureType CStreamingHashedSparseFeatures<ST>::get_feature_type() const
00160 {
00161     return F_UINT;
00162 }
00163 
00164 template <class ST>
00165 EFeatureClass CStreamingHashedSparseFeatures<ST>::get_feature_class() const
00166 {
00167     return C_STREAMING_SPARSE;
00168 }
00169 
00170 template <class ST>
00171 void CStreamingHashedSparseFeatures<ST>::start_parser()
00172 {
00173     if (!parser.is_running())
00174         parser.start_parser();
00175 }
00176 
00177 template <class ST>
00178 void CStreamingHashedSparseFeatures<ST>::end_parser()
00179 {
00180     parser.end_parser();
00181 }
00182 
00183 template <class ST>
00184 float64_t CStreamingHashedSparseFeatures<ST>::get_label()
00185 {
00186     return current_label;
00187 }
00188 
00189 template <class ST>
00190 bool CStreamingHashedSparseFeatures<ST>::get_next_example()
00191 {
00192     SGSparseVector<ST> tmp;
00193     if (parser.get_next_example(tmp.features,
00194         tmp.num_feat_entries, current_label))
00195     {
00196         current_vector = CHashedSparseFeatures<ST>::hash_vector(tmp, dim,
00197                 use_quadratic, keep_linear_terms);
00198         tmp.features = NULL;
00199         tmp.num_feat_entries = -1;
00200         return true;
00201     }
00202     return false;
00203 }
00204 
00205 template <class ST>
00206 void CStreamingHashedSparseFeatures<ST>::release_example()
00207 {
00208     parser.finalize_example();
00209 }
00210 
00211 template <class ST>
00212 int32_t CStreamingHashedSparseFeatures<ST>::get_num_features()
00213 {
00214     return dim;
00215 }
00216 
00217 template <class ST>
00218 SGSparseVector<ST> CStreamingHashedSparseFeatures<ST>::get_vector()
00219 {
00220     return current_vector;
00221 }
00222 
00223 template class CStreamingHashedSparseFeatures<bool>;
00224 template class CStreamingHashedSparseFeatures<char>;
00225 template class CStreamingHashedSparseFeatures<int8_t>;
00226 template class CStreamingHashedSparseFeatures<uint8_t>;
00227 template class CStreamingHashedSparseFeatures<int16_t>;
00228 template class CStreamingHashedSparseFeatures<uint16_t>;
00229 template class CStreamingHashedSparseFeatures<int32_t>;
00230 template class CStreamingHashedSparseFeatures<uint32_t>;
00231 template class CStreamingHashedSparseFeatures<int64_t>;
00232 template class CStreamingHashedSparseFeatures<uint64_t>;
00233 template class CStreamingHashedSparseFeatures<float32_t>;
00234 template class CStreamingHashedSparseFeatures<float64_t>;
00235 template class CStreamingHashedSparseFeatures<floatmax_t>;
00236 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation