SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StreamingHashedDenseFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evangelos Anagnostopoulos
00008  * Copyright (C) 2013 Evangelos Anagnostopoulos
00009  */
00010 
00011 #include <shogun/features/streaming/StreamingHashedDenseFeatures.h>
00012 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
00013 #include <shogun/features/HashedDenseFeatures.h>
00014 
00015 namespace shogun
00016 {
00017 template <class ST>
00018 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures()
00019 {
00020     init(NULL, false, 0, 0, false, true);
00021 }
00022 
00023 template <class ST>
00024 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures(CStreamingFile* file,
00025     bool is_labelled, int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
00026 {
00027     init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
00028 }
00029 
00030 template <class ST>
00031 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures(CDenseFeatures<ST>* dot_features,
00032     int32_t d, bool use_quadr, bool keep_lin_terms, float64_t* lab)
00033 {
00034     ASSERT(dot_features);
00035 
00036     CStreamingFileFromDenseFeatures<ST>* file =
00037             new CStreamingFileFromDenseFeatures<ST>(dot_features, lab);
00038     bool is_labelled = (lab != NULL);
00039     int32_t size = 1024;
00040 
00041     init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
00042 
00043     parser.set_free_vectors_on_destruct(false);
00044     seekable=true;
00045 }
00046 
00047 template <class ST>
00048 CStreamingHashedDenseFeatures<ST>::~CStreamingHashedDenseFeatures()
00049 {
00050 }
00051 
00052 template <class ST>
00053 void CStreamingHashedDenseFeatures<ST>::init(CStreamingFile* file, bool is_labelled,
00054     int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
00055 {
00056     dim = d;
00057     use_quadratic = use_quadr;
00058     keep_linear_terms = keep_lin_terms;
00059 
00060     SG_ADD(&use_quadratic, "use_quadratic", "Whether to use quadratic features",
00061         MS_NOT_AVAILABLE);
00062     SG_ADD(&keep_linear_terms, "keep_linear_terms", "Whether to keep the linear terms or not",
00063         MS_NOT_AVAILABLE);
00064     SG_ADD(&dim, "dim", "Size of target dimension", MS_NOT_AVAILABLE);
00065 
00066     has_labels = is_labelled;
00067     if (file)
00068     {
00069         working_file = file;
00070         SG_REF(working_file);
00071         parser.init(file, is_labelled, size);
00072         seekable = false;
00073     }
00074     else
00075         file = NULL;
00076 
00077     set_read_functions();
00078     parser.set_free_vector_after_release(false);
00079 
00080     set_generic<ST>();
00081 }
00082 
00083 template <class ST>
00084 float32_t CStreamingHashedDenseFeatures<ST>::dot(CStreamingDotFeatures* df)
00085 {
00086     ASSERT(df);
00087     ASSERT(df->get_feature_type() == get_feature_type())
00088     ASSERT(strcmp(df->get_name(),get_name())==0)
00089 
00090     CStreamingHashedDenseFeatures<ST>* hdf = (CStreamingHashedDenseFeatures<ST>* ) df;
00091     return current_vector.sparse_dot(hdf->current_vector);
00092 }
00093 
00094 template <class ST>
00095 float32_t CStreamingHashedDenseFeatures<ST>::dense_dot(const float32_t* vec2, int32_t vec2_len)
00096 {
00097     ASSERT(vec2_len == dim);
00098 
00099     float32_t result = 0;
00100     for (index_t i=0; i<current_vector.num_feat_entries; i++)
00101         result += vec2[current_vector.features[i].feat_index] * current_vector.features[i].entry;
00102 
00103     return result;
00104 }
00105 
00106 template <class ST>
00107 void CStreamingHashedDenseFeatures<ST>::add_to_dense_vec(float32_t alpha, float32_t* vec2,
00108     int32_t vec2_len, bool abs_val)
00109 {
00110     ASSERT(vec2_len == dim);
00111 
00112     if (abs_val)
00113         alpha = CMath::abs(alpha);
00114 
00115     for (index_t i=0; i<current_vector.num_feat_entries; i++)
00116         vec2[current_vector.features[i].feat_index] += alpha * current_vector.features[i].entry;
00117 }
00118 
00119 template <class ST>
00120 int32_t CStreamingHashedDenseFeatures<ST>::get_dim_feature_space() const
00121 {
00122     return dim;
00123 }
00124 
00125 template <class ST>
00126 const char* CStreamingHashedDenseFeatures<ST>::get_name() const
00127 {
00128     return "StreamingHashedDenseFeatures";
00129 }
00130 
00131 template <class ST>
00132 int32_t CStreamingHashedDenseFeatures<ST>::get_num_vectors() const
00133 {
00134     return 1;
00135 }
00136 
00137 template <class ST>
00138 CFeatures* CStreamingHashedDenseFeatures<ST>::duplicate() const
00139 {
00140     return new CStreamingHashedDenseFeatures<ST>(*this);
00141 }
00142 
00143 template <class ST>
00144 void CStreamingHashedDenseFeatures<ST>::set_vector_reader()
00145 {
00146     parser.set_read_vector(&CStreamingFile::get_vector);
00147 }
00148 
00149 template <class ST>
00150 void CStreamingHashedDenseFeatures<ST>::set_vector_and_label_reader()
00151 {
00152     parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00153 }
00154 
00155 template <class ST>
00156 EFeatureType CStreamingHashedDenseFeatures<ST>::get_feature_type() const
00157 {
00158     return F_UINT;
00159 }
00160 
00161 template <class ST>
00162 EFeatureClass CStreamingHashedDenseFeatures<ST>::get_feature_class() const
00163 {
00164     return C_STREAMING_SPARSE;
00165 }
00166 
00167 template <class ST>
00168 void CStreamingHashedDenseFeatures<ST>::start_parser()
00169 {
00170     if (!parser.is_running())
00171         parser.start_parser();
00172 }
00173 
00174 template <class ST>
00175 void CStreamingHashedDenseFeatures<ST>::end_parser()
00176 {
00177     parser.end_parser();
00178 }
00179 
00180 template <class ST>
00181 float64_t CStreamingHashedDenseFeatures<ST>::get_label()
00182 {
00183     return current_label;
00184 }
00185 
00186 template <class ST>
00187 bool CStreamingHashedDenseFeatures<ST>::get_next_example()
00188 {
00189     SGVector<ST> tmp;
00190     if (parser.get_next_example(tmp.vector,
00191         tmp.vlen, current_label))
00192     {
00193         current_vector = CHashedDenseFeatures<ST>::hash_vector(tmp, dim, use_quadratic,
00194                 keep_linear_terms);
00195         tmp.vector = NULL;
00196         tmp.vlen = -1;
00197         return true;
00198     }
00199     return false;
00200 }
00201 
00202 template <class ST>
00203 void CStreamingHashedDenseFeatures<ST>::release_example()
00204 {
00205     parser.finalize_example();
00206 }
00207 
00208 template <class ST>
00209 int32_t CStreamingHashedDenseFeatures<ST>::get_num_features()
00210 {
00211     return dim;
00212 }
00213 
00214 template <class ST>
00215 SGSparseVector<ST> CStreamingHashedDenseFeatures<ST>::get_vector()
00216 {
00217     return current_vector;
00218 }
00219 
00220 template class CStreamingHashedDenseFeatures<bool>;
00221 template class CStreamingHashedDenseFeatures<char>;
00222 template class CStreamingHashedDenseFeatures<int8_t>;
00223 template class CStreamingHashedDenseFeatures<uint8_t>;
00224 template class CStreamingHashedDenseFeatures<int16_t>;
00225 template class CStreamingHashedDenseFeatures<uint16_t>;
00226 template class CStreamingHashedDenseFeatures<int32_t>;
00227 template class CStreamingHashedDenseFeatures<uint32_t>;
00228 template class CStreamingHashedDenseFeatures<int64_t>;
00229 template class CStreamingHashedDenseFeatures<uint64_t>;
00230 template class CStreamingHashedDenseFeatures<float32_t>;
00231 template class CStreamingHashedDenseFeatures<float64_t>;
00232 template class CStreamingHashedDenseFeatures<floatmax_t>;
00233 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation