SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evangelos Anagnostopoulos 00008 * Copyright (C) 2013 Evangelos Anagnostopoulos 00009 */ 00010 00011 #include <shogun/features/streaming/StreamingHashedSparseFeatures.h> 00012 #include <shogun/features/HashedSparseFeatures.h> 00013 #include <shogun/io/streaming/StreamingFileFromSparseFeatures.h> 00014 00015 namespace shogun 00016 { 00017 00018 template <class ST> 00019 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures() 00020 { 00021 init(NULL, false, 0, 0, false, true); 00022 } 00023 00024 template <class ST> 00025 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures(CStreamingFile* file, 00026 bool is_labelled, int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms) 00027 { 00028 init(file, is_labelled, size, d, use_quadr, keep_lin_terms); 00029 } 00030 00031 template <class ST> 00032 CStreamingHashedSparseFeatures<ST>::CStreamingHashedSparseFeatures(CSparseFeatures<ST>* dot_features, 00033 int32_t d, bool use_quadr, bool keep_lin_terms, float64_t* lab) 00034 { 00035 ASSERT(dot_features); 00036 00037 CStreamingFileFromSparseFeatures<ST>* file = 00038 new CStreamingFileFromSparseFeatures<ST>(dot_features, lab); 00039 bool is_labelled = (lab != NULL); 00040 int32_t size = 1024; 00041 00042 init(file, is_labelled, size, d, use_quadr, keep_lin_terms); 00043 00044 parser.set_free_vectors_on_destruct(false); 00045 seekable=true; 00046 } 00047 00048 template <class ST> 00049 CStreamingHashedSparseFeatures<ST>::~CStreamingHashedSparseFeatures() 00050 { 00051 } 00052 00053 template <class ST> 00054 void CStreamingHashedSparseFeatures<ST>::init(CStreamingFile* file, bool is_labelled, 00055 int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms) 00056 { 00057 dim = d; 00058 SG_ADD(&dim, "dim", "Size of target dimension", MS_NOT_AVAILABLE); 00059 00060 use_quadratic = use_quadr; 00061 keep_linear_terms = keep_lin_terms; 00062 00063 SG_ADD(&use_quadratic, "use_quadratic", "Whether to use quadratic features", 00064 MS_NOT_AVAILABLE); 00065 SG_ADD(&keep_linear_terms, "keep_linear_terms", "Whether to keep the linear terms or not", 00066 MS_NOT_AVAILABLE); 00067 00068 has_labels = is_labelled; 00069 if (file) 00070 { 00071 working_file = file; 00072 SG_REF(working_file); 00073 parser.init(file, is_labelled, size); 00074 seekable = false; 00075 } 00076 else 00077 file = NULL; 00078 00079 set_read_functions(); 00080 parser.set_free_vector_after_release(false); 00081 00082 set_generic<ST>(); 00083 } 00084 00085 template <class ST> 00086 float32_t CStreamingHashedSparseFeatures<ST>::dot(CStreamingDotFeatures* df) 00087 { 00088 ASSERT(df); 00089 ASSERT(df->get_feature_type() == get_feature_type()) 00090 ASSERT(strcmp(df->get_name(),get_name())==0) 00091 00092 CStreamingHashedSparseFeatures<ST>* hdf = (CStreamingHashedSparseFeatures<ST>* ) df; 00093 return current_vector.sparse_dot(hdf->current_vector); 00094 } 00095 00096 template <class ST> 00097 float32_t CStreamingHashedSparseFeatures<ST>::dense_dot(const float32_t* vec2, int32_t vec2_len) 00098 { 00099 ASSERT(vec2_len == dim); 00100 00101 float32_t result = 0; 00102 for (index_t i=0; i<current_vector.num_feat_entries; i++) 00103 result += vec2[current_vector.features[i].feat_index] * current_vector.features[i].entry; 00104 00105 return result; 00106 } 00107 00108 template <class ST> 00109 void CStreamingHashedSparseFeatures<ST>::add_to_dense_vec(float32_t alpha, float32_t* vec2, 00110 int32_t vec2_len, bool abs_val) 00111 { 00112 ASSERT(vec2_len == dim); 00113 00114 if (abs_val) 00115 alpha = CMath::abs(alpha); 00116 00117 for (index_t i=0; i<current_vector.num_feat_entries; i++) 00118 vec2[current_vector.features[i].feat_index] += alpha * current_vector.features[i].entry; 00119 } 00120 00121 template <class ST> 00122 int32_t CStreamingHashedSparseFeatures<ST>::get_dim_feature_space() const 00123 { 00124 return dim; 00125 } 00126 00127 template <class ST> 00128 const char* CStreamingHashedSparseFeatures<ST>::get_name() const 00129 { 00130 return "StreamingHashedSparseFeatures"; 00131 } 00132 00133 template <class ST> 00134 int32_t CStreamingHashedSparseFeatures<ST>::get_num_vectors() const 00135 { 00136 return 1; 00137 } 00138 00139 template <class ST> 00140 CFeatures* CStreamingHashedSparseFeatures<ST>::duplicate() const 00141 { 00142 return new CStreamingHashedSparseFeatures<ST>(*this); 00143 } 00144 00145 template <class ST> 00146 void CStreamingHashedSparseFeatures<ST>::set_vector_reader() 00147 { 00148 SG_DEBUG("called inside set_vector_reader\n"); 00149 parser.set_read_vector(&CStreamingFile::get_sparse_vector); 00150 } 00151 00152 template <class ST> 00153 void CStreamingHashedSparseFeatures<ST>::set_vector_and_label_reader() 00154 { 00155 parser.set_read_vector_and_label(&CStreamingFile::get_sparse_vector_and_label); 00156 } 00157 00158 template <class ST> 00159 EFeatureType CStreamingHashedSparseFeatures<ST>::get_feature_type() const 00160 { 00161 return F_UINT; 00162 } 00163 00164 template <class ST> 00165 EFeatureClass CStreamingHashedSparseFeatures<ST>::get_feature_class() const 00166 { 00167 return C_STREAMING_SPARSE; 00168 } 00169 00170 template <class ST> 00171 void CStreamingHashedSparseFeatures<ST>::start_parser() 00172 { 00173 if (!parser.is_running()) 00174 parser.start_parser(); 00175 } 00176 00177 template <class ST> 00178 void CStreamingHashedSparseFeatures<ST>::end_parser() 00179 { 00180 parser.end_parser(); 00181 } 00182 00183 template <class ST> 00184 float64_t CStreamingHashedSparseFeatures<ST>::get_label() 00185 { 00186 return current_label; 00187 } 00188 00189 template <class ST> 00190 bool CStreamingHashedSparseFeatures<ST>::get_next_example() 00191 { 00192 SGSparseVector<ST> tmp; 00193 if (parser.get_next_example(tmp.features, 00194 tmp.num_feat_entries, current_label)) 00195 { 00196 current_vector = CHashedSparseFeatures<ST>::hash_vector(tmp, dim, 00197 use_quadratic, keep_linear_terms); 00198 tmp.features = NULL; 00199 tmp.num_feat_entries = -1; 00200 return true; 00201 } 00202 return false; 00203 } 00204 00205 template <class ST> 00206 void CStreamingHashedSparseFeatures<ST>::release_example() 00207 { 00208 parser.finalize_example(); 00209 } 00210 00211 template <class ST> 00212 int32_t CStreamingHashedSparseFeatures<ST>::get_num_features() 00213 { 00214 return dim; 00215 } 00216 00217 template <class ST> 00218 SGSparseVector<ST> CStreamingHashedSparseFeatures<ST>::get_vector() 00219 { 00220 return current_vector; 00221 } 00222 00223 template class CStreamingHashedSparseFeatures<bool>; 00224 template class CStreamingHashedSparseFeatures<char>; 00225 template class CStreamingHashedSparseFeatures<int8_t>; 00226 template class CStreamingHashedSparseFeatures<uint8_t>; 00227 template class CStreamingHashedSparseFeatures<int16_t>; 00228 template class CStreamingHashedSparseFeatures<uint16_t>; 00229 template class CStreamingHashedSparseFeatures<int32_t>; 00230 template class CStreamingHashedSparseFeatures<uint32_t>; 00231 template class CStreamingHashedSparseFeatures<int64_t>; 00232 template class CStreamingHashedSparseFeatures<uint64_t>; 00233 template class CStreamingHashedSparseFeatures<float32_t>; 00234 template class CStreamingHashedSparseFeatures<float64_t>; 00235 template class CStreamingHashedSparseFeatures<floatmax_t>; 00236 }