SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evangelos Anagnostopoulos 00008 * Copyright (C) 2013 Evangelos Anagnostopoulos 00009 */ 00010 00011 #include <shogun/features/streaming/StreamingHashedDenseFeatures.h> 00012 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h> 00013 #include <shogun/features/HashedDenseFeatures.h> 00014 00015 namespace shogun 00016 { 00017 template <class ST> 00018 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures() 00019 { 00020 init(NULL, false, 0, 0, false, true); 00021 } 00022 00023 template <class ST> 00024 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures(CStreamingFile* file, 00025 bool is_labelled, int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms) 00026 { 00027 init(file, is_labelled, size, d, use_quadr, keep_lin_terms); 00028 } 00029 00030 template <class ST> 00031 CStreamingHashedDenseFeatures<ST>::CStreamingHashedDenseFeatures(CDenseFeatures<ST>* dot_features, 00032 int32_t d, bool use_quadr, bool keep_lin_terms, float64_t* lab) 00033 { 00034 ASSERT(dot_features); 00035 00036 CStreamingFileFromDenseFeatures<ST>* file = 00037 new CStreamingFileFromDenseFeatures<ST>(dot_features, lab); 00038 bool is_labelled = (lab != NULL); 00039 int32_t size = 1024; 00040 00041 init(file, is_labelled, size, d, use_quadr, keep_lin_terms); 00042 00043 parser.set_free_vectors_on_destruct(false); 00044 seekable=true; 00045 } 00046 00047 template <class ST> 00048 CStreamingHashedDenseFeatures<ST>::~CStreamingHashedDenseFeatures() 00049 { 00050 } 00051 00052 template <class ST> 00053 void CStreamingHashedDenseFeatures<ST>::init(CStreamingFile* file, bool is_labelled, 00054 int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms) 00055 { 00056 dim = d; 00057 use_quadratic = use_quadr; 00058 keep_linear_terms = keep_lin_terms; 00059 00060 SG_ADD(&use_quadratic, "use_quadratic", "Whether to use quadratic features", 00061 MS_NOT_AVAILABLE); 00062 SG_ADD(&keep_linear_terms, "keep_linear_terms", "Whether to keep the linear terms or not", 00063 MS_NOT_AVAILABLE); 00064 SG_ADD(&dim, "dim", "Size of target dimension", MS_NOT_AVAILABLE); 00065 00066 has_labels = is_labelled; 00067 if (file) 00068 { 00069 working_file = file; 00070 SG_REF(working_file); 00071 parser.init(file, is_labelled, size); 00072 seekable = false; 00073 } 00074 else 00075 file = NULL; 00076 00077 set_read_functions(); 00078 parser.set_free_vector_after_release(false); 00079 00080 set_generic<ST>(); 00081 } 00082 00083 template <class ST> 00084 float32_t CStreamingHashedDenseFeatures<ST>::dot(CStreamingDotFeatures* df) 00085 { 00086 ASSERT(df); 00087 ASSERT(df->get_feature_type() == get_feature_type()) 00088 ASSERT(strcmp(df->get_name(),get_name())==0) 00089 00090 CStreamingHashedDenseFeatures<ST>* hdf = (CStreamingHashedDenseFeatures<ST>* ) df; 00091 return current_vector.sparse_dot(hdf->current_vector); 00092 } 00093 00094 template <class ST> 00095 float32_t CStreamingHashedDenseFeatures<ST>::dense_dot(const float32_t* vec2, int32_t vec2_len) 00096 { 00097 ASSERT(vec2_len == dim); 00098 00099 float32_t result = 0; 00100 for (index_t i=0; i<current_vector.num_feat_entries; i++) 00101 result += vec2[current_vector.features[i].feat_index] * current_vector.features[i].entry; 00102 00103 return result; 00104 } 00105 00106 template <class ST> 00107 void CStreamingHashedDenseFeatures<ST>::add_to_dense_vec(float32_t alpha, float32_t* vec2, 00108 int32_t vec2_len, bool abs_val) 00109 { 00110 ASSERT(vec2_len == dim); 00111 00112 if (abs_val) 00113 alpha = CMath::abs(alpha); 00114 00115 for (index_t i=0; i<current_vector.num_feat_entries; i++) 00116 vec2[current_vector.features[i].feat_index] += alpha * current_vector.features[i].entry; 00117 } 00118 00119 template <class ST> 00120 int32_t CStreamingHashedDenseFeatures<ST>::get_dim_feature_space() const 00121 { 00122 return dim; 00123 } 00124 00125 template <class ST> 00126 const char* CStreamingHashedDenseFeatures<ST>::get_name() const 00127 { 00128 return "StreamingHashedDenseFeatures"; 00129 } 00130 00131 template <class ST> 00132 int32_t CStreamingHashedDenseFeatures<ST>::get_num_vectors() const 00133 { 00134 return 1; 00135 } 00136 00137 template <class ST> 00138 CFeatures* CStreamingHashedDenseFeatures<ST>::duplicate() const 00139 { 00140 return new CStreamingHashedDenseFeatures<ST>(*this); 00141 } 00142 00143 template <class ST> 00144 void CStreamingHashedDenseFeatures<ST>::set_vector_reader() 00145 { 00146 parser.set_read_vector(&CStreamingFile::get_vector); 00147 } 00148 00149 template <class ST> 00150 void CStreamingHashedDenseFeatures<ST>::set_vector_and_label_reader() 00151 { 00152 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label); 00153 } 00154 00155 template <class ST> 00156 EFeatureType CStreamingHashedDenseFeatures<ST>::get_feature_type() const 00157 { 00158 return F_UINT; 00159 } 00160 00161 template <class ST> 00162 EFeatureClass CStreamingHashedDenseFeatures<ST>::get_feature_class() const 00163 { 00164 return C_STREAMING_SPARSE; 00165 } 00166 00167 template <class ST> 00168 void CStreamingHashedDenseFeatures<ST>::start_parser() 00169 { 00170 if (!parser.is_running()) 00171 parser.start_parser(); 00172 } 00173 00174 template <class ST> 00175 void CStreamingHashedDenseFeatures<ST>::end_parser() 00176 { 00177 parser.end_parser(); 00178 } 00179 00180 template <class ST> 00181 float64_t CStreamingHashedDenseFeatures<ST>::get_label() 00182 { 00183 return current_label; 00184 } 00185 00186 template <class ST> 00187 bool CStreamingHashedDenseFeatures<ST>::get_next_example() 00188 { 00189 SGVector<ST> tmp; 00190 if (parser.get_next_example(tmp.vector, 00191 tmp.vlen, current_label)) 00192 { 00193 current_vector = CHashedDenseFeatures<ST>::hash_vector(tmp, dim, use_quadratic, 00194 keep_linear_terms); 00195 tmp.vector = NULL; 00196 tmp.vlen = -1; 00197 return true; 00198 } 00199 return false; 00200 } 00201 00202 template <class ST> 00203 void CStreamingHashedDenseFeatures<ST>::release_example() 00204 { 00205 parser.finalize_example(); 00206 } 00207 00208 template <class ST> 00209 int32_t CStreamingHashedDenseFeatures<ST>::get_num_features() 00210 { 00211 return dim; 00212 } 00213 00214 template <class ST> 00215 SGSparseVector<ST> CStreamingHashedDenseFeatures<ST>::get_vector() 00216 { 00217 return current_vector; 00218 } 00219 00220 template class CStreamingHashedDenseFeatures<bool>; 00221 template class CStreamingHashedDenseFeatures<char>; 00222 template class CStreamingHashedDenseFeatures<int8_t>; 00223 template class CStreamingHashedDenseFeatures<uint8_t>; 00224 template class CStreamingHashedDenseFeatures<int16_t>; 00225 template class CStreamingHashedDenseFeatures<uint16_t>; 00226 template class CStreamingHashedDenseFeatures<int32_t>; 00227 template class CStreamingHashedDenseFeatures<uint32_t>; 00228 template class CStreamingHashedDenseFeatures<int64_t>; 00229 template class CStreamingHashedDenseFeatures<uint64_t>; 00230 template class CStreamingHashedDenseFeatures<float32_t>; 00231 template class CStreamingHashedDenseFeatures<float64_t>; 00232 template class CStreamingHashedDenseFeatures<floatmax_t>; 00233 }