SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Soeren Sonnenburg 00008 */ 00009 00010 #include <shogun/features/BinnedDotFeatures.h> 00011 #include <shogun/base/Parameter.h> 00012 00013 using namespace shogun; 00014 00015 CBinnedDotFeatures::CBinnedDotFeatures(int32_t size) 00016 : CDotFeatures(size) 00017 { 00018 init(); 00019 } 00020 00021 00022 CBinnedDotFeatures::CBinnedDotFeatures(const CBinnedDotFeatures & orig) 00023 : CDotFeatures(orig), m_bins(orig.m_bins), m_fill(orig.m_fill), 00024 m_norm_one(orig.m_norm_one) 00025 { 00026 init(); 00027 } 00028 00029 CBinnedDotFeatures::CBinnedDotFeatures(CDenseFeatures<float64_t>* sf, SGMatrix<float64_t> bins) 00030 { 00031 init(); 00032 set_simple_features(sf); 00033 set_bins(bins); 00034 00035 } 00036 00037 CBinnedDotFeatures::~CBinnedDotFeatures() 00038 { 00039 SG_UNREF(m_features); 00040 } 00041 00042 int32_t CBinnedDotFeatures::get_dim_feature_space() const 00043 { 00044 return m_bins.num_rows*m_bins.num_cols; 00045 } 00046 00047 float64_t CBinnedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00048 { 00049 ASSERT(df) 00050 ASSERT(df->get_feature_type() == get_feature_type()) 00051 ASSERT(df->get_feature_class() == get_feature_class()) 00052 00053 float64_t result=0; 00054 double sum1=0; 00055 double sum2=0; 00056 00057 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1); 00058 SGVector<float64_t> vec2=((CBinnedDotFeatures*) df)->m_features->get_feature_vector(vec_idx2); 00059 00060 for (int32_t i=0; i<m_bins.num_cols; i++) 00061 { 00062 float64_t v1=vec1.vector[i]; 00063 float64_t v2=vec2.vector[i]; 00064 float64_t* col=m_bins.get_column_vector(i); 00065 00066 for (int32_t j=0; j<m_bins.num_rows; j++) 00067 { 00068 if (m_fill) 00069 { 00070 if (col[j]<=v1) 00071 { 00072 sum1+=1.0; 00073 00074 if (col[j]<=v2) 00075 { 00076 sum2+=1.0; 00077 result+=1.0; 00078 } 00079 } 00080 else 00081 { 00082 if (col[j]<=v2) 00083 sum2+=1.0; 00084 else 00085 break; 00086 } 00087 00088 /* the above is the fast version of 00089 if (col[j]<=v1 && col[j]<=v2) 00090 result+=1.0; 00091 00092 if (col[j]<=v1) 00093 sum1+=1.0; 00094 00095 if (col[j]<=v2) 00096 sum2+=1.0; 00097 */ 00098 } 00099 else 00100 { 00101 if (col[j]<=v1 && (j+1)<m_bins.num_rows && col[j+1]>v1 && 00102 col[j]<=v2 && (j+1)<m_bins.num_rows && col[j+1]>v2) 00103 { 00104 result+=1; 00105 break; 00106 } 00107 } 00108 } 00109 } 00110 m_features->free_feature_vector(vec1, vec_idx1); 00111 ((CBinnedDotFeatures*) df)->m_features->free_feature_vector(vec2, vec_idx2); 00112 00113 if (m_fill && m_norm_one && sum1!=0 && sum2!=0) 00114 result/=CMath::sqrt(sum1*sum2); 00115 00116 return result; 00117 00118 } 00119 00120 float64_t CBinnedDotFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00121 { 00122 assert_shape(vec2_len); 00123 00124 float64_t result=0; 00125 double sum=0; 00126 00127 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1); 00128 00129 00130 for (int32_t i=0; i<m_bins.num_cols; i++) 00131 { 00132 float64_t v=vec1.vector[i]; 00133 float64_t* col=m_bins.get_column_vector(i); 00134 int32_t offs=i*m_bins.num_rows; 00135 00136 for (int32_t j=0; j<m_bins.num_rows; j++) 00137 { 00138 if (m_fill) 00139 { 00140 if (col[j]<=v) 00141 { 00142 result+=vec2[offs+j]; 00143 sum+=1.0; 00144 } 00145 } 00146 else 00147 { 00148 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v) 00149 { 00150 result+=vec2[offs+j]; 00151 break; 00152 } 00153 } 00154 } 00155 } 00156 m_features->free_feature_vector(vec1, vec_idx1); 00157 00158 if (m_fill && m_norm_one && sum!=0) 00159 result/=CMath::sqrt(sum); 00160 00161 return result; 00162 } 00163 00164 void CBinnedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00165 { 00166 assert_shape(vec2_len); 00167 SGVector<float64_t> vec1=m_features->get_feature_vector(vec_idx1); 00168 00169 if (m_fill && m_norm_one) 00170 { 00171 float64_t alpha_correction=0; 00172 for (int32_t i=0; i<m_bins.num_cols; i++) 00173 { 00174 float64_t v=vec1.vector[i]; 00175 float64_t* col=m_bins.get_column_vector(i); 00176 00177 for (int32_t j=0; j<m_bins.num_rows; j++) 00178 { 00179 if (col[j]<=v) 00180 alpha_correction+=1.0; 00181 } 00182 } 00183 00184 if (alpha_correction==0.0) 00185 return; 00186 00187 alpha/=CMath::sqrt(alpha_correction); 00188 } 00189 00190 for (int32_t i=0; i<m_bins.num_cols; i++) 00191 { 00192 float64_t v=vec1.vector[i]; 00193 float64_t* col=m_bins.get_column_vector(i); 00194 int32_t offs=i*m_bins.num_rows; 00195 00196 for (int32_t j=0; j<m_bins.num_rows; j++) 00197 { 00198 if (m_fill) 00199 { 00200 if (col[j]<=v) 00201 vec2[offs+j]+=alpha; 00202 } 00203 else 00204 { 00205 if (col[j]<=v && (j+1)<m_bins.num_rows && col[j+1]>v) 00206 { 00207 vec2[offs+j]+=alpha; 00208 break; 00209 } 00210 } 00211 } 00212 } 00213 m_features->free_feature_vector(vec1, vec_idx1); 00214 } 00215 00216 void CBinnedDotFeatures::assert_shape(int32_t vec2_len) 00217 { 00218 if (m_bins.num_cols*m_bins.num_rows != vec2_len) 00219 { 00220 SG_ERROR("Bin matrix has shape (%d,%d) = %d entries, not matching vector" 00221 " length %d\n", m_bins.num_cols,m_bins.num_rows, 00222 m_bins.num_cols*m_bins.num_rows,vec2_len); 00223 } 00224 00225 if (m_features && m_bins.num_cols != m_features->get_num_features()) 00226 { 00227 SG_ERROR("Number of colums (%d) doesn't match number of features " 00228 "(%d)\n", m_bins.num_cols, m_features->get_num_features()); 00229 } 00230 00231 } 00232 00233 int32_t CBinnedDotFeatures::get_nnz_features_for_vector(int32_t num) 00234 { 00235 if (m_fill) 00236 return m_bins.num_rows; 00237 else 00238 return 1; 00239 } 00240 00241 void* CBinnedDotFeatures::get_feature_iterator(int32_t vector_index) 00242 { 00243 SG_NOTIMPLEMENTED 00244 return NULL; 00245 } 00246 00247 bool CBinnedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator) 00248 { 00249 SG_NOTIMPLEMENTED 00250 return false; 00251 } 00252 00253 void CBinnedDotFeatures::free_feature_iterator(void* iterator) 00254 { 00255 SG_NOTIMPLEMENTED 00256 } 00257 00258 bool CBinnedDotFeatures::get_fill() 00259 { 00260 return m_fill; 00261 } 00262 00263 void CBinnedDotFeatures::set_fill(bool fill) 00264 { 00265 m_fill=fill; 00266 } 00267 00268 bool CBinnedDotFeatures::get_norm_one() 00269 { 00270 return m_fill; 00271 } 00272 00273 void CBinnedDotFeatures::set_norm_one(bool norm_one) 00274 { 00275 m_norm_one=norm_one; 00276 } 00277 00278 void CBinnedDotFeatures::set_bins(SGMatrix<float64_t> bins) 00279 { 00280 m_bins=bins; 00281 } 00282 00283 SGMatrix<float64_t> CBinnedDotFeatures::get_bins() 00284 { 00285 return m_bins; 00286 } 00287 00288 void CBinnedDotFeatures::set_simple_features(CDenseFeatures<float64_t>* features) 00289 { 00290 SG_REF(features); 00291 m_features=features; 00292 } 00293 00294 CDenseFeatures<float64_t>* CBinnedDotFeatures::get_simple_features() 00295 { 00296 SG_REF(m_features); 00297 return m_features; 00298 } 00299 00300 void CBinnedDotFeatures::init() 00301 { 00302 m_features=NULL; 00303 m_fill=true; 00304 m_norm_one=false; 00305 } 00306 00307 const char* CBinnedDotFeatures::get_name() const 00308 { 00309 return "BinnedDotFeatures"; 00310 } 00311 00312 CFeatures* CBinnedDotFeatures::duplicate() const 00313 { 00314 return new CBinnedDotFeatures(*this); 00315 } 00316 00317 EFeatureType CBinnedDotFeatures::get_feature_type() const 00318 { 00319 return F_DREAL; 00320 } 00321 00322 00323 EFeatureClass CBinnedDotFeatures::get_feature_class() const 00324 { 00325 return C_BINNED_DOT; 00326 } 00327 00328 int32_t CBinnedDotFeatures::get_num_vectors() const 00329 { 00330 ASSERT(m_features) 00331 return m_features->get_num_vectors(); 00332 }