SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009-2010 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #include <shogun/features/CombinedDotFeatures.h> 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/mathematics/Math.h> 00015 00016 using namespace shogun; 00017 00018 CCombinedDotFeatures::CCombinedDotFeatures() : CDotFeatures() 00019 { 00020 init(); 00021 00022 feature_array=new CDynamicObjectArray(); 00023 update_dim_feature_space_and_num_vec(); 00024 } 00025 00026 CCombinedDotFeatures::CCombinedDotFeatures(const CCombinedDotFeatures & orig) 00027 : CDotFeatures(orig), num_vectors(orig.num_vectors), 00028 num_dimensions(orig.num_dimensions) 00029 { 00030 init(); 00031 00032 feature_array=new CDynamicObjectArray(); 00033 } 00034 00035 CFeatures* CCombinedDotFeatures::duplicate() const 00036 { 00037 return new CCombinedDotFeatures(*this); 00038 } 00039 00040 CCombinedDotFeatures::~CCombinedDotFeatures() 00041 { 00042 SG_UNREF(feature_array); 00043 } 00044 00045 void CCombinedDotFeatures::list_feature_objs() 00046 { 00047 SG_INFO("BEGIN COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions) 00048 this->list_feature_obj(); 00049 00050 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00051 { 00052 CDotFeatures* f = get_feature_obj(f_idx); 00053 f->list_feature_obj(); 00054 SG_UNREF(f); 00055 } 00056 00057 SG_INFO("END COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions) 00058 this->list_feature_obj(); 00059 } 00060 00061 void CCombinedDotFeatures::update_dim_feature_space_and_num_vec() 00062 { 00063 int32_t dim=0; 00064 int32_t vec=-1; 00065 00066 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00067 { 00068 CDotFeatures* f = get_feature_obj(f_idx); 00069 dim+= f->get_dim_feature_space(); 00070 if (vec==-1) 00071 vec=f->get_num_vectors(); 00072 else if (vec != f->get_num_vectors()) 00073 { 00074 f->list_feature_obj(); 00075 SG_ERROR("Number of vectors (%d) mismatches in above feature obj (%d)\n", vec, f->get_num_vectors()) 00076 } 00077 00078 SG_UNREF(f); 00079 } 00080 00081 num_dimensions=dim; 00082 num_vectors=vec; 00083 SG_DEBUG("vecs=%d, dims=%d\n", num_vectors, num_dimensions) 00084 } 00085 00086 float64_t CCombinedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00087 { 00088 float64_t result=0; 00089 00090 ASSERT(df) 00091 ASSERT(df->get_feature_type() == get_feature_type()) 00092 ASSERT(df->get_feature_class() == get_feature_class()) 00093 CCombinedDotFeatures* cf = (CCombinedDotFeatures*) df; 00094 00095 // check that both have same number of feature objects inside 00096 ASSERT(get_num_feature_obj()==cf->get_num_feature_obj()) 00097 00098 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00099 { 00100 CDotFeatures* f1 = get_feature_obj(f_idx); 00101 CDotFeatures* f2 = cf->get_feature_obj(f_idx); 00102 00103 ASSERT(f1) 00104 ASSERT(f2) 00105 00106 result += f1->dot(vec_idx1, f2,vec_idx2) * 00107 f1->get_combined_feature_weight() * 00108 f2->get_combined_feature_weight(); 00109 00110 SG_UNREF(f1); 00111 SG_UNREF(f2); 00112 } 00113 00114 return result; 00115 } 00116 00117 float64_t CCombinedDotFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00118 { 00119 float64_t result=0; 00120 00121 uint32_t offs=0; 00122 00123 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00124 { 00125 CDotFeatures* f = get_feature_obj(f_idx); 00126 int32_t dim = f->get_dim_feature_space(); 00127 result += f->dense_dot(vec_idx1, vec2+offs, dim)*f->get_combined_feature_weight(); 00128 offs += dim; 00129 00130 SG_UNREF(f); 00131 } 00132 00133 return result; 00134 } 00135 00136 void CCombinedDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00137 { 00138 if (stop<=start) 00139 return; 00140 ASSERT(dim==num_dimensions) 00141 00142 uint32_t offs=0; 00143 bool first=true; 00144 int32_t num=stop-start; 00145 float64_t* tmp=SG_MALLOC(float64_t, num); 00146 00147 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00148 { 00149 CDotFeatures* f = get_feature_obj(f_idx); 00150 int32_t f_dim = f->get_dim_feature_space(); 00151 if (first) 00152 { 00153 f->dense_dot_range(output, start, stop, alphas, vec+offs, f_dim, b); 00154 first=false; 00155 } 00156 else 00157 { 00158 f->dense_dot_range(tmp, start, stop, alphas, vec+offs, f_dim, b); 00159 for (int32_t i=0; i<num; i++) 00160 output[i]+=tmp[i]; 00161 } 00162 offs += f_dim; 00163 00164 SG_UNREF(f); 00165 } 00166 SG_FREE(tmp); 00167 } 00168 00169 void CCombinedDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b) 00170 { 00171 if (num<=0) 00172 return; 00173 ASSERT(dim==num_dimensions) 00174 00175 uint32_t offs=0; 00176 bool first=true; 00177 float64_t* tmp=SG_MALLOC(float64_t, num); 00178 00179 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00180 { 00181 CDotFeatures* f = get_feature_obj(f_idx); 00182 int32_t f_dim = f->get_dim_feature_space(); 00183 if (first) 00184 { 00185 f->dense_dot_range_subset(sub_index, num, output, alphas, vec+offs, f_dim, b); 00186 first=false; 00187 } 00188 else 00189 { 00190 f->dense_dot_range_subset(sub_index, num, tmp, alphas, vec+offs, f_dim, b); 00191 for (int32_t i=0; i<num; i++) 00192 output[i]+=tmp[i]; 00193 } 00194 offs += f_dim; 00195 00196 SG_UNREF(f); 00197 } 00198 SG_FREE(tmp); 00199 } 00200 00201 void CCombinedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00202 { 00203 uint32_t offs=0; 00204 00205 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00206 { 00207 CDotFeatures* f = get_feature_obj(f_idx); 00208 int32_t dim = f->get_dim_feature_space(); 00209 f->add_to_dense_vec(alpha*f->get_combined_feature_weight(), vec_idx1, vec2+offs, dim, abs_val); 00210 offs += dim; 00211 00212 SG_UNREF(f); 00213 } 00214 } 00215 00216 void* CCombinedDotFeatures::get_feature_iterator(int32_t vector_index) 00217 { 00218 combined_feature_iterator* it=SG_MALLOC(combined_feature_iterator, 1); 00219 00220 it->f=get_feature_obj(0); 00221 iterator_idx=0; 00222 it->iterator=it->f->get_feature_iterator(vector_index); 00223 it->vector_index=vector_index; 00224 return it; 00225 } 00226 00227 bool CCombinedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator) 00228 { 00229 ASSERT(iterator) 00230 combined_feature_iterator* it = (combined_feature_iterator*) iterator; 00231 00232 while (it->f) 00233 { 00234 if (it->f->get_next_feature(index, value, it->iterator)) 00235 { 00236 value*=get_combined_feature_weight(); 00237 return true; 00238 } 00239 00240 if (++iterator_idx == get_num_feature_obj()) 00241 { 00242 index = -1; 00243 break; 00244 } 00245 00246 it->f->free_feature_iterator(it->iterator); 00247 SG_UNREF(it->f); 00248 it->f=get_feature_obj(iterator_idx); 00249 if (it->f) 00250 it->iterator=it->f->get_feature_iterator(it->vector_index); 00251 else 00252 it->iterator=NULL; 00253 } 00254 return false; 00255 } 00256 00257 void CCombinedDotFeatures::free_feature_iterator(void* iterator) 00258 { 00259 if (iterator) 00260 { 00261 combined_feature_iterator* it = (combined_feature_iterator*) iterator; 00262 if (it->iterator && it->f) 00263 it->f->free_feature_iterator(it->iterator); 00264 SG_UNREF(it->f); 00265 SG_FREE(it); 00266 } 00267 } 00268 00269 CDotFeatures* CCombinedDotFeatures::get_feature_obj(int32_t idx) 00270 { 00271 return (CDotFeatures*) feature_array->get_element(idx); 00272 } 00273 00274 bool CCombinedDotFeatures::insert_feature_obj(CDotFeatures* obj, int32_t idx) 00275 { 00276 ASSERT(obj) 00277 bool result=feature_array->insert_element(obj, idx); 00278 update_dim_feature_space_and_num_vec(); 00279 return result; 00280 } 00281 00282 bool CCombinedDotFeatures::append_feature_obj(CDotFeatures* obj) 00283 { 00284 ASSERT(obj) 00285 int n = get_num_feature_obj(); 00286 feature_array->push_back(obj); 00287 update_dim_feature_space_and_num_vec(); 00288 return n+1==get_num_feature_obj(); 00289 } 00290 00291 bool CCombinedDotFeatures::delete_feature_obj(int32_t idx) 00292 { 00293 bool succesful_deletion = feature_array->delete_element(idx); 00294 if (succesful_deletion) 00295 update_dim_feature_space_and_num_vec(); 00296 return succesful_deletion; 00297 } 00298 00299 int32_t CCombinedDotFeatures::get_num_feature_obj() 00300 { 00301 return feature_array->get_num_elements(); 00302 } 00303 00304 int32_t CCombinedDotFeatures::get_nnz_features_for_vector(int32_t num) 00305 { 00306 int32_t result=0; 00307 00308 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00309 { 00310 CDotFeatures* f = get_feature_obj(f_idx); 00311 result+=f->get_nnz_features_for_vector(num); 00312 SG_UNREF(f); 00313 } 00314 00315 return result; 00316 } 00317 00318 SGVector<float64_t> CCombinedDotFeatures::get_subfeature_weights() 00319 { 00320 int32_t num_weights = get_num_feature_obj(); 00321 ASSERT(num_weights > 0) 00322 00323 float64_t* weights=SG_MALLOC(float64_t, num_weights); 00324 00325 for (index_t f_idx=0; f_idx<num_weights; f_idx++) 00326 { 00327 CDotFeatures* f = get_feature_obj(f_idx); 00328 weights[f_idx] = f->get_combined_feature_weight(); 00329 SG_UNREF(f); 00330 } 00331 return SGVector<float64_t>(weights,num_weights); 00332 } 00333 00334 void CCombinedDotFeatures::set_subfeature_weights(SGVector<float64_t> weights) 00335 { 00336 ASSERT(weights.vlen==get_num_feature_obj()) 00337 00338 for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++) 00339 { 00340 CDotFeatures* f = get_feature_obj(f_idx); 00341 f->set_combined_feature_weight(weights[f_idx]); 00342 SG_UNREF(f); 00343 } 00344 } 00345 00346 void CCombinedDotFeatures::init() 00347 { 00348 m_parameters->add(&num_dimensions, "num_dimensions", 00349 "Total number of dimensions."); 00350 m_parameters->add(&num_vectors, "num_vectors", 00351 "Total number of vectors."); 00352 m_parameters->add((CSGObject**) &feature_array, 00353 "feature_array", "Feature array."); 00354 } 00355