SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
CombinedDotFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009-2010 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  * Copyright (C) 2010 Berlin Institute of Technology
00010  */
00011 
00012 #include <shogun/features/CombinedDotFeatures.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/mathematics/Math.h>
00015 
00016 using namespace shogun;
00017 
00018 CCombinedDotFeatures::CCombinedDotFeatures() : CDotFeatures()
00019 {
00020     init();
00021 
00022     feature_array=new CDynamicObjectArray();
00023     update_dim_feature_space_and_num_vec();
00024 }
00025 
00026 CCombinedDotFeatures::CCombinedDotFeatures(const CCombinedDotFeatures & orig)
00027 : CDotFeatures(orig), num_vectors(orig.num_vectors),
00028     num_dimensions(orig.num_dimensions)
00029 {
00030     init();
00031 
00032     feature_array=new CDynamicObjectArray();
00033 }
00034 
00035 CFeatures* CCombinedDotFeatures::duplicate() const
00036 {
00037     return new CCombinedDotFeatures(*this);
00038 }
00039 
00040 CCombinedDotFeatures::~CCombinedDotFeatures()
00041 {
00042     SG_UNREF(feature_array);
00043 }
00044 
00045 void CCombinedDotFeatures::list_feature_objs()
00046 {
00047     SG_INFO("BEGIN COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions)
00048     this->list_feature_obj();
00049 
00050     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00051     {
00052         CDotFeatures* f = get_feature_obj(f_idx);
00053         f->list_feature_obj();
00054         SG_UNREF(f);
00055     }
00056 
00057     SG_INFO("END COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions)
00058     this->list_feature_obj();
00059 }
00060 
00061 void CCombinedDotFeatures::update_dim_feature_space_and_num_vec()
00062 {
00063     int32_t dim=0;
00064     int32_t vec=-1;
00065 
00066     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00067     {
00068         CDotFeatures* f = get_feature_obj(f_idx);
00069         dim+= f->get_dim_feature_space();
00070         if (vec==-1)
00071             vec=f->get_num_vectors();
00072         else if (vec != f->get_num_vectors())
00073         {
00074             f->list_feature_obj();
00075             SG_ERROR("Number of vectors (%d) mismatches in above feature obj (%d)\n", vec, f->get_num_vectors())
00076         }
00077 
00078         SG_UNREF(f);
00079     }
00080 
00081     num_dimensions=dim;
00082     num_vectors=vec;
00083     SG_DEBUG("vecs=%d, dims=%d\n", num_vectors, num_dimensions)
00084 }
00085 
00086 float64_t CCombinedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00087 {
00088     float64_t result=0;
00089 
00090     ASSERT(df)
00091     ASSERT(df->get_feature_type() == get_feature_type())
00092     ASSERT(df->get_feature_class() == get_feature_class())
00093     CCombinedDotFeatures* cf = (CCombinedDotFeatures*) df;
00094 
00095     // check that both have same number of feature objects inside
00096     ASSERT(get_num_feature_obj()==cf->get_num_feature_obj())
00097 
00098     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00099     {
00100         CDotFeatures* f1 = get_feature_obj(f_idx);
00101         CDotFeatures* f2 = cf->get_feature_obj(f_idx);
00102 
00103         ASSERT(f1)
00104         ASSERT(f2)
00105 
00106         result += f1->dot(vec_idx1, f2,vec_idx2) *
00107             f1->get_combined_feature_weight() *
00108             f2->get_combined_feature_weight();
00109 
00110         SG_UNREF(f1);
00111         SG_UNREF(f2);
00112     }
00113 
00114     return result;
00115 }
00116 
00117 float64_t CCombinedDotFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00118 {
00119     float64_t result=0;
00120 
00121     uint32_t offs=0;
00122 
00123     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00124     {
00125         CDotFeatures* f = get_feature_obj(f_idx);
00126         int32_t dim = f->get_dim_feature_space();
00127         result += f->dense_dot(vec_idx1, vec2+offs, dim)*f->get_combined_feature_weight();
00128         offs += dim;
00129 
00130         SG_UNREF(f);
00131     }
00132 
00133     return result;
00134 }
00135 
00136 void CCombinedDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00137 {
00138     if (stop<=start)
00139         return;
00140     ASSERT(dim==num_dimensions)
00141 
00142     uint32_t offs=0;
00143     bool first=true;
00144     int32_t num=stop-start;
00145     float64_t* tmp=SG_MALLOC(float64_t, num);
00146 
00147     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00148     {
00149         CDotFeatures* f = get_feature_obj(f_idx);
00150         int32_t f_dim = f->get_dim_feature_space();
00151         if (first)
00152         {
00153             f->dense_dot_range(output, start, stop, alphas, vec+offs, f_dim, b);
00154             first=false;
00155         }
00156         else
00157         {
00158             f->dense_dot_range(tmp, start, stop, alphas, vec+offs, f_dim, b);
00159             for (int32_t i=0; i<num; i++)
00160                 output[i]+=tmp[i];
00161         }
00162         offs += f_dim;
00163 
00164         SG_UNREF(f);
00165     }
00166     SG_FREE(tmp);
00167 }
00168 
00169 void CCombinedDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00170 {
00171     if (num<=0)
00172         return;
00173     ASSERT(dim==num_dimensions)
00174 
00175     uint32_t offs=0;
00176     bool first=true;
00177     float64_t* tmp=SG_MALLOC(float64_t, num);
00178 
00179     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00180     {
00181         CDotFeatures* f = get_feature_obj(f_idx);
00182         int32_t f_dim = f->get_dim_feature_space();
00183         if (first)
00184         {
00185             f->dense_dot_range_subset(sub_index, num, output, alphas, vec+offs, f_dim, b);
00186             first=false;
00187         }
00188         else
00189         {
00190             f->dense_dot_range_subset(sub_index, num, tmp, alphas, vec+offs, f_dim, b);
00191             for (int32_t i=0; i<num; i++)
00192                 output[i]+=tmp[i];
00193         }
00194         offs += f_dim;
00195 
00196         SG_UNREF(f);
00197     }
00198     SG_FREE(tmp);
00199 }
00200 
00201 void CCombinedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00202 {
00203     uint32_t offs=0;
00204 
00205     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00206     {
00207         CDotFeatures* f = get_feature_obj(f_idx);
00208         int32_t dim = f->get_dim_feature_space();
00209         f->add_to_dense_vec(alpha*f->get_combined_feature_weight(), vec_idx1, vec2+offs, dim, abs_val);
00210         offs += dim;
00211 
00212         SG_UNREF(f);
00213     }
00214 }
00215 
00216 void* CCombinedDotFeatures::get_feature_iterator(int32_t vector_index)
00217 {
00218     combined_feature_iterator* it=SG_MALLOC(combined_feature_iterator, 1);
00219 
00220     it->f=get_feature_obj(0);
00221     iterator_idx=0;
00222     it->iterator=it->f->get_feature_iterator(vector_index);
00223     it->vector_index=vector_index;
00224     return it;
00225 }
00226 
00227 bool CCombinedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00228 {
00229     ASSERT(iterator)
00230     combined_feature_iterator* it = (combined_feature_iterator*) iterator;
00231 
00232     while (it->f)
00233     {
00234         if (it->f->get_next_feature(index, value, it->iterator))
00235         {
00236             value*=get_combined_feature_weight();
00237             return true;
00238         }
00239 
00240         if (++iterator_idx == get_num_feature_obj())
00241         {
00242             index = -1;
00243             break;
00244         }
00245 
00246         it->f->free_feature_iterator(it->iterator);
00247         SG_UNREF(it->f);
00248         it->f=get_feature_obj(iterator_idx);
00249         if (it->f)
00250             it->iterator=it->f->get_feature_iterator(it->vector_index);
00251         else
00252             it->iterator=NULL;
00253     }
00254     return false;
00255 }
00256 
00257 void CCombinedDotFeatures::free_feature_iterator(void* iterator)
00258 {
00259     if (iterator)
00260     {
00261         combined_feature_iterator* it = (combined_feature_iterator*) iterator;
00262         if (it->iterator && it->f)
00263             it->f->free_feature_iterator(it->iterator);
00264         SG_UNREF(it->f);
00265         SG_FREE(it);
00266     }
00267 }
00268 
00269 CDotFeatures* CCombinedDotFeatures::get_feature_obj(int32_t idx)
00270 {
00271     return (CDotFeatures*) feature_array->get_element(idx);
00272 }
00273 
00274 bool CCombinedDotFeatures::insert_feature_obj(CDotFeatures* obj, int32_t idx)
00275 {
00276     ASSERT(obj)
00277     bool result=feature_array->insert_element(obj, idx);
00278     update_dim_feature_space_and_num_vec();
00279     return result;
00280 }
00281 
00282 bool CCombinedDotFeatures::append_feature_obj(CDotFeatures* obj)
00283 {
00284     ASSERT(obj)
00285     int n = get_num_feature_obj();
00286     feature_array->push_back(obj);
00287     update_dim_feature_space_and_num_vec();
00288     return n+1==get_num_feature_obj();
00289 }
00290 
00291 bool CCombinedDotFeatures::delete_feature_obj(int32_t idx)
00292 {
00293     bool succesful_deletion = feature_array->delete_element(idx);
00294     if (succesful_deletion)
00295         update_dim_feature_space_and_num_vec();
00296     return succesful_deletion;
00297 }
00298 
00299 int32_t CCombinedDotFeatures::get_num_feature_obj()
00300 {
00301     return feature_array->get_num_elements();
00302 }
00303 
00304 int32_t CCombinedDotFeatures::get_nnz_features_for_vector(int32_t num)
00305 {
00306     int32_t result=0;
00307 
00308     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00309     {
00310         CDotFeatures* f = get_feature_obj(f_idx);
00311         result+=f->get_nnz_features_for_vector(num);
00312         SG_UNREF(f);
00313     }
00314 
00315     return result;
00316 }
00317 
00318 SGVector<float64_t> CCombinedDotFeatures::get_subfeature_weights()
00319 {
00320     int32_t num_weights = get_num_feature_obj();
00321     ASSERT(num_weights > 0)
00322 
00323     float64_t* weights=SG_MALLOC(float64_t, num_weights);
00324 
00325     for (index_t f_idx=0; f_idx<num_weights; f_idx++)
00326     {
00327         CDotFeatures* f = get_feature_obj(f_idx);
00328         weights[f_idx] = f->get_combined_feature_weight();
00329         SG_UNREF(f);
00330     }
00331     return SGVector<float64_t>(weights,num_weights);
00332 }
00333 
00334 void CCombinedDotFeatures::set_subfeature_weights(SGVector<float64_t> weights)
00335 {
00336     ASSERT(weights.vlen==get_num_feature_obj())
00337 
00338     for (index_t f_idx=0; f_idx<get_num_feature_obj(); f_idx++)
00339     {
00340         CDotFeatures* f = get_feature_obj(f_idx);
00341         f->set_combined_feature_weight(weights[f_idx]);
00342         SG_UNREF(f);
00343     }
00344 }
00345 
00346 void CCombinedDotFeatures::init()
00347 {
00348     m_parameters->add(&num_dimensions, "num_dimensions",
00349                       "Total number of dimensions.");
00350     m_parameters->add(&num_vectors, "num_vectors",
00351                       "Total number of vectors.");
00352     m_parameters->add((CSGObject**) &feature_array,
00353                       "feature_array", "Feature array.");
00354 }
00355 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation