SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
PolyFeatures.cpp
Go to the documentation of this file.
00001 #include <shogun/features/PolyFeatures.h>
00002 
00003 using namespace shogun;
00004 
00005 CPolyFeatures::CPolyFeatures() :CDotFeatures()
00006 {
00007     m_feat=NULL;
00008     m_degree=0;
00009     m_normalize=false;
00010     m_input_dimensions=0;
00011     m_multi_index=NULL;
00012     m_multinomial_coefficients=NULL;
00013     m_normalization_values=NULL;
00014     m_output_dimensions=0;
00015 
00016     register_parameters();
00017 }
00018 
00019 CPolyFeatures::CPolyFeatures(CDenseFeatures<float64_t>* feat, int32_t degree, bool normalize)
00020     : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00021         m_normalization_values(NULL)
00022 {
00023     ASSERT(feat)
00024 
00025     m_feat = feat;
00026     SG_REF(m_feat);
00027     m_degree=degree;
00028     m_normalize=normalize;
00029     m_input_dimensions=feat->get_num_features();
00030     m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00031 
00032     store_multi_index();
00033     store_multinomial_coefficients();
00034     if (m_normalize)
00035         store_normalization_values();
00036 
00037     register_parameters();
00038 }
00039 
00040 
00041 CPolyFeatures::~CPolyFeatures()
00042 {
00043     SG_FREE(m_multi_index);
00044     SG_FREE(m_multinomial_coefficients);
00045     SG_FREE(m_normalization_values);
00046     SG_UNREF(m_feat);
00047 }
00048 
00049 CPolyFeatures::CPolyFeatures(const CPolyFeatures & orig)
00050 {
00051     SG_PRINT("CPolyFeatures:\n")
00052     SG_NOTIMPLEMENTED
00053 };
00054 
00055 int32_t CPolyFeatures::get_dim_feature_space() const
00056 {
00057     return m_output_dimensions;
00058 }
00059 
00060 int32_t CPolyFeatures::get_nnz_features_for_vector(int32_t num)
00061 {
00062     return m_output_dimensions;
00063 }
00064 
00065 EFeatureType CPolyFeatures::get_feature_type() const
00066 {
00067     return F_UNKNOWN;
00068 }
00069 
00070 EFeatureClass CPolyFeatures::get_feature_class() const
00071 {
00072     return C_POLY;
00073 }
00074 
00075 int32_t CPolyFeatures::get_num_vectors() const
00076 {
00077     if (m_feat)
00078         return m_feat->get_num_vectors();
00079     else
00080         return 0;
00081 
00082 }
00083 
00084 void* CPolyFeatures::get_feature_iterator(int32_t vector_index)
00085 {
00086     SG_NOTIMPLEMENTED
00087     return NULL;
00088 }
00089 
00090 bool CPolyFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00091 {
00092     SG_NOTIMPLEMENTED
00093     return false;
00094 }
00095 
00096 void CPolyFeatures::free_feature_iterator(void* iterator)
00097 {
00098     SG_NOTIMPLEMENTED
00099 }
00100 
00101 
00102 
00103 float64_t CPolyFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00104 {
00105     ASSERT(df)
00106     ASSERT(df->get_feature_type() == get_feature_type())
00107     ASSERT(df->get_feature_class() == get_feature_class())
00108 
00109     CPolyFeatures* pf=(CPolyFeatures*) df;
00110 
00111     int32_t len1;
00112     bool do_free1;
00113     float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00114 
00115     int32_t len2;
00116     bool do_free2;
00117     float64_t* vec2 = pf->m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00118 
00119     float64_t sum=0;
00120     int cnt=0;
00121     for (int j=0; j<m_output_dimensions; j++)
00122     {
00123         float64_t out1=m_multinomial_coefficients[j];
00124         float64_t out2=m_multinomial_coefficients[j];
00125         for (int k=0; k<m_degree; k++)
00126         {
00127             out1*=vec1[m_multi_index[cnt]];
00128             out2*=vec2[m_multi_index[cnt]];
00129             cnt++;
00130         }
00131         sum+=out1*out2;
00132     }
00133     m_feat->free_feature_vector(vec1, len1, do_free1);
00134     pf->m_feat->free_feature_vector(vec2, len2, do_free2);
00135 
00136     return sum;
00137 }
00138 
00139 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00140 {
00141     if (vec2_len != m_output_dimensions)
00142         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
00143 
00144     int32_t len;
00145     bool do_free;
00146     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00147 
00148 
00149     int cnt=0;
00150     float64_t sum=0;
00151     for (int j=0; j<vec2_len; j++)
00152     {
00153         float64_t output=m_multinomial_coefficients[j];
00154         for (int k=0; k<m_degree; k++)
00155         {
00156             output*=vec[m_multi_index[cnt]];
00157             cnt++;
00158         }
00159         sum+=output*vec2[j];
00160     }
00161     if (m_normalize)
00162         sum = sum/m_normalization_values[vec_idx1];
00163 
00164     m_feat->free_feature_vector(vec, len, do_free);
00165     return sum;
00166 }
00167 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00168 {
00169     if (vec2_len != m_output_dimensions)
00170         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions)
00171 
00172     int32_t len;
00173     bool do_free;
00174     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00175 
00176 
00177     int cnt=0;
00178     float32_t norm_val=1;
00179     if (m_normalize)
00180         norm_val = m_normalization_values[vec_idx1];
00181     alpha/=norm_val;
00182     for (int j=0; j<vec2_len; j++)
00183     {
00184         float64_t output=m_multinomial_coefficients[j];
00185         for (int k=0; k<m_degree; k++)
00186         {
00187             output*=vec[m_multi_index[cnt]];
00188             cnt++;
00189         }
00190         if (abs_val)
00191             output=CMath::abs(output);
00192 
00193         vec2[j]+=alpha*output;
00194     }
00195     m_feat->free_feature_vector(vec, len, do_free);
00196 }
00197 void CPolyFeatures::store_normalization_values()
00198 {
00199     SG_FREE(m_normalization_values);
00200 
00201     int32_t num_vec = this->get_num_vectors();
00202 
00203     m_normalization_values=SG_MALLOC(float32_t, num_vec);
00204     for (int i=0; i<num_vec; i++)
00205     {
00206         float64_t tmp = CMath::sqrt(dot(i, this,i));
00207         if (tmp==0)
00208             // trap division by zero
00209             m_normalization_values[i]=1;
00210         else
00211             m_normalization_values[i]=tmp;
00212     }
00213 
00214 }
00215 
00216 void CPolyFeatures::store_multi_index()
00217 {
00218     SG_FREE(m_multi_index);
00219 
00220         m_multi_index=SG_MALLOC(uint16_t, m_output_dimensions*m_degree);
00221 
00222         uint16_t* exponents = SG_MALLOC(uint16_t, m_input_dimensions);
00223         if (!exponents)
00224         SG_ERROR("Error allocating mem \n")
00225     /*copy adress: otherwise it will be overwritten in recursion*/
00226         uint16_t* index = m_multi_index;
00227         enumerate_multi_index(0, &index, exponents, m_degree);
00228 
00229     SG_FREE(exponents);
00230 }
00231 
00232 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00233 {
00234     if (feat_idx==m_input_dimensions-1 || degree==0)
00235     {
00236         if (feat_idx==m_input_dimensions-1)
00237             exponents[feat_idx] = degree;
00238         if (degree==0)
00239             exponents[feat_idx] = 0;
00240         int32_t i, j;
00241         for (j=0; j<feat_idx+1; j++)
00242             for (i=0; i<exponents[j]; i++)
00243             {
00244                 **index = j;
00245                 (*index)++;
00246             }
00247         exponents[feat_idx] = 0;
00248         return;
00249     }
00250     int32_t k;
00251     for (k=0; k<=degree; k++)
00252     {
00253         exponents[feat_idx] =  k;
00254         enumerate_multi_index(feat_idx+1, index,  exponents, degree-k);
00255     }
00256     return;
00257 
00258 }
00259 
00260 void CPolyFeatures::store_multinomial_coefficients()
00261 {
00262     SG_FREE(m_multinomial_coefficients);
00263 
00264     m_multinomial_coefficients = SG_MALLOC(float64_t, m_output_dimensions);
00265     int32_t* exponents = SG_MALLOC(int32_t, m_input_dimensions);
00266     if (!exponents)
00267         SG_ERROR("Error allocating mem \n")
00268     int32_t j=0;
00269     for (j=0; j<m_input_dimensions; j++)
00270         exponents[j] = 0;
00271     int32_t k, cnt=0;
00272     for (j=0; j<m_output_dimensions; j++)
00273     {
00274         for (k=0; k<m_degree; k++)
00275         {
00276             exponents[m_multi_index[cnt]] ++;
00277             cnt++;
00278         }
00279         m_multinomial_coefficients[j] =  sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00280         for (k=0; k<m_input_dimensions; k++)
00281         {
00282             exponents[k]=0;
00283         }
00284     }
00285     SG_FREE(exponents);
00286 }
00287 
00288 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00289 {
00290 
00291     /* for this problem k is usually small (<=degree),
00292      * thus it is efficient to
00293      * to use recursion and prune end recursions*/
00294     if (n<k)
00295         return 0;
00296     if (k>n/2)
00297         k = n-k;
00298     if (k<0)
00299         return 0;
00300     if (k==0)
00301         return 1;
00302     if (k==1)
00303         return n;
00304     if (k<4)
00305         return bico2(n-1, k-1)+bico2(n-1, k);
00306 
00307     /* call function as implemented in numerical recipies:
00308      * much more efficient for large binomial coefficients*/
00309     return bico(n, k);
00310 
00311 }
00312 
00313 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00314 {
00315     if (N==1)
00316         return 1;
00317     if (D==0)
00318         return 1;
00319     int32_t d;
00320     int32_t ret = 0;
00321     for (d=0; d<=D; d++)
00322         ret += calc_feature_space_dimensions(N-1, d);
00323 
00324     return ret;
00325 }
00326 
00327 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00328 {
00329     int32_t ret = 1, i;
00330     int32_t n = 0;
00331     for (i=0; i<len; i++)
00332     {
00333         n += exps[i];
00334         ret *= bico2(n, exps[i]);
00335     }
00336     return ret;
00337 }
00338 
00339 /* gammln as implemented in the
00340  * second edition of Numerical Recipes in C */
00341 float64_t CPolyFeatures::gammln(float64_t xx)
00342 {
00343     float64_t x,y,tmp,ser;
00344     static float64_t cof[6]={76.18009172947146,    -86.50532032941677,
00345                           24.01409824083091,    -1.231739572450155,
00346                           0.1208650973866179e-2,-0.5395239384953e-5};
00347     int32_t j;
00348 
00349     y=x=xx;
00350     tmp=x+5.5;
00351     tmp -= (x+0.5)*log(tmp);
00352     ser=1.000000000190015;
00353     for (j=0;j<=5;j++) ser += cof[j]/++y;
00354     return -tmp+log(2.5066282746310005*ser/x);
00355 }
00356 
00357 float64_t CPolyFeatures::factln(int32_t n)
00358 {
00359     static float64_t a[101];
00360 
00361     if (n < 0) SG_ERROR("Negative factorial in routine factln\n")
00362     if (n <= 1) return 0.0;
00363     if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00364     else return gammln(n+1.0);
00365 }
00366 
00367 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00368 {
00369     /* use floor to clean roundoff errors*/
00370     return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00371 }
00372 CFeatures* CPolyFeatures::duplicate() const
00373 {
00374     return new CPolyFeatures(*this);
00375 }
00376 
00377 void CPolyFeatures::register_parameters()
00378 {
00379     m_parameters->add((CSGObject**) &m_feat, "features",
00380                 "Features in original space.");
00381     m_parameters->add(&m_degree, "degree", "Degree of the polynomial kernel.");
00382     m_parameters->add(&m_normalize, "normalize", "Normalize?");
00383     m_parameters->add(&m_input_dimensions, "input_dimensions",
00384             "Dimensions of the input space.");
00385     m_parameters->add(&m_output_dimensions, "output_dimensions",
00386             "Dimensions of the feature space of the polynomial kernel.");
00387 
00388     multi_index_length=m_output_dimensions*m_degree;
00389     m_parameters->add_vector(
00390             &m_multi_index,
00391             &multi_index_length,
00392             "multi_index",
00393             "Flattened matrix of all multi indices that sum do the"
00394             " degree of the polynomial kernel.");
00395 
00396     multinomial_coefficients_length=m_output_dimensions;
00397     m_parameters->add_vector(&m_multinomial_coefficients,
00398             &multinomial_coefficients_length, "multinomial_coefficients",
00399             "Multinomial coefficients for all multi-indices.");
00400 
00401     normalization_values_length=get_num_vectors();
00402     m_parameters->add_vector(&m_normalization_values,
00403             &normalization_values_length, "normalization_values",
00404             "Norm of each training example.");
00405 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation