SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
DenseFeatures.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2013 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  * Copyright (C) 2010 Berlin Institute of Technology
00012  */
00013 
00014 #include <shogun/features/DenseFeatures.h>
00015 #include <shogun/preprocessor/DensePreprocessor.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/base/Parameter.h>
00018 #include <shogun/mathematics/Math.h>
00019 
00020 #include <string.h>
00021 
00022 namespace shogun {
00023 
00024 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size)
00025 {
00026     init();
00027 }
00028 
00029 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) :
00030         CDotFeatures(orig)
00031 {
00032     init();
00033     set_feature_matrix(orig.feature_matrix);
00034     initialize_cache();
00035 
00036     if (orig.m_subset_stack != NULL)
00037     {
00038         SG_UNREF(m_subset_stack);
00039         m_subset_stack=new CSubsetStack(*orig.m_subset_stack);
00040         SG_REF(m_subset_stack);
00041     }
00042 }
00043 
00044 template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) :
00045         CDotFeatures()
00046 {
00047     init();
00048     set_feature_matrix(matrix);
00049 }
00050 
00051 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) :
00052         CDotFeatures()
00053 {
00054     init();
00055     set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec));
00056 }
00057 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) :
00058         CDotFeatures()
00059 {
00060     init();
00061     load(loader);
00062 }
00063 
00064 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const
00065 {
00066     return new CDenseFeatures<ST>(*this);
00067 }
00068 
00069 template<class ST> CDenseFeatures<ST>::~CDenseFeatures()
00070 {
00071     free_features();
00072 }
00073 
00074 template<class ST> void CDenseFeatures<ST>::free_features()
00075 {
00076     m_subset_stack->remove_all_subsets();
00077     free_feature_matrix();
00078     SG_UNREF(feature_cache);
00079 }
00080 
00081 template<class ST> void CDenseFeatures<ST>::free_feature_matrix()
00082 {
00083     m_subset_stack->remove_all_subsets();
00084     feature_matrix=SGMatrix<ST>();
00085     num_vectors = 0;
00086     num_features = 0;
00087 }
00088 
00089 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree)
00090 {
00091     /* index conversion for subset, only for array access */
00092     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00093 
00094     len = num_features;
00095 
00096     if (feature_matrix.matrix)
00097     {
00098         dofree = false;
00099         return &feature_matrix.matrix[real_num * int64_t(num_features)];
00100     }
00101 
00102     ST* feat = NULL;
00103     dofree = false;
00104 
00105     if (feature_cache)
00106     {
00107         feat = feature_cache->lock_entry(real_num);
00108 
00109         if (feat)
00110             return feat;
00111         else
00112             feat = feature_cache->set_entry(real_num);
00113     }
00114 
00115     if (!feat)
00116         dofree = true;
00117     feat = compute_feature_vector(num, len, feat);
00118 
00119     if (get_num_preprocessors())
00120     {
00121         int32_t tmp_len = len;
00122         ST* tmp_feat_before = feat;
00123         ST* tmp_feat_after = NULL;
00124 
00125         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00126         {
00127             CDensePreprocessor<ST>* p =
00128                     (CDensePreprocessor<ST>*) get_preprocessor(i);
00129             // temporary hack
00130             SGVector<ST> applied = p->apply_to_feature_vector(
00131                     SGVector<ST>(tmp_feat_before, tmp_len));
00132             tmp_feat_after = applied.vector;
00133             SG_UNREF(p);
00134 
00135             if (i != 0) // delete feature vector, except for the the first one, i.e., feat
00136                 SG_FREE(tmp_feat_before);
00137             tmp_feat_before = tmp_feat_after;
00138         }
00139 
00140         // note: tmp_feat_after should be checked as it is used by memcpy
00141         if (tmp_feat_after)
00142         {
00143             memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len);
00144             SG_FREE(tmp_feat_after);
00145 
00146             len = tmp_len;
00147         }
00148     }
00149     return feat;
00150 }
00151 
00152 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num)
00153 {
00154     /* index conversion for subset, only for array access */
00155     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00156 
00157     if (num>=get_num_vectors())
00158     {
00159         SG_ERROR("Index out of bounds (number of vectors %d, you "
00160         "requested %d)\n", get_num_vectors(), num);
00161     }
00162 
00163     if (!feature_matrix.matrix)
00164         SG_ERROR("Requires a in-memory feature matrix\n")
00165 
00166     if (vector.vlen != num_features)
00167         SG_ERROR(
00168                 "Vector not of length %d (has %d)\n", num_features, vector.vlen);
00169 
00170     memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector,
00171             int64_t(num_features) * sizeof(ST));
00172 }
00173 
00174 template<class ST> SGVector<ST> CDenseFeatures<ST>::get_feature_vector(int32_t num)
00175 {
00176     /* index conversion for subset, only for array access */
00177     int32_t real_num=m_subset_stack->subset_idx_conversion(num);
00178 
00179     if (num >= get_num_vectors())
00180     {
00181         SG_ERROR("Index out of bounds (number of vectors %d, you "
00182         "requested %d)\n", get_num_vectors(), real_num);
00183     }
00184 
00185     int32_t vlen;
00186     bool do_free;
00187     ST* vector= get_feature_vector(num, vlen, do_free);
00188     return SGVector<ST>(vector, vlen, do_free);
00189 }
00190 
00191 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree)
00192 {
00193     if (feature_cache)
00194         feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num));
00195 
00196     if (dofree)
00197         SG_FREE(feat_vec);
00198 }
00199 
00200 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num)
00201 {
00202     free_feature_vector(vec.vector, num, false);
00203     vec=SGVector<ST>();
00204 }
00205 
00206 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len)
00207 {
00208     if (m_subset_stack->has_subsets())
00209         SG_ERROR("A subset is set, cannot call vector_subset\n")
00210 
00211     ASSERT(feature_matrix.matrix)
00212     ASSERT(idx_len<=num_vectors)
00213 
00214     int32_t num_vec = num_vectors;
00215     num_vectors = idx_len;
00216 
00217     int32_t old_ii = -1;
00218 
00219     for (int32_t i = 0; i < idx_len; i++)
00220     {
00221         int32_t ii = idx[i];
00222         ASSERT(old_ii<ii)
00223 
00224         if (ii < 0 || ii >= num_vec)
00225             SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec)
00226 
00227         if (i == ii)
00228             continue;
00229 
00230         memcpy(&feature_matrix.matrix[int64_t(num_features) * i],
00231                 &feature_matrix.matrix[int64_t(num_features) * ii],
00232                 num_features * sizeof(ST));
00233         old_ii = ii;
00234     }
00235 }
00236 
00237 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len)
00238 {
00239     if (m_subset_stack->has_subsets())
00240         SG_ERROR("A subset is set, cannot call feature_subset\n")
00241 
00242     ASSERT(feature_matrix.matrix)
00243     ASSERT(idx_len<=num_features)
00244     int32_t num_feat = num_features;
00245     num_features = idx_len;
00246 
00247     for (int32_t i = 0; i < num_vectors; i++)
00248     {
00249         ST* src = &feature_matrix.matrix[int64_t(num_feat) * i];
00250         ST* dst = &feature_matrix.matrix[int64_t(num_features) * i];
00251 
00252         int32_t old_jj = -1;
00253         for (int32_t j = 0; j < idx_len; j++)
00254         {
00255             int32_t jj = idx[j];
00256             ASSERT(old_jj<jj)
00257             if (jj < 0 || jj >= num_feat)
00258                 SG_ERROR(
00259                         "Index out of range: should be 0<%d<%d\n", jj, num_feat);
00260 
00261             dst[j] = src[jj];
00262             old_jj = jj;
00263         }
00264     }
00265 }
00266 
00267 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix()
00268 {
00269     if (!m_subset_stack->has_subsets())
00270         return feature_matrix;
00271 
00272     SGMatrix<ST> submatrix(num_features, get_num_vectors());
00273 
00274     /* copy a subset vector wise */
00275     for (int32_t i=0; i<submatrix.num_cols; ++i)
00276     {
00277         int32_t real_i = m_subset_stack->subset_idx_conversion(i);
00278         memcpy(&submatrix.matrix[i*int64_t(num_features)],
00279                 &feature_matrix.matrix[real_i * int64_t(num_features)],
00280                 num_features * sizeof(ST));
00281     }
00282 
00283     return submatrix;
00284 }
00285 
00286 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix()
00287 {
00288     SGMatrix<ST> st_feature_matrix=feature_matrix;
00289     m_subset_stack->remove_all_subsets();
00290     SG_UNREF(feature_cache);
00291     clean_preprocessors();
00292     free_feature_matrix();
00293     return st_feature_matrix;
00294 }
00295 
00296 template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix)
00297 {
00298     m_subset_stack->remove_all_subsets();
00299     free_feature_matrix();
00300     feature_matrix = matrix;
00301     num_features = matrix.num_rows;
00302     num_vectors = matrix.num_cols;
00303 }
00304 
00305 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec)
00306 {
00307     num_feat = num_features;
00308     num_vec = num_vectors;
00309     return feature_matrix.matrix;
00310 }
00311 
00312 template<class ST> CDenseFeatures<ST>* CDenseFeatures<ST>::get_transposed()
00313 {
00314     int32_t num_feat;
00315     int32_t num_vec;
00316     ST* fm = get_transposed(num_feat, num_vec);
00317 
00318     return new CDenseFeatures<ST>(fm, num_feat, num_vec);
00319 }
00320 
00321 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec)
00322 {
00323     num_feat = get_num_vectors();
00324     num_vec = num_features;
00325 
00326     int32_t old_num_vec=get_num_vectors();
00327 
00328     ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec);
00329 
00330     for (int32_t i=0; i<old_num_vec; i++)
00331     {
00332         SGVector<ST> vec=get_feature_vector(i);
00333 
00334         for (int32_t j=0; j<vec.vlen; j++)
00335             fm[j*int64_t(old_num_vec)+i]=vec.vector[j];
00336 
00337         free_feature_vector(vec, i);
00338     }
00339 
00340     return fm;
00341 }
00342 
00343 template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src)
00344 {
00345     if (m_subset_stack->has_subsets())
00346         SG_ERROR("A subset is set, cannot call copy_feature_matrix\n")
00347 
00348     free_feature_matrix();
00349     feature_matrix = src.clone();
00350     num_features = src.num_rows;
00351     num_vectors = src.num_cols;
00352     initialize_cache();
00353 }
00354 
00355 template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df)
00356 {
00357     m_subset_stack->remove_all_subsets();
00358 
00359     int32_t num_feat = df->get_dim_feature_space();
00360     int32_t num_vec = df->get_num_vectors();
00361 
00362     ASSERT(num_feat>0 && num_vec>0)
00363 
00364     free_feature_matrix();
00365     feature_matrix = SGMatrix<ST>(num_feat, num_vec);
00366 
00367     for (int32_t i = 0; i < num_vec; i++)
00368     {
00369         SGVector<float64_t> v = df->get_computed_dot_feature_vector(i);
00370         ASSERT(num_feat==v.vlen)
00371 
00372         for (int32_t j = 0; j < num_feat; j++)
00373             feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j];
00374     }
00375     num_features = num_feat;
00376     num_vectors = num_vec;
00377 }
00378 
00379 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing)
00380 {
00381     if (m_subset_stack->has_subsets())
00382         SG_ERROR("A subset is set, cannot call apply_preproc\n")
00383 
00384     SG_DEBUG("force: %d\n", force_preprocessing)
00385 
00386     if (feature_matrix.matrix && get_num_preprocessors())
00387     {
00388         for (int32_t i = 0; i < get_num_preprocessors(); i++)
00389         {
00390             if ((!is_preprocessed(i) || force_preprocessing))
00391             {
00392                 set_preprocessed(i);
00393                 CDensePreprocessor<ST>* p =
00394                         (CDensePreprocessor<ST>*) get_preprocessor(i);
00395                 SG_INFO("preprocessing using preproc %s\n", p->get_name())
00396 
00397                 if (p->apply_to_feature_matrix(this).matrix == NULL)
00398                 {
00399                     SG_UNREF(p);
00400                     return false;
00401                 }
00402                 SG_UNREF(p);
00403 
00404             }
00405         }
00406 
00407         return true;
00408     }
00409     else
00410     {
00411         if (!feature_matrix.matrix)
00412             SG_ERROR("no feature matrix\n")
00413 
00414         if (!get_num_preprocessors())
00415             SG_ERROR("no preprocessors available\n")
00416 
00417         return false;
00418     }
00419 }
00420 
00421 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const
00422 {
00423     return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors;
00424 }
00425 
00426 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; }
00427 
00428 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num)
00429 {
00430     num_features = num;
00431     initialize_cache();
00432 }
00433 
00434 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num)
00435 {
00436     if (m_subset_stack->has_subsets())
00437         SG_ERROR("A subset is set, cannot call set_num_vectors\n")
00438 
00439     num_vectors = num;
00440     initialize_cache();
00441 }
00442 
00443 template<class ST> void CDenseFeatures<ST>::initialize_cache()
00444 {
00445     if (m_subset_stack->has_subsets())
00446         SG_ERROR("A subset is set, cannot call initialize_cache\n")
00447 
00448     if (num_features && num_vectors)
00449     {
00450         SG_UNREF(feature_cache);
00451         feature_cache = new CCache<ST>(get_cache_size(), num_features,
00452                 num_vectors);
00453         SG_REF(feature_cache);
00454     }
00455 }
00456 
00457 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const  { return C_DENSE; }
00458 
00459 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors)
00460 {
00461     if (m_subset_stack->has_subsets())
00462         SG_ERROR("A subset is set, cannot call reshape\n")
00463 
00464     if (p_num_features * p_num_vectors
00465             == this->num_features * this->num_vectors)
00466     {
00467         num_features = p_num_features;
00468         num_vectors = p_num_vectors;
00469         return true;
00470     } else
00471         return false;
00472 }
00473 
00474 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; }
00475 
00476 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df,
00477         int32_t vec_idx2)
00478 {
00479     ASSERT(df)
00480     ASSERT(df->get_feature_type() == get_feature_type())
00481     ASSERT(df->get_feature_class() == get_feature_class())
00482     CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df;
00483 
00484     int32_t len1, len2;
00485     bool free1, free2;
00486 
00487     ST* vec1 = get_feature_vector(vec_idx1, len1, free1);
00488     ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2);
00489 
00490     float64_t result = SGVector<ST>::dot(vec1, vec2, len1);
00491 
00492     free_feature_vector(vec1, vec_idx1, free1);
00493     sf->free_feature_vector(vec2, vec_idx2, free2);
00494 
00495     return result;
00496 }
00497 
00498 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00499         float64_t* vec2, int32_t vec2_len, bool abs_val)
00500 {
00501     ASSERT(vec2_len == num_features)
00502 
00503     int32_t vlen;
00504     bool vfree;
00505     ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00506 
00507     ASSERT(vlen == num_features)
00508 
00509     if (abs_val)
00510     {
00511         for (int32_t i = 0; i < num_features; i++)
00512             vec2[i] += alpha * CMath::abs(vec1[i]);
00513     }
00514     else
00515     {
00516         for (int32_t i = 0; i < num_features; i++)
00517             vec2[i] += alpha * vec1[i];
00518     }
00519 
00520     free_feature_vector(vec1, vec_idx1, vfree);
00521 }
00522 
00523 template<>
00524 void CDenseFeatures<float64_t>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00525         float64_t* vec2, int32_t vec2_len, bool abs_val)
00526 {
00527     ASSERT(vec2_len == num_features)
00528 
00529     int32_t vlen;
00530     bool vfree;
00531     float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00532 
00533     ASSERT(vlen == num_features)
00534 
00535     if (abs_val)
00536     {
00537         for (int32_t i = 0; i < num_features; i++)
00538             vec2[i] += alpha * CMath::abs(vec1[i]);
00539     }
00540     else
00541     {
00542         SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features);
00543     }
00544 
00545     free_feature_vector(vec1, vec_idx1, vfree);
00546 }
00547 
00548 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num)
00549 {
00550     return num_features;
00551 }
00552 
00553 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index)
00554 {
00555     if (vector_index>=get_num_vectors())
00556     {
00557         SG_ERROR("Index out of bounds (number of vectors %d, you "
00558         "requested %d)\n", get_num_vectors(), vector_index);
00559     }
00560 
00561     dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1);
00562     iterator->vec = get_feature_vector(vector_index, iterator->vlen,
00563             iterator->vfree);
00564     iterator->vidx = vector_index;
00565     iterator->index = 0;
00566     return iterator;
00567 }
00568 
00569 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value,
00570         void* iterator)
00571 {
00572     dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00573     if (!it || it->index >= it->vlen)
00574         return false;
00575 
00576     index = it->index++;
00577     value = (float64_t) it->vec[index];
00578 
00579     return true;
00580 }
00581 
00582 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator)
00583 {
00584     if (!iterator)
00585         return;
00586 
00587     dense_feature_iterator* it = (dense_feature_iterator*) iterator;
00588     free_feature_vector(it->vec, it->vidx, it->vfree);
00589     SG_FREE(it);
00590 }
00591 
00592 template<class ST> CFeatures* CDenseFeatures<ST>::copy_subset(SGVector<index_t> indices)
00593 {
00594     SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen);
00595 
00596     for (index_t i=0; i<indices.vlen; ++i)
00597     {
00598         index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]);
00599         memcpy(&feature_matrix_copy.matrix[i*num_features],
00600                 &feature_matrix.matrix[real_idx*num_features],
00601                 num_features*sizeof(ST));
00602     }
00603 
00604     CFeatures* result=new CDenseFeatures(feature_matrix_copy);
00605     SG_REF(result);
00606     return result;
00607 }
00608 
00609 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len,
00610         ST* target)
00611 {
00612     SG_NOTIMPLEMENTED
00613     len = 0;
00614     return NULL;
00615 }
00616 
00617 template<class ST> void CDenseFeatures<ST>::init()
00618 {
00619     num_vectors = 0;
00620     num_features = 0;
00621 
00622     feature_matrix = SGMatrix<ST>();
00623     feature_cache = NULL;
00624 
00625     set_generic<ST>();
00626 
00627     /* not store number of vectors in subset */
00628     SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE);
00629     SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE);
00630     SG_ADD(&feature_matrix, "feature_matrix",
00631             "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE);
00632 }
00633 
00634 #define GET_FEATURE_TYPE(f_type, sg_type)   \
00635 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \
00636 {                                                                           \
00637     return f_type;                                                          \
00638 }
00639 
00640 GET_FEATURE_TYPE(F_BOOL, bool)
00641 GET_FEATURE_TYPE(F_CHAR, char)
00642 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00643 GET_FEATURE_TYPE(F_BYTE, int8_t)
00644 GET_FEATURE_TYPE(F_SHORT, int16_t)
00645 GET_FEATURE_TYPE(F_WORD, uint16_t)
00646 GET_FEATURE_TYPE(F_INT, int32_t)
00647 GET_FEATURE_TYPE(F_UINT, uint32_t)
00648 GET_FEATURE_TYPE(F_LONG, int64_t)
00649 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00650 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00651 GET_FEATURE_TYPE(F_DREAL, float64_t)
00652 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00653 #undef GET_FEATURE_TYPE
00654 
00655 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1,
00656         float64_t* vec2, int32_t vec2_len)
00657 {
00658     ASSERT(vec2_len == num_features)
00659 
00660     int32_t vlen;
00661     bool vfree;
00662     bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00663 
00664     ASSERT(vlen == num_features)
00665     float64_t result = 0;
00666 
00667     for (int32_t i = 0; i < num_features; i++)
00668         result += vec1[i] ? vec2[i] : 0;
00669 
00670     free_feature_vector(vec1, vec_idx1, vfree);
00671 
00672     return result;
00673 }
00674 
00675 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1,
00676         float64_t* vec2, int32_t vec2_len)
00677 {
00678     ASSERT(vec2_len == num_features)
00679 
00680     int32_t vlen;
00681     bool vfree;
00682     char* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00683 
00684     ASSERT(vlen == num_features)
00685     float64_t result = 0;
00686 
00687     for (int32_t i = 0; i < num_features; i++)
00688         result += vec1[i] * vec2[i];
00689 
00690     free_feature_vector(vec1, vec_idx1, vfree);
00691 
00692     return result;
00693 }
00694 
00695 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1,
00696         float64_t* vec2, int32_t vec2_len)
00697 {
00698     ASSERT(vec2_len == num_features)
00699 
00700     int32_t vlen;
00701     bool vfree;
00702     int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00703 
00704     ASSERT(vlen == num_features)
00705     float64_t result = 0;
00706 
00707     for (int32_t i = 0; i < num_features; i++)
00708         result += vec1[i] * vec2[i];
00709 
00710     free_feature_vector(vec1, vec_idx1, vfree);
00711 
00712     return result;
00713 }
00714 
00715 template<> float64_t CDenseFeatures<uint8_t>::dense_dot(
00716         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00717 {
00718     ASSERT(vec2_len == num_features)
00719 
00720     int32_t vlen;
00721     bool vfree;
00722     uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00723 
00724     ASSERT(vlen == num_features)
00725     float64_t result = 0;
00726 
00727     for (int32_t i = 0; i < num_features; i++)
00728         result += vec1[i] * vec2[i];
00729 
00730     free_feature_vector(vec1, vec_idx1, vfree);
00731 
00732     return result;
00733 }
00734 
00735 template<> float64_t CDenseFeatures<int16_t>::dense_dot(
00736         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00737 {
00738     ASSERT(vec2_len == num_features)
00739 
00740     int32_t vlen;
00741     bool vfree;
00742     int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00743 
00744     ASSERT(vlen == num_features)
00745     float64_t result = 0;
00746 
00747     for (int32_t i = 0; i < num_features; i++)
00748         result += vec1[i] * vec2[i];
00749 
00750     free_feature_vector(vec1, vec_idx1, vfree);
00751 
00752     return result;
00753 }
00754 
00755 template<> float64_t CDenseFeatures<uint16_t>::dense_dot(
00756         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00757 {
00758     ASSERT(vec2_len == num_features)
00759 
00760     int32_t vlen;
00761     bool vfree;
00762     uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00763 
00764     ASSERT(vlen == num_features)
00765     float64_t result = 0;
00766 
00767     for (int32_t i = 0; i < num_features; i++)
00768         result += vec1[i] * vec2[i];
00769 
00770     free_feature_vector(vec1, vec_idx1, vfree);
00771 
00772     return result;
00773 }
00774 
00775 template<> float64_t CDenseFeatures<int32_t>::dense_dot(
00776         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00777 {
00778     ASSERT(vec2_len == num_features)
00779 
00780     int32_t vlen;
00781     bool vfree;
00782     int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00783 
00784     ASSERT(vlen == num_features)
00785     float64_t result = 0;
00786 
00787     for (int32_t i = 0; i < num_features; i++)
00788         result += vec1[i] * vec2[i];
00789 
00790     free_feature_vector(vec1, vec_idx1, vfree);
00791 
00792     return result;
00793 }
00794 
00795 template<> float64_t CDenseFeatures<uint32_t>::dense_dot(
00796         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00797 {
00798     ASSERT(vec2_len == num_features)
00799 
00800     int32_t vlen;
00801     bool vfree;
00802     uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00803 
00804     ASSERT(vlen == num_features)
00805     float64_t result = 0;
00806 
00807     for (int32_t i = 0; i < num_features; i++)
00808         result += vec1[i] * vec2[i];
00809 
00810     free_feature_vector(vec1, vec_idx1, vfree);
00811 
00812     return result;
00813 }
00814 
00815 template<> float64_t CDenseFeatures<int64_t>::dense_dot(
00816         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00817 {
00818     ASSERT(vec2_len == num_features)
00819 
00820     int32_t vlen;
00821     bool vfree;
00822     int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00823 
00824     ASSERT(vlen == num_features)
00825     float64_t result = 0;
00826 
00827     for (int32_t i = 0; i < num_features; i++)
00828         result += vec1[i] * vec2[i];
00829 
00830     free_feature_vector(vec1, vec_idx1, vfree);
00831 
00832     return result;
00833 }
00834 
00835 template<> float64_t CDenseFeatures<uint64_t>::dense_dot(
00836         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00837 {
00838     ASSERT(vec2_len == num_features)
00839 
00840     int32_t vlen;
00841     bool vfree;
00842     uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00843 
00844     ASSERT(vlen == num_features)
00845     float64_t result = 0;
00846 
00847     for (int32_t i = 0; i < num_features; i++)
00848         result += vec1[i] * vec2[i];
00849 
00850     free_feature_vector(vec1, vec_idx1, vfree);
00851 
00852     return result;
00853 }
00854 
00855 template<> float64_t CDenseFeatures<float32_t>::dense_dot(
00856         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00857 {
00858     ASSERT(vec2_len == num_features)
00859 
00860     int32_t vlen;
00861     bool vfree;
00862     float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00863 
00864     ASSERT(vlen == num_features)
00865     float64_t result = 0;
00866 
00867     for (int32_t i = 0; i < num_features; i++)
00868         result += vec1[i] * vec2[i];
00869 
00870     free_feature_vector(vec1, vec_idx1, vfree);
00871 
00872     return result;
00873 }
00874 
00875 template<> float64_t CDenseFeatures<float64_t>::dense_dot(
00876         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00877 {
00878     ASSERT(vec2_len == num_features)
00879 
00880     int32_t vlen;
00881     bool vfree;
00882     float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00883 
00884     ASSERT(vlen == num_features)
00885     float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features);
00886 
00887     free_feature_vector(vec1, vec_idx1, vfree);
00888 
00889     return result;
00890 }
00891 
00892 template<> float64_t CDenseFeatures<floatmax_t>::dense_dot(
00893         int32_t vec_idx1, float64_t* vec2, int32_t vec2_len)
00894 {
00895     ASSERT(vec2_len == num_features)
00896 
00897     int32_t vlen;
00898     bool vfree;
00899     floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree);
00900 
00901     ASSERT(vlen == num_features)
00902     float64_t result = 0;
00903 
00904     for (int32_t i = 0; i < num_features; i++)
00905         result += vec1[i] * vec2[i];
00906 
00907     free_feature_vector(vec1, vec_idx1, vfree);
00908 
00909     return result;
00910 }
00911 
00912 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs)
00913 {
00914     if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors )
00915         return false;
00916 
00917     ST* vec1;
00918     ST* vec2;
00919     int32_t v1len, v2len;
00920     bool v1free, v2free, stop = false;
00921 
00922     for (int32_t i = 0; i < num_vectors; i++)
00923     {
00924         vec1 = get_feature_vector(i, v1len, v1free);
00925         vec2 = rhs->get_feature_vector(i, v2len, v2free);
00926 
00927         if (v1len!=v2len)
00928             stop = true;
00929 
00930         for (int32_t j=0; j<v1len; j++)
00931         {
00932             if (vec1[j]!=vec2[j])
00933                 stop = true;
00934         }
00935 
00936         free_feature_vector(vec1, i, v1free);
00937         free_feature_vector(vec2, i, v2free);
00938 
00939         if (stop)
00940             return false;
00941     }
00942 
00943     return true;
00944 }
00945 
00946 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
00947         CList* others)
00948 {
00949     SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
00950 
00951     if (!others)
00952         return NULL;
00953 
00954     /* first, check other features and count number of elements */
00955     CSGObject* other=others->get_first_element();
00956     index_t num_vectors_merged=num_vectors;
00957     while (other)
00958     {
00959         CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other);
00960 
00961         if (!casted)
00962         {
00963             SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to "
00964                     "same type as %s\n",get_name(), other->get_name(), get_name());
00965         }
00966 
00967         if (get_feature_type()!=casted->get_feature_type() ||
00968                 get_feature_class()!=casted->get_feature_class() ||
00969                 strcmp(get_name(), casted->get_name()))
00970         {
00971             SG_ERROR("%s::create_merged_copy(): Features are of different type!\n",
00972                     get_name());
00973         }
00974 
00975         if (num_features!=casted->num_features)
00976         {
00977             SG_ERROR("%s::create_merged_copy(): Provided feature object has "
00978                     "different dimension than this one\n");
00979         }
00980 
00981         num_vectors_merged+=casted->get_num_vectors();
00982 
00983         /* check if reference counting is used */
00984         if (others->get_delete_data())
00985             SG_UNREF(other);
00986         other=others->get_next_element();
00987     }
00988 
00989     /* create new feature matrix and copy both instances data into it */
00990     SGMatrix<ST> data(num_features, num_vectors_merged);
00991 
00992     /* copy data of this instance */
00993     SG_DEBUG("copying matrix of this instance\n")
00994     memcpy(data.matrix, feature_matrix.matrix,
00995             num_features*num_vectors*sizeof(ST));
00996 
00997     /* count number of vectors (not elements) processed so far */
00998     index_t num_processed=num_vectors;
00999 
01000     /* now copy data of other features bock wise */
01001     other=others->get_first_element();
01002     while (other)
01003     {
01004         /* cast is safe due to above check */
01005         CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other;
01006 
01007         SG_DEBUG("copying matrix of provided instance\n")
01008         memcpy(&(data.matrix[num_processed*num_features]),
01009                 casted->get_feature_matrix().matrix,
01010                 num_features*casted->get_num_vectors()*sizeof(ST));
01011 
01012         /* update counting */
01013         num_processed+=casted->get_num_vectors();
01014 
01015         /* check if reference counting is used */
01016         if (others->get_delete_data())
01017             SG_UNREF(other);
01018         other=others->get_next_element();
01019     }
01020 
01021     /* create new instance and return */
01022     CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data);
01023 
01024     SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
01025     return result;
01026 }
01027 
01028 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy(
01029         CFeatures* other)
01030 {
01031     SG_DEBUG("entering %s::create_merged_copy()\n", get_name());
01032 
01033     /* create list with one element and call general method */
01034     CList* list=new CList();
01035     list->append_element(other);
01036     CFeatures* result=create_merged_copy(list);
01037     SG_UNREF(list);
01038 
01039     SG_DEBUG("leaving %s::create_merged_copy()\n", get_name());
01040     return result;
01041 }
01042 
01043 template<class ST>
01044 void CDenseFeatures<ST>::load(CFile* loader)
01045 {
01046     SGMatrix<ST> matrix;
01047     matrix.load(loader);
01048     set_feature_matrix(matrix);
01049 }
01050 
01051 template<class ST>
01052 void CDenseFeatures<ST>::save(CFile* writer)
01053 {
01054     feature_matrix.save(writer);
01055 }
01056 
01057 template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_generic(CFeatures* const base_features)
01058 {
01059     REQUIRE(base_features->get_feature_class() == C_DENSE,
01060             "base_features must be of dynamic type CDenseFeatures\n")
01061 
01062     return (CDenseFeatures< ST >*) base_features;
01063 }
01064 
01065 template class CDenseFeatures<bool>;
01066 template class CDenseFeatures<char>;
01067 template class CDenseFeatures<int8_t>;
01068 template class CDenseFeatures<uint8_t>;
01069 template class CDenseFeatures<int16_t>;
01070 template class CDenseFeatures<uint16_t>;
01071 template class CDenseFeatures<int32_t>;
01072 template class CDenseFeatures<uint32_t>;
01073 template class CDenseFeatures<int64_t>;
01074 template class CDenseFeatures<uint64_t>;
01075 template class CDenseFeatures<float32_t>;
01076 template class CDenseFeatures<float64_t>;
01077 template class CDenseFeatures<floatmax_t>;
01078 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation