SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2011-2013 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 * Copyright (C) 2010 Berlin Institute of Technology 00012 */ 00013 00014 #include <shogun/features/DenseFeatures.h> 00015 #include <shogun/preprocessor/DensePreprocessor.h> 00016 #include <shogun/io/SGIO.h> 00017 #include <shogun/base/Parameter.h> 00018 #include <shogun/mathematics/Math.h> 00019 00020 #include <string.h> 00021 00022 namespace shogun { 00023 00024 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size) 00025 { 00026 init(); 00027 } 00028 00029 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) : 00030 CDotFeatures(orig) 00031 { 00032 init(); 00033 set_feature_matrix(orig.feature_matrix); 00034 initialize_cache(); 00035 00036 if (orig.m_subset_stack != NULL) 00037 { 00038 SG_UNREF(m_subset_stack); 00039 m_subset_stack=new CSubsetStack(*orig.m_subset_stack); 00040 SG_REF(m_subset_stack); 00041 } 00042 } 00043 00044 template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) : 00045 CDotFeatures() 00046 { 00047 init(); 00048 set_feature_matrix(matrix); 00049 } 00050 00051 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) : 00052 CDotFeatures() 00053 { 00054 init(); 00055 set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec)); 00056 } 00057 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) : 00058 CDotFeatures() 00059 { 00060 init(); 00061 load(loader); 00062 } 00063 00064 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const 00065 { 00066 return new CDenseFeatures<ST>(*this); 00067 } 00068 00069 template<class ST> CDenseFeatures<ST>::~CDenseFeatures() 00070 { 00071 free_features(); 00072 } 00073 00074 template<class ST> void CDenseFeatures<ST>::free_features() 00075 { 00076 m_subset_stack->remove_all_subsets(); 00077 free_feature_matrix(); 00078 SG_UNREF(feature_cache); 00079 } 00080 00081 template<class ST> void CDenseFeatures<ST>::free_feature_matrix() 00082 { 00083 m_subset_stack->remove_all_subsets(); 00084 feature_matrix=SGMatrix<ST>(); 00085 num_vectors = 0; 00086 num_features = 0; 00087 } 00088 00089 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree) 00090 { 00091 /* index conversion for subset, only for array access */ 00092 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00093 00094 len = num_features; 00095 00096 if (feature_matrix.matrix) 00097 { 00098 dofree = false; 00099 return &feature_matrix.matrix[real_num * int64_t(num_features)]; 00100 } 00101 00102 ST* feat = NULL; 00103 dofree = false; 00104 00105 if (feature_cache) 00106 { 00107 feat = feature_cache->lock_entry(real_num); 00108 00109 if (feat) 00110 return feat; 00111 else 00112 feat = feature_cache->set_entry(real_num); 00113 } 00114 00115 if (!feat) 00116 dofree = true; 00117 feat = compute_feature_vector(num, len, feat); 00118 00119 if (get_num_preprocessors()) 00120 { 00121 int32_t tmp_len = len; 00122 ST* tmp_feat_before = feat; 00123 ST* tmp_feat_after = NULL; 00124 00125 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00126 { 00127 CDensePreprocessor<ST>* p = 00128 (CDensePreprocessor<ST>*) get_preprocessor(i); 00129 // temporary hack 00130 SGVector<ST> applied = p->apply_to_feature_vector( 00131 SGVector<ST>(tmp_feat_before, tmp_len)); 00132 tmp_feat_after = applied.vector; 00133 SG_UNREF(p); 00134 00135 if (i != 0) // delete feature vector, except for the the first one, i.e., feat 00136 SG_FREE(tmp_feat_before); 00137 tmp_feat_before = tmp_feat_after; 00138 } 00139 00140 // note: tmp_feat_after should be checked as it is used by memcpy 00141 if (tmp_feat_after) 00142 { 00143 memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len); 00144 SG_FREE(tmp_feat_after); 00145 00146 len = tmp_len; 00147 } 00148 } 00149 return feat; 00150 } 00151 00152 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num) 00153 { 00154 /* index conversion for subset, only for array access */ 00155 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00156 00157 if (num>=get_num_vectors()) 00158 { 00159 SG_ERROR("Index out of bounds (number of vectors %d, you " 00160 "requested %d)\n", get_num_vectors(), num); 00161 } 00162 00163 if (!feature_matrix.matrix) 00164 SG_ERROR("Requires a in-memory feature matrix\n") 00165 00166 if (vector.vlen != num_features) 00167 SG_ERROR( 00168 "Vector not of length %d (has %d)\n", num_features, vector.vlen); 00169 00170 memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector, 00171 int64_t(num_features) * sizeof(ST)); 00172 } 00173 00174 template<class ST> SGVector<ST> CDenseFeatures<ST>::get_feature_vector(int32_t num) 00175 { 00176 /* index conversion for subset, only for array access */ 00177 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00178 00179 if (num >= get_num_vectors()) 00180 { 00181 SG_ERROR("Index out of bounds (number of vectors %d, you " 00182 "requested %d)\n", get_num_vectors(), real_num); 00183 } 00184 00185 int32_t vlen; 00186 bool do_free; 00187 ST* vector= get_feature_vector(num, vlen, do_free); 00188 return SGVector<ST>(vector, vlen, do_free); 00189 } 00190 00191 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree) 00192 { 00193 if (feature_cache) 00194 feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num)); 00195 00196 if (dofree) 00197 SG_FREE(feat_vec); 00198 } 00199 00200 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num) 00201 { 00202 free_feature_vector(vec.vector, num, false); 00203 vec=SGVector<ST>(); 00204 } 00205 00206 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len) 00207 { 00208 if (m_subset_stack->has_subsets()) 00209 SG_ERROR("A subset is set, cannot call vector_subset\n") 00210 00211 ASSERT(feature_matrix.matrix) 00212 ASSERT(idx_len<=num_vectors) 00213 00214 int32_t num_vec = num_vectors; 00215 num_vectors = idx_len; 00216 00217 int32_t old_ii = -1; 00218 00219 for (int32_t i = 0; i < idx_len; i++) 00220 { 00221 int32_t ii = idx[i]; 00222 ASSERT(old_ii<ii) 00223 00224 if (ii < 0 || ii >= num_vec) 00225 SG_ERROR("Index out of range: should be 0<%d<%d\n", ii, num_vec) 00226 00227 if (i == ii) 00228 continue; 00229 00230 memcpy(&feature_matrix.matrix[int64_t(num_features) * i], 00231 &feature_matrix.matrix[int64_t(num_features) * ii], 00232 num_features * sizeof(ST)); 00233 old_ii = ii; 00234 } 00235 } 00236 00237 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len) 00238 { 00239 if (m_subset_stack->has_subsets()) 00240 SG_ERROR("A subset is set, cannot call feature_subset\n") 00241 00242 ASSERT(feature_matrix.matrix) 00243 ASSERT(idx_len<=num_features) 00244 int32_t num_feat = num_features; 00245 num_features = idx_len; 00246 00247 for (int32_t i = 0; i < num_vectors; i++) 00248 { 00249 ST* src = &feature_matrix.matrix[int64_t(num_feat) * i]; 00250 ST* dst = &feature_matrix.matrix[int64_t(num_features) * i]; 00251 00252 int32_t old_jj = -1; 00253 for (int32_t j = 0; j < idx_len; j++) 00254 { 00255 int32_t jj = idx[j]; 00256 ASSERT(old_jj<jj) 00257 if (jj < 0 || jj >= num_feat) 00258 SG_ERROR( 00259 "Index out of range: should be 0<%d<%d\n", jj, num_feat); 00260 00261 dst[j] = src[jj]; 00262 old_jj = jj; 00263 } 00264 } 00265 } 00266 00267 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix() 00268 { 00269 if (!m_subset_stack->has_subsets()) 00270 return feature_matrix; 00271 00272 SGMatrix<ST> submatrix(num_features, get_num_vectors()); 00273 00274 /* copy a subset vector wise */ 00275 for (int32_t i=0; i<submatrix.num_cols; ++i) 00276 { 00277 int32_t real_i = m_subset_stack->subset_idx_conversion(i); 00278 memcpy(&submatrix.matrix[i*int64_t(num_features)], 00279 &feature_matrix.matrix[real_i * int64_t(num_features)], 00280 num_features * sizeof(ST)); 00281 } 00282 00283 return submatrix; 00284 } 00285 00286 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix() 00287 { 00288 SGMatrix<ST> st_feature_matrix=feature_matrix; 00289 m_subset_stack->remove_all_subsets(); 00290 SG_UNREF(feature_cache); 00291 clean_preprocessors(); 00292 free_feature_matrix(); 00293 return st_feature_matrix; 00294 } 00295 00296 template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix) 00297 { 00298 m_subset_stack->remove_all_subsets(); 00299 free_feature_matrix(); 00300 feature_matrix = matrix; 00301 num_features = matrix.num_rows; 00302 num_vectors = matrix.num_cols; 00303 } 00304 00305 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec) 00306 { 00307 num_feat = num_features; 00308 num_vec = num_vectors; 00309 return feature_matrix.matrix; 00310 } 00311 00312 template<class ST> CDenseFeatures<ST>* CDenseFeatures<ST>::get_transposed() 00313 { 00314 int32_t num_feat; 00315 int32_t num_vec; 00316 ST* fm = get_transposed(num_feat, num_vec); 00317 00318 return new CDenseFeatures<ST>(fm, num_feat, num_vec); 00319 } 00320 00321 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec) 00322 { 00323 num_feat = get_num_vectors(); 00324 num_vec = num_features; 00325 00326 int32_t old_num_vec=get_num_vectors(); 00327 00328 ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec); 00329 00330 for (int32_t i=0; i<old_num_vec; i++) 00331 { 00332 SGVector<ST> vec=get_feature_vector(i); 00333 00334 for (int32_t j=0; j<vec.vlen; j++) 00335 fm[j*int64_t(old_num_vec)+i]=vec.vector[j]; 00336 00337 free_feature_vector(vec, i); 00338 } 00339 00340 return fm; 00341 } 00342 00343 template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src) 00344 { 00345 if (m_subset_stack->has_subsets()) 00346 SG_ERROR("A subset is set, cannot call copy_feature_matrix\n") 00347 00348 free_feature_matrix(); 00349 feature_matrix = src.clone(); 00350 num_features = src.num_rows; 00351 num_vectors = src.num_cols; 00352 initialize_cache(); 00353 } 00354 00355 template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df) 00356 { 00357 m_subset_stack->remove_all_subsets(); 00358 00359 int32_t num_feat = df->get_dim_feature_space(); 00360 int32_t num_vec = df->get_num_vectors(); 00361 00362 ASSERT(num_feat>0 && num_vec>0) 00363 00364 free_feature_matrix(); 00365 feature_matrix = SGMatrix<ST>(num_feat, num_vec); 00366 00367 for (int32_t i = 0; i < num_vec; i++) 00368 { 00369 SGVector<float64_t> v = df->get_computed_dot_feature_vector(i); 00370 ASSERT(num_feat==v.vlen) 00371 00372 for (int32_t j = 0; j < num_feat; j++) 00373 feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j]; 00374 } 00375 num_features = num_feat; 00376 num_vectors = num_vec; 00377 } 00378 00379 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing) 00380 { 00381 if (m_subset_stack->has_subsets()) 00382 SG_ERROR("A subset is set, cannot call apply_preproc\n") 00383 00384 SG_DEBUG("force: %d\n", force_preprocessing) 00385 00386 if (feature_matrix.matrix && get_num_preprocessors()) 00387 { 00388 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00389 { 00390 if ((!is_preprocessed(i) || force_preprocessing)) 00391 { 00392 set_preprocessed(i); 00393 CDensePreprocessor<ST>* p = 00394 (CDensePreprocessor<ST>*) get_preprocessor(i); 00395 SG_INFO("preprocessing using preproc %s\n", p->get_name()) 00396 00397 if (p->apply_to_feature_matrix(this).matrix == NULL) 00398 { 00399 SG_UNREF(p); 00400 return false; 00401 } 00402 SG_UNREF(p); 00403 00404 } 00405 } 00406 00407 return true; 00408 } 00409 else 00410 { 00411 if (!feature_matrix.matrix) 00412 SG_ERROR("no feature matrix\n") 00413 00414 if (!get_num_preprocessors()) 00415 SG_ERROR("no preprocessors available\n") 00416 00417 return false; 00418 } 00419 } 00420 00421 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const 00422 { 00423 return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors; 00424 } 00425 00426 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() const { return num_features; } 00427 00428 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num) 00429 { 00430 num_features = num; 00431 initialize_cache(); 00432 } 00433 00434 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num) 00435 { 00436 if (m_subset_stack->has_subsets()) 00437 SG_ERROR("A subset is set, cannot call set_num_vectors\n") 00438 00439 num_vectors = num; 00440 initialize_cache(); 00441 } 00442 00443 template<class ST> void CDenseFeatures<ST>::initialize_cache() 00444 { 00445 if (m_subset_stack->has_subsets()) 00446 SG_ERROR("A subset is set, cannot call initialize_cache\n") 00447 00448 if (num_features && num_vectors) 00449 { 00450 SG_UNREF(feature_cache); 00451 feature_cache = new CCache<ST>(get_cache_size(), num_features, 00452 num_vectors); 00453 SG_REF(feature_cache); 00454 } 00455 } 00456 00457 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; } 00458 00459 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors) 00460 { 00461 if (m_subset_stack->has_subsets()) 00462 SG_ERROR("A subset is set, cannot call reshape\n") 00463 00464 if (p_num_features * p_num_vectors 00465 == this->num_features * this->num_vectors) 00466 { 00467 num_features = p_num_features; 00468 num_vectors = p_num_vectors; 00469 return true; 00470 } else 00471 return false; 00472 } 00473 00474 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; } 00475 00476 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df, 00477 int32_t vec_idx2) 00478 { 00479 ASSERT(df) 00480 ASSERT(df->get_feature_type() == get_feature_type()) 00481 ASSERT(df->get_feature_class() == get_feature_class()) 00482 CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df; 00483 00484 int32_t len1, len2; 00485 bool free1, free2; 00486 00487 ST* vec1 = get_feature_vector(vec_idx1, len1, free1); 00488 ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2); 00489 00490 float64_t result = SGVector<ST>::dot(vec1, vec2, len1); 00491 00492 free_feature_vector(vec1, vec_idx1, free1); 00493 sf->free_feature_vector(vec2, vec_idx2, free2); 00494 00495 return result; 00496 } 00497 00498 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, 00499 float64_t* vec2, int32_t vec2_len, bool abs_val) 00500 { 00501 ASSERT(vec2_len == num_features) 00502 00503 int32_t vlen; 00504 bool vfree; 00505 ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00506 00507 ASSERT(vlen == num_features) 00508 00509 if (abs_val) 00510 { 00511 for (int32_t i = 0; i < num_features; i++) 00512 vec2[i] += alpha * CMath::abs(vec1[i]); 00513 } 00514 else 00515 { 00516 for (int32_t i = 0; i < num_features; i++) 00517 vec2[i] += alpha * vec1[i]; 00518 } 00519 00520 free_feature_vector(vec1, vec_idx1, vfree); 00521 } 00522 00523 template<> 00524 void CDenseFeatures<float64_t>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, 00525 float64_t* vec2, int32_t vec2_len, bool abs_val) 00526 { 00527 ASSERT(vec2_len == num_features) 00528 00529 int32_t vlen; 00530 bool vfree; 00531 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00532 00533 ASSERT(vlen == num_features) 00534 00535 if (abs_val) 00536 { 00537 for (int32_t i = 0; i < num_features; i++) 00538 vec2[i] += alpha * CMath::abs(vec1[i]); 00539 } 00540 else 00541 { 00542 SGVector<float64_t>::vec1_plus_scalar_times_vec2(vec2, alpha, vec1, num_features); 00543 } 00544 00545 free_feature_vector(vec1, vec_idx1, vfree); 00546 } 00547 00548 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num) 00549 { 00550 return num_features; 00551 } 00552 00553 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index) 00554 { 00555 if (vector_index>=get_num_vectors()) 00556 { 00557 SG_ERROR("Index out of bounds (number of vectors %d, you " 00558 "requested %d)\n", get_num_vectors(), vector_index); 00559 } 00560 00561 dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1); 00562 iterator->vec = get_feature_vector(vector_index, iterator->vlen, 00563 iterator->vfree); 00564 iterator->vidx = vector_index; 00565 iterator->index = 0; 00566 return iterator; 00567 } 00568 00569 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value, 00570 void* iterator) 00571 { 00572 dense_feature_iterator* it = (dense_feature_iterator*) iterator; 00573 if (!it || it->index >= it->vlen) 00574 return false; 00575 00576 index = it->index++; 00577 value = (float64_t) it->vec[index]; 00578 00579 return true; 00580 } 00581 00582 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator) 00583 { 00584 if (!iterator) 00585 return; 00586 00587 dense_feature_iterator* it = (dense_feature_iterator*) iterator; 00588 free_feature_vector(it->vec, it->vidx, it->vfree); 00589 SG_FREE(it); 00590 } 00591 00592 template<class ST> CFeatures* CDenseFeatures<ST>::copy_subset(SGVector<index_t> indices) 00593 { 00594 SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen); 00595 00596 for (index_t i=0; i<indices.vlen; ++i) 00597 { 00598 index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]); 00599 memcpy(&feature_matrix_copy.matrix[i*num_features], 00600 &feature_matrix.matrix[real_idx*num_features], 00601 num_features*sizeof(ST)); 00602 } 00603 00604 CFeatures* result=new CDenseFeatures(feature_matrix_copy); 00605 SG_REF(result); 00606 return result; 00607 } 00608 00609 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len, 00610 ST* target) 00611 { 00612 SG_NOTIMPLEMENTED 00613 len = 0; 00614 return NULL; 00615 } 00616 00617 template<class ST> void CDenseFeatures<ST>::init() 00618 { 00619 num_vectors = 0; 00620 num_features = 0; 00621 00622 feature_matrix = SGMatrix<ST>(); 00623 feature_cache = NULL; 00624 00625 set_generic<ST>(); 00626 00627 /* not store number of vectors in subset */ 00628 SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE); 00629 SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE); 00630 SG_ADD(&feature_matrix, "feature_matrix", 00631 "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE); 00632 } 00633 00634 #define GET_FEATURE_TYPE(f_type, sg_type) \ 00635 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \ 00636 { \ 00637 return f_type; \ 00638 } 00639 00640 GET_FEATURE_TYPE(F_BOOL, bool) 00641 GET_FEATURE_TYPE(F_CHAR, char) 00642 GET_FEATURE_TYPE(F_BYTE, uint8_t) 00643 GET_FEATURE_TYPE(F_BYTE, int8_t) 00644 GET_FEATURE_TYPE(F_SHORT, int16_t) 00645 GET_FEATURE_TYPE(F_WORD, uint16_t) 00646 GET_FEATURE_TYPE(F_INT, int32_t) 00647 GET_FEATURE_TYPE(F_UINT, uint32_t) 00648 GET_FEATURE_TYPE(F_LONG, int64_t) 00649 GET_FEATURE_TYPE(F_ULONG, uint64_t) 00650 GET_FEATURE_TYPE(F_SHORTREAL, float32_t) 00651 GET_FEATURE_TYPE(F_DREAL, float64_t) 00652 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t) 00653 #undef GET_FEATURE_TYPE 00654 00655 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1, 00656 float64_t* vec2, int32_t vec2_len) 00657 { 00658 ASSERT(vec2_len == num_features) 00659 00660 int32_t vlen; 00661 bool vfree; 00662 bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00663 00664 ASSERT(vlen == num_features) 00665 float64_t result = 0; 00666 00667 for (int32_t i = 0; i < num_features; i++) 00668 result += vec1[i] ? vec2[i] : 0; 00669 00670 free_feature_vector(vec1, vec_idx1, vfree); 00671 00672 return result; 00673 } 00674 00675 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1, 00676 float64_t* vec2, int32_t vec2_len) 00677 { 00678 ASSERT(vec2_len == num_features) 00679 00680 int32_t vlen; 00681 bool vfree; 00682 char* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00683 00684 ASSERT(vlen == num_features) 00685 float64_t result = 0; 00686 00687 for (int32_t i = 0; i < num_features; i++) 00688 result += vec1[i] * vec2[i]; 00689 00690 free_feature_vector(vec1, vec_idx1, vfree); 00691 00692 return result; 00693 } 00694 00695 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1, 00696 float64_t* vec2, int32_t vec2_len) 00697 { 00698 ASSERT(vec2_len == num_features) 00699 00700 int32_t vlen; 00701 bool vfree; 00702 int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00703 00704 ASSERT(vlen == num_features) 00705 float64_t result = 0; 00706 00707 for (int32_t i = 0; i < num_features; i++) 00708 result += vec1[i] * vec2[i]; 00709 00710 free_feature_vector(vec1, vec_idx1, vfree); 00711 00712 return result; 00713 } 00714 00715 template<> float64_t CDenseFeatures<uint8_t>::dense_dot( 00716 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00717 { 00718 ASSERT(vec2_len == num_features) 00719 00720 int32_t vlen; 00721 bool vfree; 00722 uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00723 00724 ASSERT(vlen == num_features) 00725 float64_t result = 0; 00726 00727 for (int32_t i = 0; i < num_features; i++) 00728 result += vec1[i] * vec2[i]; 00729 00730 free_feature_vector(vec1, vec_idx1, vfree); 00731 00732 return result; 00733 } 00734 00735 template<> float64_t CDenseFeatures<int16_t>::dense_dot( 00736 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00737 { 00738 ASSERT(vec2_len == num_features) 00739 00740 int32_t vlen; 00741 bool vfree; 00742 int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00743 00744 ASSERT(vlen == num_features) 00745 float64_t result = 0; 00746 00747 for (int32_t i = 0; i < num_features; i++) 00748 result += vec1[i] * vec2[i]; 00749 00750 free_feature_vector(vec1, vec_idx1, vfree); 00751 00752 return result; 00753 } 00754 00755 template<> float64_t CDenseFeatures<uint16_t>::dense_dot( 00756 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00757 { 00758 ASSERT(vec2_len == num_features) 00759 00760 int32_t vlen; 00761 bool vfree; 00762 uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00763 00764 ASSERT(vlen == num_features) 00765 float64_t result = 0; 00766 00767 for (int32_t i = 0; i < num_features; i++) 00768 result += vec1[i] * vec2[i]; 00769 00770 free_feature_vector(vec1, vec_idx1, vfree); 00771 00772 return result; 00773 } 00774 00775 template<> float64_t CDenseFeatures<int32_t>::dense_dot( 00776 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00777 { 00778 ASSERT(vec2_len == num_features) 00779 00780 int32_t vlen; 00781 bool vfree; 00782 int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00783 00784 ASSERT(vlen == num_features) 00785 float64_t result = 0; 00786 00787 for (int32_t i = 0; i < num_features; i++) 00788 result += vec1[i] * vec2[i]; 00789 00790 free_feature_vector(vec1, vec_idx1, vfree); 00791 00792 return result; 00793 } 00794 00795 template<> float64_t CDenseFeatures<uint32_t>::dense_dot( 00796 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00797 { 00798 ASSERT(vec2_len == num_features) 00799 00800 int32_t vlen; 00801 bool vfree; 00802 uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00803 00804 ASSERT(vlen == num_features) 00805 float64_t result = 0; 00806 00807 for (int32_t i = 0; i < num_features; i++) 00808 result += vec1[i] * vec2[i]; 00809 00810 free_feature_vector(vec1, vec_idx1, vfree); 00811 00812 return result; 00813 } 00814 00815 template<> float64_t CDenseFeatures<int64_t>::dense_dot( 00816 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00817 { 00818 ASSERT(vec2_len == num_features) 00819 00820 int32_t vlen; 00821 bool vfree; 00822 int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00823 00824 ASSERT(vlen == num_features) 00825 float64_t result = 0; 00826 00827 for (int32_t i = 0; i < num_features; i++) 00828 result += vec1[i] * vec2[i]; 00829 00830 free_feature_vector(vec1, vec_idx1, vfree); 00831 00832 return result; 00833 } 00834 00835 template<> float64_t CDenseFeatures<uint64_t>::dense_dot( 00836 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00837 { 00838 ASSERT(vec2_len == num_features) 00839 00840 int32_t vlen; 00841 bool vfree; 00842 uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00843 00844 ASSERT(vlen == num_features) 00845 float64_t result = 0; 00846 00847 for (int32_t i = 0; i < num_features; i++) 00848 result += vec1[i] * vec2[i]; 00849 00850 free_feature_vector(vec1, vec_idx1, vfree); 00851 00852 return result; 00853 } 00854 00855 template<> float64_t CDenseFeatures<float32_t>::dense_dot( 00856 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00857 { 00858 ASSERT(vec2_len == num_features) 00859 00860 int32_t vlen; 00861 bool vfree; 00862 float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00863 00864 ASSERT(vlen == num_features) 00865 float64_t result = 0; 00866 00867 for (int32_t i = 0; i < num_features; i++) 00868 result += vec1[i] * vec2[i]; 00869 00870 free_feature_vector(vec1, vec_idx1, vfree); 00871 00872 return result; 00873 } 00874 00875 template<> float64_t CDenseFeatures<float64_t>::dense_dot( 00876 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00877 { 00878 ASSERT(vec2_len == num_features) 00879 00880 int32_t vlen; 00881 bool vfree; 00882 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00883 00884 ASSERT(vlen == num_features) 00885 float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features); 00886 00887 free_feature_vector(vec1, vec_idx1, vfree); 00888 00889 return result; 00890 } 00891 00892 template<> float64_t CDenseFeatures<floatmax_t>::dense_dot( 00893 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00894 { 00895 ASSERT(vec2_len == num_features) 00896 00897 int32_t vlen; 00898 bool vfree; 00899 floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00900 00901 ASSERT(vlen == num_features) 00902 float64_t result = 0; 00903 00904 for (int32_t i = 0; i < num_features; i++) 00905 result += vec1[i] * vec2[i]; 00906 00907 free_feature_vector(vec1, vec_idx1, vfree); 00908 00909 return result; 00910 } 00911 00912 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs) 00913 { 00914 if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors ) 00915 return false; 00916 00917 ST* vec1; 00918 ST* vec2; 00919 int32_t v1len, v2len; 00920 bool v1free, v2free, stop = false; 00921 00922 for (int32_t i = 0; i < num_vectors; i++) 00923 { 00924 vec1 = get_feature_vector(i, v1len, v1free); 00925 vec2 = rhs->get_feature_vector(i, v2len, v2free); 00926 00927 if (v1len!=v2len) 00928 stop = true; 00929 00930 for (int32_t j=0; j<v1len; j++) 00931 { 00932 if (vec1[j]!=vec2[j]) 00933 stop = true; 00934 } 00935 00936 free_feature_vector(vec1, i, v1free); 00937 free_feature_vector(vec2, i, v2free); 00938 00939 if (stop) 00940 return false; 00941 } 00942 00943 return true; 00944 } 00945 00946 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy( 00947 CList* others) 00948 { 00949 SG_DEBUG("entering %s::create_merged_copy()\n", get_name()); 00950 00951 if (!others) 00952 return NULL; 00953 00954 /* first, check other features and count number of elements */ 00955 CSGObject* other=others->get_first_element(); 00956 index_t num_vectors_merged=num_vectors; 00957 while (other) 00958 { 00959 CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other); 00960 00961 if (!casted) 00962 { 00963 SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to " 00964 "same type as %s\n",get_name(), other->get_name(), get_name()); 00965 } 00966 00967 if (get_feature_type()!=casted->get_feature_type() || 00968 get_feature_class()!=casted->get_feature_class() || 00969 strcmp(get_name(), casted->get_name())) 00970 { 00971 SG_ERROR("%s::create_merged_copy(): Features are of different type!\n", 00972 get_name()); 00973 } 00974 00975 if (num_features!=casted->num_features) 00976 { 00977 SG_ERROR("%s::create_merged_copy(): Provided feature object has " 00978 "different dimension than this one\n"); 00979 } 00980 00981 num_vectors_merged+=casted->get_num_vectors(); 00982 00983 /* check if reference counting is used */ 00984 if (others->get_delete_data()) 00985 SG_UNREF(other); 00986 other=others->get_next_element(); 00987 } 00988 00989 /* create new feature matrix and copy both instances data into it */ 00990 SGMatrix<ST> data(num_features, num_vectors_merged); 00991 00992 /* copy data of this instance */ 00993 SG_DEBUG("copying matrix of this instance\n") 00994 memcpy(data.matrix, feature_matrix.matrix, 00995 num_features*num_vectors*sizeof(ST)); 00996 00997 /* count number of vectors (not elements) processed so far */ 00998 index_t num_processed=num_vectors; 00999 01000 /* now copy data of other features bock wise */ 01001 other=others->get_first_element(); 01002 while (other) 01003 { 01004 /* cast is safe due to above check */ 01005 CDenseFeatures<ST>* casted=(CDenseFeatures<ST>*)other; 01006 01007 SG_DEBUG("copying matrix of provided instance\n") 01008 memcpy(&(data.matrix[num_processed*num_features]), 01009 casted->get_feature_matrix().matrix, 01010 num_features*casted->get_num_vectors()*sizeof(ST)); 01011 01012 /* update counting */ 01013 num_processed+=casted->get_num_vectors(); 01014 01015 /* check if reference counting is used */ 01016 if (others->get_delete_data()) 01017 SG_UNREF(other); 01018 other=others->get_next_element(); 01019 } 01020 01021 /* create new instance and return */ 01022 CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data); 01023 01024 SG_DEBUG("leaving %s::create_merged_copy()\n", get_name()); 01025 return result; 01026 } 01027 01028 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy( 01029 CFeatures* other) 01030 { 01031 SG_DEBUG("entering %s::create_merged_copy()\n", get_name()); 01032 01033 /* create list with one element and call general method */ 01034 CList* list=new CList(); 01035 list->append_element(other); 01036 CFeatures* result=create_merged_copy(list); 01037 SG_UNREF(list); 01038 01039 SG_DEBUG("leaving %s::create_merged_copy()\n", get_name()); 01040 return result; 01041 } 01042 01043 template<class ST> 01044 void CDenseFeatures<ST>::load(CFile* loader) 01045 { 01046 SGMatrix<ST> matrix; 01047 matrix.load(loader); 01048 set_feature_matrix(matrix); 01049 } 01050 01051 template<class ST> 01052 void CDenseFeatures<ST>::save(CFile* writer) 01053 { 01054 feature_matrix.save(writer); 01055 } 01056 01057 template< class ST > CDenseFeatures< ST >* CDenseFeatures< ST >::obtain_from_generic(CFeatures* const base_features) 01058 { 01059 REQUIRE(base_features->get_feature_class() == C_DENSE, 01060 "base_features must be of dynamic type CDenseFeatures\n") 01061 01062 return (CDenseFeatures< ST >*) base_features; 01063 } 01064 01065 template class CDenseFeatures<bool>; 01066 template class CDenseFeatures<char>; 01067 template class CDenseFeatures<int8_t>; 01068 template class CDenseFeatures<uint8_t>; 01069 template class CDenseFeatures<int16_t>; 01070 template class CDenseFeatures<uint16_t>; 01071 template class CDenseFeatures<int32_t>; 01072 template class CDenseFeatures<uint32_t>; 01073 template class CDenseFeatures<int64_t>; 01074 template class CDenseFeatures<uint64_t>; 01075 template class CDenseFeatures<float32_t>; 01076 template class CDenseFeatures<float64_t>; 01077 template class CDenseFeatures<floatmax_t>; 01078 }