SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/features/ExplicitSpecFeatures.h> 00012 #include <shogun/io/SGIO.h> 00013 00014 using namespace shogun; 00015 00016 CExplicitSpecFeatures::CExplicitSpecFeatures() :CDotFeatures() 00017 { 00018 SG_UNSTABLE("CExplicitSpecFeatures::CExplicitSpecFeatures()", 00019 "\n"); 00020 00021 use_normalization = false; 00022 num_strings = 0; 00023 alphabet_size = 0; 00024 00025 spec_size = 0; 00026 k_spectrum = NULL; 00027 } 00028 00029 00030 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures() 00031 { 00032 ASSERT(str) 00033 00034 use_normalization=normalize; 00035 num_strings = str->get_num_vectors(); 00036 spec_size = str->get_num_symbols(); 00037 00038 obtain_kmer_spectrum(str); 00039 00040 SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings) 00041 } 00042 00043 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig), 00044 num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size) 00045 { 00046 k_spectrum= SG_MALLOC(float64_t*, num_strings); 00047 for (int32_t i=0; i<num_strings; i++) 00048 k_spectrum[i]=SGVector<float64_t>::clone_vector(k_spectrum[i], spec_size); 00049 } 00050 00051 CExplicitSpecFeatures::~CExplicitSpecFeatures() 00052 { 00053 delete_kmer_spectrum(); 00054 } 00055 00056 int32_t CExplicitSpecFeatures::get_dim_feature_space() const 00057 { 00058 return spec_size; 00059 } 00060 00061 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2) 00062 { 00063 ASSERT(df) 00064 ASSERT(df->get_feature_type() == get_feature_type()) 00065 ASSERT(df->get_feature_class() == get_feature_class()) 00066 CExplicitSpecFeatures* sf = (CExplicitSpecFeatures*) df; 00067 00068 ASSERT(vec_idx1 < num_strings) 00069 ASSERT(vec_idx2 < sf->num_strings) 00070 float64_t* vec1=k_spectrum[vec_idx1]; 00071 float64_t* vec2=sf->k_spectrum[vec_idx2]; 00072 00073 return SGVector<float64_t>::dot(vec1, vec2, spec_size); 00074 } 00075 00076 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00077 { 00078 ASSERT(vec2_len == spec_size) 00079 ASSERT(vec_idx1 < num_strings) 00080 float64_t* vec1=k_spectrum[vec_idx1]; 00081 float64_t result=0; 00082 00083 for (int32_t i=0; i<spec_size; i++) 00084 result+=vec1[i]*vec2[i]; 00085 00086 return result; 00087 } 00088 00089 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val) 00090 { 00091 ASSERT(vec2_len == spec_size) 00092 ASSERT(vec_idx1 < num_strings) 00093 float64_t* vec1=k_spectrum[vec_idx1]; 00094 00095 if (abs_val) 00096 { 00097 for (int32_t i=0; i<spec_size; i++) 00098 vec2[i]+=alpha*CMath::abs(vec1[i]); 00099 } 00100 else 00101 { 00102 for (int32_t i=0; i<spec_size; i++) 00103 vec2[i]+=alpha*vec1[i]; 00104 } 00105 } 00106 00107 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str) 00108 { 00109 k_spectrum= SG_MALLOC(float64_t*, num_strings); 00110 00111 for (int32_t i=0; i<num_strings; i++) 00112 { 00113 k_spectrum[i]=SG_MALLOC(float64_t, spec_size); 00114 memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size); 00115 00116 int32_t len=0; 00117 bool free_fv; 00118 uint16_t* fv=str->get_feature_vector(i, len, free_fv); 00119 00120 for (int32_t j=0; j<len; j++) 00121 k_spectrum[i][fv[j]]++; 00122 00123 str->free_feature_vector(fv, i, free_fv); 00124 00125 if (use_normalization) 00126 { 00127 float64_t n=0; 00128 for (int32_t j=0; j<spec_size; j++) 00129 n+=CMath::sq(k_spectrum[i][j]); 00130 00131 n=CMath::sqrt(n); 00132 00133 for (int32_t j=0; j<spec_size; j++) 00134 k_spectrum[i][j]/=n; 00135 } 00136 } 00137 } 00138 00139 void CExplicitSpecFeatures::delete_kmer_spectrum() 00140 { 00141 for (int32_t i=0; i<num_strings; i++) 00142 SG_FREE(k_spectrum[i]); 00143 00144 SG_FREE(k_spectrum); 00145 k_spectrum=NULL; 00146 } 00147 00148 CFeatures* CExplicitSpecFeatures::duplicate() const 00149 { 00150 return new CExplicitSpecFeatures(*this); 00151 } 00152 00153 00154 00155 void* CExplicitSpecFeatures::get_feature_iterator(int32_t vector_index) 00156 { 00157 SG_NOTIMPLEMENTED 00158 return NULL; 00159 } 00160 00161 bool CExplicitSpecFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator) 00162 { 00163 SG_NOTIMPLEMENTED 00164 return false; 00165 } 00166 00167 void CExplicitSpecFeatures::free_feature_iterator(void* iterator) 00168 { 00169 SG_NOTIMPLEMENTED 00170 } 00171 00172 int32_t CExplicitSpecFeatures::get_nnz_features_for_vector(int32_t num) 00173 { 00174 SG_NOTIMPLEMENTED 00175 return 0; 00176 } 00177 00178 EFeatureType CExplicitSpecFeatures::get_feature_type() const 00179 { 00180 return F_UNKNOWN; 00181 } 00182 00183 EFeatureClass CExplicitSpecFeatures::get_feature_class() const 00184 { 00185 return C_SPEC; 00186 } 00187 00188 int32_t CExplicitSpecFeatures::get_num_vectors() const 00189 { 00190 return num_strings; 00191 }