SHOGUN
v3.2.0
|
00001 #include <shogun/lib/SGMatrix.h> 00002 #include <shogun/lib/SGSparseMatrix.h> 00003 #include <shogun/lib/SGSparseVector.h> 00004 #include <shogun/labels/RegressionLabels.h> 00005 #include <shogun/io/File.h> 00006 #include <shogun/io/SGIO.h> 00007 00008 namespace shogun { 00009 00010 template <class T> 00011 SGSparseMatrix<T>::SGSparseMatrix() : SGReferencedData() 00012 { 00013 init_data(); 00014 } 00015 00016 template <class T> 00017 SGSparseMatrix<T>::SGSparseMatrix(SGSparseVector<T>* vecs, index_t num_feat, 00018 index_t num_vec, bool ref_counting) : 00019 SGReferencedData(ref_counting), 00020 num_vectors(num_vec), num_features(num_feat), 00021 sparse_matrix(vecs) 00022 { 00023 } 00024 00025 template <class T> 00026 SGSparseMatrix<T>::SGSparseMatrix(index_t num_feat, index_t num_vec, bool ref_counting) : 00027 SGReferencedData(ref_counting), 00028 num_vectors(num_vec), num_features(num_feat) 00029 { 00030 sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors); 00031 } 00032 00033 template <class T> 00034 SGSparseMatrix<T>::SGSparseMatrix(SGMatrix<T> dense) : SGReferencedData() 00035 { 00036 from_dense(dense); 00037 } 00038 00039 template <class T> 00040 SGSparseMatrix<T>::SGSparseMatrix(const SGSparseMatrix &orig) : SGReferencedData(orig) 00041 { 00042 copy_data(orig); 00043 } 00044 00045 template <class T> 00046 SGSparseMatrix<T>::~SGSparseMatrix() 00047 { 00048 unref(); 00049 } 00050 00051 template <> template <> 00052 const SGVector<complex128_t> SGSparseMatrix<complex128_t>::operator*( 00053 SGVector<float64_t> v) const 00054 { 00055 SGVector<complex128_t> result(num_vectors); 00056 REQUIRE(v.vlen==num_features, 00057 "Dimension mismatch! %d vs %d\n", 00058 v.vlen, num_features); 00059 for (index_t i=0; i<num_vectors; ++i) 00060 result[i]=sparse_matrix[i].dense_dot(v); 00061 return result; 00062 } 00063 00064 template <> template <> 00065 const SGVector<complex128_t> SGSparseMatrix<complex128_t>::operator*( 00066 SGVector<int32_t> v) const 00067 { 00068 SGVector<complex128_t> result(num_vectors); 00069 REQUIRE(v.vlen==num_features, 00070 "Dimension mismatch! %d vs %d\n", 00071 v.vlen, num_features); 00072 for (index_t i=0; i<num_vectors; ++i) 00073 result[i]=sparse_matrix[i].dense_dot(v); 00074 return result; 00075 } 00076 00077 template <> template <> 00078 const SGVector<float64_t> SGSparseMatrix<float64_t>::operator*( 00079 SGVector<int32_t> v) const 00080 { 00081 SGVector<float64_t> result(num_vectors); 00082 REQUIRE(v.vlen==num_features, 00083 "Dimension mismatch! %d vs %d\n", 00084 v.vlen, num_features); 00085 for (index_t i=0; i<num_vectors; ++i) 00086 result[i]=sparse_matrix[i].dense_dot(v); 00087 return result; 00088 } 00089 00090 template<class T> 00091 void SGSparseMatrix<T>::load(CFile* loader) 00092 { 00093 ASSERT(loader) 00094 unref(); 00095 00096 SG_SET_LOCALE_C; 00097 loader->get_sparse_matrix(sparse_matrix, num_features, num_vectors); 00098 SG_RESET_LOCALE; 00099 } 00100 00101 template<> 00102 void SGSparseMatrix<complex128_t>::load(CFile* loader) 00103 { 00104 SG_SERROR("SGSparseMatrix::load():: Not supported for complex128_t"); 00105 } 00106 00107 template<class T> SGVector<float64_t> SGSparseMatrix<T>::load_with_labels(CLibSVMFile* file, bool do_sort_features) 00108 { 00109 ASSERT(file) 00110 00111 float64_t* raw_labels; 00112 file->get_sparse_matrix(sparse_matrix, num_features, num_vectors, 00113 raw_labels, true); 00114 00115 SGVector<float64_t> labels(raw_labels, num_vectors); 00116 00117 if (do_sort_features) 00118 sort_features(); 00119 00120 return labels; 00121 } 00122 00123 template<> SGVector<float64_t> SGSparseMatrix<complex128_t>::load_with_labels(CLibSVMFile* file, bool do_sort_features) { return SGVector<float64_t>(); } 00124 00125 00126 template<class T> 00127 void SGSparseMatrix<T>::save(CFile* saver) 00128 { 00129 ASSERT(saver) 00130 00131 SG_SET_LOCALE_C; 00132 saver->set_sparse_matrix(sparse_matrix, num_features, num_vectors); 00133 SG_RESET_LOCALE; 00134 } 00135 00136 template<> 00137 void SGSparseMatrix<complex128_t>::save(CFile* saver) 00138 { 00139 SG_SERROR("SGSparseMatrix::save():: Not supported for complex128_t"); 00140 } 00141 00142 template<class T> void SGSparseMatrix<T>::save_with_labels(CLibSVMFile* file, 00143 SGVector<float64_t> labels) 00144 { 00145 ASSERT(file) 00146 int32_t num=labels.vlen; 00147 ASSERT(num>0) 00148 ASSERT(num==num_vectors) 00149 00150 float64_t* raw_labels=labels.vector; 00151 file->set_sparse_matrix(sparse_matrix, num_features, num_vectors, 00152 raw_labels); 00153 } 00154 00155 template <> void SGSparseMatrix<complex128_t>::save_with_labels(CLibSVMFile* saver, SGVector<float64_t> labels) { } 00156 00157 00158 template <class T> 00159 void SGSparseMatrix<T>::copy_data(const SGReferencedData& orig) 00160 { 00161 sparse_matrix = ((SGSparseMatrix*)(&orig))->sparse_matrix; 00162 num_vectors = ((SGSparseMatrix*)(&orig))->num_vectors; 00163 num_features = ((SGSparseMatrix*)(&orig))->num_features; 00164 } 00165 00166 template <class T> 00167 void SGSparseMatrix<T>::init_data() 00168 { 00169 sparse_matrix = NULL; 00170 num_vectors = 0; 00171 num_features = 0; 00172 } 00173 00174 template <class T> 00175 void SGSparseMatrix<T>::free_data() 00176 { 00177 SG_FREE(sparse_matrix); 00178 num_vectors = 0; 00179 num_features = 0; 00180 } 00181 00182 template<class T> SGSparseMatrix<T> SGSparseMatrix<T>::get_transposed() 00183 { 00184 SGSparseMatrix<T> sfm(num_vectors, num_features); 00185 00186 int32_t* hist=SG_CALLOC(int32_t, num_features); 00187 00188 // count the lengths of future feature vectors 00189 for (int32_t v=0; v<num_vectors; v++) 00190 { 00191 SGSparseVector<T> sv=sparse_matrix[v]; 00192 00193 for (int32_t i=0; i<sv.num_feat_entries; i++) 00194 hist[sv.features[i].feat_index]++; 00195 } 00196 00197 for (int32_t v=0; v<num_features; v++) 00198 sfm[v]=SGSparseVector<T>(hist[v]); 00199 00200 SG_FREE(hist); 00201 00202 int32_t* index=SG_CALLOC(int32_t, num_vectors); 00203 00204 // fill future feature vectors with content 00205 for (int32_t v=0; v<num_vectors; v++) 00206 { 00207 SGSparseVector<T> sv=sparse_matrix[v]; 00208 00209 for (int32_t i=0; i<sv.num_feat_entries; i++) 00210 { 00211 int32_t vidx=sv.features[i].feat_index; 00212 int32_t fidx=v; 00213 sfm[vidx].features[index[vidx]].feat_index=fidx; 00214 sfm[vidx].features[index[vidx]].entry=sv.features[i].entry; 00215 index[vidx]++; 00216 } 00217 } 00218 00219 SG_FREE(index); 00220 return sfm; 00221 } 00222 00223 00224 template<class T> void SGSparseMatrix<T>::sort_features() 00225 { 00226 for (int32_t i=0; i<num_vectors; i++) 00227 { 00228 sparse_matrix[i].sort_features(); 00229 } 00230 } 00231 00232 template<class T> void SGSparseMatrix<T>::from_dense(SGMatrix<T> full) 00233 { 00234 T* src=full.matrix; 00235 int32_t num_feat=full.num_rows; 00236 int32_t num_vec=full.num_cols; 00237 00238 REQUIRE(num_vec>0, "Matrix should have > 0 vectors!\n"); 00239 00240 SG_SINFO("converting dense feature matrix to sparse one\n") 00241 int32_t* num_feat_entries=SG_MALLOC(int, num_vec); 00242 00243 00244 int64_t num_total_entries=0; 00245 00246 // count nr of non sparse features 00247 for (int32_t i=0; i<num_vec; i++) 00248 { 00249 num_feat_entries[i]=0; 00250 for (int32_t j=0; j<num_feat; j++) 00251 { 00252 if (src[i*((int64_t) num_feat) + j] != static_cast<T>(0)) 00253 num_feat_entries[i]++; 00254 } 00255 } 00256 00257 num_features=num_feat; 00258 num_vectors=num_vec; 00259 sparse_matrix=SG_MALLOC(SGSparseVector<T>,num_vec); 00260 00261 for (int32_t i=0; i< num_vec; i++) 00262 { 00263 sparse_matrix[i]=SGSparseVector<T>(num_feat_entries[i]); 00264 int32_t sparse_feat_idx=0; 00265 00266 for (int32_t j=0; j< num_feat; j++) 00267 { 00268 int64_t pos= i*num_feat + j; 00269 00270 if (src[pos] != static_cast<T>(0)) 00271 { 00272 sparse_matrix[i].features[sparse_feat_idx].entry=src[pos]; 00273 sparse_matrix[i].features[sparse_feat_idx].feat_index=j; 00274 sparse_feat_idx++; 00275 num_total_entries++; 00276 } 00277 } 00278 } 00279 00280 SG_SINFO("sparse feature matrix has %ld entries (full matrix had %ld, sparsity %2.2f%%)\n", 00281 num_total_entries, int64_t(num_feat)*num_vec, (100.0*num_total_entries)/(int64_t(num_feat)*num_vec)); 00282 SG_FREE(num_feat_entries); 00283 } 00284 00285 template class SGSparseMatrix<bool>; 00286 template class SGSparseMatrix<char>; 00287 template class SGSparseMatrix<int8_t>; 00288 template class SGSparseMatrix<uint8_t>; 00289 template class SGSparseMatrix<int16_t>; 00290 template class SGSparseMatrix<uint16_t>; 00291 template class SGSparseMatrix<int32_t>; 00292 template class SGSparseMatrix<uint32_t>; 00293 template class SGSparseMatrix<int64_t>; 00294 template class SGSparseMatrix<uint64_t>; 00295 template class SGSparseMatrix<float32_t>; 00296 template class SGSparseMatrix<float64_t>; 00297 template class SGSparseMatrix<floatmax_t>; 00298 template class SGSparseMatrix<complex128_t>; 00299 }