SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SGSparseMatrix.cpp
Go to the documentation of this file.
00001 #include <shogun/lib/SGMatrix.h>
00002 #include <shogun/lib/SGSparseMatrix.h>
00003 #include <shogun/lib/SGSparseVector.h>
00004 #include <shogun/labels/RegressionLabels.h>
00005 #include <shogun/io/File.h>
00006 #include <shogun/io/SGIO.h>
00007 
00008 namespace shogun {
00009 
00010 template <class T>
00011 SGSparseMatrix<T>::SGSparseMatrix() : SGReferencedData()
00012 {
00013     init_data();
00014 }
00015 
00016 template <class T>
00017 SGSparseMatrix<T>::SGSparseMatrix(SGSparseVector<T>* vecs, index_t num_feat,
00018         index_t num_vec, bool ref_counting) :
00019     SGReferencedData(ref_counting),
00020     num_vectors(num_vec), num_features(num_feat),
00021     sparse_matrix(vecs)
00022 {
00023 }
00024 
00025 template <class T>
00026 SGSparseMatrix<T>::SGSparseMatrix(index_t num_feat, index_t num_vec, bool ref_counting) :
00027     SGReferencedData(ref_counting),
00028     num_vectors(num_vec), num_features(num_feat)
00029 {
00030     sparse_matrix=SG_MALLOC(SGSparseVector<T>, num_vectors);
00031 }
00032 
00033 template <class T>
00034 SGSparseMatrix<T>::SGSparseMatrix(SGMatrix<T> dense) : SGReferencedData()
00035 {
00036     from_dense(dense);
00037 }
00038 
00039 template <class T>
00040 SGSparseMatrix<T>::SGSparseMatrix(const SGSparseMatrix &orig) : SGReferencedData(orig)
00041 {
00042     copy_data(orig);
00043 }
00044 
00045 template <class T>
00046 SGSparseMatrix<T>::~SGSparseMatrix()
00047 {
00048     unref();
00049 }
00050 
00051 template <> template <>
00052 const SGVector<complex128_t> SGSparseMatrix<complex128_t>::operator*(
00053     SGVector<float64_t> v) const
00054 {
00055     SGVector<complex128_t> result(num_vectors);
00056     REQUIRE(v.vlen==num_features,
00057         "Dimension mismatch! %d vs %d\n",
00058         v.vlen, num_features);
00059     for (index_t i=0; i<num_vectors; ++i)
00060         result[i]=sparse_matrix[i].dense_dot(v);
00061     return result;
00062 }
00063 
00064 template <> template <>
00065 const SGVector<complex128_t> SGSparseMatrix<complex128_t>::operator*(
00066     SGVector<int32_t> v) const
00067 {
00068     SGVector<complex128_t> result(num_vectors);
00069     REQUIRE(v.vlen==num_features,
00070         "Dimension mismatch! %d vs %d\n",
00071         v.vlen, num_features);
00072     for (index_t i=0; i<num_vectors; ++i)
00073         result[i]=sparse_matrix[i].dense_dot(v);
00074     return result;
00075 }
00076 
00077 template <> template <>
00078 const SGVector<float64_t> SGSparseMatrix<float64_t>::operator*(
00079     SGVector<int32_t> v) const
00080 {
00081     SGVector<float64_t> result(num_vectors);
00082     REQUIRE(v.vlen==num_features,
00083         "Dimension mismatch! %d vs %d\n",
00084         v.vlen, num_features);
00085     for (index_t i=0; i<num_vectors; ++i)
00086         result[i]=sparse_matrix[i].dense_dot(v);
00087     return result;
00088 }
00089 
00090 template<class T>
00091 void SGSparseMatrix<T>::load(CFile* loader)
00092 {
00093     ASSERT(loader)
00094     unref();
00095 
00096     SG_SET_LOCALE_C;
00097     loader->get_sparse_matrix(sparse_matrix, num_features, num_vectors);
00098     SG_RESET_LOCALE;
00099 }
00100 
00101 template<>
00102 void SGSparseMatrix<complex128_t>::load(CFile* loader)
00103 {
00104     SG_SERROR("SGSparseMatrix::load():: Not supported for complex128_t");
00105 }
00106 
00107 template<class T> SGVector<float64_t> SGSparseMatrix<T>::load_with_labels(CLibSVMFile* file, bool do_sort_features)
00108 {
00109     ASSERT(file)
00110 
00111     float64_t* raw_labels;
00112     file->get_sparse_matrix(sparse_matrix, num_features, num_vectors,
00113                     raw_labels, true);
00114 
00115     SGVector<float64_t> labels(raw_labels, num_vectors);
00116 
00117     if (do_sort_features)
00118         sort_features();
00119 
00120     return labels;
00121 }
00122 
00123 template<> SGVector<float64_t> SGSparseMatrix<complex128_t>::load_with_labels(CLibSVMFile* file, bool do_sort_features) { return SGVector<float64_t>(); }
00124 
00125 
00126 template<class T>
00127 void SGSparseMatrix<T>::save(CFile* saver)
00128 {
00129     ASSERT(saver)
00130 
00131     SG_SET_LOCALE_C;
00132     saver->set_sparse_matrix(sparse_matrix, num_features, num_vectors);
00133     SG_RESET_LOCALE;
00134 }
00135 
00136 template<>
00137 void SGSparseMatrix<complex128_t>::save(CFile* saver)
00138 {
00139     SG_SERROR("SGSparseMatrix::save():: Not supported for complex128_t");
00140 }
00141 
00142 template<class T> void SGSparseMatrix<T>::save_with_labels(CLibSVMFile* file,
00143         SGVector<float64_t> labels)
00144 {
00145     ASSERT(file)
00146     int32_t num=labels.vlen;
00147     ASSERT(num>0)
00148     ASSERT(num==num_vectors)
00149 
00150     float64_t* raw_labels=labels.vector;
00151     file->set_sparse_matrix(sparse_matrix, num_features, num_vectors,
00152             raw_labels);
00153 }
00154 
00155 template <> void SGSparseMatrix<complex128_t>::save_with_labels(CLibSVMFile* saver, SGVector<float64_t> labels) { }
00156 
00157 
00158 template <class T>
00159 void SGSparseMatrix<T>::copy_data(const SGReferencedData& orig)
00160 {
00161     sparse_matrix = ((SGSparseMatrix*)(&orig))->sparse_matrix;
00162     num_vectors = ((SGSparseMatrix*)(&orig))->num_vectors;
00163     num_features = ((SGSparseMatrix*)(&orig))->num_features;
00164 }
00165 
00166 template <class T>
00167 void SGSparseMatrix<T>::init_data()
00168 {
00169     sparse_matrix = NULL;
00170     num_vectors = 0;
00171     num_features = 0;
00172 }
00173 
00174 template <class T>
00175 void SGSparseMatrix<T>::free_data()
00176 {
00177     SG_FREE(sparse_matrix);
00178     num_vectors = 0;
00179     num_features = 0;
00180 }
00181 
00182 template<class T> SGSparseMatrix<T> SGSparseMatrix<T>::get_transposed()
00183 {
00184     SGSparseMatrix<T> sfm(num_vectors, num_features);
00185 
00186     int32_t* hist=SG_CALLOC(int32_t, num_features);
00187 
00188     // count the lengths of future feature vectors
00189     for (int32_t v=0; v<num_vectors; v++)
00190     {
00191         SGSparseVector<T> sv=sparse_matrix[v];
00192 
00193         for (int32_t i=0; i<sv.num_feat_entries; i++)
00194             hist[sv.features[i].feat_index]++;
00195     }
00196 
00197     for (int32_t v=0; v<num_features; v++)
00198         sfm[v]=SGSparseVector<T>(hist[v]);
00199 
00200     SG_FREE(hist);
00201 
00202     int32_t* index=SG_CALLOC(int32_t, num_vectors);
00203 
00204     // fill future feature vectors with content
00205     for (int32_t v=0; v<num_vectors; v++)
00206     {
00207         SGSparseVector<T> sv=sparse_matrix[v];
00208 
00209         for (int32_t i=0; i<sv.num_feat_entries; i++)
00210         {
00211             int32_t vidx=sv.features[i].feat_index;
00212             int32_t fidx=v;
00213             sfm[vidx].features[index[vidx]].feat_index=fidx;
00214             sfm[vidx].features[index[vidx]].entry=sv.features[i].entry;
00215             index[vidx]++;
00216         }
00217     }
00218 
00219     SG_FREE(index);
00220     return sfm;
00221 }
00222 
00223 
00224 template<class T> void SGSparseMatrix<T>::sort_features()
00225 {
00226     for (int32_t i=0; i<num_vectors; i++)
00227     {
00228         sparse_matrix[i].sort_features();
00229     }
00230 }
00231 
00232 template<class T> void SGSparseMatrix<T>::from_dense(SGMatrix<T> full)
00233 {
00234     T* src=full.matrix;
00235     int32_t num_feat=full.num_rows;
00236     int32_t num_vec=full.num_cols;
00237 
00238     REQUIRE(num_vec>0, "Matrix should have > 0 vectors!\n");
00239 
00240     SG_SINFO("converting dense feature matrix to sparse one\n")
00241         int32_t* num_feat_entries=SG_MALLOC(int, num_vec);
00242 
00243 
00244     int64_t num_total_entries=0;
00245 
00246     // count nr of non sparse features
00247     for (int32_t i=0; i<num_vec; i++)
00248     {
00249         num_feat_entries[i]=0;
00250         for (int32_t j=0; j<num_feat; j++)
00251         {
00252             if (src[i*((int64_t) num_feat) + j] != static_cast<T>(0))
00253                 num_feat_entries[i]++;
00254         }
00255     }
00256 
00257     num_features=num_feat;
00258     num_vectors=num_vec;
00259     sparse_matrix=SG_MALLOC(SGSparseVector<T>,num_vec);
00260 
00261     for (int32_t i=0; i< num_vec; i++)
00262     {
00263         sparse_matrix[i]=SGSparseVector<T>(num_feat_entries[i]);
00264         int32_t sparse_feat_idx=0;
00265 
00266         for (int32_t j=0; j< num_feat; j++)
00267         {
00268             int64_t pos= i*num_feat + j;
00269 
00270             if (src[pos] != static_cast<T>(0))
00271             {
00272                 sparse_matrix[i].features[sparse_feat_idx].entry=src[pos];
00273                 sparse_matrix[i].features[sparse_feat_idx].feat_index=j;
00274                 sparse_feat_idx++;
00275                 num_total_entries++;
00276             }
00277         }
00278     }
00279 
00280     SG_SINFO("sparse feature matrix has %ld entries (full matrix had %ld, sparsity %2.2f%%)\n",
00281             num_total_entries, int64_t(num_feat)*num_vec, (100.0*num_total_entries)/(int64_t(num_feat)*num_vec));
00282     SG_FREE(num_feat_entries);
00283 }
00284 
00285 template class SGSparseMatrix<bool>;
00286 template class SGSparseMatrix<char>;
00287 template class SGSparseMatrix<int8_t>;
00288 template class SGSparseMatrix<uint8_t>;
00289 template class SGSparseMatrix<int16_t>;
00290 template class SGSparseMatrix<uint16_t>;
00291 template class SGSparseMatrix<int32_t>;
00292 template class SGSparseMatrix<uint32_t>;
00293 template class SGSparseMatrix<int64_t>;
00294 template class SGSparseMatrix<uint64_t>;
00295 template class SGSparseMatrix<float32_t>;
00296 template class SGSparseMatrix<float64_t>;
00297 template class SGSparseMatrix<floatmax_t>;
00298 template class SGSparseMatrix<complex128_t>;
00299 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation