![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 00011 #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 template<typename Lhs, typename Rhs, typename ResultType> 00018 static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) 00019 { 00020 typedef typename remove_all<Lhs>::type::Scalar Scalar; 00021 00022 // make sure to call innerSize/outerSize since we fake the storage order. 00023 Index rows = lhs.innerSize(); 00024 Index cols = rhs.outerSize(); 00025 eigen_assert(lhs.outerSize() == rhs.innerSize()); 00026 00027 ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); 00028 ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); 00029 ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); 00030 00031 std::memset(mask,0,sizeof(bool)*rows); 00032 00033 evaluator<Lhs> lhsEval(lhs); 00034 evaluator<Rhs> rhsEval(rhs); 00035 00036 // estimate the number of non zero entries 00037 // given a rhs column containing Y non zeros, we assume that the respective Y columns 00038 // of the lhs differs in average of one non zeros, thus the number of non zeros for 00039 // the product of a rhs column with the lhs is X+Y where X is the average number of non zero 00040 // per column of the lhs. 00041 // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) 00042 Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); 00043 00044 res.setZero(); 00045 res.reserve(Index(estimated_nnz_prod)); 00046 // we compute each column of the result, one after the other 00047 for (Index j=0; j<cols; ++j) 00048 { 00049 00050 res.startVec(j); 00051 Index nnz = 0; 00052 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 00053 { 00054 Scalar y = rhsIt.value(); 00055 Index k = rhsIt.index(); 00056 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 00057 { 00058 Index i = lhsIt.index(); 00059 Scalar x = lhsIt.value(); 00060 if(!mask[i]) 00061 { 00062 mask[i] = true; 00063 values[i] = x * y; 00064 indices[nnz] = i; 00065 ++nnz; 00066 } 00067 else 00068 values[i] += x * y; 00069 } 00070 } 00071 if(!sortedInsertion) 00072 { 00073 // unordered insertion 00074 for(Index k=0; k<nnz; ++k) 00075 { 00076 Index i = indices[k]; 00077 res.insertBackByOuterInnerUnordered(j,i) = values[i]; 00078 mask[i] = false; 00079 } 00080 } 00081 else 00082 { 00083 // alternative ordered insertion code: 00084 const Index t200 = rows/11; // 11 == (log2(200)*1.39) 00085 const Index t = (rows*100)/139; 00086 00087 // FIXME reserve nnz non zeros 00088 // FIXME implement faster sorting algorithms for very small nnz 00089 // if the result is sparse enough => use a quick sort 00090 // otherwise => loop through the entire vector 00091 // In order to avoid to perform an expensive log2 when the 00092 // result is clearly very sparse we use a linear bound up to 200. 00093 if((nnz<200 && nnz<t200) || nnz * numext::log2(int(nnz)) < t) 00094 { 00095 if(nnz>1) std::sort(indices,indices+nnz); 00096 for(Index k=0; k<nnz; ++k) 00097 { 00098 Index i = indices[k]; 00099 res.insertBackByOuterInner(j,i) = values[i]; 00100 mask[i] = false; 00101 } 00102 } 00103 else 00104 { 00105 // dense path 00106 for(Index i=0; i<rows; ++i) 00107 { 00108 if(mask[i]) 00109 { 00110 mask[i] = false; 00111 res.insertBackByOuterInner(j,i) = values[i]; 00112 } 00113 } 00114 } 00115 } 00116 } 00117 res.finalize(); 00118 } 00119 00120 00121 } // end namespace internal 00122 00123 namespace internal { 00124 00125 template<typename Lhs, typename Rhs, typename ResultType, 00126 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 00127 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 00128 int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor> 00129 struct conservative_sparse_sparse_product_selector; 00130 00131 template<typename Lhs, typename Rhs, typename ResultType> 00132 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> 00133 { 00134 typedef typename remove_all<Lhs>::type LhsCleaned; 00135 typedef typename LhsCleaned::Scalar Scalar; 00136 00137 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00138 { 00139 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 00140 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrixAux; 00141 typedef typename sparse_eval<ColMajorMatrixAux,ResultType::RowsAtCompileTime,ResultType::ColsAtCompileTime,ColMajorMatrixAux::Flags>::type ColMajorMatrix; 00142 00143 // If the result is tall and thin (in the extreme case a column vector) 00144 // then it is faster to sort the coefficients inplace instead of transposing twice. 00145 // FIXME, the following heuristic is probably not very good. 00146 if(lhs.rows()>rhs.cols()) 00147 { 00148 ColMajorMatrix resCol(lhs.rows(),rhs.cols()); 00149 // perform sorted insertion 00150 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true); 00151 res = resCol.markAsRValue(); 00152 } 00153 else 00154 { 00155 ColMajorMatrixAux resCol(lhs.rows(),rhs.cols()); 00156 // ressort to transpose to sort the entries 00157 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrixAux>(lhs, rhs, resCol, false); 00158 RowMajorMatrix resRow(resCol); 00159 res = resRow.markAsRValue(); 00160 } 00161 } 00162 }; 00163 00164 template<typename Lhs, typename Rhs, typename ResultType> 00165 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor> 00166 { 00167 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00168 { 00169 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 00170 RowMajorMatrix rhsRow = rhs; 00171 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 00172 internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow); 00173 res = resRow; 00174 } 00175 }; 00176 00177 template<typename Lhs, typename Rhs, typename ResultType> 00178 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor> 00179 { 00180 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00181 { 00182 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 00183 RowMajorMatrix lhsRow = lhs; 00184 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 00185 internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow); 00186 res = resRow; 00187 } 00188 }; 00189 00190 template<typename Lhs, typename Rhs, typename ResultType> 00191 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor> 00192 { 00193 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00194 { 00195 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 00196 RowMajorMatrix resRow(lhs.rows(), rhs.cols()); 00197 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 00198 res = resRow; 00199 } 00200 }; 00201 00202 00203 template<typename Lhs, typename Rhs, typename ResultType> 00204 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor> 00205 { 00206 typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; 00207 00208 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00209 { 00210 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00211 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 00212 internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol); 00213 res = resCol; 00214 } 00215 }; 00216 00217 template<typename Lhs, typename Rhs, typename ResultType> 00218 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor> 00219 { 00220 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00221 { 00222 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00223 ColMajorMatrix lhsCol = lhs; 00224 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 00225 internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol); 00226 res = resCol; 00227 } 00228 }; 00229 00230 template<typename Lhs, typename Rhs, typename ResultType> 00231 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor> 00232 { 00233 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00234 { 00235 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00236 ColMajorMatrix rhsCol = rhs; 00237 ColMajorMatrix resCol(lhs.rows(), rhs.cols()); 00238 internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol); 00239 res = resCol; 00240 } 00241 }; 00242 00243 template<typename Lhs, typename Rhs, typename ResultType> 00244 struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor> 00245 { 00246 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00247 { 00248 typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; 00249 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00250 RowMajorMatrix resRow(lhs.rows(),rhs.cols()); 00251 internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow); 00252 // sort the non zeros: 00253 ColMajorMatrix resCol(resRow); 00254 res = resCol; 00255 } 00256 }; 00257 00258 } // end namespace internal 00259 00260 00261 namespace internal { 00262 00263 template<typename Lhs, typename Rhs, typename ResultType> 00264 static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00265 { 00266 typedef typename remove_all<Lhs>::type::Scalar Scalar; 00267 Index cols = rhs.outerSize(); 00268 eigen_assert(lhs.outerSize() == rhs.innerSize()); 00269 00270 evaluator<Lhs> lhsEval(lhs); 00271 evaluator<Rhs> rhsEval(rhs); 00272 00273 for (Index j=0; j<cols; ++j) 00274 { 00275 for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) 00276 { 00277 Scalar y = rhsIt.value(); 00278 Index k = rhsIt.index(); 00279 for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) 00280 { 00281 Index i = lhsIt.index(); 00282 Scalar x = lhsIt.value(); 00283 res.coeffRef(i,j) += x * y; 00284 } 00285 } 00286 } 00287 } 00288 00289 00290 } // end namespace internal 00291 00292 namespace internal { 00293 00294 template<typename Lhs, typename Rhs, typename ResultType, 00295 int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor, 00296 int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor> 00297 struct sparse_sparse_to_dense_product_selector; 00298 00299 template<typename Lhs, typename Rhs, typename ResultType> 00300 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor> 00301 { 00302 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00303 { 00304 internal::sparse_sparse_to_dense_product_impl<Lhs,Rhs,ResultType>(lhs, rhs, res); 00305 } 00306 }; 00307 00308 template<typename Lhs, typename Rhs, typename ResultType> 00309 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor> 00310 { 00311 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00312 { 00313 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00314 ColMajorMatrix lhsCol(lhs); 00315 internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res); 00316 } 00317 }; 00318 00319 template<typename Lhs, typename Rhs, typename ResultType> 00320 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor> 00321 { 00322 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00323 { 00324 typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; 00325 ColMajorMatrix rhsCol(rhs); 00326 internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res); 00327 } 00328 }; 00329 00330 template<typename Lhs, typename Rhs, typename ResultType> 00331 struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor> 00332 { 00333 static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) 00334 { 00335 Transpose<ResultType> trRes(res); 00336 internal::sparse_sparse_to_dense_product_impl<Rhs,Lhs,Transpose<ResultType> >(rhs, lhs, trRes); 00337 } 00338 }; 00339 00340 00341 } // end namespace internal 00342 00343 } // end namespace Eigen 00344 00345 #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H