![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00005 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 00006 // Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk> 00007 // 00008 // This Source Code Form is subject to the terms of the Mozilla 00009 // Public License v. 2.0. If a copy of the MPL was not distributed 00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00011 00012 00013 #ifndef EIGEN_PRODUCTEVALUATORS_H 00014 #define EIGEN_PRODUCTEVALUATORS_H 00015 00016 namespace Eigen { 00017 00018 namespace internal { 00019 00028 template<typename Lhs, typename Rhs, int Options> 00029 struct evaluator<Product<Lhs, Rhs, Options> > 00030 : public product_evaluator<Product<Lhs, Rhs, Options> > 00031 { 00032 typedef Product<Lhs, Rhs, Options> XprType; 00033 typedef product_evaluator<XprType> Base; 00034 00035 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} 00036 }; 00037 00038 // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" 00039 // TODO we should apply that rule only if that's really helpful 00040 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> 00041 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, 00042 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, 00043 const Product<Lhs, Rhs, DefaultProduct> > > 00044 { 00045 static const bool value = true; 00046 }; 00047 template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> 00048 struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, 00049 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, 00050 const Product<Lhs, Rhs, DefaultProduct> > > 00051 : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > 00052 { 00053 typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, 00054 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>, 00055 const Product<Lhs, Rhs, DefaultProduct> > XprType; 00056 typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; 00057 00058 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) 00059 : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) 00060 {} 00061 }; 00062 00063 00064 template<typename Lhs, typename Rhs, int DiagIndex> 00065 struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > 00066 : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > 00067 { 00068 typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; 00069 typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; 00070 00071 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) 00072 : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( 00073 Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), 00074 xpr.index() )) 00075 {} 00076 }; 00077 00078 00079 // Helper class to perform a matrix product with the destination at hand. 00080 // Depending on the sizes of the factors, there are different evaluation strategies 00081 // as controlled by internal::product_type. 00082 template< typename Lhs, typename Rhs, 00083 typename LhsShape = typename evaluator_traits<Lhs>::Shape, 00084 typename RhsShape = typename evaluator_traits<Rhs>::Shape, 00085 int ProductType = internal::product_type<Lhs,Rhs>::value> 00086 struct generic_product_impl; 00087 00088 template<typename Lhs, typename Rhs> 00089 struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > { 00090 static const bool value = true; 00091 }; 00092 00093 // This is the default evaluator implementation for products: 00094 // It creates a temporary and call generic_product_impl 00095 template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape> 00096 struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape> 00097 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject> 00098 { 00099 typedef Product<Lhs, Rhs, Options> XprType; 00100 typedef typename XprType::PlainObject PlainObject; 00101 typedef evaluator<PlainObject> Base; 00102 enum { 00103 Flags = Base::Flags | EvalBeforeNestingBit 00104 }; 00105 00106 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00107 explicit product_evaluator(const XprType& xpr) 00108 : m_result(xpr.rows(), xpr.cols()) 00109 { 00110 ::new (static_cast<Base*>(this)) Base(m_result); 00111 00112 // FIXME shall we handle nested_eval here?, 00113 // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) 00114 // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; 00115 // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; 00116 // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; 00117 // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; 00118 // 00119 // const LhsNested lhs(xpr.lhs()); 00120 // const RhsNested rhs(xpr.rhs()); 00121 // 00122 // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); 00123 00124 generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); 00125 } 00126 00127 protected: 00128 PlainObject m_result; 00129 }; 00130 00131 // The following three shortcuts are enabled only if the scalar types match excatly. 00132 // TODO: we could enable them for different scalar types when the product is not vectorized. 00133 00134 // Dense = Product 00135 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00136 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense, 00137 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> 00138 { 00139 typedef Product<Lhs,Rhs,Options> SrcXprType; 00140 static EIGEN_STRONG_INLINE 00141 void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &) 00142 { 00143 Index dstRows = src.rows(); 00144 Index dstCols = src.cols(); 00145 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00146 dst.resize(dstRows, dstCols); 00147 // FIXME shall we handle nested_eval here? 00148 generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs()); 00149 } 00150 }; 00151 00152 // Dense += Product 00153 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00154 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense, 00155 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> 00156 { 00157 typedef Product<Lhs,Rhs,Options> SrcXprType; 00158 static EIGEN_STRONG_INLINE 00159 void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &) 00160 { 00161 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00162 // FIXME shall we handle nested_eval here? 00163 generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs()); 00164 } 00165 }; 00166 00167 // Dense -= Product 00168 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> 00169 struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense, 00170 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> 00171 { 00172 typedef Product<Lhs,Rhs,Options> SrcXprType; 00173 static EIGEN_STRONG_INLINE 00174 void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &) 00175 { 00176 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00177 // FIXME shall we handle nested_eval here? 00178 generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs()); 00179 } 00180 }; 00181 00182 00183 // Dense ?= scalar * Product 00184 // TODO we should apply that rule if that's really helpful 00185 // for instance, this is not good for inner products 00186 template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> 00187 struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, 00188 const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense> 00189 { 00190 typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, 00191 const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, 00192 const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; 00193 static EIGEN_STRONG_INLINE 00194 void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) 00195 { 00196 call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); 00197 } 00198 }; 00199 00200 //---------------------------------------- 00201 // Catch "Dense ?= xpr + Product<>" expression to save one temporary 00202 // FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct 00203 00204 template<typename OtherXpr, typename Lhs, typename Rhs> 00205 struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, 00206 const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { 00207 static const bool value = true; 00208 }; 00209 00210 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> 00211 struct assignment_from_xpr_op_product 00212 { 00213 template<typename SrcXprType, typename InitialFunc> 00214 static EIGEN_STRONG_INLINE 00215 void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) 00216 { 00217 call_assignment_no_alias(dst, src.lhs(), Func1()); 00218 call_assignment_no_alias(dst, src.rhs(), Func2()); 00219 } 00220 }; 00221 00222 #define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ 00223 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ 00224 struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \ 00225 const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \ 00226 : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \ 00227 {} 00228 00229 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); 00230 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); 00231 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); 00232 00233 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); 00234 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); 00235 EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); 00236 00237 //---------------------------------------- 00238 00239 template<typename Lhs, typename Rhs> 00240 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> 00241 { 00242 template<typename Dst> 00243 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00244 { 00245 dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); 00246 } 00247 00248 template<typename Dst> 00249 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00250 { 00251 dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); 00252 } 00253 00254 template<typename Dst> 00255 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00256 { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } 00257 }; 00258 00259 00260 /*********************************************************************** 00261 * Implementation of outer dense * dense vector product 00262 ***********************************************************************/ 00263 00264 // Column major result 00265 template<typename Dst, typename Lhs, typename Rhs, typename Func> 00266 void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) 00267 { 00268 evaluator<Rhs> rhsEval(rhs); 00269 typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); 00270 // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored 00271 // FIXME not very good if rhs is real and lhs complex while alpha is real too 00272 const Index cols = dst.cols(); 00273 for (Index j=0; j<cols; ++j) 00274 func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs); 00275 } 00276 00277 // Row major result 00278 template<typename Dst, typename Lhs, typename Rhs, typename Func> 00279 void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) 00280 { 00281 evaluator<Lhs> lhsEval(lhs); 00282 typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); 00283 // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored 00284 // FIXME not very good if lhs is real and rhs complex while alpha is real too 00285 const Index rows = dst.rows(); 00286 for (Index i=0; i<rows; ++i) 00287 func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs); 00288 } 00289 00290 template<typename Lhs, typename Rhs> 00291 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct> 00292 { 00293 template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; 00294 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00295 00296 // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose 00297 struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; 00298 struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; 00299 struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; 00300 struct adds { 00301 Scalar m_scale; 00302 explicit adds(const Scalar& s) : m_scale(s) {} 00303 template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { 00304 dst.const_cast_derived() += m_scale * src; 00305 } 00306 }; 00307 00308 template<typename Dst> 00309 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00310 { 00311 internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); 00312 } 00313 00314 template<typename Dst> 00315 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00316 { 00317 internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); 00318 } 00319 00320 template<typename Dst> 00321 static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00322 { 00323 internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); 00324 } 00325 00326 template<typename Dst> 00327 static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00328 { 00329 internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); 00330 } 00331 00332 }; 00333 00334 00335 // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo 00336 template<typename Lhs, typename Rhs, typename Derived> 00337 struct generic_product_impl_base 00338 { 00339 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00340 00341 template<typename Dst> 00342 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00343 { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } 00344 00345 template<typename Dst> 00346 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00347 { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } 00348 00349 template<typename Dst> 00350 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00351 { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } 00352 00353 template<typename Dst> 00354 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00355 { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } 00356 00357 }; 00358 00359 template<typename Lhs, typename Rhs> 00360 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> 00361 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> > 00362 { 00363 typedef typename nested_eval<Lhs,1>::type LhsNested; 00364 typedef typename nested_eval<Rhs,1>::type RhsNested; 00365 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00366 enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; 00367 typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType; 00368 00369 template<typename Dest> 00370 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00371 { 00372 LhsNested actual_lhs(lhs); 00373 RhsNested actual_rhs(rhs); 00374 internal::gemv_dense_selector<Side, 00375 (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, 00376 bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess) 00377 >::run(actual_lhs, actual_rhs, dst, alpha); 00378 } 00379 }; 00380 00381 template<typename Lhs, typename Rhs> 00382 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> 00383 { 00384 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00385 00386 template<typename Dst> 00387 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00388 { 00389 // Same as: dst.noalias() = lhs.lazyProduct(rhs); 00390 // but easier on the compiler side 00391 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>()); 00392 } 00393 00394 template<typename Dst> 00395 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00396 { 00397 // dst.noalias() += lhs.lazyProduct(rhs); 00398 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>()); 00399 } 00400 00401 template<typename Dst> 00402 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) 00403 { 00404 // dst.noalias() -= lhs.lazyProduct(rhs); 00405 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>()); 00406 } 00407 00408 // template<typename Dst> 00409 // static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00410 // { dst.noalias() += alpha * lhs.lazyProduct(rhs); } 00411 }; 00412 00413 // This specialization enforces the use of a coefficient-based evaluation strategy 00414 template<typename Lhs, typename Rhs> 00415 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode> 00416 : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {}; 00417 00418 // Case 2: Evaluate coeff by coeff 00419 // 00420 // This is mostly taken from CoeffBasedProduct.h 00421 // The main difference is that we add an extra argument to the etor_product_*_impl::run() function 00422 // for the inner dimension of the product, because evaluator object do not know their size. 00423 00424 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 00425 struct etor_product_coeff_impl; 00426 00427 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00428 struct etor_product_packet_impl; 00429 00430 template<typename Lhs, typename Rhs, int ProductTag> 00431 struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape> 00432 : evaluator_base<Product<Lhs, Rhs, LazyProduct> > 00433 { 00434 typedef Product<Lhs, Rhs, LazyProduct> XprType; 00435 typedef typename XprType::Scalar Scalar; 00436 typedef typename XprType::CoeffReturnType CoeffReturnType; 00437 00438 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00439 explicit product_evaluator(const XprType& xpr) 00440 : m_lhs(xpr.lhs()), 00441 m_rhs(xpr.rhs()), 00442 m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! 00443 m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, 00444 // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. 00445 m_innerDim(xpr.lhs().cols()) 00446 { 00447 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); 00448 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost); 00449 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); 00450 #if 0 00451 std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; 00452 std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; 00453 std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; 00454 std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; 00455 std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; 00456 std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; 00457 std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; 00458 std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; 00459 std::cerr << "Alignment= " << Alignment << "\n"; 00460 std::cerr << "Flags= " << Flags << "\n"; 00461 #endif 00462 } 00463 00464 // Everything below here is taken from CoeffBasedProduct.h 00465 00466 typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; 00467 typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; 00468 00469 typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; 00470 typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; 00471 00472 typedef evaluator<LhsNestedCleaned> LhsEtorType; 00473 typedef evaluator<RhsNestedCleaned> RhsEtorType; 00474 00475 enum { 00476 RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, 00477 ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, 00478 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), 00479 MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, 00480 MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime 00481 }; 00482 00483 typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType; 00484 typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType; 00485 00486 enum { 00487 00488 LhsCoeffReadCost = LhsEtorType::CoeffReadCost, 00489 RhsCoeffReadCost = RhsEtorType::CoeffReadCost, 00490 CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost 00491 : InnerSize == Dynamic ? HugeCost 00492 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 00493 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 00494 00495 Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 00496 00497 LhsFlags = LhsEtorType::Flags, 00498 RhsFlags = RhsEtorType::Flags, 00499 00500 LhsRowMajor = LhsFlags & RowMajorBit, 00501 RhsRowMajor = RhsFlags & RowMajorBit, 00502 00503 LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size, 00504 RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size, 00505 00506 // Here, we don't care about alignment larger than the usable packet size. 00507 LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), 00508 RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), 00509 00510 SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, 00511 00512 CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), 00513 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), 00514 00515 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 00516 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 00517 : (bool(RhsRowMajor) && !CanVectorizeLhs), 00518 00519 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 00520 | (EvalToRowMajor ? RowMajorBit : 0) 00521 // TODO enable vectorization for mixed types 00522 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) 00523 | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), 00524 00525 LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), 00526 RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), 00527 00528 Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) 00529 : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) 00530 : 0, 00531 00532 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 00533 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 00534 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 00535 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 00536 */ 00537 CanVectorizeInner = SameType 00538 && LhsRowMajor 00539 && (!RhsRowMajor) 00540 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 00541 && (InnerSize % packet_traits<Scalar>::size == 0) 00542 }; 00543 00544 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const 00545 { 00546 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); 00547 } 00548 00549 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 00550 * which is why we don't set the LinearAccessBit. 00551 * TODO: this seems possible when the result is a vector 00552 */ 00553 EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const 00554 { 00555 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; 00556 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; 00557 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); 00558 } 00559 00560 template<int LoadMode, typename PacketType> 00561 const PacketType packet(Index row, Index col) const 00562 { 00563 PacketType res; 00564 typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor, 00565 Unroll ? int(InnerSize) : Dynamic, 00566 LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl; 00567 PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); 00568 return res; 00569 } 00570 00571 template<int LoadMode, typename PacketType> 00572 const PacketType packet(Index index) const 00573 { 00574 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; 00575 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; 00576 return packet<LoadMode,PacketType>(row,col); 00577 } 00578 00579 protected: 00580 typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 00581 typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 00582 00583 LhsEtorType m_lhsImpl; 00584 RhsEtorType m_rhsImpl; 00585 00586 // TODO: Get rid of m_innerDim if known at compile time 00587 Index m_innerDim; 00588 }; 00589 00590 template<typename Lhs, typename Rhs> 00591 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape> 00592 : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape> 00593 { 00594 typedef Product<Lhs, Rhs, DefaultProduct> XprType; 00595 typedef Product<Lhs, Rhs, LazyProduct> BaseProduct; 00596 typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base; 00597 enum { 00598 Flags = Base::Flags | EvalBeforeNestingBit 00599 }; 00600 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00601 : Base(BaseProduct(xpr.lhs(),xpr.rhs())) 00602 {} 00603 }; 00604 00605 /**************************************** 00606 *** Coeff based product, Packet path *** 00607 ****************************************/ 00608 00609 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00610 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 00611 { 00612 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) 00613 { 00614 etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); 00615 res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res); 00616 } 00617 }; 00618 00619 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 00620 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 00621 { 00622 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) 00623 { 00624 etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); 00625 res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res); 00626 } 00627 }; 00628 00629 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00630 struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> 00631 { 00632 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) 00633 { 00634 res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col)); 00635 } 00636 }; 00637 00638 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00639 struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> 00640 { 00641 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) 00642 { 00643 res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col))); 00644 } 00645 }; 00646 00647 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00648 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 00649 { 00650 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) 00651 { 00652 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); 00653 } 00654 }; 00655 00656 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00657 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 00658 { 00659 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) 00660 { 00661 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); 00662 } 00663 }; 00664 00665 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00666 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 00667 { 00668 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) 00669 { 00670 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); 00671 for(Index i = 0; i < innerDim; ++i) 00672 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res); 00673 } 00674 }; 00675 00676 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 00677 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 00678 { 00679 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) 00680 { 00681 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); 00682 for(Index i = 0; i < innerDim; ++i) 00683 res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 00684 } 00685 }; 00686 00687 00688 /*************************************************************************** 00689 * Triangular products 00690 ***************************************************************************/ 00691 template<int Mode, bool LhsIsTriangular, 00692 typename Lhs, bool LhsIsVector, 00693 typename Rhs, bool RhsIsVector> 00694 struct triangular_product_impl; 00695 00696 template<typename Lhs, typename Rhs, int ProductTag> 00697 struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> 00698 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> > 00699 { 00700 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00701 00702 template<typename Dest> 00703 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00704 { 00705 triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1> 00706 ::run(dst, lhs.nestedExpression(), rhs, alpha); 00707 } 00708 }; 00709 00710 template<typename Lhs, typename Rhs, int ProductTag> 00711 struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> 00712 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> > 00713 { 00714 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00715 00716 template<typename Dest> 00717 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00718 { 00719 triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha); 00720 } 00721 }; 00722 00723 00724 /*************************************************************************** 00725 * SelfAdjoint products 00726 ***************************************************************************/ 00727 template <typename Lhs, int LhsMode, bool LhsIsVector, 00728 typename Rhs, int RhsMode, bool RhsIsVector> 00729 struct selfadjoint_product_impl; 00730 00731 template<typename Lhs, typename Rhs, int ProductTag> 00732 struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> 00733 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> > 00734 { 00735 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00736 00737 template<typename Dest> 00738 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00739 { 00740 selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha); 00741 } 00742 }; 00743 00744 template<typename Lhs, typename Rhs, int ProductTag> 00745 struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> 00746 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> > 00747 { 00748 typedef typename Product<Lhs,Rhs>::Scalar Scalar; 00749 00750 template<typename Dest> 00751 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) 00752 { 00753 selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha); 00754 } 00755 }; 00756 00757 00758 /*************************************************************************** 00759 * Diagonal products 00760 ***************************************************************************/ 00761 00762 template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> 00763 struct diagonal_product_evaluator_base 00764 : evaluator_base<Derived> 00765 { 00766 typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar; 00767 public: 00768 enum { 00769 CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, 00770 00771 MatrixFlags = evaluator<MatrixType>::Flags, 00772 DiagFlags = evaluator<DiagonalType>::Flags, 00773 _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, 00774 _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) 00775 ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), 00776 _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value, 00777 // FIXME currently we need same types, but in the future the next rule should be the one 00778 //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), 00779 _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), 00780 _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, 00781 Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), 00782 Alignment = evaluator<MatrixType>::Alignment 00783 }; 00784 00785 diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) 00786 : m_diagImpl(diag), m_matImpl(mat) 00787 { 00788 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); 00789 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); 00790 } 00791 00792 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const 00793 { 00794 return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); 00795 } 00796 00797 protected: 00798 template<int LoadMode,typename PacketType> 00799 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const 00800 { 00801 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), 00802 internal::pset1<PacketType>(m_diagImpl.coeff(id))); 00803 } 00804 00805 template<int LoadMode,typename PacketType> 00806 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const 00807 { 00808 enum { 00809 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, 00810 DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!! 00811 }; 00812 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), 00813 m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id)); 00814 } 00815 00816 evaluator<DiagonalType> m_diagImpl; 00817 evaluator<MatrixType> m_matImpl; 00818 }; 00819 00820 // diagonal * dense 00821 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> 00822 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape> 00823 : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> 00824 { 00825 typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base; 00826 using Base::m_diagImpl; 00827 using Base::m_matImpl; 00828 using Base::coeff; 00829 typedef typename Base::Scalar Scalar; 00830 00831 typedef Product<Lhs, Rhs, ProductKind> XprType; 00832 typedef typename XprType::PlainObject PlainObject; 00833 00834 enum { 00835 StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor 00836 }; 00837 00838 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00839 : Base(xpr.rhs(), xpr.lhs().diagonal()) 00840 { 00841 } 00842 00843 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 00844 { 00845 return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); 00846 } 00847 00848 #ifndef __CUDACC__ 00849 template<int LoadMode,typename PacketType> 00850 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const 00851 { 00852 // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. 00853 // See also similar calls below. 00854 return this->template packet_impl<LoadMode,PacketType>(row,col, row, 00855 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); 00856 } 00857 00858 template<int LoadMode,typename PacketType> 00859 EIGEN_STRONG_INLINE PacketType packet(Index idx) const 00860 { 00861 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); 00862 } 00863 #endif 00864 }; 00865 00866 // dense * diagonal 00867 template<typename Lhs, typename Rhs, int ProductKind, int ProductTag> 00868 struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape> 00869 : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> 00870 { 00871 typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base; 00872 using Base::m_diagImpl; 00873 using Base::m_matImpl; 00874 using Base::coeff; 00875 typedef typename Base::Scalar Scalar; 00876 00877 typedef Product<Lhs, Rhs, ProductKind> XprType; 00878 typedef typename XprType::PlainObject PlainObject; 00879 00880 enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; 00881 00882 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) 00883 : Base(xpr.lhs(), xpr.rhs().diagonal()) 00884 { 00885 } 00886 00887 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 00888 { 00889 return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); 00890 } 00891 00892 #ifndef __CUDACC__ 00893 template<int LoadMode,typename PacketType> 00894 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const 00895 { 00896 return this->template packet_impl<LoadMode,PacketType>(row,col, col, 00897 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type()); 00898 } 00899 00900 template<int LoadMode,typename PacketType> 00901 EIGEN_STRONG_INLINE PacketType packet(Index idx) const 00902 { 00903 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); 00904 } 00905 #endif 00906 }; 00907 00908 /*************************************************************************** 00909 * Products with permutation matrices 00910 ***************************************************************************/ 00911 00917 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> 00918 struct permutation_matrix_product; 00919 00920 template<typename ExpressionType, int Side, bool Transposed> 00921 struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape> 00922 { 00923 typedef typename nested_eval<ExpressionType, 1>::type MatrixType; 00924 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; 00925 00926 template<typename Dest, typename PermutationType> 00927 static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) 00928 { 00929 MatrixType mat(xpr); 00930 const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); 00931 // FIXME we need an is_same for expression that is not sensitive to constness. For instance 00932 // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true. 00933 //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat)) 00934 if(is_same_dense(dst, mat)) 00935 { 00936 // apply the permutation inplace 00937 Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size()); 00938 mask.fill(false); 00939 Index r = 0; 00940 while(r < perm.size()) 00941 { 00942 // search for the next seed 00943 while(r<perm.size() && mask[r]) r++; 00944 if(r>=perm.size()) 00945 break; 00946 // we got one, let's follow it until we are back to the seed 00947 Index k0 = r++; 00948 Index kPrev = k0; 00949 mask.coeffRef(k0) = true; 00950 for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) 00951 { 00952 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k) 00953 .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> 00954 (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); 00955 00956 mask.coeffRef(k) = true; 00957 kPrev = k; 00958 } 00959 } 00960 } 00961 else 00962 { 00963 for(Index i = 0; i < n; ++i) 00964 { 00965 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime> 00966 (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) 00967 00968 = 00969 00970 Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime> 00971 (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); 00972 } 00973 } 00974 } 00975 }; 00976 00977 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00978 struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> 00979 { 00980 template<typename Dest> 00981 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 00982 { 00983 permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); 00984 } 00985 }; 00986 00987 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00988 struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> 00989 { 00990 template<typename Dest> 00991 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 00992 { 00993 permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); 00994 } 00995 }; 00996 00997 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 00998 struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> 00999 { 01000 template<typename Dest> 01001 static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) 01002 { 01003 permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); 01004 } 01005 }; 01006 01007 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01008 struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> 01009 { 01010 template<typename Dest> 01011 static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) 01012 { 01013 permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); 01014 } 01015 }; 01016 01017 01018 /*************************************************************************** 01019 * Products with transpositions matrices 01020 ***************************************************************************/ 01021 01022 // FIXME could we unify Transpositions and Permutation into a single "shape"?? 01023 01028 template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape> 01029 struct transposition_matrix_product 01030 { 01031 typedef typename nested_eval<ExpressionType, 1>::type MatrixType; 01032 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; 01033 01034 template<typename Dest, typename TranspositionType> 01035 static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) 01036 { 01037 MatrixType mat(xpr); 01038 typedef typename TranspositionType::StorageIndex StorageIndex; 01039 const Index size = tr.size(); 01040 StorageIndex j = 0; 01041 01042 if(!is_same_dense(dst,mat)) 01043 dst = mat; 01044 01045 for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k) 01046 if(Index(j=tr.coeff(k))!=k) 01047 { 01048 if(Side==OnTheLeft) dst.row(k).swap(dst.row(j)); 01049 else if(Side==OnTheRight) dst.col(k).swap(dst.col(j)); 01050 } 01051 } 01052 }; 01053 01054 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01055 struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> 01056 { 01057 template<typename Dest> 01058 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 01059 { 01060 transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); 01061 } 01062 }; 01063 01064 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01065 struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> 01066 { 01067 template<typename Dest> 01068 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) 01069 { 01070 transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); 01071 } 01072 }; 01073 01074 01075 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01076 struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> 01077 { 01078 template<typename Dest> 01079 static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) 01080 { 01081 transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); 01082 } 01083 }; 01084 01085 template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape> 01086 struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> 01087 { 01088 template<typename Dest> 01089 static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) 01090 { 01091 transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); 01092 } 01093 }; 01094 01095 } // end namespace internal 01096 01097 } // end namespace Eigen 01098 01099 #endif // EIGEN_PRODUCT_EVALUATORS_H