![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_REDUX_H 00012 #define EIGEN_REDUX_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 // TODO 00019 // * implement other kind of vectorization 00020 // * factorize code 00021 00022 /*************************************************************************** 00023 * Part 1 : the logic deciding a strategy for vectorization and unrolling 00024 ***************************************************************************/ 00025 00026 template<typename Func, typename Derived> 00027 struct redux_traits 00028 { 00029 public: 00030 typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType; 00031 enum { 00032 PacketSize = unpacket_traits<PacketType>::size, 00033 InnerMaxSize = int(Derived::IsRowMajor) 00034 ? Derived::MaxColsAtCompileTime 00035 : Derived::MaxRowsAtCompileTime 00036 }; 00037 00038 enum { 00039 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 00040 && (functor_traits<Func>::PacketAccess), 00041 MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), 00042 MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize 00043 }; 00044 00045 public: 00046 enum { 00047 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00048 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00049 : int(DefaultTraversal) 00050 }; 00051 00052 public: 00053 enum { 00054 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost 00055 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 00056 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 00057 }; 00058 00059 public: 00060 enum { 00061 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling 00062 }; 00063 00064 #ifdef EIGEN_DEBUG_ASSIGN 00065 static void debug() 00066 { 00067 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; 00068 std::cerr.setf(std::ios::hex, std::ios::basefield); 00069 EIGEN_DEBUG_VAR(Derived::Flags) 00070 std::cerr.unsetf(std::ios::hex); 00071 EIGEN_DEBUG_VAR(InnerMaxSize) 00072 EIGEN_DEBUG_VAR(PacketSize) 00073 EIGEN_DEBUG_VAR(MightVectorize) 00074 EIGEN_DEBUG_VAR(MayLinearVectorize) 00075 EIGEN_DEBUG_VAR(MaySliceVectorize) 00076 EIGEN_DEBUG_VAR(Traversal) 00077 EIGEN_DEBUG_VAR(UnrollingLimit) 00078 EIGEN_DEBUG_VAR(Unrolling) 00079 std::cerr << std::endl; 00080 } 00081 #endif 00082 }; 00083 00084 /*************************************************************************** 00085 * Part 2 : unrollers 00086 ***************************************************************************/ 00087 00088 /*** no vectorization ***/ 00089 00090 template<typename Func, typename Derived, int Start, int Length> 00091 struct redux_novec_unroller 00092 { 00093 enum { 00094 HalfLength = Length/2 00095 }; 00096 00097 typedef typename Derived::Scalar Scalar; 00098 00099 EIGEN_DEVICE_FUNC 00100 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00101 { 00102 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00103 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 00104 } 00105 }; 00106 00107 template<typename Func, typename Derived, int Start> 00108 struct redux_novec_unroller<Func, Derived, Start, 1> 00109 { 00110 enum { 00111 outer = Start / Derived::InnerSizeAtCompileTime, 00112 inner = Start % Derived::InnerSizeAtCompileTime 00113 }; 00114 00115 typedef typename Derived::Scalar Scalar; 00116 00117 EIGEN_DEVICE_FUNC 00118 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 00119 { 00120 return mat.coeffByOuterInner(outer, inner); 00121 } 00122 }; 00123 00124 // This is actually dead code and will never be called. It is required 00125 // to prevent false warnings regarding failed inlining though 00126 // for 0 length run() will never be called at all. 00127 template<typename Func, typename Derived, int Start> 00128 struct redux_novec_unroller<Func, Derived, Start, 0> 00129 { 00130 typedef typename Derived::Scalar Scalar; 00131 EIGEN_DEVICE_FUNC 00132 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 00133 }; 00134 00135 /*** vectorization ***/ 00136 00137 template<typename Func, typename Derived, int Start, int Length> 00138 struct redux_vec_unroller 00139 { 00140 enum { 00141 PacketSize = redux_traits<Func, Derived>::PacketSize, 00142 HalfLength = Length/2 00143 }; 00144 00145 typedef typename Derived::Scalar Scalar; 00146 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 00147 00148 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 00149 { 00150 return func.packetOp( 00151 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00152 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 00153 } 00154 }; 00155 00156 template<typename Func, typename Derived, int Start> 00157 struct redux_vec_unroller<Func, Derived, Start, 1> 00158 { 00159 enum { 00160 index = Start * redux_traits<Func, Derived>::PacketSize, 00161 outer = index / int(Derived::InnerSizeAtCompileTime), 00162 inner = index % int(Derived::InnerSizeAtCompileTime), 00163 alignment = Derived::Alignment 00164 }; 00165 00166 typedef typename Derived::Scalar Scalar; 00167 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 00168 00169 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 00170 { 00171 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner); 00172 } 00173 }; 00174 00175 /*************************************************************************** 00176 * Part 3 : implementation of all cases 00177 ***************************************************************************/ 00178 00179 template<typename Func, typename Derived, 00180 int Traversal = redux_traits<Func, Derived>::Traversal, 00181 int Unrolling = redux_traits<Func, Derived>::Unrolling 00182 > 00183 struct redux_impl; 00184 00185 template<typename Func, typename Derived> 00186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 00187 { 00188 typedef typename Derived::Scalar Scalar; 00189 EIGEN_DEVICE_FUNC 00190 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00191 { 00192 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00193 Scalar res; 00194 res = mat.coeffByOuterInner(0, 0); 00195 for(Index i = 1; i < mat.innerSize(); ++i) 00196 res = func(res, mat.coeffByOuterInner(0, i)); 00197 for(Index i = 1; i < mat.outerSize(); ++i) 00198 for(Index j = 0; j < mat.innerSize(); ++j) 00199 res = func(res, mat.coeffByOuterInner(i, j)); 00200 return res; 00201 } 00202 }; 00203 00204 template<typename Func, typename Derived> 00205 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 00206 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 00207 {}; 00208 00209 template<typename Func, typename Derived> 00210 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 00211 { 00212 typedef typename Derived::Scalar Scalar; 00213 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 00214 00215 static Scalar run(const Derived &mat, const Func& func) 00216 { 00217 const Index size = mat.size(); 00218 00219 const Index packetSize = redux_traits<Func, Derived>::PacketSize; 00220 const int packetAlignment = unpacket_traits<PacketScalar>::alignment; 00221 enum { 00222 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), 00223 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) 00224 }; 00225 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); 00226 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 00227 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 00228 const Index alignedEnd2 = alignedStart + alignedSize2; 00229 const Index alignedEnd = alignedStart + alignedSize; 00230 Scalar res; 00231 if(alignedSize) 00232 { 00233 PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart); 00234 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 00235 { 00236 PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize); 00237 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 00238 { 00239 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index)); 00240 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize)); 00241 } 00242 00243 packet_res0 = func.packetOp(packet_res0,packet_res1); 00244 if(alignedEnd>alignedEnd2) 00245 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2)); 00246 } 00247 res = func.predux(packet_res0); 00248 00249 for(Index index = 0; index < alignedStart; ++index) 00250 res = func(res,mat.coeff(index)); 00251 00252 for(Index index = alignedEnd; index < size; ++index) 00253 res = func(res,mat.coeff(index)); 00254 } 00255 else // too small to vectorize anything. 00256 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00257 { 00258 res = mat.coeff(0); 00259 for(Index index = 1; index < size; ++index) 00260 res = func(res,mat.coeff(index)); 00261 } 00262 00263 return res; 00264 } 00265 }; 00266 00267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling 00268 template<typename Func, typename Derived, int Unrolling> 00269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 00270 { 00271 typedef typename Derived::Scalar Scalar; 00272 typedef typename redux_traits<Func, Derived>::PacketType PacketType; 00273 00274 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) 00275 { 00276 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00277 const Index innerSize = mat.innerSize(); 00278 const Index outerSize = mat.outerSize(); 00279 enum { 00280 packetSize = redux_traits<Func, Derived>::PacketSize 00281 }; 00282 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 00283 Scalar res; 00284 if(packetedInnerSize) 00285 { 00286 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0); 00287 for(Index j=0; j<outerSize; ++j) 00288 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 00289 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i)); 00290 00291 res = func.predux(packet_res); 00292 for(Index j=0; j<outerSize; ++j) 00293 for(Index i=packetedInnerSize; i<innerSize; ++i) 00294 res = func(res, mat.coeffByOuterInner(j,i)); 00295 } 00296 else // too small to vectorize anything. 00297 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00298 { 00299 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 00300 } 00301 00302 return res; 00303 } 00304 }; 00305 00306 template<typename Func, typename Derived> 00307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 00308 { 00309 typedef typename Derived::Scalar Scalar; 00310 00311 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 00312 enum { 00313 PacketSize = redux_traits<Func, Derived>::PacketSize, 00314 Size = Derived::SizeAtCompileTime, 00315 VectorizedSize = (Size / PacketSize) * PacketSize 00316 }; 00317 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00318 { 00319 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00320 if (VectorizedSize > 0) { 00321 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 00322 if (VectorizedSize != Size) 00323 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 00324 return res; 00325 } 00326 else { 00327 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func); 00328 } 00329 } 00330 }; 00331 00332 // evaluator adaptor 00333 template<typename _XprType> 00334 class redux_evaluator 00335 { 00336 public: 00337 typedef _XprType XprType; 00338 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} 00339 00340 typedef typename XprType::Scalar Scalar; 00341 typedef typename XprType::CoeffReturnType CoeffReturnType; 00342 typedef typename XprType::PacketScalar PacketScalar; 00343 typedef typename XprType::PacketReturnType PacketReturnType; 00344 00345 enum { 00346 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, 00347 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, 00348 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator 00349 Flags = evaluator<XprType>::Flags & ~DirectAccessBit, 00350 IsRowMajor = XprType::IsRowMajor, 00351 SizeAtCompileTime = XprType::SizeAtCompileTime, 00352 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, 00353 CoeffReadCost = evaluator<XprType>::CoeffReadCost, 00354 Alignment = evaluator<XprType>::Alignment 00355 }; 00356 00357 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } 00358 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } 00359 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } 00360 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } 00361 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } 00362 00363 EIGEN_DEVICE_FUNC 00364 CoeffReturnType coeff(Index row, Index col) const 00365 { return m_evaluator.coeff(row, col); } 00366 00367 EIGEN_DEVICE_FUNC 00368 CoeffReturnType coeff(Index index) const 00369 { return m_evaluator.coeff(index); } 00370 00371 template<int LoadMode, typename PacketType> 00372 PacketType packet(Index row, Index col) const 00373 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); } 00374 00375 template<int LoadMode, typename PacketType> 00376 PacketType packet(Index index) const 00377 { return m_evaluator.template packet<LoadMode,PacketType>(index); } 00378 00379 EIGEN_DEVICE_FUNC 00380 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const 00381 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 00382 00383 template<int LoadMode, typename PacketType> 00384 PacketType packetByOuterInner(Index outer, Index inner) const 00385 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 00386 00387 const XprType & nestedExpression() const { return m_xpr; } 00388 00389 protected: 00390 internal::evaluator<XprType> m_evaluator; 00391 const XprType &m_xpr; 00392 }; 00393 00394 } // end namespace internal 00395 00396 /*************************************************************************** 00397 * Part 4 : public API 00398 ***************************************************************************/ 00399 00400 00408 template<typename Derived> 00409 template<typename Func> 00410 typename internal::traits<Derived>::Scalar 00411 DenseBase<Derived>::redux(const Func& func) const 00412 { 00413 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); 00414 00415 typedef typename internal::redux_evaluator<Derived> ThisEvaluator; 00416 ThisEvaluator thisEval(derived()); 00417 00418 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); 00419 } 00420 00424 template<typename Derived> 00425 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00426 DenseBase<Derived>::minCoeff() const 00427 { 00428 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); 00429 } 00430 00434 template<typename Derived> 00435 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00436 DenseBase<Derived>::maxCoeff() const 00437 { 00438 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); 00439 } 00440 00447 template<typename Derived> 00448 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00449 DenseBase<Derived>::sum() const 00450 { 00451 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00452 return Scalar(0); 00453 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>()); 00454 } 00455 00460 template<typename Derived> 00461 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00462 DenseBase<Derived>::mean() const 00463 { 00464 #ifdef __INTEL_COMPILER 00465 #pragma warning push 00466 #pragma warning ( disable : 2259 ) 00467 #endif 00468 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size()); 00469 #ifdef __INTEL_COMPILER 00470 #pragma warning pop 00471 #endif 00472 } 00473 00481 template<typename Derived> 00482 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00483 DenseBase<Derived>::prod() const 00484 { 00485 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00486 return Scalar(1); 00487 return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); 00488 } 00489 00496 template<typename Derived> 00497 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00498 MatrixBase<Derived>::trace() const 00499 { 00500 return derived().diagonal().sum(); 00501 } 00502 00503 } // end namespace Eigen 00504 00505 #endif // EIGEN_REDUX_H