![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com> 00005 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr> 00006 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk> 00007 // 00008 // This Source Code Form is subject to the terms of the Mozilla 00009 // Public License v. 2.0. If a copy of the MPL was not distributed 00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00011 00012 #ifndef EIGEN_ASSIGN_EVALUATOR_H 00013 #define EIGEN_ASSIGN_EVALUATOR_H 00014 00015 namespace Eigen { 00016 00017 // This implementation is based on Assign.h 00018 00019 namespace internal { 00020 00021 /*************************************************************************** 00022 * Part 1 : the logic deciding a strategy for traversal and unrolling * 00023 ***************************************************************************/ 00024 00025 // copy_using_evaluator_traits is based on assign_traits 00026 00027 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc> 00028 struct copy_using_evaluator_traits 00029 { 00030 typedef typename DstEvaluator::XprType Dst; 00031 typedef typename Dst::Scalar DstScalar; 00032 00033 enum { 00034 DstFlags = DstEvaluator::Flags, 00035 SrcFlags = SrcEvaluator::Flags 00036 }; 00037 00038 public: 00039 enum { 00040 DstAlignment = DstEvaluator::Alignment, 00041 SrcAlignment = SrcEvaluator::Alignment, 00042 DstHasDirectAccess = DstFlags & DirectAccessBit, 00043 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) 00044 }; 00045 00046 private: 00047 enum { 00048 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) 00049 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) 00050 : int(Dst::RowsAtCompileTime), 00051 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) 00052 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) 00053 : int(Dst::MaxRowsAtCompileTime), 00054 OuterStride = int(outer_stride_at_compile_time<Dst>::ret), 00055 MaxSizeAtCompileTime = Dst::SizeAtCompileTime 00056 }; 00057 00058 // TODO distinguish between linear traversal and inner-traversals 00059 typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType; 00060 typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType; 00061 00062 enum { 00063 LinearPacketSize = unpacket_traits<LinearPacketType>::size, 00064 InnerPacketSize = unpacket_traits<InnerPacketType>::size 00065 }; 00066 00067 public: 00068 enum { 00069 LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment, 00070 InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment 00071 }; 00072 00073 private: 00074 enum { 00075 DstIsRowMajor = DstFlags&RowMajorBit, 00076 SrcIsRowMajor = SrcFlags&RowMajorBit, 00077 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), 00078 MightVectorize = bool(StorageOrdersAgree) 00079 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) 00080 && bool(functor_traits<AssignFunc>::PacketAccess), 00081 MayInnerVectorize = MightVectorize 00082 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 00083 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 00084 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), 00085 MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), 00086 MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess 00087 && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), 00088 /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, 00089 so it's only good for large enough sizes. */ 00090 MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) 00091 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) 00092 /* slice vectorization can be slow, so we only want it if the slices are big, which is 00093 indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block 00094 in a fixed-size matrix 00095 However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ 00096 }; 00097 00098 public: 00099 enum { 00100 Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) 00101 : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) 00102 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00103 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00104 : int(MayLinearize) ? int(LinearTraversal) 00105 : int(DefaultTraversal), 00106 Vectorized = int(Traversal) == InnerVectorizedTraversal 00107 || int(Traversal) == LinearVectorizedTraversal 00108 || int(Traversal) == SliceVectorizedTraversal 00109 }; 00110 00111 typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType; 00112 00113 private: 00114 enum { 00115 ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize 00116 : Vectorized ? InnerPacketSize 00117 : 1, 00118 UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, 00119 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic 00120 && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), 00121 MayUnrollInner = int(InnerSize) != Dynamic 00122 && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) 00123 }; 00124 00125 public: 00126 enum { 00127 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) 00128 ? ( 00129 int(MayUnrollCompletely) ? int(CompleteUnrolling) 00130 : int(MayUnrollInner) ? int(InnerUnrolling) 00131 : int(NoUnrolling) 00132 ) 00133 : int(Traversal) == int(LinearVectorizedTraversal) 00134 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) 00135 ? int(CompleteUnrolling) 00136 : int(NoUnrolling) ) 00137 : int(Traversal) == int(LinearTraversal) 00138 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 00139 : int(NoUnrolling) ) 00140 #if EIGEN_UNALIGNED_VECTORIZE 00141 : int(Traversal) == int(SliceVectorizedTraversal) 00142 ? ( bool(MayUnrollInner) ? int(InnerUnrolling) 00143 : int(NoUnrolling) ) 00144 #endif 00145 : int(NoUnrolling) 00146 }; 00147 00148 #ifdef EIGEN_DEBUG_ASSIGN 00149 static void debug() 00150 { 00151 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; 00152 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; 00153 std::cerr.setf(std::ios::hex, std::ios::basefield); 00154 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; 00155 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; 00156 std::cerr.unsetf(std::ios::hex); 00157 EIGEN_DEBUG_VAR(DstAlignment) 00158 EIGEN_DEBUG_VAR(SrcAlignment) 00159 EIGEN_DEBUG_VAR(LinearRequiredAlignment) 00160 EIGEN_DEBUG_VAR(InnerRequiredAlignment) 00161 EIGEN_DEBUG_VAR(JointAlignment) 00162 EIGEN_DEBUG_VAR(InnerSize) 00163 EIGEN_DEBUG_VAR(InnerMaxSize) 00164 EIGEN_DEBUG_VAR(LinearPacketSize) 00165 EIGEN_DEBUG_VAR(InnerPacketSize) 00166 EIGEN_DEBUG_VAR(ActualPacketSize) 00167 EIGEN_DEBUG_VAR(StorageOrdersAgree) 00168 EIGEN_DEBUG_VAR(MightVectorize) 00169 EIGEN_DEBUG_VAR(MayLinearize) 00170 EIGEN_DEBUG_VAR(MayInnerVectorize) 00171 EIGEN_DEBUG_VAR(MayLinearVectorize) 00172 EIGEN_DEBUG_VAR(MaySliceVectorize) 00173 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; 00174 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) 00175 EIGEN_DEBUG_VAR(UnrollingLimit) 00176 EIGEN_DEBUG_VAR(MayUnrollCompletely) 00177 EIGEN_DEBUG_VAR(MayUnrollInner) 00178 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; 00179 std::cerr << std::endl; 00180 } 00181 #endif 00182 }; 00183 00184 /*************************************************************************** 00185 * Part 2 : meta-unrollers 00186 ***************************************************************************/ 00187 00188 /************************ 00189 *** Default traversal *** 00190 ************************/ 00191 00192 template<typename Kernel, int Index, int Stop> 00193 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling 00194 { 00195 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 00196 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 00197 typedef typename DstEvaluatorType::XprType DstXprType; 00198 00199 enum { 00200 outer = Index / DstXprType::InnerSizeAtCompileTime, 00201 inner = Index % DstXprType::InnerSizeAtCompileTime 00202 }; 00203 00204 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00205 { 00206 kernel.assignCoeffByOuterInner(outer, inner); 00207 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 00208 } 00209 }; 00210 00211 template<typename Kernel, int Stop> 00212 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop> 00213 { 00214 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00215 }; 00216 00217 template<typename Kernel, int Index_, int Stop> 00218 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling 00219 { 00220 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 00221 { 00222 kernel.assignCoeffByOuterInner(outer, Index_); 00223 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer); 00224 } 00225 }; 00226 00227 template<typename Kernel, int Stop> 00228 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop> 00229 { 00230 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } 00231 }; 00232 00233 /*********************** 00234 *** Linear traversal *** 00235 ***********************/ 00236 00237 template<typename Kernel, int Index, int Stop> 00238 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling 00239 { 00240 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) 00241 { 00242 kernel.assignCoeff(Index); 00243 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel); 00244 } 00245 }; 00246 00247 template<typename Kernel, int Stop> 00248 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop> 00249 { 00250 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00251 }; 00252 00253 /************************** 00254 *** Inner vectorization *** 00255 **************************/ 00256 00257 template<typename Kernel, int Index, int Stop> 00258 struct copy_using_evaluator_innervec_CompleteUnrolling 00259 { 00260 // FIXME: this is not very clean, perhaps this information should be provided by the kernel? 00261 typedef typename Kernel::DstEvaluatorType DstEvaluatorType; 00262 typedef typename DstEvaluatorType::XprType DstXprType; 00263 typedef typename Kernel::PacketType PacketType; 00264 00265 enum { 00266 outer = Index / DstXprType::InnerSizeAtCompileTime, 00267 inner = Index % DstXprType::InnerSizeAtCompileTime, 00268 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, 00269 DstAlignment = Kernel::AssignmentTraits::DstAlignment 00270 }; 00271 00272 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00273 { 00274 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); 00275 enum { NextIndex = Index + unpacket_traits<PacketType>::size }; 00276 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); 00277 } 00278 }; 00279 00280 template<typename Kernel, int Stop> 00281 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop> 00282 { 00283 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } 00284 }; 00285 00286 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment> 00287 struct copy_using_evaluator_innervec_InnerUnrolling 00288 { 00289 typedef typename Kernel::PacketType PacketType; 00290 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) 00291 { 00292 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_); 00293 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; 00294 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer); 00295 } 00296 }; 00297 00298 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment> 00299 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment> 00300 { 00301 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } 00302 }; 00303 00304 /*************************************************************************** 00305 * Part 3 : implementation of all cases 00306 ***************************************************************************/ 00307 00308 // dense_assignment_loop is based on assign_impl 00309 00310 template<typename Kernel, 00311 int Traversal = Kernel::AssignmentTraits::Traversal, 00312 int Unrolling = Kernel::AssignmentTraits::Unrolling> 00313 struct dense_assignment_loop; 00314 00315 /************************ 00316 *** Default traversal *** 00317 ************************/ 00318 00319 template<typename Kernel> 00320 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling> 00321 { 00322 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) 00323 { 00324 for(Index outer = 0; outer < kernel.outerSize(); ++outer) { 00325 for(Index inner = 0; inner < kernel.innerSize(); ++inner) { 00326 kernel.assignCoeffByOuterInner(outer, inner); 00327 } 00328 } 00329 } 00330 }; 00331 00332 template<typename Kernel> 00333 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling> 00334 { 00335 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00336 { 00337 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00338 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00339 } 00340 }; 00341 00342 template<typename Kernel> 00343 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling> 00344 { 00345 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00346 { 00347 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00348 00349 const Index outerSize = kernel.outerSize(); 00350 for(Index outer = 0; outer < outerSize; ++outer) 00351 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer); 00352 } 00353 }; 00354 00355 /*************************** 00356 *** Linear vectorization *** 00357 ***************************/ 00358 00359 00360 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling 00361 // of the non vectorizable beginning and ending parts 00362 00363 template <bool IsAligned = false> 00364 struct unaligned_dense_assignment_loop 00365 { 00366 // if IsAligned = true, then do nothing 00367 template <typename Kernel> 00368 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} 00369 }; 00370 00371 template <> 00372 struct unaligned_dense_assignment_loop<false> 00373 { 00374 // MSVC must not inline this functions. If it does, it fails to optimize the 00375 // packet access path. 00376 // FIXME check which version exhibits this issue 00377 #if EIGEN_COMP_MSVC 00378 template <typename Kernel> 00379 static EIGEN_DONT_INLINE void run(Kernel &kernel, 00380 Index start, 00381 Index end) 00382 #else 00383 template <typename Kernel> 00384 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, 00385 Index start, 00386 Index end) 00387 #endif 00388 { 00389 for (Index index = start; index < end; ++index) 00390 kernel.assignCoeff(index); 00391 } 00392 }; 00393 00394 template<typename Kernel> 00395 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> 00396 { 00397 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00398 { 00399 const Index size = kernel.size(); 00400 typedef typename Kernel::Scalar Scalar; 00401 typedef typename Kernel::PacketType PacketType; 00402 enum { 00403 requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, 00404 packetSize = unpacket_traits<PacketType>::size, 00405 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 00406 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment) 00407 : int(Kernel::AssignmentTraits::DstAlignment), 00408 srcAlignment = Kernel::AssignmentTraits::JointAlignment 00409 }; 00410 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size); 00411 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; 00412 00413 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart); 00414 00415 for(Index index = alignedStart; index < alignedEnd; index += packetSize) 00416 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index); 00417 00418 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); 00419 } 00420 }; 00421 00422 template<typename Kernel> 00423 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> 00424 { 00425 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00426 { 00427 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00428 typedef typename Kernel::PacketType PacketType; 00429 00430 enum { size = DstXprType::SizeAtCompileTime, 00431 packetSize =unpacket_traits<PacketType>::size, 00432 alignedSize = (size/packetSize)*packetSize }; 00433 00434 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel); 00435 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel); 00436 } 00437 }; 00438 00439 /************************** 00440 *** Inner vectorization *** 00441 **************************/ 00442 00443 template<typename Kernel> 00444 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> 00445 { 00446 typedef typename Kernel::PacketType PacketType; 00447 enum { 00448 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, 00449 DstAlignment = Kernel::AssignmentTraits::DstAlignment 00450 }; 00451 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00452 { 00453 const Index innerSize = kernel.innerSize(); 00454 const Index outerSize = kernel.outerSize(); 00455 const Index packetSize = unpacket_traits<PacketType>::size; 00456 for(Index outer = 0; outer < outerSize; ++outer) 00457 for(Index inner = 0; inner < innerSize; inner+=packetSize) 00458 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner); 00459 } 00460 }; 00461 00462 template<typename Kernel> 00463 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> 00464 { 00465 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00466 { 00467 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00468 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00469 } 00470 }; 00471 00472 template<typename Kernel> 00473 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling> 00474 { 00475 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00476 { 00477 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00478 typedef typename Kernel::AssignmentTraits Traits; 00479 const Index outerSize = kernel.outerSize(); 00480 for(Index outer = 0; outer < outerSize; ++outer) 00481 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime, 00482 Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer); 00483 } 00484 }; 00485 00486 /*********************** 00487 *** Linear traversal *** 00488 ***********************/ 00489 00490 template<typename Kernel> 00491 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> 00492 { 00493 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00494 { 00495 const Index size = kernel.size(); 00496 for(Index i = 0; i < size; ++i) 00497 kernel.assignCoeff(i); 00498 } 00499 }; 00500 00501 template<typename Kernel> 00502 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> 00503 { 00504 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00505 { 00506 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00507 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel); 00508 } 00509 }; 00510 00511 /************************** 00512 *** Slice vectorization *** 00513 ***************************/ 00514 00515 template<typename Kernel> 00516 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> 00517 { 00518 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00519 { 00520 typedef typename Kernel::Scalar Scalar; 00521 typedef typename Kernel::PacketType PacketType; 00522 enum { 00523 packetSize = unpacket_traits<PacketType>::size, 00524 requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), 00525 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), 00526 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), 00527 dstAlignment = alignable ? int(requestedAlignment) 00528 : int(Kernel::AssignmentTraits::DstAlignment) 00529 }; 00530 const Scalar *dst_ptr = kernel.dstDataPtr(); 00531 if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) 00532 { 00533 // the pointer is not aligend-on scalar, so alignment is not possible 00534 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel); 00535 } 00536 const Index packetAlignedMask = packetSize - 1; 00537 const Index innerSize = kernel.innerSize(); 00538 const Index outerSize = kernel.outerSize(); 00539 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; 00540 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize); 00541 00542 for(Index outer = 0; outer < outerSize; ++outer) 00543 { 00544 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); 00545 // do the non-vectorizable part of the assignment 00546 for(Index inner = 0; inner<alignedStart ; ++inner) 00547 kernel.assignCoeffByOuterInner(outer, inner); 00548 00549 // do the vectorizable part of the assignment 00550 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) 00551 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner); 00552 00553 // do the non-vectorizable part of the assignment 00554 for(Index inner = alignedEnd; inner<innerSize ; ++inner) 00555 kernel.assignCoeffByOuterInner(outer, inner); 00556 00557 alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize); 00558 } 00559 } 00560 }; 00561 00562 #if EIGEN_UNALIGNED_VECTORIZE 00563 template<typename Kernel> 00564 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling> 00565 { 00566 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) 00567 { 00568 typedef typename Kernel::DstEvaluatorType::XprType DstXprType; 00569 typedef typename Kernel::PacketType PacketType; 00570 00571 enum { size = DstXprType::InnerSizeAtCompileTime, 00572 packetSize =unpacket_traits<PacketType>::size, 00573 vectorizableSize = (size/packetSize)*packetSize }; 00574 00575 for(Index outer = 0; outer < kernel.outerSize(); ++outer) 00576 { 00577 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer); 00578 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer); 00579 } 00580 } 00581 }; 00582 #endif 00583 00584 00585 /*************************************************************************** 00586 * Part 4 : Generic dense assignment kernel 00587 ***************************************************************************/ 00588 00589 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator 00590 // to another dense writable evaluator. 00591 // It is parametrized by the two evaluators, and the actual assignment functor. 00592 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible. 00593 // One can customize the assignment using this generic dense_assignment_kernel with different 00594 // functors, or by completely overloading it, by-passing a functor. 00595 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized> 00596 class generic_dense_assignment_kernel 00597 { 00598 protected: 00599 typedef typename DstEvaluatorTypeT::XprType DstXprType; 00600 typedef typename SrcEvaluatorTypeT::XprType SrcXprType; 00601 public: 00602 00603 typedef DstEvaluatorTypeT DstEvaluatorType; 00604 typedef SrcEvaluatorTypeT SrcEvaluatorType; 00605 typedef typename DstEvaluatorType::Scalar Scalar; 00606 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits; 00607 typedef typename AssignmentTraits::PacketType PacketType; 00608 00609 00610 EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) 00611 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) 00612 { 00613 #ifdef EIGEN_DEBUG_ASSIGN 00614 AssignmentTraits::debug(); 00615 #endif 00616 } 00617 00618 EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } 00619 EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } 00620 EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } 00621 EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } 00622 EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } 00623 EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } 00624 00625 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } 00626 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } 00627 00629 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) 00630 { 00631 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); 00632 } 00633 00635 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) 00636 { 00637 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); 00638 } 00639 00641 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) 00642 { 00643 Index row = rowIndexByOuterInner(outer, inner); 00644 Index col = colIndexByOuterInner(outer, inner); 00645 assignCoeff(row, col); 00646 } 00647 00648 00649 template<int StoreMode, int LoadMode, typename PacketType> 00650 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) 00651 { 00652 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col)); 00653 } 00654 00655 template<int StoreMode, int LoadMode, typename PacketType> 00656 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) 00657 { 00658 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index)); 00659 } 00660 00661 template<int StoreMode, int LoadMode, typename PacketType> 00662 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) 00663 { 00664 Index row = rowIndexByOuterInner(outer, inner); 00665 Index col = colIndexByOuterInner(outer, inner); 00666 assignPacket<StoreMode,LoadMode,PacketType>(row, col); 00667 } 00668 00669 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) 00670 { 00671 typedef typename DstEvaluatorType::ExpressionTraits Traits; 00672 return int(Traits::RowsAtCompileTime) == 1 ? 0 00673 : int(Traits::ColsAtCompileTime) == 1 ? inner 00674 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer 00675 : inner; 00676 } 00677 00678 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) 00679 { 00680 typedef typename DstEvaluatorType::ExpressionTraits Traits; 00681 return int(Traits::ColsAtCompileTime) == 1 ? 0 00682 : int(Traits::RowsAtCompileTime) == 1 ? inner 00683 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner 00684 : outer; 00685 } 00686 00687 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const 00688 { 00689 return m_dstExpr.data(); 00690 } 00691 00692 protected: 00693 DstEvaluatorType& m_dst; 00694 const SrcEvaluatorType& m_src; 00695 const Functor &m_functor; 00696 // TODO find a way to avoid the needs of the original expression 00697 DstXprType& m_dstExpr; 00698 }; 00699 00700 /*************************************************************************** 00701 * Part 5 : Entry point for dense rectangular assignment 00702 ***************************************************************************/ 00703 00704 template<typename DstXprType,typename SrcXprType, typename Functor> 00705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00706 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) 00707 { 00708 EIGEN_ONLY_USED_FOR_DEBUG(dst); 00709 EIGEN_ONLY_USED_FOR_DEBUG(src); 00710 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00711 } 00712 00713 template<typename DstXprType,typename SrcXprType, typename T1, typename T2> 00714 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00715 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/) 00716 { 00717 Index dstRows = src.rows(); 00718 Index dstCols = src.cols(); 00719 if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) 00720 dst.resize(dstRows, dstCols); 00721 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); 00722 } 00723 00724 template<typename DstXprType, typename SrcXprType, typename Functor> 00725 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) 00726 { 00727 typedef evaluator<DstXprType> DstEvaluatorType; 00728 typedef evaluator<SrcXprType> SrcEvaluatorType; 00729 00730 SrcEvaluatorType srcEvaluator(src); 00731 00732 // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, 00733 // we need to resize the destination after the source evaluator has been created. 00734 resize_if_allowed(dst, src, func); 00735 00736 DstEvaluatorType dstEvaluator(dst); 00737 00738 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel; 00739 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); 00740 00741 dense_assignment_loop<Kernel>::run(kernel); 00742 } 00743 00744 template<typename DstXprType, typename SrcXprType> 00745 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) 00746 { 00747 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>()); 00748 } 00749 00750 /*************************************************************************** 00751 * Part 6 : Generic assignment 00752 ***************************************************************************/ 00753 00754 // Based on the respective shapes of the destination and source, 00755 // the class AssignmentKind determine the kind of assignment mechanism. 00756 // AssignmentKind must define a Kind typedef. 00757 template<typename DstShape, typename SrcShape> struct AssignmentKind; 00758 00759 // Assignement kind defined in this file: 00760 struct Dense2Dense {}; 00761 struct EigenBase2EigenBase {}; 00762 00763 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; 00764 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; }; 00765 00766 // This is the main assignment class 00767 template< typename DstXprType, typename SrcXprType, typename Functor, 00768 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind, 00769 typename EnableIf = void> 00770 struct Assignment; 00771 00772 00773 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. 00774 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. 00775 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment 00776 // does not has to bother about these annoying details. 00777 00778 template<typename Dst, typename Src> 00779 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00780 void call_assignment(Dst& dst, const Src& src) 00781 { 00782 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 00783 } 00784 template<typename Dst, typename Src> 00785 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00786 void call_assignment(const Dst& dst, const Src& src) 00787 { 00788 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 00789 } 00790 00791 // Deal with "assume-aliasing" 00792 template<typename Dst, typename Src, typename Func> 00793 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00794 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) 00795 { 00796 typename plain_matrix_type<Src>::type tmp(src); 00797 call_assignment_no_alias(dst, tmp, func); 00798 } 00799 00800 template<typename Dst, typename Src, typename Func> 00801 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00802 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) 00803 { 00804 call_assignment_no_alias(dst, src, func); 00805 } 00806 00807 // by-pass "assume-aliasing" 00808 // When there is no aliasing, we require that 'dst' has been properly resized 00809 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> 00810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00811 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) 00812 { 00813 call_assignment_no_alias(dst.expression(), src, func); 00814 } 00815 00816 00817 template<typename Dst, typename Src, typename Func> 00818 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00819 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) 00820 { 00821 enum { 00822 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) 00823 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) 00824 ) && int(Dst::SizeAtCompileTime) != 1 00825 }; 00826 00827 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned; 00828 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType; 00829 ActualDstType actualDst(dst); 00830 00831 // TODO check whether this is the right place to perform these checks: 00832 EIGEN_STATIC_ASSERT_LVALUE(Dst) 00833 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) 00834 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); 00835 00836 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); 00837 } 00838 template<typename Dst, typename Src> 00839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00840 void call_assignment_no_alias(Dst& dst, const Src& src) 00841 { 00842 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 00843 } 00844 00845 template<typename Dst, typename Src, typename Func> 00846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00847 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) 00848 { 00849 // TODO check whether this is the right place to perform these checks: 00850 EIGEN_STATIC_ASSERT_LVALUE(Dst) 00851 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) 00852 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); 00853 00854 Assignment<Dst,Src,Func>::run(dst, src, func); 00855 } 00856 template<typename Dst, typename Src> 00857 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00858 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) 00859 { 00860 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>()); 00861 } 00862 00863 // forward declaration 00864 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src); 00865 00866 // Generic Dense to Dense assignment 00867 // Note that the last template argument "Weak" is needed to make it possible to perform 00868 // both partial specialization+SFINAE without ambiguous specialization 00869 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> 00870 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> 00871 { 00872 EIGEN_DEVICE_FUNC 00873 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) 00874 { 00875 #ifndef EIGEN_NO_DEBUG 00876 internal::check_for_aliasing(dst, src); 00877 #endif 00878 00879 call_dense_assignment_loop(dst, src, func); 00880 } 00881 }; 00882 00883 // Generic assignment through evalTo. 00884 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. 00885 // Note that the last template argument "Weak" is needed to make it possible to perform 00886 // both partial specialization+SFINAE without ambiguous specialization 00887 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> 00888 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak> 00889 { 00890 EIGEN_DEVICE_FUNC 00891 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/) 00892 { 00893 Index dstRows = src.rows(); 00894 Index dstCols = src.cols(); 00895 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00896 dst.resize(dstRows, dstCols); 00897 00898 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00899 src.evalTo(dst); 00900 } 00901 00902 // NOTE The following two functions are templated to avoid their instanciation if not needed 00903 // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. 00904 template<typename SrcScalarType> 00905 EIGEN_DEVICE_FUNC 00906 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) 00907 { 00908 Index dstRows = src.rows(); 00909 Index dstCols = src.cols(); 00910 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00911 dst.resize(dstRows, dstCols); 00912 00913 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00914 src.addTo(dst); 00915 } 00916 00917 template<typename SrcScalarType> 00918 EIGEN_DEVICE_FUNC 00919 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/) 00920 { 00921 Index dstRows = src.rows(); 00922 Index dstCols = src.cols(); 00923 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) 00924 dst.resize(dstRows, dstCols); 00925 00926 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); 00927 src.subTo(dst); 00928 } 00929 }; 00930 00931 } // namespace internal 00932 00933 } // end namespace Eigen 00934 00935 #endif // EIGEN_ASSIGN_EVALUATOR_H