Eigen  3.3.3
AssignEvaluator.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
00005 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
00006 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
00007 //
00008 // This Source Code Form is subject to the terms of the Mozilla
00009 // Public License v. 2.0. If a copy of the MPL was not distributed
00010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00011 
00012 #ifndef EIGEN_ASSIGN_EVALUATOR_H
00013 #define EIGEN_ASSIGN_EVALUATOR_H
00014 
00015 namespace Eigen {
00016 
00017 // This implementation is based on Assign.h
00018 
00019 namespace internal {
00020   
00021 /***************************************************************************
00022 * Part 1 : the logic deciding a strategy for traversal and unrolling       *
00023 ***************************************************************************/
00024 
00025 // copy_using_evaluator_traits is based on assign_traits
00026 
00027 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
00028 struct copy_using_evaluator_traits
00029 {
00030   typedef typename DstEvaluator::XprType Dst;
00031   typedef typename Dst::Scalar DstScalar;
00032   
00033   enum {
00034     DstFlags = DstEvaluator::Flags,
00035     SrcFlags = SrcEvaluator::Flags
00036   };
00037   
00038 public:
00039   enum {
00040     DstAlignment = DstEvaluator::Alignment,
00041     SrcAlignment = SrcEvaluator::Alignment,
00042     DstHasDirectAccess = DstFlags & DirectAccessBit,
00043     JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
00044   };
00045 
00046 private:
00047   enum {
00048     InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
00049               : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
00050               : int(Dst::RowsAtCompileTime),
00051     InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
00052               : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
00053               : int(Dst::MaxRowsAtCompileTime),
00054     OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
00055     MaxSizeAtCompileTime = Dst::SizeAtCompileTime
00056   };
00057 
00058   // TODO distinguish between linear traversal and inner-traversals
00059   typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
00060   typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
00061 
00062   enum {
00063     LinearPacketSize = unpacket_traits<LinearPacketType>::size,
00064     InnerPacketSize = unpacket_traits<InnerPacketType>::size
00065   };
00066 
00067 public:
00068   enum {
00069     LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
00070     InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
00071   };
00072 
00073 private:
00074   enum {
00075     DstIsRowMajor = DstFlags&RowMajorBit,
00076     SrcIsRowMajor = SrcFlags&RowMajorBit,
00077     StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
00078     MightVectorize = bool(StorageOrdersAgree)
00079                   && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
00080                   && bool(functor_traits<AssignFunc>::PacketAccess),
00081     MayInnerVectorize  = MightVectorize
00082                        && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
00083                        && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
00084                        && (EIGEN_UNALIGNED_VECTORIZE  || int(JointAlignment)>=int(InnerRequiredAlignment)),
00085     MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
00086     MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
00087                        && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
00088       /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
00089          so it's only good for large enough sizes. */
00090     MaySliceVectorize  = bool(MightVectorize) && bool(DstHasDirectAccess)
00091                        && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
00092       /* slice vectorization can be slow, so we only want it if the slices are big, which is
00093          indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
00094          in a fixed-size matrix
00095          However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
00096   };
00097 
00098 public:
00099   enum {
00100     Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
00101               : int(MayInnerVectorize)   ? int(InnerVectorizedTraversal)
00102               : int(MayLinearVectorize)  ? int(LinearVectorizedTraversal)
00103               : int(MaySliceVectorize)   ? int(SliceVectorizedTraversal)
00104               : int(MayLinearize)        ? int(LinearTraversal)
00105                                          : int(DefaultTraversal),
00106     Vectorized = int(Traversal) == InnerVectorizedTraversal
00107               || int(Traversal) == LinearVectorizedTraversal
00108               || int(Traversal) == SliceVectorizedTraversal
00109   };
00110 
00111   typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
00112 
00113 private:
00114   enum {
00115     ActualPacketSize    = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
00116                         : Vectorized ? InnerPacketSize
00117                         : 1,
00118     UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
00119     MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
00120                        && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
00121     MayUnrollInner      = int(InnerSize) != Dynamic
00122                        && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
00123   };
00124 
00125 public:
00126   enum {
00127     Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
00128                 ? (
00129                     int(MayUnrollCompletely) ? int(CompleteUnrolling)
00130                   : int(MayUnrollInner)      ? int(InnerUnrolling)
00131                                              : int(NoUnrolling)
00132                   )
00133               : int(Traversal) == int(LinearVectorizedTraversal)
00134                 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
00135                           ? int(CompleteUnrolling)
00136                           : int(NoUnrolling) )
00137               : int(Traversal) == int(LinearTraversal)
00138                 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
00139                                               : int(NoUnrolling) )
00140 #if EIGEN_UNALIGNED_VECTORIZE
00141               : int(Traversal) == int(SliceVectorizedTraversal)
00142                 ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
00143                                          : int(NoUnrolling) )
00144 #endif
00145               : int(NoUnrolling)
00146   };
00147 
00148 #ifdef EIGEN_DEBUG_ASSIGN
00149   static void debug()
00150   {
00151     std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
00152     std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
00153     std::cerr.setf(std::ios::hex, std::ios::basefield);
00154     std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
00155     std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
00156     std::cerr.unsetf(std::ios::hex);
00157     EIGEN_DEBUG_VAR(DstAlignment)
00158     EIGEN_DEBUG_VAR(SrcAlignment)
00159     EIGEN_DEBUG_VAR(LinearRequiredAlignment)
00160     EIGEN_DEBUG_VAR(InnerRequiredAlignment)
00161     EIGEN_DEBUG_VAR(JointAlignment)
00162     EIGEN_DEBUG_VAR(InnerSize)
00163     EIGEN_DEBUG_VAR(InnerMaxSize)
00164     EIGEN_DEBUG_VAR(LinearPacketSize)
00165     EIGEN_DEBUG_VAR(InnerPacketSize)
00166     EIGEN_DEBUG_VAR(ActualPacketSize)
00167     EIGEN_DEBUG_VAR(StorageOrdersAgree)
00168     EIGEN_DEBUG_VAR(MightVectorize)
00169     EIGEN_DEBUG_VAR(MayLinearize)
00170     EIGEN_DEBUG_VAR(MayInnerVectorize)
00171     EIGEN_DEBUG_VAR(MayLinearVectorize)
00172     EIGEN_DEBUG_VAR(MaySliceVectorize)
00173     std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
00174     EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
00175     EIGEN_DEBUG_VAR(UnrollingLimit)
00176     EIGEN_DEBUG_VAR(MayUnrollCompletely)
00177     EIGEN_DEBUG_VAR(MayUnrollInner)
00178     std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
00179     std::cerr << std::endl;
00180   }
00181 #endif
00182 };
00183 
00184 /***************************************************************************
00185 * Part 2 : meta-unrollers
00186 ***************************************************************************/
00187 
00188 /************************
00189 *** Default traversal ***
00190 ************************/
00191 
00192 template<typename Kernel, int Index, int Stop>
00193 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
00194 {
00195   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
00196   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
00197   typedef typename DstEvaluatorType::XprType DstXprType;
00198   
00199   enum {
00200     outer = Index / DstXprType::InnerSizeAtCompileTime,
00201     inner = Index % DstXprType::InnerSizeAtCompileTime
00202   };
00203 
00204   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00205   {
00206     kernel.assignCoeffByOuterInner(outer, inner);
00207     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
00208   }
00209 };
00210 
00211 template<typename Kernel, int Stop>
00212 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
00213 {
00214   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00215 };
00216 
00217 template<typename Kernel, int Index_, int Stop>
00218 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
00219 {
00220   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
00221   {
00222     kernel.assignCoeffByOuterInner(outer, Index_);
00223     copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
00224   }
00225 };
00226 
00227 template<typename Kernel, int Stop>
00228 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
00229 {
00230   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
00231 };
00232 
00233 /***********************
00234 *** Linear traversal ***
00235 ***********************/
00236 
00237 template<typename Kernel, int Index, int Stop>
00238 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
00239 {
00240   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
00241   {
00242     kernel.assignCoeff(Index);
00243     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
00244   }
00245 };
00246 
00247 template<typename Kernel, int Stop>
00248 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
00249 {
00250   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00251 };
00252 
00253 /**************************
00254 *** Inner vectorization ***
00255 **************************/
00256 
00257 template<typename Kernel, int Index, int Stop>
00258 struct copy_using_evaluator_innervec_CompleteUnrolling
00259 {
00260   // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
00261   typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
00262   typedef typename DstEvaluatorType::XprType DstXprType;
00263   typedef typename Kernel::PacketType PacketType;
00264   
00265   enum {
00266     outer = Index / DstXprType::InnerSizeAtCompileTime,
00267     inner = Index % DstXprType::InnerSizeAtCompileTime,
00268     SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
00269     DstAlignment = Kernel::AssignmentTraits::DstAlignment
00270   };
00271 
00272   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00273   {
00274     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
00275     enum { NextIndex = Index + unpacket_traits<PacketType>::size };
00276     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
00277   }
00278 };
00279 
00280 template<typename Kernel, int Stop>
00281 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
00282 {
00283   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
00284 };
00285 
00286 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
00287 struct copy_using_evaluator_innervec_InnerUnrolling
00288 {
00289   typedef typename Kernel::PacketType PacketType;
00290   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
00291   {
00292     kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
00293     enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
00294     copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
00295   }
00296 };
00297 
00298 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
00299 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
00300 {
00301   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
00302 };
00303 
00304 /***************************************************************************
00305 * Part 3 : implementation of all cases
00306 ***************************************************************************/
00307 
00308 // dense_assignment_loop is based on assign_impl
00309 
00310 template<typename Kernel,
00311          int Traversal = Kernel::AssignmentTraits::Traversal,
00312          int Unrolling = Kernel::AssignmentTraits::Unrolling>
00313 struct dense_assignment_loop;
00314 
00315 /************************
00316 *** Default traversal ***
00317 ************************/
00318 
00319 template<typename Kernel>
00320 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
00321 {
00322   EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
00323   {
00324     for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
00325       for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
00326         kernel.assignCoeffByOuterInner(outer, inner);
00327       }
00328     }
00329   }
00330 };
00331 
00332 template<typename Kernel>
00333 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
00334 {
00335   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00336   {
00337     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00338     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00339   }
00340 };
00341 
00342 template<typename Kernel>
00343 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
00344 {
00345   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00346   {
00347     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00348 
00349     const Index outerSize = kernel.outerSize();
00350     for(Index outer = 0; outer < outerSize; ++outer)
00351       copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
00352   }
00353 };
00354 
00355 /***************************
00356 *** Linear vectorization ***
00357 ***************************/
00358 
00359 
00360 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
00361 // of the non vectorizable beginning and ending parts
00362 
00363 template <bool IsAligned = false>
00364 struct unaligned_dense_assignment_loop
00365 {
00366   // if IsAligned = true, then do nothing
00367   template <typename Kernel>
00368   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
00369 };
00370 
00371 template <>
00372 struct unaligned_dense_assignment_loop<false>
00373 {
00374   // MSVC must not inline this functions. If it does, it fails to optimize the
00375   // packet access path.
00376   // FIXME check which version exhibits this issue
00377 #if EIGEN_COMP_MSVC
00378   template <typename Kernel>
00379   static EIGEN_DONT_INLINE void run(Kernel &kernel,
00380                                     Index start,
00381                                     Index end)
00382 #else
00383   template <typename Kernel>
00384   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
00385                                       Index start,
00386                                       Index end)
00387 #endif
00388   {
00389     for (Index index = start; index < end; ++index)
00390       kernel.assignCoeff(index);
00391   }
00392 };
00393 
00394 template<typename Kernel>
00395 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
00396 {
00397   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00398   {
00399     const Index size = kernel.size();
00400     typedef typename Kernel::Scalar Scalar;
00401     typedef typename Kernel::PacketType PacketType;
00402     enum {
00403       requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
00404       packetSize = unpacket_traits<PacketType>::size,
00405       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
00406       dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
00407                                                             : int(Kernel::AssignmentTraits::DstAlignment),
00408       srcAlignment = Kernel::AssignmentTraits::JointAlignment
00409     };
00410     const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
00411     const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
00412 
00413     unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
00414 
00415     for(Index index = alignedStart; index < alignedEnd; index += packetSize)
00416       kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
00417 
00418     unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
00419   }
00420 };
00421 
00422 template<typename Kernel>
00423 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
00424 {
00425   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00426   {
00427     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00428     typedef typename Kernel::PacketType PacketType;
00429     
00430     enum { size = DstXprType::SizeAtCompileTime,
00431            packetSize =unpacket_traits<PacketType>::size,
00432            alignedSize = (size/packetSize)*packetSize };
00433 
00434     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
00435     copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
00436   }
00437 };
00438 
00439 /**************************
00440 *** Inner vectorization ***
00441 **************************/
00442 
00443 template<typename Kernel>
00444 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
00445 {
00446   typedef typename Kernel::PacketType PacketType;
00447   enum {
00448     SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
00449     DstAlignment = Kernel::AssignmentTraits::DstAlignment
00450   };
00451   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00452   {
00453     const Index innerSize = kernel.innerSize();
00454     const Index outerSize = kernel.outerSize();
00455     const Index packetSize = unpacket_traits<PacketType>::size;
00456     for(Index outer = 0; outer < outerSize; ++outer)
00457       for(Index inner = 0; inner < innerSize; inner+=packetSize)
00458         kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
00459   }
00460 };
00461 
00462 template<typename Kernel>
00463 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
00464 {
00465   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00466   {
00467     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00468     copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00469   }
00470 };
00471 
00472 template<typename Kernel>
00473 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
00474 {
00475   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00476   {
00477     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00478     typedef typename Kernel::AssignmentTraits Traits;
00479     const Index outerSize = kernel.outerSize();
00480     for(Index outer = 0; outer < outerSize; ++outer)
00481       copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
00482                                                    Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
00483   }
00484 };
00485 
00486 /***********************
00487 *** Linear traversal ***
00488 ***********************/
00489 
00490 template<typename Kernel>
00491 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
00492 {
00493   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00494   {
00495     const Index size = kernel.size();
00496     for(Index i = 0; i < size; ++i)
00497       kernel.assignCoeff(i);
00498   }
00499 };
00500 
00501 template<typename Kernel>
00502 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
00503 {
00504   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00505   {
00506     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00507     copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
00508   }
00509 };
00510 
00511 /**************************
00512 *** Slice vectorization ***
00513 ***************************/
00514 
00515 template<typename Kernel>
00516 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
00517 {
00518   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00519   {
00520     typedef typename Kernel::Scalar Scalar;
00521     typedef typename Kernel::PacketType PacketType;
00522     enum {
00523       packetSize = unpacket_traits<PacketType>::size,
00524       requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
00525       alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
00526       dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
00527       dstAlignment = alignable ? int(requestedAlignment)
00528                                : int(Kernel::AssignmentTraits::DstAlignment)
00529     };
00530     const Scalar *dst_ptr = kernel.dstDataPtr();
00531     if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
00532     {
00533       // the pointer is not aligend-on scalar, so alignment is not possible
00534       return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
00535     }
00536     const Index packetAlignedMask = packetSize - 1;
00537     const Index innerSize = kernel.innerSize();
00538     const Index outerSize = kernel.outerSize();
00539     const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
00540     Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
00541 
00542     for(Index outer = 0; outer < outerSize; ++outer)
00543     {
00544       const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
00545       // do the non-vectorizable part of the assignment
00546       for(Index inner = 0; inner<alignedStart ; ++inner)
00547         kernel.assignCoeffByOuterInner(outer, inner);
00548 
00549       // do the vectorizable part of the assignment
00550       for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
00551         kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
00552 
00553       // do the non-vectorizable part of the assignment
00554       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
00555         kernel.assignCoeffByOuterInner(outer, inner);
00556 
00557       alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
00558     }
00559   }
00560 };
00561 
00562 #if EIGEN_UNALIGNED_VECTORIZE
00563 template<typename Kernel>
00564 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
00565 {
00566   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
00567   {
00568     typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
00569     typedef typename Kernel::PacketType PacketType;
00570 
00571     enum { size = DstXprType::InnerSizeAtCompileTime,
00572            packetSize =unpacket_traits<PacketType>::size,
00573            vectorizableSize = (size/packetSize)*packetSize };
00574 
00575     for(Index outer = 0; outer < kernel.outerSize(); ++outer)
00576     {
00577       copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
00578       copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
00579     }
00580   }
00581 };
00582 #endif
00583 
00584 
00585 /***************************************************************************
00586 * Part 4 : Generic dense assignment kernel
00587 ***************************************************************************/
00588 
00589 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
00590 // to another dense writable evaluator.
00591 // It is parametrized by the two evaluators, and the actual assignment functor.
00592 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
00593 // One can customize the assignment using this generic dense_assignment_kernel with different
00594 // functors, or by completely overloading it, by-passing a functor.
00595 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
00596 class generic_dense_assignment_kernel
00597 {
00598 protected:
00599   typedef typename DstEvaluatorTypeT::XprType DstXprType;
00600   typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
00601 public:
00602   
00603   typedef DstEvaluatorTypeT DstEvaluatorType;
00604   typedef SrcEvaluatorTypeT SrcEvaluatorType;
00605   typedef typename DstEvaluatorType::Scalar Scalar;
00606   typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
00607   typedef typename AssignmentTraits::PacketType PacketType;
00608   
00609   
00610   EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
00611     : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
00612   {
00613     #ifdef EIGEN_DEBUG_ASSIGN
00614     AssignmentTraits::debug();
00615     #endif
00616   }
00617   
00618   EIGEN_DEVICE_FUNC Index size() const        { return m_dstExpr.size(); }
00619   EIGEN_DEVICE_FUNC Index innerSize() const   { return m_dstExpr.innerSize(); }
00620   EIGEN_DEVICE_FUNC Index outerSize() const   { return m_dstExpr.outerSize(); }
00621   EIGEN_DEVICE_FUNC Index rows() const        { return m_dstExpr.rows(); }
00622   EIGEN_DEVICE_FUNC Index cols() const        { return m_dstExpr.cols(); }
00623   EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
00624   
00625   EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
00626   EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
00627   
00629   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
00630   {
00631     m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
00632   }
00633   
00635   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
00636   {
00637     m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
00638   }
00639   
00641   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
00642   {
00643     Index row = rowIndexByOuterInner(outer, inner); 
00644     Index col = colIndexByOuterInner(outer, inner); 
00645     assignCoeff(row, col);
00646   }
00647   
00648   
00649   template<int StoreMode, int LoadMode, typename PacketType>
00650   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
00651   {
00652     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
00653   }
00654   
00655   template<int StoreMode, int LoadMode, typename PacketType>
00656   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
00657   {
00658     m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
00659   }
00660   
00661   template<int StoreMode, int LoadMode, typename PacketType>
00662   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
00663   {
00664     Index row = rowIndexByOuterInner(outer, inner); 
00665     Index col = colIndexByOuterInner(outer, inner);
00666     assignPacket<StoreMode,LoadMode,PacketType>(row, col);
00667   }
00668   
00669   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
00670   {
00671     typedef typename DstEvaluatorType::ExpressionTraits Traits;
00672     return int(Traits::RowsAtCompileTime) == 1 ? 0
00673       : int(Traits::ColsAtCompileTime) == 1 ? inner
00674       : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
00675       : inner;
00676   }
00677 
00678   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
00679   {
00680     typedef typename DstEvaluatorType::ExpressionTraits Traits;
00681     return int(Traits::ColsAtCompileTime) == 1 ? 0
00682       : int(Traits::RowsAtCompileTime) == 1 ? inner
00683       : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
00684       : outer;
00685   }
00686 
00687   EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
00688   {
00689     return m_dstExpr.data();
00690   }
00691   
00692 protected:
00693   DstEvaluatorType& m_dst;
00694   const SrcEvaluatorType& m_src;
00695   const Functor &m_functor;
00696   // TODO find a way to avoid the needs of the original expression
00697   DstXprType& m_dstExpr;
00698 };
00699 
00700 /***************************************************************************
00701 * Part 5 : Entry point for dense rectangular assignment
00702 ***************************************************************************/
00703 
00704 template<typename DstXprType,typename SrcXprType, typename Functor>
00705 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00706 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/)
00707 {
00708   EIGEN_ONLY_USED_FOR_DEBUG(dst);
00709   EIGEN_ONLY_USED_FOR_DEBUG(src);
00710   eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00711 }
00712 
00713 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
00714 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00715 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &/*func*/)
00716 {
00717   Index dstRows = src.rows();
00718   Index dstCols = src.cols();
00719   if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
00720     dst.resize(dstRows, dstCols);
00721   eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
00722 }
00723 
00724 template<typename DstXprType, typename SrcXprType, typename Functor>
00725 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
00726 {
00727   typedef evaluator<DstXprType> DstEvaluatorType;
00728   typedef evaluator<SrcXprType> SrcEvaluatorType;
00729 
00730   SrcEvaluatorType srcEvaluator(src);
00731 
00732   // NOTE To properly handle A = (A*A.transpose())/s with A rectangular,
00733   // we need to resize the destination after the source evaluator has been created.
00734   resize_if_allowed(dst, src, func);
00735 
00736   DstEvaluatorType dstEvaluator(dst);
00737     
00738   typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
00739   Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
00740 
00741   dense_assignment_loop<Kernel>::run(kernel);
00742 }
00743 
00744 template<typename DstXprType, typename SrcXprType>
00745 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
00746 {
00747   call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
00748 }
00749 
00750 /***************************************************************************
00751 * Part 6 : Generic assignment
00752 ***************************************************************************/
00753 
00754 // Based on the respective shapes of the destination and source,
00755 // the class AssignmentKind determine the kind of assignment mechanism.
00756 // AssignmentKind must define a Kind typedef.
00757 template<typename DstShape, typename SrcShape> struct AssignmentKind;
00758 
00759 // Assignement kind defined in this file:
00760 struct Dense2Dense {};
00761 struct EigenBase2EigenBase {};
00762 
00763 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
00764 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
00765     
00766 // This is the main assignment class
00767 template< typename DstXprType, typename SrcXprType, typename Functor,
00768           typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
00769           typename EnableIf = void>
00770 struct Assignment;
00771 
00772 
00773 // The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition.
00774 // Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated.
00775 // So this intermediate function removes everything related to "assume-aliasing" such that Assignment
00776 // does not has to bother about these annoying details.
00777 
00778 template<typename Dst, typename Src>
00779 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00780 void call_assignment(Dst& dst, const Src& src)
00781 {
00782   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
00783 }
00784 template<typename Dst, typename Src>
00785 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00786 void call_assignment(const Dst& dst, const Src& src)
00787 {
00788   call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
00789 }
00790                      
00791 // Deal with "assume-aliasing"
00792 template<typename Dst, typename Src, typename Func>
00793 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00794 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
00795 {
00796   typename plain_matrix_type<Src>::type tmp(src);
00797   call_assignment_no_alias(dst, tmp, func);
00798 }
00799 
00800 template<typename Dst, typename Src, typename Func>
00801 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00802 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
00803 {
00804   call_assignment_no_alias(dst, src, func);
00805 }
00806 
00807 // by-pass "assume-aliasing"
00808 // When there is no aliasing, we require that 'dst' has been properly resized
00809 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
00810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00811 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
00812 {
00813   call_assignment_no_alias(dst.expression(), src, func);
00814 }
00815 
00816 
00817 template<typename Dst, typename Src, typename Func>
00818 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00819 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
00820 {
00821   enum {
00822     NeedToTranspose = (    (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
00823                         || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
00824                       ) && int(Dst::SizeAtCompileTime) != 1
00825   };
00826 
00827   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
00828   typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
00829   ActualDstType actualDst(dst);
00830   
00831   // TODO check whether this is the right place to perform these checks:
00832   EIGEN_STATIC_ASSERT_LVALUE(Dst)
00833   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
00834   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
00835   
00836   Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
00837 }
00838 template<typename Dst, typename Src>
00839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00840 void call_assignment_no_alias(Dst& dst, const Src& src)
00841 {
00842   call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
00843 }
00844 
00845 template<typename Dst, typename Src, typename Func>
00846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00847 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
00848 {
00849   // TODO check whether this is the right place to perform these checks:
00850   EIGEN_STATIC_ASSERT_LVALUE(Dst)
00851   EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
00852   EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
00853 
00854   Assignment<Dst,Src,Func>::run(dst, src, func);
00855 }
00856 template<typename Dst, typename Src>
00857 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00858 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
00859 {
00860   call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
00861 }
00862 
00863 // forward declaration
00864 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
00865 
00866 // Generic Dense to Dense assignment
00867 // Note that the last template argument "Weak" is needed to make it possible to perform
00868 // both partial specialization+SFINAE without ambiguous specialization
00869 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
00870 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
00871 {
00872   EIGEN_DEVICE_FUNC
00873   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
00874   {
00875 #ifndef EIGEN_NO_DEBUG
00876     internal::check_for_aliasing(dst, src);
00877 #endif
00878     
00879     call_dense_assignment_loop(dst, src, func);
00880   }
00881 };
00882 
00883 // Generic assignment through evalTo.
00884 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
00885 // Note that the last template argument "Weak" is needed to make it possible to perform
00886 // both partial specialization+SFINAE without ambiguous specialization
00887 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
00888 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
00889 {
00890   EIGEN_DEVICE_FUNC
00891   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
00892   {
00893     Index dstRows = src.rows();
00894     Index dstCols = src.cols();
00895     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
00896       dst.resize(dstRows, dstCols);
00897 
00898     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00899     src.evalTo(dst);
00900   }
00901 
00902   // NOTE The following two functions are templated to avoid their instanciation if not needed
00903   //      This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
00904   template<typename SrcScalarType>
00905   EIGEN_DEVICE_FUNC
00906   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
00907   {
00908     Index dstRows = src.rows();
00909     Index dstCols = src.cols();
00910     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
00911       dst.resize(dstRows, dstCols);
00912 
00913     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00914     src.addTo(dst);
00915   }
00916 
00917   template<typename SrcScalarType>
00918   EIGEN_DEVICE_FUNC
00919   static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &/*func*/)
00920   {
00921     Index dstRows = src.rows();
00922     Index dstCols = src.cols();
00923     if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
00924       dst.resize(dstRows, dstCols);
00925 
00926     eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
00927     src.subTo(dst);
00928   }
00929 };
00930 
00931 } // namespace internal
00932 
00933 } // end namespace Eigen
00934 
00935 #endif // EIGEN_ASSIGN_EVALUATOR_H
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends