TensorMorphing.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
00011 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
00012 
00013 namespace Eigen {
00014 
00022 namespace internal {
00023 template<typename NewDimensions, typename XprType>
00024 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
00025 {
00026   typedef typename XprType::Scalar Scalar;
00027   typedef traits<XprType> XprTraits;
00028   typedef typename XprTraits::StorageKind StorageKind;
00029   typedef typename XprTraits::Index Index;
00030   typedef typename XprType::Nested Nested;
00031   typedef typename remove_reference<Nested>::type _Nested;
00032   static const int NumDimensions = array_size<NewDimensions>::value;
00033   static const int Layout = XprTraits::Layout;
00034 };
00035 
00036 template<typename NewDimensions, typename XprType>
00037 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
00038 {
00039   typedef const TensorReshapingOp<NewDimensions, XprType>& type;
00040 };
00041 
00042 template<typename NewDimensions, typename XprType>
00043 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
00044 {
00045   typedef TensorReshapingOp<NewDimensions, XprType> type;
00046 };
00047 
00048 }  // end namespace internal
00049 
00050 
00051 
00052 template<typename NewDimensions, typename XprType>
00053 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
00054 {
00055   public:
00056   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
00057   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
00058   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
00059   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
00060   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
00061 
00062   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
00063       : m_xpr(expr), m_dims(dims) {}
00064 
00065     EIGEN_DEVICE_FUNC
00066     const NewDimensions& dimensions() const { return m_dims; }
00067 
00068     EIGEN_DEVICE_FUNC
00069     const typename internal::remove_all<typename XprType::Nested>::type&
00070     expression() const { return m_xpr; }
00071 
00072     EIGEN_DEVICE_FUNC
00073     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
00074     {
00075       typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
00076       Assign assign(*this, other);
00077       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00078       return *this;
00079     }
00080 
00081     template<typename OtherDerived>
00082     EIGEN_DEVICE_FUNC
00083     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
00084     {
00085       typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
00086       Assign assign(*this, other);
00087       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00088       return *this;
00089     }
00090 
00091   protected:
00092     typename XprType::Nested m_xpr;
00093     const NewDimensions m_dims;
00094 };
00095 
00096 
00097 // Eval as rvalue
00098 template<typename NewDimensions, typename ArgType, typename Device>
00099 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
00100 {
00101   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
00102   typedef NewDimensions Dimensions;
00103 
00104   enum {
00105     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
00106     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00107     Layout = TensorEvaluator<ArgType, Device>::Layout,
00108     CoordAccess = false,  // to be implemented
00109     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
00110   };
00111 
00112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00113       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
00114   {
00115     // The total size of the reshaped tensor must be equal to the total size
00116     // of the input tensor.
00117     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
00118   }
00119 
00120   typedef typename XprType::Index Index;
00121   typedef typename XprType::Scalar Scalar;
00122   typedef typename XprType::CoeffReturnType CoeffReturnType;
00123   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00124 
00125   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
00126 
00127   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
00128     return m_impl.evalSubExprsIfNeeded(data);
00129   }
00130   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
00131     m_impl.cleanup();
00132   }
00133 
00134   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
00135   {
00136     return m_impl.coeff(index);
00137   }
00138 
00139   template<int LoadMode>
00140   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
00141   {
00142     return m_impl.template packet<LoadMode>(index);
00143   }
00144 
00145   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
00146     return m_impl.costPerCoeff(vectorized);
00147   }
00148 
00149   EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
00150 
00151   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
00152 
00153  protected:
00154   TensorEvaluator<ArgType, Device> m_impl;
00155   NewDimensions m_dimensions;
00156 };
00157 
00158 
00159 // Eval as lvalue
00160 template<typename NewDimensions, typename ArgType, typename Device>
00161   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
00162   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
00163 
00164 {
00165   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
00166   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
00167   typedef NewDimensions Dimensions;
00168 
00169   enum {
00170     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
00171     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00172     Layout = TensorEvaluator<ArgType, Device>::Layout,
00173     CoordAccess = false,  // to be implemented
00174     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
00175   };
00176 
00177   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00178     : Base(op, device)
00179   { }
00180 
00181   typedef typename XprType::Index Index;
00182   typedef typename XprType::Scalar Scalar;
00183   typedef typename XprType::CoeffReturnType CoeffReturnType;
00184   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00185 
00186   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
00187   {
00188     return this->m_impl.coeffRef(index);
00189   }
00190   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00191   void writePacket(Index index, const PacketReturnType& x)
00192   {
00193     this->m_impl.template writePacket<StoreMode>(index, x);
00194   }
00195 };
00196 
00197 
00205 namespace internal {
00206 template<typename StartIndices, typename Sizes, typename XprType>
00207 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
00208 {
00209   typedef typename XprType::Scalar Scalar;
00210   typedef traits<XprType> XprTraits;
00211   typedef typename XprTraits::StorageKind StorageKind;
00212   typedef typename XprTraits::Index Index;
00213   typedef typename XprType::Nested Nested;
00214   typedef typename remove_reference<Nested>::type _Nested;
00215   static const int NumDimensions = array_size<StartIndices>::value;
00216   static const int Layout = XprTraits::Layout;
00217 };
00218 
00219 template<typename StartIndices, typename Sizes, typename XprType>
00220 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
00221 {
00222   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
00223 };
00224 
00225 template<typename StartIndices, typename Sizes, typename XprType>
00226 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
00227 {
00228   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
00229 };
00230 
00231 }  // end namespace internal
00232 
00233 
00234 
00235 template<typename StartIndices, typename Sizes, typename XprType>
00236 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
00237 {
00238   public:
00239   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
00240   typedef typename XprType::CoeffReturnType CoeffReturnType;
00241   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
00242   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
00243   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
00244 
00245   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
00246       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
00247 
00248     EIGEN_DEVICE_FUNC
00249     const StartIndices& startIndices() const { return m_indices; }
00250     EIGEN_DEVICE_FUNC
00251     const Sizes& sizes() const { return m_sizes; }
00252 
00253     EIGEN_DEVICE_FUNC
00254     const typename internal::remove_all<typename XprType::Nested>::type&
00255     expression() const { return m_xpr; }
00256 
00257     template<typename OtherDerived>
00258     EIGEN_DEVICE_FUNC
00259     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
00260     {
00261       typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
00262       Assign assign(*this, other);
00263       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00264       return *this;
00265     }
00266 
00267     EIGEN_DEVICE_FUNC
00268     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
00269     {
00270       typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
00271       Assign assign(*this, other);
00272       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00273       return *this;
00274     }
00275 
00276 
00277   protected:
00278     typename XprType::Nested m_xpr;
00279     const StartIndices m_indices;
00280     const Sizes m_sizes;
00281 };
00282 
00283 
00284 // Fixme: figure out the exact threshold
00285 namespace {
00286 template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
00287   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
00288   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
00289 
00290  private:
00291   Index threshold_;
00292 };
00293 
00294 // It is very expensive to start the memcpy kernel on GPU: we therefore only
00295 // use it for large copies.
00296 #ifdef EIGEN_USE_GPU
00297 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice>  {
00298   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
00299   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
00300 };
00301 #endif
00302 }
00303 
00304 // Eval as rvalue
00305 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
00306 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
00307 {
00308   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
00309   static const int NumDims = internal::array_size<Sizes>::value;
00310 
00311   enum {
00312     // Alignment can't be guaranteed at compile time since it depends on the
00313     // slice offsets and sizes.
00314     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
00315     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00316     Layout = TensorEvaluator<ArgType, Device>::Layout,
00317     CoordAccess = false,
00318     RawAccess = false
00319   };
00320 
00321   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00322       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
00323   {
00324     for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
00325       eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
00326     }
00327 
00328     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
00329     const Sizes& output_dims = op.sizes();
00330     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00331       m_inputStrides[0] = 1;
00332       for (int i = 1; i < NumDims; ++i) {
00333         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
00334       }
00335 
00336      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
00337       m_outputStrides[0] = 1;
00338       for (int i = 1; i < NumDims; ++i) {
00339         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
00340         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
00341       }
00342     } else {
00343       m_inputStrides[NumDims-1] = 1;
00344       for (int i = NumDims - 2; i >= 0; --i) {
00345         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
00346       }
00347 
00348      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
00349       m_outputStrides[NumDims-1] = 1;
00350       for (int i = NumDims - 2; i >= 0; --i) {
00351         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
00352         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
00353       }
00354     }
00355   }
00356 
00357   typedef typename XprType::Index Index;
00358   typedef typename XprType::Scalar Scalar;
00359   typedef typename XprType::CoeffReturnType CoeffReturnType;
00360   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00361   typedef Sizes Dimensions;
00362 
00363   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
00364 
00365 
00366   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
00367     m_impl.evalSubExprsIfNeeded(NULL);
00368     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
00369       Index contiguous_values = 1;
00370       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00371         for (int i = 0; i < NumDims; ++i) {
00372           contiguous_values *= dimensions()[i];
00373           if (dimensions()[i] != m_impl.dimensions()[i]) {
00374             break;
00375           }
00376         }
00377       } else {
00378         for (int i = NumDims-1; i >= 0; --i) {
00379           contiguous_values *= dimensions()[i];
00380           if (dimensions()[i] != m_impl.dimensions()[i]) {
00381             break;
00382           }
00383         }
00384       }
00385       // Use memcpy if it's going to be faster than using the regular evaluation.
00386       const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
00387       if (trigger(contiguous_values)) {
00388         Scalar* src = (Scalar*)m_impl.data();
00389         for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
00390           Index offset = srcCoeff(i);
00391           m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
00392         }
00393         return false;
00394       }
00395     }
00396     return true;
00397   }
00398 
00399   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
00400     m_impl.cleanup();
00401   }
00402 
00403   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
00404   {
00405     return m_impl.coeff(srcCoeff(index));
00406   }
00407 
00408   template<int LoadMode>
00409   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
00410   {
00411     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
00412     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
00413     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
00414 
00415     Index inputIndices[] = {0, 0};
00416     Index indices[] = {index, index + packetSize - 1};
00417     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00418       for (int i = NumDims - 1; i > 0; --i) {
00419         const Index idx0 = indices[0] / m_fastOutputStrides[i];
00420         const Index idx1 = indices[1] / m_fastOutputStrides[i];
00421         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
00422         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
00423         indices[0] -= idx0 * m_outputStrides[i];
00424         indices[1] -= idx1 * m_outputStrides[i];
00425       }
00426       inputIndices[0] += (indices[0] + m_offsets[0]);
00427       inputIndices[1] += (indices[1] + m_offsets[0]);
00428     } else {
00429       for (int i = 0; i < NumDims - 1; ++i) {
00430         const Index idx0 = indices[0] / m_fastOutputStrides[i];
00431         const Index idx1 = indices[1] / m_fastOutputStrides[i];
00432         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
00433         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
00434         indices[0] -= idx0 * m_outputStrides[i];
00435         indices[1] -= idx1 * m_outputStrides[i];
00436       }
00437       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
00438       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
00439     }
00440     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
00441       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
00442       return rslt;
00443     }
00444     else {
00445       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
00446       values[0] = m_impl.coeff(inputIndices[0]);
00447       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
00448       for (int i = 1; i < packetSize-1; ++i) {
00449         values[i] = coeff(index+i);
00450       }
00451       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
00452       return rslt;
00453     }
00454   }
00455 
00456   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
00457     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
00458   }
00459 
00460 
00461   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
00462     Scalar* result = m_impl.data();
00463     if (result) {
00464       Index offset = 0;
00465       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00466         for (int i = 0; i < NumDims; ++i) {
00467           if (m_dimensions[i] != m_impl.dimensions()[i]) {
00468             offset += m_offsets[i] * m_inputStrides[i];
00469             for (int j = i+1; j < NumDims; ++j) {
00470               if (m_dimensions[j] > 1) {
00471                 return NULL;
00472               }
00473               offset += m_offsets[j] * m_inputStrides[j];
00474             }
00475             break;
00476           }
00477         }
00478       } else {
00479         for (int i = NumDims - 1; i >= 0; --i) {
00480           if (m_dimensions[i] != m_impl.dimensions()[i]) {
00481             offset += m_offsets[i] * m_inputStrides[i];
00482             for (int j = i-1; j >= 0; --j) {
00483               if (m_dimensions[j] > 1) {
00484                 return NULL;
00485               }
00486               offset += m_offsets[j] * m_inputStrides[j];
00487             }
00488             break;
00489           }
00490         }
00491       }
00492       return result + offset;
00493     }
00494     return NULL;
00495   }
00496 
00497  protected:
00498   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
00499   {
00500     Index inputIndex = 0;
00501     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00502       for (int i = NumDims - 1; i > 0; --i) {
00503         const Index idx = index / m_fastOutputStrides[i];
00504         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
00505         index -= idx * m_outputStrides[i];
00506       }
00507       inputIndex += (index + m_offsets[0]);
00508     } else {
00509       for (int i = 0; i < NumDims - 1; ++i) {
00510         const Index idx = index / m_fastOutputStrides[i];
00511         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
00512         index -= idx * m_outputStrides[i];
00513       }
00514       inputIndex += (index + m_offsets[NumDims-1]);
00515     }
00516     return inputIndex;
00517   }
00518 
00519   array<Index, NumDims> m_outputStrides;
00520   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
00521   array<Index, NumDims> m_inputStrides;
00522   TensorEvaluator<ArgType, Device> m_impl;
00523   const Device& m_device;
00524   Dimensions m_dimensions;
00525   const StartIndices m_offsets;
00526 };
00527 
00528 
00529 // Eval as lvalue
00530 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
00531 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
00532   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
00533 {
00534   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
00535   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
00536   static const int NumDims = internal::array_size<Sizes>::value;
00537 
00538   enum {
00539     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
00540     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00541     Layout = TensorEvaluator<ArgType, Device>::Layout,
00542     CoordAccess = false,
00543     RawAccess = false
00544   };
00545 
00546   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00547     : Base(op, device)
00548     { }
00549 
00550   typedef typename XprType::Index Index;
00551   typedef typename XprType::Scalar Scalar;
00552   typedef typename XprType::CoeffReturnType CoeffReturnType;
00553   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00554   typedef Sizes Dimensions;
00555 
00556   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
00557   {
00558     return this->m_impl.coeffRef(this->srcCoeff(index));
00559   }
00560 
00561   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00562   void writePacket(Index index, const PacketReturnType& x)
00563   {
00564     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
00565     Index inputIndices[] = {0, 0};
00566     Index indices[] = {index, index + packetSize - 1};
00567     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00568       for (int i = NumDims - 1; i > 0; --i) {
00569         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
00570         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
00571         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
00572         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
00573         indices[0] -= idx0 * this->m_outputStrides[i];
00574         indices[1] -= idx1 * this->m_outputStrides[i];
00575       }
00576       inputIndices[0] += (indices[0] + this->m_offsets[0]);
00577       inputIndices[1] += (indices[1] + this->m_offsets[0]);
00578     } else {
00579       for (int i = 0; i < NumDims - 1; ++i) {
00580         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
00581         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
00582         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
00583         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
00584         indices[0] -= idx0 * this->m_outputStrides[i];
00585         indices[1] -= idx1 * this->m_outputStrides[i];
00586       }
00587       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
00588       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
00589     }
00590     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
00591       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
00592     }
00593     else {
00594       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
00595       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
00596       this->m_impl.coeffRef(inputIndices[0]) = values[0];
00597       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
00598       for (int i = 1; i < packetSize-1; ++i) {
00599         this->coeffRef(index+i) = values[i];
00600       }
00601     }
00602   }
00603 };
00604 
00605 
00606 
00607 namespace internal {
00608 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
00609 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
00610 {
00611   typedef typename XprType::Scalar Scalar;
00612   typedef traits<XprType> XprTraits;
00613   typedef typename XprTraits::StorageKind StorageKind;
00614   typedef typename XprTraits::Index Index;
00615   typedef typename XprType::Nested Nested;
00616   typedef typename remove_reference<Nested>::type _Nested;
00617   static const int NumDimensions = array_size<StartIndices>::value;
00618   static const int Layout = XprTraits::Layout;
00619 };
00620 
00621 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
00622 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
00623 {
00624   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
00625 };
00626 
00627 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
00628 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
00629 {
00630   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
00631 };
00632 
00633 }  // end namespace internal
00634 
00635 
00636 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
00637 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
00638 {
00639   public:
00640   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
00641   typedef typename XprType::CoeffReturnType CoeffReturnType;
00642   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
00643   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
00644   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
00645 
00646   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
00647     const XprType& expr, const StartIndices& startIndices,
00648     const StopIndices& stopIndices, const Strides& strides)
00649       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
00650         m_strides(strides) {}
00651 
00652     EIGEN_DEVICE_FUNC
00653     const StartIndices& startIndices() const { return m_startIndices; }
00654     EIGEN_DEVICE_FUNC
00655     const StartIndices& stopIndices() const { return m_stopIndices; }
00656     EIGEN_DEVICE_FUNC
00657     const StartIndices& strides() const { return m_strides; }
00658 
00659     EIGEN_DEVICE_FUNC
00660     const typename internal::remove_all<typename XprType::Nested>::type&
00661     expression() const { return m_xpr; }
00662 
00663     EIGEN_DEVICE_FUNC
00664     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
00665     {
00666       typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
00667       Assign assign(*this, other);
00668       internal::TensorExecutor<const Assign, DefaultDevice>::run(
00669           assign, DefaultDevice());
00670       return *this;
00671     }
00672 
00673     template<typename OtherDerived>
00674     EIGEN_DEVICE_FUNC
00675     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
00676     {
00677       typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
00678       Assign assign(*this, other);
00679       internal::TensorExecutor<const Assign, DefaultDevice>::run(
00680           assign, DefaultDevice());
00681       return *this;
00682     }
00683 
00684   protected:
00685     typename XprType::Nested m_xpr;
00686     const StartIndices m_startIndices;
00687     const StopIndices m_stopIndices;
00688     const Strides m_strides;
00689 };
00690 
00691 // Eval as rvalue
00692 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
00693 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
00694 {
00695   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
00696   static const int NumDims = internal::array_size<Strides>::value;
00697 
00698   enum {
00699     // Alignment can't be guaranteed at compile time since it depends on the
00700     // slice offsets and sizes.
00701     IsAligned = false,
00702     PacketAccess = false,
00703     BlockAccess = false,
00704     Layout = TensorEvaluator<ArgType, Device>::Layout,
00705     RawAccess = false
00706   };
00707 
00708   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00709       : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
00710   {
00711     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
00712     DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
00713     for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
00714       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
00715       if(m_strides[i]>0){
00716         startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
00717         stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
00718       }else{
00719         /* implies m_strides[i]<0 by assert */
00720         startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
00721         stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
00722       }
00723       m_startIndices[i] = startIndicesClamped[i];
00724     }
00725 
00726     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
00727 
00728     // check for degenerate intervals and compute output tensor shape
00729     bool degenerate = false;;
00730     for(int i = 0; i < NumDims; i++){
00731       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
00732       if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
00733         m_dimensions[i] = 0;
00734         degenerate = true;
00735       }else{
00736         m_dimensions[i] = interval / m_strides[i]
00737                           + (interval % m_strides[i] != 0 ? 1 : 0);
00738         eigen_assert(m_dimensions[i] >= 0);
00739       }
00740     }
00741     Strides output_dims = m_dimensions;
00742 
00743     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00744       m_inputStrides[0] = m_strides[0];
00745       m_offsets[0] = startIndicesClamped[0];
00746       Index previousDimProduct = 1;
00747       for (int i = 1; i < NumDims; ++i) {
00748         previousDimProduct *= input_dims[i-1];
00749         m_inputStrides[i] = previousDimProduct * m_strides[i];
00750         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
00751       }
00752 
00753       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
00754       m_outputStrides[0] = 1;
00755       for (int i = 1; i < NumDims; ++i) {
00756         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
00757         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
00758         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
00759       }
00760     } else {
00761       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
00762       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
00763       Index previousDimProduct = 1;
00764       for (int i = NumDims - 2; i >= 0; --i) {
00765         previousDimProduct *= input_dims[i+1];
00766         m_inputStrides[i] = previousDimProduct * m_strides[i];
00767         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
00768       }
00769 
00770       m_outputStrides[NumDims-1] = 1;
00771       for (int i = NumDims - 2; i >= 0; --i) {
00772         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
00773         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
00774         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
00775       }
00776     }
00777     m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
00778                                           device.lastLevelCacheSize() /
00779                                           sizeof(Scalar));
00780   }
00781 
00782   typedef typename XprType::Index Index;
00783   typedef typename XprType::Scalar Scalar;
00784   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
00785   typedef typename XprType::CoeffReturnType CoeffReturnType;
00786   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00787   typedef Strides Dimensions;
00788 
00789   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
00790 
00791 
00792   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
00793     m_impl.evalSubExprsIfNeeded(NULL);
00794     return true;
00795   }
00796 
00797   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
00798     m_impl.cleanup();
00799   }
00800 
00801   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
00802   {
00803     return m_impl.coeff(srcCoeff(index));
00804   }
00805 
00806   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
00807     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
00808   }
00809 
00810   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
00811     return NULL;
00812   }
00813 
00814  protected:
00815   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
00816   {
00817     Index inputIndex = 0;
00818     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00819       for (int i = NumDims - 1; i >= 0; --i) {
00820         const Index idx = index / m_fastOutputStrides[i];
00821         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
00822         index -= idx * m_outputStrides[i];
00823       }
00824     } else {
00825       for (int i = 0; i < NumDims; ++i) {
00826         const Index idx = index / m_fastOutputStrides[i];
00827         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
00828         index -= idx * m_outputStrides[i];
00829       }
00830     }
00831     return inputIndex;
00832   }
00833 
00834   static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
00835     return numext::maxi(min, numext::mini(max,value));
00836   }
00837 
00838   array<Index, NumDims> m_outputStrides;
00839   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
00840   array<Index, NumDims> m_inputStrides;
00841   TensorEvaluator<ArgType, Device> m_impl;
00842   const Device& m_device;
00843   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
00844   DSizes<Index, NumDims> m_dimensions;
00845   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
00846   const Strides m_strides;
00847   std::size_t m_block_total_size_max;
00848 };
00849 
00850 // Eval as lvalue
00851 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
00852 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
00853   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
00854 {
00855   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
00856   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
00857   static const int NumDims = internal::array_size<Strides>::value;
00858 
00859   enum {
00860     IsAligned = false,
00861     PacketAccess = false,
00862     BlockAccess = false,
00863     Layout = TensorEvaluator<ArgType, Device>::Layout,
00864     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
00865     RawAccess = false
00866   };
00867 
00868   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00869     : Base(op, device)
00870     { }
00871 
00872   typedef typename XprType::Index Index;
00873   typedef typename XprType::Scalar Scalar;
00874   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
00875   typedef typename XprType::CoeffReturnType CoeffReturnType;
00876   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00877   typedef Strides Dimensions;
00878 
00879   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
00880   {
00881     return this->m_impl.coeffRef(this->srcCoeff(index));
00882   }
00883 };
00884 
00885 
00886 } // end namespace Eigen
00887 
00888 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
 All Classes Functions Variables Typedefs Enumerator