TensorStriding.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
00011 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
00012 
00013 namespace Eigen {
00014 
00022 namespace internal {
00023 template<typename Strides, typename XprType>
00024 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
00025 {
00026   typedef typename XprType::Scalar Scalar;
00027   typedef traits<XprType> XprTraits;
00028   typedef typename XprTraits::StorageKind StorageKind;
00029   typedef typename XprTraits::Index Index;
00030   typedef typename XprType::Nested Nested;
00031   typedef typename remove_reference<Nested>::type _Nested;
00032   static const int NumDimensions = XprTraits::NumDimensions;
00033   static const int Layout = XprTraits::Layout;
00034 };
00035 
00036 template<typename Strides, typename XprType>
00037 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
00038 {
00039   typedef const TensorStridingOp<Strides, XprType>& type;
00040 };
00041 
00042 template<typename Strides, typename XprType>
00043 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
00044 {
00045   typedef TensorStridingOp<Strides, XprType> type;
00046 };
00047 
00048 }  // end namespace internal
00049 
00050 
00051 
00052 template<typename Strides, typename XprType>
00053 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
00054 {
00055   public:
00056   typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
00057   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
00058   typedef typename XprType::CoeffReturnType CoeffReturnType;
00059   typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
00060   typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
00061   typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
00062 
00063   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
00064       : m_xpr(expr), m_dims(dims) {}
00065 
00066     EIGEN_DEVICE_FUNC
00067     const Strides& strides() const { return m_dims; }
00068 
00069     EIGEN_DEVICE_FUNC
00070     const typename internal::remove_all<typename XprType::Nested>::type&
00071     expression() const { return m_xpr; }
00072 
00073     EIGEN_DEVICE_FUNC
00074     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other)
00075     {
00076       typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
00077       Assign assign(*this, other);
00078       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00079       return *this;
00080     }
00081 
00082     template<typename OtherDerived>
00083     EIGEN_DEVICE_FUNC
00084     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other)
00085     {
00086       typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
00087       Assign assign(*this, other);
00088       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
00089       return *this;
00090     }
00091 
00092   protected:
00093     typename XprType::Nested m_xpr;
00094     const Strides m_dims;
00095 };
00096 
00097 
00098 // Eval as rvalue
00099 template<typename Strides, typename ArgType, typename Device>
00100 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
00101 {
00102   typedef TensorStridingOp<Strides, ArgType> XprType;
00103   typedef typename XprType::Index Index;
00104   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
00105   typedef DSizes<Index, NumDims> Dimensions;
00106   typedef typename XprType::Scalar Scalar;
00107   typedef typename XprType::CoeffReturnType CoeffReturnType;
00108   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00109   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
00110 
00111   enum {
00112     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
00113     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00114     Layout = TensorEvaluator<ArgType, Device>::Layout,
00115     CoordAccess = false,  // to be implemented
00116     RawAccess = false
00117   };
00118 
00119   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00120       : m_impl(op.expression(), device)
00121   {
00122     m_dimensions = m_impl.dimensions();
00123     for (int i = 0; i < NumDims; ++i) {
00124       m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
00125     }
00126 
00127     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
00128     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00129       m_outputStrides[0] = 1;
00130       m_inputStrides[0] = 1;
00131       for (int i = 1; i < NumDims; ++i) {
00132         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
00133         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
00134         m_inputStrides[i-1] *= op.strides()[i-1];
00135       }
00136       m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
00137     } else {  // RowMajor
00138       m_outputStrides[NumDims-1] = 1;
00139       m_inputStrides[NumDims-1] = 1;
00140       for (int i = NumDims - 2; i >= 0; --i) {
00141         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
00142         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
00143         m_inputStrides[i+1] *= op.strides()[i+1];
00144       }
00145       m_inputStrides[0] *= op.strides()[0];
00146     }
00147   }
00148 
00149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
00150 
00151   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
00152     m_impl.evalSubExprsIfNeeded(NULL);
00153     return true;
00154   }
00155   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
00156     m_impl.cleanup();
00157   }
00158 
00159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
00160   {
00161     return m_impl.coeff(srcCoeff(index));
00162   }
00163 
00164   template<int LoadMode>
00165   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
00166   {
00167     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
00168     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
00169 
00170     Index inputIndices[] = {0, 0};
00171     Index indices[] = {index, index + PacketSize - 1};
00172     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00173       for (int i = NumDims - 1; i > 0; --i) {
00174         const Index idx0 = indices[0] / m_outputStrides[i];
00175         const Index idx1 = indices[1] / m_outputStrides[i];
00176         inputIndices[0] += idx0 * m_inputStrides[i];
00177         inputIndices[1] += idx1 * m_inputStrides[i];
00178         indices[0] -= idx0 * m_outputStrides[i];
00179         indices[1] -= idx1 * m_outputStrides[i];
00180       }
00181       inputIndices[0] += indices[0] * m_inputStrides[0];
00182       inputIndices[1] += indices[1] * m_inputStrides[0];
00183     } else {  // RowMajor
00184       for (int i = 0; i < NumDims - 1; ++i) {
00185         const Index idx0 = indices[0] / m_outputStrides[i];
00186         const Index idx1 = indices[1] / m_outputStrides[i];
00187         inputIndices[0] += idx0 * m_inputStrides[i];
00188         inputIndices[1] += idx1 * m_inputStrides[i];
00189         indices[0] -= idx0 * m_outputStrides[i];
00190         indices[1] -= idx1 * m_outputStrides[i];
00191       }
00192       inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
00193       inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
00194     }
00195     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
00196       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
00197       return rslt;
00198     }
00199     else {
00200       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
00201       values[0] = m_impl.coeff(inputIndices[0]);
00202       values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
00203       for (int i = 1; i < PacketSize-1; ++i) {
00204         values[i] = coeff(index+i);
00205       }
00206       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
00207       return rslt;
00208     }
00209   }
00210 
00211   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
00212     double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
00213                                            TensorOpCost::MulCost<Index>() +
00214                                            TensorOpCost::DivCost<Index>()) +
00215         TensorOpCost::MulCost<Index>();
00216     if (vectorized) {
00217       compute_cost *= 2;  // packet() computes two indices
00218     }
00219     const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
00220     return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
00221         // Computation is not vectorized per se, but it is done once per packet.
00222         TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
00223   }
00224 
00225   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
00226 
00227  protected:
00228   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
00229   {
00230     Index inputIndex = 0;
00231     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00232       for (int i = NumDims - 1; i > 0; --i) {
00233         const Index idx = index / m_outputStrides[i];
00234         inputIndex += idx * m_inputStrides[i];
00235         index -= idx * m_outputStrides[i];
00236       }
00237       inputIndex += index * m_inputStrides[0];
00238     } else {  // RowMajor
00239       for (int i = 0; i < NumDims - 1; ++i) {
00240         const Index idx = index / m_outputStrides[i];
00241         inputIndex += idx * m_inputStrides[i];
00242         index -= idx * m_outputStrides[i];
00243       }
00244       inputIndex += index * m_inputStrides[NumDims-1];
00245     }
00246     return inputIndex;
00247   }
00248 
00249   Dimensions m_dimensions;
00250   array<Index, NumDims> m_outputStrides;
00251   array<Index, NumDims> m_inputStrides;
00252   TensorEvaluator<ArgType, Device> m_impl;
00253 };
00254 
00255 
00256 // Eval as lvalue
00257 template<typename Strides, typename ArgType, typename Device>
00258 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
00259     : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
00260 {
00261   typedef TensorStridingOp<Strides, ArgType> XprType;
00262   typedef TensorEvaluator<const XprType, Device> Base;
00263   //  typedef typename XprType::Index Index;
00264   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
00265   //  typedef DSizes<Index, NumDims> Dimensions;
00266 
00267   enum {
00268     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
00269     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
00270     Layout = TensorEvaluator<ArgType, Device>::Layout,
00271     CoordAccess = false,  // to be implemented
00272     RawAccess = false
00273   };
00274 
00275   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00276       : Base(op, device) { }
00277 
00278   typedef typename XprType::Index Index;
00279   typedef typename XprType::Scalar Scalar;
00280   typedef typename XprType::CoeffReturnType CoeffReturnType;
00281   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00282   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
00283 
00284   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
00285   {
00286     return this->m_impl.coeffRef(this->srcCoeff(index));
00287   }
00288 
00289   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00290   void writePacket(Index index, const PacketReturnType& x)
00291   {
00292     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
00293     eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
00294 
00295     Index inputIndices[] = {0, 0};
00296     Index indices[] = {index, index + PacketSize - 1};
00297     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00298       for (int i = NumDims - 1; i > 0; --i) {
00299         const Index idx0 = indices[0] / this->m_outputStrides[i];
00300         const Index idx1 = indices[1] / this->m_outputStrides[i];
00301         inputIndices[0] += idx0 * this->m_inputStrides[i];
00302         inputIndices[1] += idx1 * this->m_inputStrides[i];
00303         indices[0] -= idx0 * this->m_outputStrides[i];
00304         indices[1] -= idx1 * this->m_outputStrides[i];
00305       }
00306       inputIndices[0] += indices[0] * this->m_inputStrides[0];
00307       inputIndices[1] += indices[1] * this->m_inputStrides[0];
00308     } else {  // RowMajor
00309       for (int i = 0; i < NumDims - 1; ++i) {
00310         const Index idx0 = indices[0] / this->m_outputStrides[i];
00311         const Index idx1 = indices[1] / this->m_outputStrides[i];
00312         inputIndices[0] += idx0 * this->m_inputStrides[i];
00313         inputIndices[1] += idx1 * this->m_inputStrides[i];
00314         indices[0] -= idx0 * this->m_outputStrides[i];
00315         indices[1] -= idx1 * this->m_outputStrides[i];
00316       }
00317       inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
00318       inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
00319     }
00320     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
00321       this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
00322     }
00323     else {
00324       EIGEN_ALIGN_MAX Scalar values[PacketSize];
00325       internal::pstore<Scalar, PacketReturnType>(values, x);
00326       this->m_impl.coeffRef(inputIndices[0]) = values[0];
00327       this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
00328       for (int i = 1; i < PacketSize-1; ++i) {
00329         this->coeffRef(index+i) = values[i];
00330       }
00331     }
00332   }
00333 };
00334 
00335 
00336 } // end namespace Eigen
00337 
00338 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
 All Classes Functions Variables Typedefs Enumerator