TensorConversion.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
00011 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
00012 
00013 namespace Eigen {
00014 
00022 namespace internal {
00023 template<typename TargetType, typename XprType>
00024 struct traits<TensorConversionOp<TargetType, XprType> >
00025 {
00026   // Type promotion to handle the case where the types of the lhs and the rhs are different.
00027   typedef TargetType Scalar;
00028   typedef typename traits<XprType>::StorageKind StorageKind;
00029   typedef typename traits<XprType>::Index Index;
00030   typedef typename XprType::Nested Nested;
00031   typedef typename remove_reference<Nested>::type _Nested;
00032   static const int NumDimensions = traits<XprType>::NumDimensions;
00033   static const int Layout = traits<XprType>::Layout;
00034   enum { Flags = 0 };
00035 };
00036 
00037 template<typename TargetType, typename XprType>
00038 struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
00039 {
00040   typedef const TensorConversionOp<TargetType, XprType>& type;
00041 };
00042 
00043 template<typename TargetType, typename XprType>
00044 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
00045 {
00046   typedef TensorConversionOp<TargetType, XprType> type;
00047 };
00048 
00049 }  // end namespace internal
00050 
00051 
00052 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
00053 struct PacketConverter {
00054   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00055   PacketConverter(const TensorEvaluator& impl)
00056       : m_impl(impl) {}
00057 
00058   template<int LoadMode, typename Index>
00059   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
00060     return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
00061   }
00062 
00063  private:
00064   const TensorEvaluator& m_impl;
00065 };
00066 
00067 
00068 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
00069 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
00070   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00071   PacketConverter(const TensorEvaluator& impl)
00072       : m_impl(impl) {}
00073 
00074   template<int LoadMode, typename Index>
00075   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
00076     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
00077 
00078     SrcPacket src1 = m_impl.template packet<LoadMode>(index);
00079     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
00080     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
00081     return result;
00082   }
00083 
00084  private:
00085   const TensorEvaluator& m_impl;
00086 };
00087 
00088 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
00089 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
00090   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00091   PacketConverter(const TensorEvaluator& impl)
00092       : m_impl(impl) {}
00093 
00094   template<int LoadMode, typename Index>
00095   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
00096     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
00097 
00098     SrcPacket src1 = m_impl.template packet<LoadMode>(index);
00099     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
00100     SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
00101     SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
00102     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
00103     return result;
00104   }
00105 
00106  private:
00107   const TensorEvaluator& m_impl;
00108 };
00109 
00110 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
00111 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
00112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00113   PacketConverter(const TensorEvaluator& impl)
00114       : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
00115 
00116   template<int LoadMode, typename Index>
00117   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
00118     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
00119     // Only call m_impl.packet() when we have direct access to the underlying data. This
00120     // ensures that we don't compute the subexpression twice. We may however load some
00121     // coefficients twice, but in practice this doesn't negatively impact performance.
00122     if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
00123       // Force unaligned memory loads since we can't ensure alignment anymore
00124       return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
00125     } else {
00126       const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
00127       typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
00128       typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
00129       internal::scalar_cast_op<SrcType, TgtType> converter;
00130       EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
00131       for (int i = 0; i < TgtPacketSize; ++i) {
00132         values[i] = converter(m_impl.coeff(index+i));
00133       }
00134       TgtPacket rslt = internal::pload<TgtPacket>(values);
00135       return rslt;
00136     }
00137   }
00138 
00139  private:
00140   const TensorEvaluator& m_impl;
00141   const typename TensorEvaluator::Index m_maxIndex;
00142 };
00143 
00144 template<typename TargetType, typename XprType>
00145 class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
00146 {
00147   public:
00148     typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
00149     typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
00150     typedef typename internal::traits<TensorConversionOp>::Index Index;
00151     typedef typename internal::nested<TensorConversionOp>::type Nested;
00152     typedef Scalar CoeffReturnType;
00153     typedef typename NumTraits<Scalar>::Real RealScalar;
00154 
00155     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
00156         : m_xpr(xpr) {}
00157 
00158     EIGEN_DEVICE_FUNC
00159     const typename internal::remove_all<typename XprType::Nested>::type&
00160     expression() const { return m_xpr; }
00161 
00162   protected:
00163     typename XprType::Nested m_xpr;
00164 };
00165 
00166 template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
00167   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
00168     impl.evalSubExprsIfNeeded(NULL);
00169     return true;
00170   }
00171 };
00172 
00173 template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
00174   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
00175     return impl.evalSubExprsIfNeeded(data);
00176   }
00177 };
00178 
00179 
00180 // Eval as rvalue
00181 template<typename TargetType, typename ArgType, typename Device>
00182 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
00183 {
00184   typedef TensorConversionOp<TargetType, ArgType> XprType;
00185   typedef typename XprType::Index Index;
00186   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
00187   typedef TargetType Scalar;
00188   typedef TargetType CoeffReturnType;
00189   typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
00190   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
00191   typedef typename PacketType<SrcType, Device>::type PacketSourceType;
00192   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
00193 
00194   enum {
00195     IsAligned = false,
00196     PacketAccess = true,
00197     Layout = TensorEvaluator<ArgType, Device>::Layout,
00198     RawAccess = false
00199   };
00200 
00201   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
00202     : m_impl(op.expression(), device)
00203   {
00204   }
00205 
00206   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
00207 
00208   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
00209   {
00210     return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
00211   }
00212 
00213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
00214   {
00215     m_impl.cleanup();
00216   }
00217 
00218   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
00219   {
00220     internal::scalar_cast_op<SrcType, TargetType> converter;
00221     return converter(m_impl.coeff(index));
00222   }
00223 
00224   template<int LoadMode>
00225   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
00226   {
00227     const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
00228         internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
00229     return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
00230   }
00231 
00232   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
00233   costPerCoeff(bool vectorized) const {
00234     const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
00235     if (vectorized) {
00236       const double SrcCoeffRatio =
00237           internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
00238       const double TgtCoeffRatio =
00239           internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
00240       return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
00241           TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
00242     } else {
00243       return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
00244     }
00245   }
00246 
00247   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
00248 
00249   protected:
00250   template <int LoadMode, bool ActuallyVectorize>
00251   struct PacketConv {
00252     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
00253       internal::scalar_cast_op<SrcType, TargetType> converter;
00254       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
00255       for (int i = 0; i < PacketSize; ++i) {
00256         values[i] = converter(impl.coeff(index+i));
00257       }
00258       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
00259       return rslt;
00260     }
00261   };
00262 
00263   template <int LoadMode>
00264   struct PacketConv<LoadMode, true> {
00265     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
00266       const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
00267       const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
00268       PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
00269                       SrcCoeffRatio, TgtCoeffRatio> converter(impl);
00270       return converter.template packet<LoadMode>(index);
00271     }
00272   };
00273 
00274   TensorEvaluator<ArgType, Device> m_impl;
00275 };
00276 
00277 } // end namespace Eigen
00278 
00279 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
 All Classes Functions Variables Typedefs Enumerator