Eigen  3.3.3
Assign_MKL.h
00001 /*
00002  Copyright (c) 2011, Intel Corporation. All rights reserved.
00003  Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
00004  
00005  Redistribution and use in source and binary forms, with or without modification,
00006  are permitted provided that the following conditions are met:
00007 
00008  * Redistributions of source code must retain the above copyright notice, this
00009    list of conditions and the following disclaimer.
00010  * Redistributions in binary form must reproduce the above copyright notice,
00011    this list of conditions and the following disclaimer in the documentation
00012    and/or other materials provided with the distribution.
00013  * Neither the name of Intel Corporation nor the names of its contributors may
00014    be used to endorse or promote products derived from this software without
00015    specific prior written permission.
00016 
00017  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00018  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00019  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00020  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
00021  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00022  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00023  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
00024  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00025  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00026  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00027 
00028  ********************************************************************************
00029  *   Content : Eigen bindings to Intel(R) MKL
00030  *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
00031  ********************************************************************************
00032 */
00033 
00034 #ifndef EIGEN_ASSIGN_VML_H
00035 #define EIGEN_ASSIGN_VML_H
00036 
00037 namespace Eigen { 
00038 
00039 namespace internal {
00040 
00041 template<typename Dst, typename Src>
00042 class vml_assign_traits
00043 {
00044   private:
00045     enum {
00046       DstHasDirectAccess = Dst::Flags & DirectAccessBit,
00047       SrcHasDirectAccess = Src::Flags & DirectAccessBit,
00048       StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
00049       InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
00050                 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
00051                 : int(Dst::RowsAtCompileTime),
00052       InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
00053                     : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
00054                     : int(Dst::MaxRowsAtCompileTime),
00055       MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
00056 
00057       MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
00058       MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
00059       VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
00060       LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
00061     };
00062   public:
00063     enum {
00064       EnableVml = MightEnableVml && LargeEnough,
00065       Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
00066     };
00067 };
00068 
00069 #define EIGEN_PP_EXPAND(ARG) ARG
00070 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
00071 #define EIGEN_VMLMODE_EXPAND_LA , VML_HA
00072 #else
00073 #define EIGEN_VMLMODE_EXPAND_LA , VML_LA
00074 #endif
00075 
00076 #define EIGEN_VMLMODE_EXPAND__ 
00077 
00078 #define EIGEN_VMLMODE_PREFIX_LA vm
00079 #define EIGEN_VMLMODE_PREFIX__  v
00080 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
00081 
00082 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
00083   template< typename DstXprType, typename SrcXprNested>                                                                         \
00084   struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>,   \
00085                    Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {              \
00086     typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                                            \
00087     static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                   \
00088       eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                       \
00089       if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) {                                              \
00090         VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(),                                                        \
00091               (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                           \
00092       } else {                                                                                                                  \
00093         const Index outerSize = dst.outerSize();                                                                                \
00094         for(Index outer = 0; outer < outerSize; ++outer) {                                                                      \
00095           const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :                             \
00096                                                       &(src.nestedExpression().coeffRef(0, outer));                             \
00097           EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                           \
00098           VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr,                                                                      \
00099                 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                             \
00100         }                                                                                                                       \
00101       }                                                                                                                         \
00102     }                                                                                                                           \
00103   };                                                                                                                            \
00104 
00105 
00106 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                         \
00107   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE)           \
00108   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
00109 
00110 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                                         \
00111   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
00112   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
00113   
00114 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)                                                              \
00115   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                               \
00116   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
00117 
00118   
00119 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin,   Sin,   LA)
00120 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin,  Asin,  LA)
00121 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh,  Sinh,  LA)
00122 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos,   Cos,   LA)
00123 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos,  Acos,  LA)
00124 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh,  Cosh,  LA)
00125 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan,   Tan,   LA)
00126 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan,  Atan,  LA)
00127 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh,  Tanh,  LA)
00128 // EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _)
00129 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp,   Exp,   LA)
00130 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log,   Ln,    LA)
00131 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
00132 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt,  Sqrt,  _)
00133 
00134 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr,   _)
00135 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg,      _)
00136 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round,  _)
00137 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor,  _)
00138 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil,  Ceil,   _)
00139 
00140 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                                           \
00141   template< typename DstXprType, typename SrcXprNested, typename Plain>                                                       \
00142   struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                       \
00143                     const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>,    \
00144                    Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> {            \
00145     typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested,                                           \
00146                     const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType;                         \
00147     static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) {                 \
00148       eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                                     \
00149       VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other);                                       \
00150       if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal)                                              \
00151       {                                                                                                                       \
00152         VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent,                                                        \
00153               (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) );                                         \
00154       } else {                                                                                                                \
00155         const Index outerSize = dst.outerSize();                                                                              \
00156         for(Index outer = 0; outer < outerSize; ++outer) {                                                                    \
00157           const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) :                                        \
00158                                                       &(src.lhs().coeffRef(0, outer));                                        \
00159           EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));                         \
00160           VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent,                                                          \
00161                  (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE));                                          \
00162         }                                                                                                                     \
00163       }                                                                                                                       \
00164     }                                                                                                                         \
00165   };
00166   
00167 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float,    float,         LA)
00168 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double,   double,        LA)
00169 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8,  LA)
00170 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
00171 
00172 } // end namespace internal
00173 
00174 } // end namespace Eigen
00175 
00176 #endif // EIGEN_ASSIGN_VML_H
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends