Eigen  3.3.3
GenericPacketMath.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 //
00007 // This Source Code Form is subject to the terms of the Mozilla
00008 // Public License v. 2.0. If a copy of the MPL was not distributed
00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00010 
00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H
00012 #define EIGEN_GENERIC_PACKET_MATH_H
00013 
00014 namespace Eigen {
00015 
00016 namespace internal {
00017 
00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
00027 #define EIGEN_DEBUG_ALIGNED_LOAD
00028 #endif
00029 
00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
00031 #define EIGEN_DEBUG_UNALIGNED_LOAD
00032 #endif
00033 
00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE
00035 #define EIGEN_DEBUG_ALIGNED_STORE
00036 #endif
00037 
00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
00039 #define EIGEN_DEBUG_UNALIGNED_STORE
00040 #endif
00041 
00042 struct default_packet_traits
00043 {
00044   enum {
00045     HasHalfPacket = 0,
00046 
00047     HasAdd    = 1,
00048     HasSub    = 1,
00049     HasMul    = 1,
00050     HasNegate = 1,
00051     HasAbs    = 1,
00052     HasArg    = 0,
00053     HasAbs2   = 1,
00054     HasMin    = 1,
00055     HasMax    = 1,
00056     HasConj   = 1,
00057     HasSetLinear = 1,
00058     HasBlend  = 0,
00059 
00060     HasDiv    = 0,
00061     HasSqrt   = 0,
00062     HasRsqrt  = 0,
00063     HasExp    = 0,
00064     HasLog    = 0,
00065     HasLog1p  = 0,
00066     HasLog10  = 0,
00067     HasPow    = 0,
00068 
00069     HasSin    = 0,
00070     HasCos    = 0,
00071     HasTan    = 0,
00072     HasASin   = 0,
00073     HasACos   = 0,
00074     HasATan   = 0,
00075     HasSinh   = 0,
00076     HasCosh   = 0,
00077     HasTanh   = 0,
00078     HasLGamma = 0,
00079     HasDiGamma = 0,
00080     HasZeta = 0,
00081     HasPolygamma = 0,
00082     HasErf = 0,
00083     HasErfc = 0,
00084     HasIGamma = 0,
00085     HasIGammac = 0,
00086     HasBetaInc = 0,
00087 
00088     HasRound  = 0,
00089     HasFloor  = 0,
00090     HasCeil   = 0,
00091 
00092     HasSign   = 0
00093   };
00094 };
00095 
00096 template<typename T> struct packet_traits : default_packet_traits
00097 {
00098   typedef T type;
00099   typedef T half;
00100   enum {
00101     Vectorizable = 0,
00102     size = 1,
00103     AlignedOnScalar = 0,
00104     HasHalfPacket = 0
00105   };
00106   enum {
00107     HasAdd    = 0,
00108     HasSub    = 0,
00109     HasMul    = 0,
00110     HasNegate = 0,
00111     HasAbs    = 0,
00112     HasAbs2   = 0,
00113     HasMin    = 0,
00114     HasMax    = 0,
00115     HasConj   = 0,
00116     HasSetLinear = 0
00117   };
00118 };
00119 
00120 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
00121 
00122 template <typename Src, typename Tgt> struct type_casting_traits {
00123   enum {
00124     VectorizedCast = 0,
00125     SrcCoeffRatio = 1,
00126     TgtCoeffRatio = 1
00127   };
00128 };
00129 
00130 
00132 template <typename SrcPacket, typename TgtPacket>
00133 EIGEN_DEVICE_FUNC inline TgtPacket
00134 pcast(const SrcPacket& a) {
00135   return static_cast<TgtPacket>(a);
00136 }
00137 template <typename SrcPacket, typename TgtPacket>
00138 EIGEN_DEVICE_FUNC inline TgtPacket
00139 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
00140   return static_cast<TgtPacket>(a);
00141 }
00142 
00143 template <typename SrcPacket, typename TgtPacket>
00144 EIGEN_DEVICE_FUNC inline TgtPacket
00145 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
00146   return static_cast<TgtPacket>(a);
00147 }
00148 
00150 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00151 padd(const Packet& a,
00152         const Packet& b) { return a+b; }
00153 
00155 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00156 psub(const Packet& a,
00157         const Packet& b) { return a-b; }
00158 
00160 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00161 pnegate(const Packet& a) { return -a; }
00162 
00165 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00166 pconj(const Packet& a) { return numext::conj(a); }
00167 
00169 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00170 pmul(const Packet& a,
00171         const Packet& b) { return a*b; }
00172 
00174 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00175 pdiv(const Packet& a,
00176         const Packet& b) { return a/b; }
00177 
00179 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00180 pmin(const Packet& a,
00181         const Packet& b) { return numext::mini(a, b); }
00182 
00184 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00185 pmax(const Packet& a,
00186         const Packet& b) { return numext::maxi(a, b); }
00187 
00189 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00190 pabs(const Packet& a) { using std::abs; return abs(a); }
00191 
00193 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00194 parg(const Packet& a) { using numext::arg; return arg(a); }
00195 
00197 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00198 pand(const Packet& a, const Packet& b) { return a & b; }
00199 
00201 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00202 por(const Packet& a, const Packet& b) { return a | b; }
00203 
00205 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00206 pxor(const Packet& a, const Packet& b) { return a ^ b; }
00207 
00209 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00210 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
00211 
00213 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00214 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
00215 
00217 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00218 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
00219 
00221 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00222 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
00223 
00225 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00226 pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
00227 
00233 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00234 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
00235 
00242 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00243 ploadquad(const typename unpacket_traits<Packet>::type* from)
00244 { return pload1<Packet>(from); }
00245 
00255 template<typename Packet> EIGEN_DEVICE_FUNC
00256 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
00257                         Packet& a0, Packet& a1, Packet& a2, Packet& a3)
00258 {
00259   a0 = pload1<Packet>(a+0);
00260   a1 = pload1<Packet>(a+1);
00261   a2 = pload1<Packet>(a+2);
00262   a3 = pload1<Packet>(a+3);
00263 }
00264 
00272 template<typename Packet> EIGEN_DEVICE_FUNC
00273 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
00274                         Packet& a0, Packet& a1)
00275 {
00276   a0 = pload1<Packet>(a+0);
00277   a1 = pload1<Packet>(a+1);
00278 }
00279 
00281 template<typename Packet> inline Packet
00282 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
00283 
00285 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
00286 { (*to) = from; }
00287 
00289 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
00290 {  (*to) = from; }
00291 
00292  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
00293  { return ploadu<Packet>(from); }
00294 
00295  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
00296  { pstore(to, from); }
00297 
00299 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
00300 {
00301 #ifdef __CUDA_ARCH__
00302 #if defined(__LP64__)
00303   // 64-bit pointer operand constraint for inlined asm
00304   asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
00305 #else
00306   // 32-bit pointer operand constraint for inlined asm
00307   asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
00308 #endif
00309 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
00310   __builtin_prefetch(addr);
00311 #endif
00312 }
00313 
00315 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
00316 { return a; }
00317 
00319 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00320 preduxp(const Packet* vecs) { return vecs[0]; }
00321 
00323 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
00324 { return a; }
00325 
00330 template<typename Packet> EIGEN_DEVICE_FUNC inline
00331 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
00332 predux_downto4(const Packet& a)
00333 { return a; }
00334 
00336 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
00337 { return a; }
00338 
00340 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
00341 { return a; }
00342 
00344 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
00345 { return a; }
00346 
00348 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
00349 { return a; }
00350 
00352 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
00353 {
00354   // FIXME: uncomment the following in case we drop the internal imag and real functions.
00355 //   using std::imag;
00356 //   using std::real;
00357   return Packet(imag(a),real(a));
00358 }
00359 
00360 /**************************
00361 * Special math functions
00362 ***************************/
00363 
00365 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00366 Packet psin(const Packet& a) { using std::sin; return sin(a); }
00367 
00369 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00370 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
00371 
00373 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00374 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
00375 
00377 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00378 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
00379 
00381 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00382 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
00383 
00385 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00386 Packet patan(const Packet& a) { using std::atan; return atan(a); }
00387 
00389 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00390 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
00391 
00393 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00394 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
00395 
00397 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00398 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
00399 
00401 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00402 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
00403 
00405 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00406 Packet plog(const Packet& a) { using std::log; return log(a); }
00407 
00409 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00410 Packet plog1p(const Packet& a) { return numext::log1p(a); }
00411 
00413 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00414 Packet plog10(const Packet& a) { using std::log10; return log10(a); }
00415 
00417 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00418 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
00419 
00421 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00422 Packet prsqrt(const Packet& a) {
00423   return pdiv(pset1<Packet>(1), psqrt(a));
00424 }
00425 
00427 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00428 Packet pround(const Packet& a) { using numext::round; return round(a); }
00429 
00431 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00432 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
00433 
00435 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00436 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
00437 
00438 /***************************************************************************
00439 * The following functions might not have to be overwritten for vectorized types
00440 ***************************************************************************/
00441 
00443 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
00444 template<typename Packet>
00445 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
00446 {
00447   pstore(to, pset1<Packet>(a));
00448 }
00449 
00451 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00452 pmadd(const Packet&  a,
00453          const Packet&  b,
00454          const Packet&  c)
00455 { return padd(pmul(a, b),c); }
00456 
00459 template<typename Packet, int Alignment>
00460 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
00461 {
00462   if(Alignment >= unpacket_traits<Packet>::alignment)
00463     return pload<Packet>(from);
00464   else
00465     return ploadu<Packet>(from);
00466 }
00467 
00470 template<typename Scalar, typename Packet, int Alignment>
00471 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
00472 {
00473   if(Alignment >= unpacket_traits<Packet>::alignment)
00474     pstore(to, from);
00475   else
00476     pstoreu(to, from);
00477 }
00478 
00484 template<typename Packet, int LoadMode>
00485 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
00486 {
00487   return ploadt<Packet, LoadMode>(from);
00488 }
00489 
00491 template<int Offset,typename PacketType>
00492 struct palign_impl
00493 {
00494   // by default data are aligned, so there is nothing to be done :)
00495   static inline void run(PacketType&, const PacketType&) {}
00496 };
00497 
00513 template<int Offset,typename PacketType>
00514 inline void palign(PacketType& first, const PacketType& second)
00515 {
00516   palign_impl<Offset,PacketType>::run(first,second);
00517 }
00518 
00519 /***************************************************************************
00520 * Fast complex products (GCC generates a function call which is very slow)
00521 ***************************************************************************/
00522 
00523 // Eigen+CUDA does not support complexes.
00524 #ifndef __CUDACC__
00525 
00526 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
00527 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00528 
00529 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
00530 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00531 
00532 #endif
00533 
00534 
00535 /***************************************************************************
00536  * PacketBlock, that is a collection of N packets where the number of words
00537  * in the packet is a multiple of N.
00538 ***************************************************************************/
00539 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
00540   Packet packet[N];
00541 };
00542 
00543 template<typename Packet> EIGEN_DEVICE_FUNC inline void
00544 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
00545   // Nothing to do in the scalar case, i.e. a 1x1 matrix.
00546 }
00547 
00548 /***************************************************************************
00549  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
00550  * words from 2 packets.
00551 ***************************************************************************/
00552 template <size_t N> struct Selector {
00553   bool select[N];
00554 };
00555 
00556 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00557 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
00558   return ifPacket.select[0] ? thenPacket : elsePacket;
00559 }
00560 
00562 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00563 pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
00564 {
00565   // Default implementation based on pblend.
00566   // It must be specialized for higher performance.
00567   Selector<unpacket_traits<Packet>::size> mask;
00568   mask.select[0] = true;
00569   // This for loop should be optimized away by the compiler.
00570   for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
00571     mask.select[i] = false;
00572   return pblend(mask, pset1<Packet>(b), a);
00573 }
00574 
00576 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
00577 pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
00578 {
00579   // Default implementation based on pblend.
00580   // It must be specialized for higher performance.
00581   Selector<unpacket_traits<Packet>::size> mask;
00582   // This for loop should be optimized away by the compiler.
00583   for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
00584     mask.select[i] = false;
00585   mask.select[unpacket_traits<Packet>::size-1] = true;
00586   return pblend(mask, pset1<Packet>(b), a);
00587 }
00588 
00589 } // end namespace internal
00590 
00591 } // end namespace Eigen
00592 
00593 #endif // EIGEN_GENERIC_PACKET_MATH_H
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends