![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H 00012 #define EIGEN_GENERIC_PACKET_MATH_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD 00027 #define EIGEN_DEBUG_ALIGNED_LOAD 00028 #endif 00029 00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD 00031 #define EIGEN_DEBUG_UNALIGNED_LOAD 00032 #endif 00033 00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE 00035 #define EIGEN_DEBUG_ALIGNED_STORE 00036 #endif 00037 00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE 00039 #define EIGEN_DEBUG_UNALIGNED_STORE 00040 #endif 00041 00042 struct default_packet_traits 00043 { 00044 enum { 00045 HasHalfPacket = 0, 00046 00047 HasAdd = 1, 00048 HasSub = 1, 00049 HasMul = 1, 00050 HasNegate = 1, 00051 HasAbs = 1, 00052 HasArg = 0, 00053 HasAbs2 = 1, 00054 HasMin = 1, 00055 HasMax = 1, 00056 HasConj = 1, 00057 HasSetLinear = 1, 00058 HasBlend = 0, 00059 00060 HasDiv = 0, 00061 HasSqrt = 0, 00062 HasRsqrt = 0, 00063 HasExp = 0, 00064 HasLog = 0, 00065 HasLog1p = 0, 00066 HasLog10 = 0, 00067 HasPow = 0, 00068 00069 HasSin = 0, 00070 HasCos = 0, 00071 HasTan = 0, 00072 HasASin = 0, 00073 HasACos = 0, 00074 HasATan = 0, 00075 HasSinh = 0, 00076 HasCosh = 0, 00077 HasTanh = 0, 00078 HasLGamma = 0, 00079 HasDiGamma = 0, 00080 HasZeta = 0, 00081 HasPolygamma = 0, 00082 HasErf = 0, 00083 HasErfc = 0, 00084 HasIGamma = 0, 00085 HasIGammac = 0, 00086 HasBetaInc = 0, 00087 00088 HasRound = 0, 00089 HasFloor = 0, 00090 HasCeil = 0, 00091 00092 HasSign = 0 00093 }; 00094 }; 00095 00096 template<typename T> struct packet_traits : default_packet_traits 00097 { 00098 typedef T type; 00099 typedef T half; 00100 enum { 00101 Vectorizable = 0, 00102 size = 1, 00103 AlignedOnScalar = 0, 00104 HasHalfPacket = 0 00105 }; 00106 enum { 00107 HasAdd = 0, 00108 HasSub = 0, 00109 HasMul = 0, 00110 HasNegate = 0, 00111 HasAbs = 0, 00112 HasAbs2 = 0, 00113 HasMin = 0, 00114 HasMax = 0, 00115 HasConj = 0, 00116 HasSetLinear = 0 00117 }; 00118 }; 00119 00120 template<typename T> struct packet_traits<const T> : packet_traits<T> { }; 00121 00122 template <typename Src, typename Tgt> struct type_casting_traits { 00123 enum { 00124 VectorizedCast = 0, 00125 SrcCoeffRatio = 1, 00126 TgtCoeffRatio = 1 00127 }; 00128 }; 00129 00130 00132 template <typename SrcPacket, typename TgtPacket> 00133 EIGEN_DEVICE_FUNC inline TgtPacket 00134 pcast(const SrcPacket& a) { 00135 return static_cast<TgtPacket>(a); 00136 } 00137 template <typename SrcPacket, typename TgtPacket> 00138 EIGEN_DEVICE_FUNC inline TgtPacket 00139 pcast(const SrcPacket& a, const SrcPacket& /*b*/) { 00140 return static_cast<TgtPacket>(a); 00141 } 00142 00143 template <typename SrcPacket, typename TgtPacket> 00144 EIGEN_DEVICE_FUNC inline TgtPacket 00145 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { 00146 return static_cast<TgtPacket>(a); 00147 } 00148 00150 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00151 padd(const Packet& a, 00152 const Packet& b) { return a+b; } 00153 00155 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00156 psub(const Packet& a, 00157 const Packet& b) { return a-b; } 00158 00160 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00161 pnegate(const Packet& a) { return -a; } 00162 00165 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00166 pconj(const Packet& a) { return numext::conj(a); } 00167 00169 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00170 pmul(const Packet& a, 00171 const Packet& b) { return a*b; } 00172 00174 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00175 pdiv(const Packet& a, 00176 const Packet& b) { return a/b; } 00177 00179 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00180 pmin(const Packet& a, 00181 const Packet& b) { return numext::mini(a, b); } 00182 00184 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00185 pmax(const Packet& a, 00186 const Packet& b) { return numext::maxi(a, b); } 00187 00189 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00190 pabs(const Packet& a) { using std::abs; return abs(a); } 00191 00193 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00194 parg(const Packet& a) { using numext::arg; return arg(a); } 00195 00197 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00198 pand(const Packet& a, const Packet& b) { return a & b; } 00199 00201 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00202 por(const Packet& a, const Packet& b) { return a | b; } 00203 00205 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00206 pxor(const Packet& a, const Packet& b) { return a ^ b; } 00207 00209 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00210 pandnot(const Packet& a, const Packet& b) { return a & (!b); } 00211 00213 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00214 pload(const typename unpacket_traits<Packet>::type* from) { return *from; } 00215 00217 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00218 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } 00219 00221 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00222 pset1(const typename unpacket_traits<Packet>::type& a) { return a; } 00223 00225 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00226 pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); } 00227 00233 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00234 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } 00235 00242 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00243 ploadquad(const typename unpacket_traits<Packet>::type* from) 00244 { return pload1<Packet>(from); } 00245 00255 template<typename Packet> EIGEN_DEVICE_FUNC 00256 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a, 00257 Packet& a0, Packet& a1, Packet& a2, Packet& a3) 00258 { 00259 a0 = pload1<Packet>(a+0); 00260 a1 = pload1<Packet>(a+1); 00261 a2 = pload1<Packet>(a+2); 00262 a3 = pload1<Packet>(a+3); 00263 } 00264 00272 template<typename Packet> EIGEN_DEVICE_FUNC 00273 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a, 00274 Packet& a0, Packet& a1) 00275 { 00276 a0 = pload1<Packet>(a+0); 00277 a1 = pload1<Packet>(a+1); 00278 } 00279 00281 template<typename Packet> inline Packet 00282 plset(const typename unpacket_traits<Packet>::type& a) { return a; } 00283 00285 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) 00286 { (*to) = from; } 00287 00289 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) 00290 { (*to) = from; } 00291 00292 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) 00293 { return ploadu<Packet>(from); } 00294 00295 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) 00296 { pstore(to, from); } 00297 00299 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) 00300 { 00301 #ifdef __CUDA_ARCH__ 00302 #if defined(__LP64__) 00303 // 64-bit pointer operand constraint for inlined asm 00304 asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); 00305 #else 00306 // 32-bit pointer operand constraint for inlined asm 00307 asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); 00308 #endif 00309 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) 00310 __builtin_prefetch(addr); 00311 #endif 00312 } 00313 00315 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) 00316 { return a; } 00317 00319 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00320 preduxp(const Packet* vecs) { return vecs[0]; } 00321 00323 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) 00324 { return a; } 00325 00330 template<typename Packet> EIGEN_DEVICE_FUNC inline 00331 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type 00332 predux_downto4(const Packet& a) 00333 { return a; } 00334 00336 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) 00337 { return a; } 00338 00340 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) 00341 { return a; } 00342 00344 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) 00345 { return a; } 00346 00348 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) 00349 { return a; } 00350 00352 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) 00353 { 00354 // FIXME: uncomment the following in case we drop the internal imag and real functions. 00355 // using std::imag; 00356 // using std::real; 00357 return Packet(imag(a),real(a)); 00358 } 00359 00360 /************************** 00361 * Special math functions 00362 ***************************/ 00363 00365 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00366 Packet psin(const Packet& a) { using std::sin; return sin(a); } 00367 00369 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00370 Packet pcos(const Packet& a) { using std::cos; return cos(a); } 00371 00373 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00374 Packet ptan(const Packet& a) { using std::tan; return tan(a); } 00375 00377 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00378 Packet pasin(const Packet& a) { using std::asin; return asin(a); } 00379 00381 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00382 Packet pacos(const Packet& a) { using std::acos; return acos(a); } 00383 00385 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00386 Packet patan(const Packet& a) { using std::atan; return atan(a); } 00387 00389 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00390 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } 00391 00393 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00394 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } 00395 00397 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00398 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } 00399 00401 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00402 Packet pexp(const Packet& a) { using std::exp; return exp(a); } 00403 00405 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00406 Packet plog(const Packet& a) { using std::log; return log(a); } 00407 00409 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00410 Packet plog1p(const Packet& a) { return numext::log1p(a); } 00411 00413 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00414 Packet plog10(const Packet& a) { using std::log10; return log10(a); } 00415 00417 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00418 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } 00419 00421 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00422 Packet prsqrt(const Packet& a) { 00423 return pdiv(pset1<Packet>(1), psqrt(a)); 00424 } 00425 00427 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00428 Packet pround(const Packet& a) { using numext::round; return round(a); } 00429 00431 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00432 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } 00433 00435 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00436 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } 00437 00438 /*************************************************************************** 00439 * The following functions might not have to be overwritten for vectorized types 00440 ***************************************************************************/ 00441 00443 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) 00444 template<typename Packet> 00445 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) 00446 { 00447 pstore(to, pset1<Packet>(a)); 00448 } 00449 00451 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00452 pmadd(const Packet& a, 00453 const Packet& b, 00454 const Packet& c) 00455 { return padd(pmul(a, b),c); } 00456 00459 template<typename Packet, int Alignment> 00460 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) 00461 { 00462 if(Alignment >= unpacket_traits<Packet>::alignment) 00463 return pload<Packet>(from); 00464 else 00465 return ploadu<Packet>(from); 00466 } 00467 00470 template<typename Scalar, typename Packet, int Alignment> 00471 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) 00472 { 00473 if(Alignment >= unpacket_traits<Packet>::alignment) 00474 pstore(to, from); 00475 else 00476 pstoreu(to, from); 00477 } 00478 00484 template<typename Packet, int LoadMode> 00485 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) 00486 { 00487 return ploadt<Packet, LoadMode>(from); 00488 } 00489 00491 template<int Offset,typename PacketType> 00492 struct palign_impl 00493 { 00494 // by default data are aligned, so there is nothing to be done :) 00495 static inline void run(PacketType&, const PacketType&) {} 00496 }; 00497 00513 template<int Offset,typename PacketType> 00514 inline void palign(PacketType& first, const PacketType& second) 00515 { 00516 palign_impl<Offset,PacketType>::run(first,second); 00517 } 00518 00519 /*************************************************************************** 00520 * Fast complex products (GCC generates a function call which is very slow) 00521 ***************************************************************************/ 00522 00523 // Eigen+CUDA does not support complexes. 00524 #ifndef __CUDACC__ 00525 00526 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) 00527 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00528 00529 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) 00530 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00531 00532 #endif 00533 00534 00535 /*************************************************************************** 00536 * PacketBlock, that is a collection of N packets where the number of words 00537 * in the packet is a multiple of N. 00538 ***************************************************************************/ 00539 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock { 00540 Packet packet[N]; 00541 }; 00542 00543 template<typename Packet> EIGEN_DEVICE_FUNC inline void 00544 ptranspose(PacketBlock<Packet,1>& /*kernel*/) { 00545 // Nothing to do in the scalar case, i.e. a 1x1 matrix. 00546 } 00547 00548 /*************************************************************************** 00549 * Selector, i.e. vector of N boolean values used to select (i.e. blend) 00550 * words from 2 packets. 00551 ***************************************************************************/ 00552 template <size_t N> struct Selector { 00553 bool select[N]; 00554 }; 00555 00556 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00557 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { 00558 return ifPacket.select[0] ? thenPacket : elsePacket; 00559 } 00560 00562 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00563 pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b) 00564 { 00565 // Default implementation based on pblend. 00566 // It must be specialized for higher performance. 00567 Selector<unpacket_traits<Packet>::size> mask; 00568 mask.select[0] = true; 00569 // This for loop should be optimized away by the compiler. 00570 for(Index i=1; i<unpacket_traits<Packet>::size; ++i) 00571 mask.select[i] = false; 00572 return pblend(mask, pset1<Packet>(b), a); 00573 } 00574 00576 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet 00577 pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b) 00578 { 00579 // Default implementation based on pblend. 00580 // It must be specialized for higher performance. 00581 Selector<unpacket_traits<Packet>::size> mask; 00582 // This for loop should be optimized away by the compiler. 00583 for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i) 00584 mask.select[i] = false; 00585 mask.select[unpacket_traits<Packet>::size-1] = true; 00586 return pblend(mask, pset1<Packet>(b), a); 00587 } 00588 00589 } // end namespace internal 00590 00591 } // end namespace Eigen 00592 00593 #endif // EIGEN_GENERIC_PACKET_MATH_H