![]() |
Eigen
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_TYPE_CASTING_CUDA_H 00011 #define EIGEN_TYPE_CASTING_CUDA_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 template<> 00018 struct scalar_cast_op<float, Eigen::half> { 00019 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 00020 typedef Eigen::half result_type; 00021 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const { 00022 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 00023 return __float2half(a); 00024 #else 00025 return Eigen::half(a); 00026 #endif 00027 } 00028 }; 00029 00030 template<> 00031 struct functor_traits<scalar_cast_op<float, Eigen::half> > 00032 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 00033 00034 00035 template<> 00036 struct scalar_cast_op<int, Eigen::half> { 00037 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 00038 typedef Eigen::half result_type; 00039 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const { 00040 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 00041 return __float2half(static_cast<float>(a)); 00042 #else 00043 return Eigen::half(static_cast<float>(a)); 00044 #endif 00045 } 00046 }; 00047 00048 template<> 00049 struct functor_traits<scalar_cast_op<int, Eigen::half> > 00050 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 00051 00052 00053 template<> 00054 struct scalar_cast_op<Eigen::half, float> { 00055 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 00056 typedef float result_type; 00057 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const { 00058 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 00059 return __half2float(a); 00060 #else 00061 return static_cast<float>(a); 00062 #endif 00063 } 00064 }; 00065 00066 template<> 00067 struct functor_traits<scalar_cast_op<Eigen::half, float> > 00068 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 00069 00070 00071 00072 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 00073 00074 template <> 00075 struct type_casting_traits<Eigen::half, float> { 00076 enum { 00077 VectorizedCast = 1, 00078 SrcCoeffRatio = 2, 00079 TgtCoeffRatio = 1 00080 }; 00081 }; 00082 00083 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { 00084 float2 r1 = __half22float2(a); 00085 float2 r2 = __half22float2(b); 00086 return make_float4(r1.x, r1.y, r2.x, r2.y); 00087 } 00088 00089 template <> 00090 struct type_casting_traits<float, Eigen::half> { 00091 enum { 00092 VectorizedCast = 1, 00093 SrcCoeffRatio = 1, 00094 TgtCoeffRatio = 2 00095 }; 00096 }; 00097 00098 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { 00099 // Simply discard the second half of the input 00100 return __floats2half2_rn(a.x, a.y); 00101 } 00102 00103 #elif defined EIGEN_VECTORIZE_AVX512 00104 template <> 00105 struct type_casting_traits<half, float> { 00106 enum { 00107 VectorizedCast = 1, 00108 SrcCoeffRatio = 1, 00109 TgtCoeffRatio = 1 00110 }; 00111 }; 00112 00113 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) { 00114 return half2float(a); 00115 } 00116 00117 template <> 00118 struct type_casting_traits<float, half> { 00119 enum { 00120 VectorizedCast = 1, 00121 SrcCoeffRatio = 1, 00122 TgtCoeffRatio = 1 00123 }; 00124 }; 00125 00126 template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) { 00127 return float2half(a); 00128 } 00129 00130 #elif defined EIGEN_VECTORIZE_AVX 00131 00132 template <> 00133 struct type_casting_traits<Eigen::half, float> { 00134 enum { 00135 VectorizedCast = 1, 00136 SrcCoeffRatio = 1, 00137 TgtCoeffRatio = 1 00138 }; 00139 }; 00140 00141 template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) { 00142 return half2float(a); 00143 } 00144 00145 template <> 00146 struct type_casting_traits<float, Eigen::half> { 00147 enum { 00148 VectorizedCast = 1, 00149 SrcCoeffRatio = 1, 00150 TgtCoeffRatio = 1 00151 }; 00152 }; 00153 00154 template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) { 00155 return float2half(a); 00156 } 00157 00158 // Disable the following code since it's broken on too many platforms / compilers. 00159 //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC) 00160 #elif 0 00161 00162 template <> 00163 struct type_casting_traits<Eigen::half, float> { 00164 enum { 00165 VectorizedCast = 1, 00166 SrcCoeffRatio = 1, 00167 TgtCoeffRatio = 1 00168 }; 00169 }; 00170 00171 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) { 00172 __int64_t a64 = _mm_cvtm64_si64(a.x); 00173 Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64)); 00174 float f1 = static_cast<float>(h); 00175 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16)); 00176 float f2 = static_cast<float>(h); 00177 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32)); 00178 float f3 = static_cast<float>(h); 00179 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48)); 00180 float f4 = static_cast<float>(h); 00181 return _mm_set_ps(f4, f3, f2, f1); 00182 } 00183 00184 template <> 00185 struct type_casting_traits<float, Eigen::half> { 00186 enum { 00187 VectorizedCast = 1, 00188 SrcCoeffRatio = 1, 00189 TgtCoeffRatio = 1 00190 }; 00191 }; 00192 00193 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) { 00194 EIGEN_ALIGN16 float aux[4]; 00195 pstore(aux, a); 00196 Eigen::half h0(aux[0]); 00197 Eigen::half h1(aux[1]); 00198 Eigen::half h2(aux[2]); 00199 Eigen::half h3(aux[3]); 00200 00201 Packet4h result; 00202 result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x); 00203 return result; 00204 } 00205 00206 #endif 00207 00208 } // end namespace internal 00209 00210 } // end namespace Eigen 00211 00212 #endif // EIGEN_TYPE_CASTING_CUDA_H