![]() |
Eigen-unsupported
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H 00011 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H 00012 00013 namespace Eigen { 00014 namespace internal { 00015 00016 namespace { 00017 00018 EIGEN_DEVICE_FUNC uint64_t get_random_seed() { 00019 #ifdef __CUDA_ARCH__ 00020 // We don't support 3d kernels since we currently only use 1 and 00021 // 2d kernels. 00022 assert(threadIdx.z == 0); 00023 return clock64() + 00024 blockIdx.x * blockDim.x + threadIdx.x + 00025 gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); 00026 00027 #elif defined _WIN32 00028 // Use the current time as a baseline. 00029 SYSTEMTIME st; 00030 GetSystemTime(&st); 00031 int time = st.wSecond + 1000 * st.wMilliseconds; 00032 // Mix in a random number to make sure that we get different seeds if 00033 // we try to generate seeds faster than the clock resolution. 00034 // We need 2 random values since the generator only generate 16 bits at 00035 // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) 00036 int rnd1 = ::rand(); 00037 int rnd2 = ::rand(); 00038 uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; 00039 return rnd; 00040 00041 #elif defined __APPLE__ 00042 // Same approach as for win32, except that the random number generator 00043 // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). 00044 uint64_t rnd = ::random() ^ mach_absolute_time(); 00045 return rnd; 00046 00047 #else 00048 // Augment the current time with pseudo random number generation 00049 // to ensure that we get different seeds if we try to generate seeds 00050 // faster than the clock resolution. 00051 timespec ts; 00052 clock_gettime(CLOCK_REALTIME, &ts); 00053 uint64_t rnd = ::random() ^ ts.tv_nsec; 00054 return rnd; 00055 #endif 00056 } 00057 00058 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { 00059 // TODO: Unify with the implementation in the non blocking thread pool. 00060 uint64_t current = *state; 00061 // Update the internal state 00062 *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; 00063 // Generate the random output (using the PCG-XSH-RS scheme) 00064 return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); 00065 } 00066 00067 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { 00068 seed = seed ? seed : get_random_seed(); 00069 return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; 00070 } 00071 00072 } // namespace 00073 00074 00075 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00076 T RandomToTypeUniform(uint64_t* state) { 00077 unsigned rnd = PCG_XSH_RS_generator(state); 00078 return static_cast<T>(rnd); 00079 } 00080 00081 00082 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00083 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) { 00084 Eigen::half result; 00085 // Generate 10 random bits for the mantissa 00086 unsigned rnd = PCG_XSH_RS_generator(state); 00087 result.x = static_cast<uint16_t>(rnd & 0x3ffu); 00088 // Set the exponent 00089 result.x |= (static_cast<uint16_t>(15) << 10); 00090 // Return the final result 00091 return result - Eigen::half(1.0f); 00092 } 00093 00094 00095 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00096 float RandomToTypeUniform<float>(uint64_t* state) { 00097 typedef union { 00098 uint32_t raw; 00099 float fp; 00100 } internal; 00101 internal result; 00102 // Generate 23 random bits for the mantissa mantissa 00103 const unsigned rnd = PCG_XSH_RS_generator(state); 00104 result.raw = rnd & 0x7fffffu; 00105 // Set the exponent 00106 result.raw |= (static_cast<uint32_t>(127) << 23); 00107 // Return the final result 00108 return result.fp - 1.0f; 00109 } 00110 00111 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00112 double RandomToTypeUniform<double>(uint64_t* state) { 00113 typedef union { 00114 uint64_t raw; 00115 double dp; 00116 } internal; 00117 internal result; 00118 result.raw = 0; 00119 // Generate 52 random bits for the mantissa 00120 // First generate the upper 20 bits 00121 unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; 00122 // The generate the lower 32 bits 00123 unsigned rnd2 = PCG_XSH_RS_generator(state); 00124 result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2; 00125 // Set the exponent 00126 result.raw |= (static_cast<uint64_t>(1023) << 52); 00127 // Return the final result 00128 return result.dp - 1.0; 00129 } 00130 00131 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00132 std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) { 00133 return std::complex<float>(RandomToTypeUniform<float>(state), 00134 RandomToTypeUniform<float>(state)); 00135 } 00136 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00137 std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) { 00138 return std::complex<double>(RandomToTypeUniform<double>(state), 00139 RandomToTypeUniform<double>(state)); 00140 } 00141 00142 template <typename T> class UniformRandomGenerator { 00143 public: 00144 static const bool PacketAccess = true; 00145 00146 // Uses the given "seed" if non-zero, otherwise uses a random seed. 00147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( 00148 uint64_t seed = 0) { 00149 m_state = PCG_XSH_RS_state(seed); 00150 } 00151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( 00152 const UniformRandomGenerator& other) { 00153 m_state = other.m_state; 00154 } 00155 00156 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00157 T operator()(Index i) const { 00158 uint64_t local_state = m_state + i; 00159 T result = RandomToTypeUniform<T>(&local_state); 00160 m_state = local_state; 00161 return result; 00162 } 00163 00164 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00165 Packet packetOp(Index i) const { 00166 const int packetSize = internal::unpacket_traits<Packet>::size; 00167 EIGEN_ALIGN_MAX T values[packetSize]; 00168 uint64_t local_state = m_state + i; 00169 for (int j = 0; j < packetSize; ++j) { 00170 values[j] = RandomToTypeUniform<T>(&local_state); 00171 } 00172 m_state = local_state; 00173 return internal::pload<Packet>(values); 00174 } 00175 00176 private: 00177 mutable uint64_t m_state; 00178 }; 00179 00180 template <typename Scalar> 00181 struct functor_traits<UniformRandomGenerator<Scalar> > { 00182 enum { 00183 // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). 00184 Cost = 12 * NumTraits<Scalar>::AddCost * 00185 ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), 00186 PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess 00187 }; 00188 }; 00189 00190 00191 00192 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00193 T RandomToTypeNormal(uint64_t* state) { 00194 // Use the ratio of uniform method to generate numbers following a normal 00195 // distribution. See for example Numerical Recipes chapter 7.3.9 for the 00196 // details. 00197 T u, v, q; 00198 do { 00199 u = RandomToTypeUniform<T>(state); 00200 v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5)); 00201 const T x = u - T(0.449871); 00202 const T y = numext::abs(v) + T(0.386595); 00203 q = x*x + y * (T(0.196)*y - T(0.25472)*x); 00204 } while (q > T(0.27597) && 00205 (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); 00206 00207 return v/u; 00208 } 00209 00210 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00211 std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) { 00212 return std::complex<float>(RandomToTypeNormal<float>(state), 00213 RandomToTypeNormal<float>(state)); 00214 } 00215 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00216 std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) { 00217 return std::complex<double>(RandomToTypeNormal<double>(state), 00218 RandomToTypeNormal<double>(state)); 00219 } 00220 00221 00222 template <typename T> class NormalRandomGenerator { 00223 public: 00224 static const bool PacketAccess = true; 00225 00226 // Uses the given "seed" if non-zero, otherwise uses a random seed. 00227 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { 00228 m_state = PCG_XSH_RS_state(seed); 00229 } 00230 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( 00231 const NormalRandomGenerator& other) { 00232 m_state = other.m_state; 00233 } 00234 00235 template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00236 T operator()(Index i) const { 00237 uint64_t local_state = m_state + i; 00238 T result = RandomToTypeNormal<T>(&local_state); 00239 m_state = local_state; 00240 return result; 00241 } 00242 00243 template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 00244 Packet packetOp(Index i) const { 00245 const int packetSize = internal::unpacket_traits<Packet>::size; 00246 EIGEN_ALIGN_MAX T values[packetSize]; 00247 uint64_t local_state = m_state + i; 00248 for (int j = 0; j < packetSize; ++j) { 00249 values[j] = RandomToTypeNormal<T>(&local_state); 00250 } 00251 m_state = local_state; 00252 return internal::pload<Packet>(values); 00253 } 00254 00255 private: 00256 mutable uint64_t m_state; 00257 }; 00258 00259 00260 template <typename Scalar> 00261 struct functor_traits<NormalRandomGenerator<Scalar> > { 00262 enum { 00263 // On average, we need to generate about 3 random numbers 00264 // 15 mul, 8 add, 1.5 logs 00265 Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost + 00266 15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost + 00267 3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2, 00268 PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess 00269 }; 00270 }; 00271 00272 00273 } // end namespace internal 00274 } // end namespace Eigen 00275 00276 #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H