TensorRandom.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
00011 #define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
00012 
00013 namespace Eigen {
00014 namespace internal {
00015 
00016 namespace {
00017 
00018 EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
00019 #ifdef __CUDA_ARCH__
00020   // We don't support 3d kernels since we currently only use 1 and
00021   // 2d kernels.
00022   assert(threadIdx.z == 0);
00023   return clock64() +
00024       blockIdx.x * blockDim.x + threadIdx.x +
00025       gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y);
00026 
00027 #elif defined _WIN32
00028   // Use the current time as a baseline.
00029   SYSTEMTIME st;
00030   GetSystemTime(&st);
00031   int time = st.wSecond + 1000 * st.wMilliseconds;
00032   // Mix in a random number to make sure that we get different seeds if
00033   // we try to generate seeds faster than the clock resolution.
00034   // We need 2 random values since the generator only generate 16 bits at
00035   // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx)
00036   int rnd1 = ::rand();
00037   int rnd2 = ::rand();
00038   uint64_t rnd = (rnd1 | rnd2 << 16) ^ time;
00039   return rnd;
00040 
00041 #elif defined __APPLE__
00042   // Same approach as for win32, except that the random number generator
00043   // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random).
00044   uint64_t rnd = ::random() ^ mach_absolute_time();
00045   return rnd;
00046 
00047 #else
00048   // Augment the current time with pseudo random number generation
00049   // to ensure that we get different seeds if we try to generate seeds
00050   // faster than the clock resolution.
00051   timespec ts;
00052   clock_gettime(CLOCK_REALTIME, &ts);
00053   uint64_t rnd = ::random() ^ ts.tv_nsec;
00054   return rnd;
00055 #endif
00056 }
00057 
00058 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) {
00059   // TODO: Unify with the implementation in the non blocking thread pool.
00060   uint64_t current = *state;
00061   // Update the internal state
00062   *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
00063   // Generate the random output (using the PCG-XSH-RS scheme)
00064   return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
00065 }
00066 
00067 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) {
00068   seed = seed ? seed : get_random_seed();
00069   return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
00070 }
00071 
00072 }  // namespace
00073 
00074 
00075 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00076 T RandomToTypeUniform(uint64_t* state) {
00077   unsigned rnd = PCG_XSH_RS_generator(state);
00078   return static_cast<T>(rnd);
00079 }
00080 
00081 
00082 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00083 Eigen::half RandomToTypeUniform<Eigen::half>(uint64_t* state) {
00084   Eigen::half result;
00085   // Generate 10 random bits for the mantissa
00086   unsigned rnd = PCG_XSH_RS_generator(state);
00087   result.x = static_cast<uint16_t>(rnd & 0x3ffu);
00088   // Set the exponent
00089   result.x |= (static_cast<uint16_t>(15) << 10);
00090   // Return the final result
00091   return result - Eigen::half(1.0f);
00092 }
00093 
00094 
00095 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00096 float RandomToTypeUniform<float>(uint64_t* state) {
00097   typedef union {
00098     uint32_t raw;
00099     float fp;
00100   } internal;
00101   internal result;
00102   // Generate 23 random bits for the mantissa mantissa
00103   const unsigned rnd = PCG_XSH_RS_generator(state);
00104   result.raw = rnd & 0x7fffffu;
00105   // Set the exponent
00106   result.raw |= (static_cast<uint32_t>(127) << 23);
00107   // Return the final result
00108   return result.fp - 1.0f;
00109 }
00110 
00111 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00112 double RandomToTypeUniform<double>(uint64_t* state) {
00113   typedef union {
00114     uint64_t raw;
00115     double dp;
00116   } internal;
00117   internal result;
00118   result.raw = 0;
00119   // Generate 52 random bits for the mantissa
00120   // First generate the upper 20 bits
00121   unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu;
00122   // The generate the lower 32 bits
00123   unsigned rnd2 = PCG_XSH_RS_generator(state);
00124   result.raw = (static_cast<uint64_t>(rnd1) << 32) | rnd2;
00125   // Set the exponent
00126   result.raw |= (static_cast<uint64_t>(1023) << 52);
00127   // Return the final result
00128   return result.dp - 1.0;
00129 }
00130 
00131 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00132 std::complex<float> RandomToTypeUniform<std::complex<float> >(uint64_t* state) {
00133   return std::complex<float>(RandomToTypeUniform<float>(state),
00134                              RandomToTypeUniform<float>(state));
00135 }
00136 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00137 std::complex<double> RandomToTypeUniform<std::complex<double> >(uint64_t* state) {
00138   return std::complex<double>(RandomToTypeUniform<double>(state),
00139                               RandomToTypeUniform<double>(state));
00140 }
00141 
00142 template <typename T> class UniformRandomGenerator {
00143  public:
00144   static const bool PacketAccess = true;
00145 
00146   // Uses the given "seed" if non-zero, otherwise uses a random seed.
00147   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
00148       uint64_t seed = 0) {
00149     m_state = PCG_XSH_RS_state(seed);
00150   }
00151   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
00152       const UniformRandomGenerator& other) {
00153     m_state = other.m_state;
00154   }
00155 
00156   template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00157   T operator()(Index i) const {
00158     uint64_t local_state = m_state + i;
00159     T result = RandomToTypeUniform<T>(&local_state);
00160     m_state = local_state;
00161     return result;
00162   }
00163 
00164   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00165   Packet packetOp(Index i) const {
00166     const int packetSize = internal::unpacket_traits<Packet>::size;
00167     EIGEN_ALIGN_MAX T values[packetSize];
00168     uint64_t local_state = m_state + i;
00169     for (int j = 0; j < packetSize; ++j) {
00170       values[j] = RandomToTypeUniform<T>(&local_state);
00171     }
00172     m_state = local_state;
00173     return internal::pload<Packet>(values);
00174   }
00175 
00176  private:
00177   mutable uint64_t m_state;
00178 };
00179 
00180 template <typename Scalar>
00181 struct functor_traits<UniformRandomGenerator<Scalar> > {
00182   enum {
00183     // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)).
00184     Cost = 12 * NumTraits<Scalar>::AddCost *
00185            ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)),
00186     PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
00187   };
00188 };
00189 
00190 
00191 
00192 template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00193 T RandomToTypeNormal(uint64_t* state) {
00194   // Use the ratio of uniform method to generate numbers following a normal
00195   // distribution. See for example Numerical Recipes chapter 7.3.9 for the
00196   // details.
00197   T u, v, q;
00198   do {
00199     u = RandomToTypeUniform<T>(state);
00200     v = T(1.7156) * (RandomToTypeUniform<T>(state) - T(0.5));
00201     const T x = u - T(0.449871);
00202     const T y = numext::abs(v) + T(0.386595);
00203     q = x*x + y * (T(0.196)*y - T(0.25472)*x);
00204   } while (q > T(0.27597) &&
00205            (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u));
00206 
00207   return v/u;
00208 }
00209 
00210 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00211 std::complex<float> RandomToTypeNormal<std::complex<float> >(uint64_t* state) {
00212   return std::complex<float>(RandomToTypeNormal<float>(state),
00213                              RandomToTypeNormal<float>(state));
00214 }
00215 template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00216 std::complex<double> RandomToTypeNormal<std::complex<double> >(uint64_t* state) {
00217   return std::complex<double>(RandomToTypeNormal<double>(state),
00218                               RandomToTypeNormal<double>(state));
00219 }
00220 
00221 
00222 template <typename T> class NormalRandomGenerator {
00223  public:
00224   static const bool PacketAccess = true;
00225 
00226   // Uses the given "seed" if non-zero, otherwise uses a random seed.
00227   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
00228     m_state = PCG_XSH_RS_state(seed);
00229   }
00230   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
00231       const NormalRandomGenerator& other) {
00232     m_state = other.m_state;
00233   }
00234 
00235  template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00236   T operator()(Index i) const {
00237     uint64_t local_state = m_state + i;
00238     T result = RandomToTypeNormal<T>(&local_state);
00239     m_state = local_state;
00240     return result;
00241   }
00242 
00243   template<typename Packet, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
00244   Packet packetOp(Index i) const {
00245     const int packetSize = internal::unpacket_traits<Packet>::size;
00246     EIGEN_ALIGN_MAX T values[packetSize];
00247     uint64_t local_state = m_state + i;
00248     for (int j = 0; j < packetSize; ++j) {
00249       values[j] = RandomToTypeNormal<T>(&local_state);
00250     }
00251     m_state = local_state;
00252     return internal::pload<Packet>(values);
00253   }
00254 
00255  private:
00256   mutable uint64_t m_state;
00257 };
00258 
00259 
00260 template <typename Scalar>
00261 struct functor_traits<NormalRandomGenerator<Scalar> > {
00262   enum {
00263     // On average, we need to generate about 3 random numbers
00264     // 15 mul, 8 add, 1.5 logs
00265     Cost = 3 * functor_traits<UniformRandomGenerator<Scalar> >::Cost +
00266            15 * NumTraits<Scalar>::AddCost + 8 * NumTraits<Scalar>::AddCost +
00267            3 * functor_traits<scalar_log_op<Scalar> >::Cost / 2,
00268     PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
00269   };
00270 };
00271 
00272 
00273 } // end namespace internal
00274 } // end namespace Eigen
00275 
00276 #endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H
 All Classes Functions Variables Typedefs Enumerator