SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Random.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Viktor Gal
00008  * Copyright (C) 2013 Viktor Gal
00009  */
00010 
00011 #include <shogun/mathematics/Random.h>
00012 #include <shogun/base/Parameter.h>
00013 #include <shogun/lib/external/SFMT/SFMT.h>
00014 #include <shogun/lib/external/dSFMT/dSFMT.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/lib/Lock.h>
00017 
00018 #ifdef DEV_RANDOM
00019 #include <fcntl.h>
00020 #endif
00021 
00022 #ifdef _WIN32
00023 #define _CRT_RAND_S
00024 #include <stdlib.h>
00025 #endif
00026 
00027 using namespace shogun;
00028 
00029 CRandom::CRandom()
00030  : m_sfmt_32(NULL),
00031  m_sfmt_64(NULL),
00032  m_dsfmt(NULL)
00033 {
00034     m_seed = CRandom::generate_seed();
00035     init();
00036 }
00037 
00038 CRandom::CRandom(uint32_t seed)
00039  : m_seed(seed),
00040  m_sfmt_32(NULL),
00041  m_sfmt_64(NULL),
00042  m_dsfmt(NULL)
00043 {
00044     init();
00045 }
00046 
00047 CRandom::~CRandom()
00048 {
00049     SG_FREE(m_x);
00050     SG_FREE(m_y);
00051     SG_FREE(m_xComp);
00052     SG_FREE(m_sfmt_32);
00053     SG_FREE(m_sfmt_64);
00054     SG_FREE(m_dsfmt);
00055 }
00056 
00057 void CRandom::set_seed(uint32_t seed)
00058 {
00059     reinit(seed);
00060 }
00061 
00062 uint32_t CRandom::get_seed() const
00063 {
00064     return m_seed;
00065 }
00066 
00067 void CRandom::init()
00068 {
00070     m_blockCount = 128;
00071     m_R = 3.442619855899;
00072     m_A = 9.91256303526217e-3;
00073     m_uint32ToU = 1.0 / (float64_t)std::numeric_limits<uint32_t>::max();
00074 
00075     m_x = SG_MALLOC(float64_t, m_blockCount + 1);
00076     m_y = SG_MALLOC(float64_t, m_blockCount);
00077     m_xComp = SG_MALLOC(uint32_t, m_blockCount);
00078 
00079     // Initialise rectangle position data.
00080     // m_x[i] and m_y[i] describe the top-right position ox Box i.
00081 
00082     // Determine top right position of the base rectangle/box (the rectangle with the Gaussian tale attached).
00083     // We call this Box 0 or B0 for short.
00084     // Note. x[0] also describes the right-hand edge of B1. (See diagram).
00085     m_x[0] = m_R;
00086     m_y[0] = GaussianPdfDenorm(m_R);
00087 
00088     // The next box (B1) has a right hand X edge the same as B0.
00089     // Note. B1's height is the box area divided by its width, hence B1 has a smaller height than B0 because
00090     // B0's total area includes the attached distribution tail.
00091     m_x[1] = m_R;
00092     m_y[1] = m_y[0] + (m_A / m_x[1]);
00093 
00094     // Calc positions of all remaining rectangles.
00095     for(int i=2; i < m_blockCount; i++)
00096     {
00097         m_x[i] = GaussianPdfDenormInv(m_y[i-1]);
00098         m_y[i] = m_y[i-1] + (m_A / m_x[i]);
00099     }
00100 
00101     // For completeness we define the right-hand edge of a notional box 6 as being zero (a box with no area).
00102     m_x[m_blockCount] = 0.0;
00103 
00104     // Useful precomputed values.
00105     m_A_div_y0 = m_A / m_y[0];
00106 
00107     // Special case for base box. m_xComp[0] stores the area of B0 as a proportion of R
00108     // (recalling that all segments have area A, but that the base segment is the combination of B0 and the distribution tail).
00109     // Thus -m_xComp[0] is the probability that a sample point is within the box part of the segment.
00110     m_xComp[0] = (uint32_t)(((m_R * m_y[0]) / m_A) * (float64_t)std::numeric_limits<uint32_t>::max());
00111 
00112     for(int32_t i=1; i < m_blockCount-1; i++)
00113     {
00114         m_xComp[i] = (uint32_t)((m_x[i+1] / m_x[i]) * (float64_t)std::numeric_limits<uint32_t>::max());
00115     }
00116     m_xComp[m_blockCount-1] = 0;  // Shown for completeness.
00117 
00118     // Sanity check. Test that the top edge of the topmost rectangle is at y=1.0.
00119     // Note. We expect there to be a tiny drift away from 1.0 due to the inexactness of floating
00120     // point arithmetic.
00121     ASSERT(CMath::abs(1.0 - m_y[m_blockCount-1]) < 1e-10);
00122 
00124     m_sfmt_32 = SG_MALLOC(sfmt_t, 1);
00125     m_sfmt_64 = SG_MALLOC(sfmt_t, 1);
00126     m_dsfmt = SG_MALLOC(dsfmt_t, 1);
00127     reinit(m_seed);
00128 }
00129 
00130 uint32_t CRandom::random_32() const
00131 {
00132     return sfmt_genrand_uint32(m_sfmt_32);
00133 }
00134 
00135 uint64_t CRandom::random_64() const
00136 {
00137     return sfmt_genrand_uint64(m_sfmt_64);
00138 }
00139 
00140 void CRandom::fill_array(uint32_t* array, int32_t size) const
00141 {
00142 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00143     if ((size >= sfmt_get_min_array_size32(m_sfmt_32)) && (size % 4) == 0)
00144     {
00145         sfmt_fill_array32(m_sfmt_32, array, size);
00146         return;
00147     }
00148 #endif
00149     for (int32_t i=0; i < size; i++)
00150         array[i] = random_32();
00151 }
00152 
00153 void CRandom::fill_array(uint64_t* array, int32_t size) const
00154 {
00155 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00156     if ((size >= sfmt_get_min_array_size64(m_sfmt_64)) && (size % 2) == 0)
00157     {
00158         sfmt_fill_array64(m_sfmt_64, array, size);
00159         return;
00160     }
00161 #endif
00162     for (int32_t i=0; i < size; i++)
00163         array[i] = random_64();
00164 }
00165 
00166 void CRandom::fill_array_oc(float64_t* array, int32_t size) const
00167 {
00168 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00169     if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
00170     {
00171         dsfmt_fill_array_open_close(m_dsfmt, array, size);
00172         return;
00173     }
00174 #endif
00175     for (int32_t i=0; i < size; i++)
00176         array[i] = dsfmt_genrand_open_close(m_dsfmt);
00177 }
00178 
00179 void CRandom::fill_array_co(float64_t* array, int32_t size) const
00180 {
00181 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00182     if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
00183     {
00184         dsfmt_fill_array_close_open(m_dsfmt, array, size);
00185         return;
00186     }
00187 #endif
00188     for (int32_t i=0; i < size; i++)
00189         array[i] = dsfmt_genrand_close_open(m_dsfmt);
00190 }
00191 
00192 void CRandom::fill_array_oo(float64_t* array, int32_t size) const
00193 {
00194 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00195     if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
00196     {
00197         dsfmt_fill_array_open_open(m_dsfmt, array, size);
00198         return;
00199     }
00200 #endif
00201     for (int32_t i=0; i < size; i++)
00202         array[i] = dsfmt_genrand_open_open(m_dsfmt);
00203 }
00204 
00205 void CRandom::fill_array_c1o2(float64_t* array, int32_t size) const
00206 {
00207 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN)
00208     if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0)
00209     {
00210         dsfmt_fill_array_close1_open2(m_dsfmt, array, size);
00211         return;
00212     }
00213 #endif
00214     for (int32_t i=0; i < size; i++)
00215         array[i] = dsfmt_genrand_close1_open2(m_dsfmt);
00216 }
00217 
00218 float64_t CRandom::random_close() const
00219 {
00220     return sfmt_genrand_real1(m_sfmt_32);
00221 }
00222 
00223 float64_t CRandom::random_open() const
00224 {
00225     return dsfmt_genrand_open_open(m_dsfmt);
00226 }
00227 
00228 float64_t CRandom::random_half_open() const
00229 {
00230     return dsfmt_genrand_close_open(m_dsfmt);
00231 }
00232 
00233 float64_t CRandom::normal_distrib(float64_t mu, float64_t sigma) const
00234 {
00235     return mu + (std_normal_distrib() * sigma);
00236 }
00237 
00238 float64_t CRandom::std_normal_distrib() const
00239 {
00240     for (;;)
00241     {
00242         // Select box at random.
00243         uint8_t u = random_32();
00244         int32_t i = (int32_t)(u & 0x7F);
00245         float64_t sign = ((u & 0x80) == 0) ? -1.0 : 1.0;
00246 
00247         // Generate uniform random value with range [0,0xffffffff].
00248         uint32_t u2 = random_32();
00249 
00250         // Special case for the base segment.
00251         if(0 == i)
00252         {
00253             if(u2 < m_xComp[0])
00254             {
00255                 // Generated x is within R0.
00256                 return u2 * m_uint32ToU * m_A_div_y0 * sign;
00257             }
00258             // Generated x is in the tail of the distribution.
00259             return sample_tail() * sign;
00260         }
00261 
00262         // All other segments.
00263         if(u2 < m_xComp[i])
00264         {   // Generated x is within the rectangle.
00265             return u2 * m_uint32ToU * m_x[i] * sign;
00266         }
00267 
00268         // Generated x is outside of the rectangle.
00269         // Generate a random y coordinate and test if our (x,y) is within the distribution curve.
00270         // This execution path is relatively slow/expensive (makes a call to Math.Exp()) but relatively rarely executed,
00271         // although more often than the 'tail' path (above).
00272         float64_t x = u2 * m_uint32ToU * m_x[i];
00273         if(m_y[i-1] + ((m_y[i] - m_y[i-1]) * random_half_open()) < GaussianPdfDenorm(x) ) {
00274             return x * sign;
00275         }
00276     }
00277 }
00278 
00279 float64_t CRandom::sample_tail() const
00280 {
00281     float64_t x, y;
00282     do
00283     {
00284         x = -CMath::log(random_half_open()) / m_R;
00285         y = -CMath::log(random_half_open());
00286     } while(y+y < x*x);
00287     return m_R + x;
00288 }
00289 
00290 float64_t CRandom::GaussianPdfDenorm(float64_t x) const
00291 {
00292     return CMath::exp(-(x*x / 2.0));
00293 }
00294 
00295 float64_t CRandom::GaussianPdfDenormInv(float64_t y) const
00296 {
00297     // Operates over the y range (0,1], which happens to be the y range of the pdf,
00298     // with the exception that it does not include y=0, but we would never call with
00299     // y=0 so it doesn't matter. Remember that a Gaussian effectively has a tail going
00300     // off into x == infinity, hence asking what is x when y=0 is an invalid question
00301     // in the context of this class.
00302     return CMath::sqrt(-2.0 * CMath::log(y));
00303 }
00304 
00305 void CRandom::reinit(uint32_t seed)
00306 {
00307     m_state_lock.lock();
00308     m_seed = seed;
00309     sfmt_init_gen_rand(m_sfmt_32, m_seed);
00310     sfmt_init_gen_rand(m_sfmt_64, m_seed);
00311     dsfmt_init_gen_rand(m_dsfmt, m_seed);
00312     m_state_lock.unlock();
00313 }
00314 
00315 uint32_t CRandom::generate_seed()
00316 {
00317     uint32_t seed;
00318 #if defined(_WIN32)
00319     rand_s(&seed);
00320 #elif defined(HAVE_ARC4RANDOM)
00321     seed = arc4random();
00322 #elif defined(DEV_RANDOM)
00323     int fd = open(DEV_RANDOM, O_RDONLY);
00324     ASSERT(fd >= 0);
00325     ssize_t actual_read = 
00326         read(fd, reinterpret_cast<char*>(&seed), sizeof(seed));
00327     close(fd);
00328     ASSERT(actual_read == sizeof(seed));
00329 #else
00330     SG_SWARNING("Not safe seed for the PRNG\n");
00331     struct timeval tv;
00332     gettimeofday(&tv, NULL);
00333     seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
00334 #endif
00335     return seed;
00336 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation