SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Viktor Gal 00008 * Copyright (C) 2013 Viktor Gal 00009 */ 00010 00011 #include <shogun/mathematics/Random.h> 00012 #include <shogun/base/Parameter.h> 00013 #include <shogun/lib/external/SFMT/SFMT.h> 00014 #include <shogun/lib/external/dSFMT/dSFMT.h> 00015 #include <shogun/lib/Time.h> 00016 #include <shogun/lib/Lock.h> 00017 00018 #ifdef DEV_RANDOM 00019 #include <fcntl.h> 00020 #endif 00021 00022 #ifdef _WIN32 00023 #define _CRT_RAND_S 00024 #include <stdlib.h> 00025 #endif 00026 00027 using namespace shogun; 00028 00029 CRandom::CRandom() 00030 : m_sfmt_32(NULL), 00031 m_sfmt_64(NULL), 00032 m_dsfmt(NULL) 00033 { 00034 m_seed = CRandom::generate_seed(); 00035 init(); 00036 } 00037 00038 CRandom::CRandom(uint32_t seed) 00039 : m_seed(seed), 00040 m_sfmt_32(NULL), 00041 m_sfmt_64(NULL), 00042 m_dsfmt(NULL) 00043 { 00044 init(); 00045 } 00046 00047 CRandom::~CRandom() 00048 { 00049 SG_FREE(m_x); 00050 SG_FREE(m_y); 00051 SG_FREE(m_xComp); 00052 SG_FREE(m_sfmt_32); 00053 SG_FREE(m_sfmt_64); 00054 SG_FREE(m_dsfmt); 00055 } 00056 00057 void CRandom::set_seed(uint32_t seed) 00058 { 00059 reinit(seed); 00060 } 00061 00062 uint32_t CRandom::get_seed() const 00063 { 00064 return m_seed; 00065 } 00066 00067 void CRandom::init() 00068 { 00070 m_blockCount = 128; 00071 m_R = 3.442619855899; 00072 m_A = 9.91256303526217e-3; 00073 m_uint32ToU = 1.0 / (float64_t)std::numeric_limits<uint32_t>::max(); 00074 00075 m_x = SG_MALLOC(float64_t, m_blockCount + 1); 00076 m_y = SG_MALLOC(float64_t, m_blockCount); 00077 m_xComp = SG_MALLOC(uint32_t, m_blockCount); 00078 00079 // Initialise rectangle position data. 00080 // m_x[i] and m_y[i] describe the top-right position ox Box i. 00081 00082 // Determine top right position of the base rectangle/box (the rectangle with the Gaussian tale attached). 00083 // We call this Box 0 or B0 for short. 00084 // Note. x[0] also describes the right-hand edge of B1. (See diagram). 00085 m_x[0] = m_R; 00086 m_y[0] = GaussianPdfDenorm(m_R); 00087 00088 // The next box (B1) has a right hand X edge the same as B0. 00089 // Note. B1's height is the box area divided by its width, hence B1 has a smaller height than B0 because 00090 // B0's total area includes the attached distribution tail. 00091 m_x[1] = m_R; 00092 m_y[1] = m_y[0] + (m_A / m_x[1]); 00093 00094 // Calc positions of all remaining rectangles. 00095 for(int i=2; i < m_blockCount; i++) 00096 { 00097 m_x[i] = GaussianPdfDenormInv(m_y[i-1]); 00098 m_y[i] = m_y[i-1] + (m_A / m_x[i]); 00099 } 00100 00101 // For completeness we define the right-hand edge of a notional box 6 as being zero (a box with no area). 00102 m_x[m_blockCount] = 0.0; 00103 00104 // Useful precomputed values. 00105 m_A_div_y0 = m_A / m_y[0]; 00106 00107 // Special case for base box. m_xComp[0] stores the area of B0 as a proportion of R 00108 // (recalling that all segments have area A, but that the base segment is the combination of B0 and the distribution tail). 00109 // Thus -m_xComp[0] is the probability that a sample point is within the box part of the segment. 00110 m_xComp[0] = (uint32_t)(((m_R * m_y[0]) / m_A) * (float64_t)std::numeric_limits<uint32_t>::max()); 00111 00112 for(int32_t i=1; i < m_blockCount-1; i++) 00113 { 00114 m_xComp[i] = (uint32_t)((m_x[i+1] / m_x[i]) * (float64_t)std::numeric_limits<uint32_t>::max()); 00115 } 00116 m_xComp[m_blockCount-1] = 0; // Shown for completeness. 00117 00118 // Sanity check. Test that the top edge of the topmost rectangle is at y=1.0. 00119 // Note. We expect there to be a tiny drift away from 1.0 due to the inexactness of floating 00120 // point arithmetic. 00121 ASSERT(CMath::abs(1.0 - m_y[m_blockCount-1]) < 1e-10); 00122 00124 m_sfmt_32 = SG_MALLOC(sfmt_t, 1); 00125 m_sfmt_64 = SG_MALLOC(sfmt_t, 1); 00126 m_dsfmt = SG_MALLOC(dsfmt_t, 1); 00127 reinit(m_seed); 00128 } 00129 00130 uint32_t CRandom::random_32() const 00131 { 00132 return sfmt_genrand_uint32(m_sfmt_32); 00133 } 00134 00135 uint64_t CRandom::random_64() const 00136 { 00137 return sfmt_genrand_uint64(m_sfmt_64); 00138 } 00139 00140 void CRandom::fill_array(uint32_t* array, int32_t size) const 00141 { 00142 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00143 if ((size >= sfmt_get_min_array_size32(m_sfmt_32)) && (size % 4) == 0) 00144 { 00145 sfmt_fill_array32(m_sfmt_32, array, size); 00146 return; 00147 } 00148 #endif 00149 for (int32_t i=0; i < size; i++) 00150 array[i] = random_32(); 00151 } 00152 00153 void CRandom::fill_array(uint64_t* array, int32_t size) const 00154 { 00155 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00156 if ((size >= sfmt_get_min_array_size64(m_sfmt_64)) && (size % 2) == 0) 00157 { 00158 sfmt_fill_array64(m_sfmt_64, array, size); 00159 return; 00160 } 00161 #endif 00162 for (int32_t i=0; i < size; i++) 00163 array[i] = random_64(); 00164 } 00165 00166 void CRandom::fill_array_oc(float64_t* array, int32_t size) const 00167 { 00168 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00169 if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0) 00170 { 00171 dsfmt_fill_array_open_close(m_dsfmt, array, size); 00172 return; 00173 } 00174 #endif 00175 for (int32_t i=0; i < size; i++) 00176 array[i] = dsfmt_genrand_open_close(m_dsfmt); 00177 } 00178 00179 void CRandom::fill_array_co(float64_t* array, int32_t size) const 00180 { 00181 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00182 if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0) 00183 { 00184 dsfmt_fill_array_close_open(m_dsfmt, array, size); 00185 return; 00186 } 00187 #endif 00188 for (int32_t i=0; i < size; i++) 00189 array[i] = dsfmt_genrand_close_open(m_dsfmt); 00190 } 00191 00192 void CRandom::fill_array_oo(float64_t* array, int32_t size) const 00193 { 00194 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00195 if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0) 00196 { 00197 dsfmt_fill_array_open_open(m_dsfmt, array, size); 00198 return; 00199 } 00200 #endif 00201 for (int32_t i=0; i < size; i++) 00202 array[i] = dsfmt_genrand_open_open(m_dsfmt); 00203 } 00204 00205 void CRandom::fill_array_c1o2(float64_t* array, int32_t size) const 00206 { 00207 #if defined(USE_ALIGNED_MEMORY) || defined(DARWIN) 00208 if ((size >= dsfmt_get_min_array_size()) && (size % 2) == 0) 00209 { 00210 dsfmt_fill_array_close1_open2(m_dsfmt, array, size); 00211 return; 00212 } 00213 #endif 00214 for (int32_t i=0; i < size; i++) 00215 array[i] = dsfmt_genrand_close1_open2(m_dsfmt); 00216 } 00217 00218 float64_t CRandom::random_close() const 00219 { 00220 return sfmt_genrand_real1(m_sfmt_32); 00221 } 00222 00223 float64_t CRandom::random_open() const 00224 { 00225 return dsfmt_genrand_open_open(m_dsfmt); 00226 } 00227 00228 float64_t CRandom::random_half_open() const 00229 { 00230 return dsfmt_genrand_close_open(m_dsfmt); 00231 } 00232 00233 float64_t CRandom::normal_distrib(float64_t mu, float64_t sigma) const 00234 { 00235 return mu + (std_normal_distrib() * sigma); 00236 } 00237 00238 float64_t CRandom::std_normal_distrib() const 00239 { 00240 for (;;) 00241 { 00242 // Select box at random. 00243 uint8_t u = random_32(); 00244 int32_t i = (int32_t)(u & 0x7F); 00245 float64_t sign = ((u & 0x80) == 0) ? -1.0 : 1.0; 00246 00247 // Generate uniform random value with range [0,0xffffffff]. 00248 uint32_t u2 = random_32(); 00249 00250 // Special case for the base segment. 00251 if(0 == i) 00252 { 00253 if(u2 < m_xComp[0]) 00254 { 00255 // Generated x is within R0. 00256 return u2 * m_uint32ToU * m_A_div_y0 * sign; 00257 } 00258 // Generated x is in the tail of the distribution. 00259 return sample_tail() * sign; 00260 } 00261 00262 // All other segments. 00263 if(u2 < m_xComp[i]) 00264 { // Generated x is within the rectangle. 00265 return u2 * m_uint32ToU * m_x[i] * sign; 00266 } 00267 00268 // Generated x is outside of the rectangle. 00269 // Generate a random y coordinate and test if our (x,y) is within the distribution curve. 00270 // This execution path is relatively slow/expensive (makes a call to Math.Exp()) but relatively rarely executed, 00271 // although more often than the 'tail' path (above). 00272 float64_t x = u2 * m_uint32ToU * m_x[i]; 00273 if(m_y[i-1] + ((m_y[i] - m_y[i-1]) * random_half_open()) < GaussianPdfDenorm(x) ) { 00274 return x * sign; 00275 } 00276 } 00277 } 00278 00279 float64_t CRandom::sample_tail() const 00280 { 00281 float64_t x, y; 00282 do 00283 { 00284 x = -CMath::log(random_half_open()) / m_R; 00285 y = -CMath::log(random_half_open()); 00286 } while(y+y < x*x); 00287 return m_R + x; 00288 } 00289 00290 float64_t CRandom::GaussianPdfDenorm(float64_t x) const 00291 { 00292 return CMath::exp(-(x*x / 2.0)); 00293 } 00294 00295 float64_t CRandom::GaussianPdfDenormInv(float64_t y) const 00296 { 00297 // Operates over the y range (0,1], which happens to be the y range of the pdf, 00298 // with the exception that it does not include y=0, but we would never call with 00299 // y=0 so it doesn't matter. Remember that a Gaussian effectively has a tail going 00300 // off into x == infinity, hence asking what is x when y=0 is an invalid question 00301 // in the context of this class. 00302 return CMath::sqrt(-2.0 * CMath::log(y)); 00303 } 00304 00305 void CRandom::reinit(uint32_t seed) 00306 { 00307 m_state_lock.lock(); 00308 m_seed = seed; 00309 sfmt_init_gen_rand(m_sfmt_32, m_seed); 00310 sfmt_init_gen_rand(m_sfmt_64, m_seed); 00311 dsfmt_init_gen_rand(m_dsfmt, m_seed); 00312 m_state_lock.unlock(); 00313 } 00314 00315 uint32_t CRandom::generate_seed() 00316 { 00317 uint32_t seed; 00318 #if defined(_WIN32) 00319 rand_s(&seed); 00320 #elif defined(HAVE_ARC4RANDOM) 00321 seed = arc4random(); 00322 #elif defined(DEV_RANDOM) 00323 int fd = open(DEV_RANDOM, O_RDONLY); 00324 ASSERT(fd >= 0); 00325 ssize_t actual_read = 00326 read(fd, reinterpret_cast<char*>(&seed), sizeof(seed)); 00327 close(fd); 00328 ASSERT(actual_read == sizeof(seed)); 00329 #else 00330 SG_SWARNING("Not safe seed for the PRNG\n"); 00331 struct timeval tv; 00332 gettimeofday(&tv, NULL); 00333 seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec); 00334 #endif 00335 return seed; 00336 }