SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/lib/config.h> 00012 #include <shogun/base/SGObject.h> 00013 #include <shogun/lib/common.h> 00014 #include <cmath> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/mathematics/lapack.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/lib/SGVector.h> 00019 00020 #include <stdio.h> 00021 #include <stdlib.h> 00022 #include <math.h> 00023 00024 #ifndef NAN 00025 #include <stdlib.h> 00026 #define NAN (strtod("NAN",NULL)) 00027 #endif 00028 00029 00030 using namespace shogun; 00031 00032 #ifdef USE_LOGCACHE 00033 #ifdef USE_HMMDEBUG 00034 #define MAX_LOG_TABLE_SIZE 10*1024*1024 00035 #define LOG_TABLE_PRECISION 1e-6 00036 #else //USE_HMMDEBUG 00037 #define MAX_LOG_TABLE_SIZE 123*1024*1024 00038 #define LOG_TABLE_PRECISION 1e-15 00039 #endif //USE_HMMDEBUG 00040 int32_t CMath::LOGACCURACY = 0; // 100000 steps per integer 00041 #endif // USE_LOGCACHE 00042 00043 int32_t CMath::LOGRANGE = 0; // range for logtable: log(1+exp(x)) -25 <= x <= 0 00044 00045 const float64_t CMath::NOT_A_NUMBER = NAN; 00046 const float64_t CMath::INFTY = INFINITY; // infinity 00047 const float64_t CMath::ALMOST_INFTY = +1e+20; //a large number 00048 const float64_t CMath::ALMOST_NEG_INFTY = -1000; 00049 const float64_t CMath::PI=M_PI; 00050 const float64_t CMath::MACHINE_EPSILON=5E-16; 00051 const float64_t CMath::MAX_REAL_NUMBER=1E300; 00052 const float64_t CMath::MIN_REAL_NUMBER=1E-300; 00053 00054 #ifdef USE_LOGCACHE 00055 float64_t* CMath::logtable = NULL; 00056 #endif 00057 uint32_t CMath::seed = 0; 00058 00059 CMath::CMath() 00060 : CSGObject() 00061 { 00062 #ifdef USE_LOGCACHE 00063 LOGRANGE=CMath::determine_logrange(); 00064 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE); 00065 CMath::logtable=SG_MALLOC(float64_t, LOGRANGE*LOGACCURACY); 00066 init_log_table(); 00067 #else 00068 int32_t i=0; 00069 while ((float64_t)log(1+((float64_t)exp(-float64_t(i))))) 00070 i++; 00071 00072 LOGRANGE=i; 00073 #endif 00074 } 00075 00076 CMath::~CMath() 00077 { 00078 #ifdef USE_LOGCACHE 00079 SG_FREE(CMath::logtable); 00080 CMath::logtable=NULL; 00081 #endif 00082 } 00083 00084 #ifdef USE_LOGCACHE 00085 int32_t CMath::determine_logrange() 00086 { 00087 int32_t i; 00088 float64_t acc=0; 00089 for (i=0; i<50; i++) 00090 { 00091 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i))))); 00092 if (acc<=(float64_t)LOG_TABLE_PRECISION) 00093 break; 00094 } 00095 00096 SG_SINFO("determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc) 00097 return i; 00098 } 00099 00100 int32_t CMath::determine_logaccuracy(int32_t range) 00101 { 00102 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t)); 00103 SG_SINFO("determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range) 00104 return range; 00105 } 00106 00107 //init log table of form log(1+exp(x)) 00108 void CMath::init_log_table() 00109 { 00110 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++) 00111 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY))); 00112 } 00113 #endif 00114 00115 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col) 00116 { 00117 int32_t changed=1; 00118 if (a[0]==-1) return ; 00119 while (changed) 00120 { 00121 changed=0; int32_t i=0 ; 00122 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure 00123 { 00124 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col]) 00125 { 00126 for (int32_t j=0; j<cols; j++) 00127 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ; 00128 changed=1 ; 00129 } ; 00130 i++ ; 00131 } ; 00132 } ; 00133 } 00134 00135 void CMath::sort(float64_t *a, int32_t* idx, int32_t N) 00136 { 00137 int32_t changed=1; 00138 while (changed) 00139 { 00140 changed=0; 00141 for (int32_t i=0; i<N-1; i++) 00142 { 00143 if (a[i]>a[i+1]) 00144 { 00145 swap(a[i],a[i+1]) ; 00146 swap(idx[i],idx[i+1]) ; 00147 changed=1 ; 00148 } ; 00149 } ; 00150 } ; 00151 00152 } 00153 00154 float64_t CMath::Align( 00155 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost) 00156 { 00157 float64_t actCost=0 ; 00158 int32_t i1, i2 ; 00159 float64_t* const gapCosts1 = SG_MALLOC(float64_t, l1 ); 00160 float64_t* const gapCosts2 = SG_MALLOC(float64_t, l2 ); 00161 float64_t* costs2_0 = SG_MALLOC(float64_t, l2 + 1 ); 00162 float64_t* costs2_1 = SG_MALLOC(float64_t, l2 + 1 ); 00163 00164 // initialize borders 00165 for( i1 = 0; i1 < l1; ++i1 ) { 00166 gapCosts1[ i1 ] = gapCost * i1; 00167 } 00168 costs2_1[ 0 ] = 0; 00169 for( i2 = 0; i2 < l2; ++i2 ) { 00170 gapCosts2[ i2 ] = gapCost * i2; 00171 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ]; 00172 } 00173 // compute alignment 00174 for( i1 = 0; i1 < l1; ++i1 ) { 00175 swap( costs2_0, costs2_1 ); 00176 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ]; 00177 costs2_1[ 0 ] = actCost; 00178 for( i2 = 0; i2 < l2; ++i2 ) { 00179 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] ); 00180 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ]; 00181 const float64_t actGap2 = actCost + gapCosts2[ i2 ]; 00182 const float64_t actGap = min( actGap1, actGap2 ); 00183 actCost = min( actMatch, actGap ); 00184 costs2_1[ i2+1 ] = actCost; 00185 } 00186 } 00187 00188 SG_FREE(gapCosts1); 00189 SG_FREE(gapCosts2); 00190 SG_FREE(costs2_0); 00191 SG_FREE(costs2_1); 00192 00193 // return the final cost 00194 return actCost; 00195 } 00196 00197 void CMath::linspace(float64_t* output, float64_t start, float64_t end, int32_t n) 00198 { 00199 float64_t delta = (end-start) / (n-1); 00200 float64_t v = start; 00201 index_t i = 0; 00202 while ( v <= end ) 00203 { 00204 output[i++] = v; 00205 v += delta; 00206 } 00207 output[n-1] = end; 00208 } 00209 00210 00211 int CMath::is_nan(double f) 00212 { 00213 #ifndef HAVE_STD_ISNAN 00214 #if (HAVE_DECL_ISNAN == 1) || defined(HAVE_ISNAN) 00215 return ::isnan(f); 00216 #else 00217 return ((f != f) ? 1 : 0); 00218 #endif // #if (HAVE_DECL_ISNAN == 1) || defined(HAVE_ISNAN) 00219 #endif // #ifndef HAVE_STD_ISNAN 00220 00221 return std::isnan(f); 00222 } 00223 00224 int CMath::is_infinity(double f) 00225 { 00226 #ifndef HAVE_STD_ISINF 00227 #if (HAVE_DECL_ISINF == 1) || defined(HAVE_ISINF) 00228 return ::isinf(f); 00229 #elif defined(FPCLASS) 00230 if (::fpclass(f) == FP_NINF) return -1; 00231 else if (::fpclass(f) == FP_PINF) return 1; 00232 else return 0; 00233 #else 00234 if ((f == f) && ((f - f) != 0.0)) return (f < 0.0 ? -1 : 1); 00235 else return 0; 00236 } 00237 #endif // #if (HAVE_DECL_ISINF == 1) || defined(HAVE_ISINF) 00238 #endif // #ifndef HAVE_STD_ISINF 00239 00240 return std::isinf(f); 00241 } 00242 00243 int CMath::is_finite(double f) 00244 { 00245 #ifndef HAVE_STD_ISFINITE 00246 #if (HAVE_DECL_ISFINITE == 1) || defined(HAVE_ISFINITE) 00247 return ::isfinite(f); 00248 #elif defined(HAVE_FINITE) 00249 return ::finite(f); 00250 #else 00251 return ((!std::isnan(f) && !std::isinf(f)) ? 1 : 0); 00252 #endif // #if (HAVE_DECL_ISFINITE == 1) || defined(HAVE_ISFINITE) 00253 #endif // #ifndef HAVE_STD_ISFINITE 00254 00255 return std::isfinite(f); 00256 } 00257 00258 bool CMath::strtof(const char* str, float32_t* float_result) 00259 { 00260 ASSERT(str); 00261 ASSERT(float_result); 00262 00263 SGVector<char> buf(strlen(str)+1); 00264 00265 for (index_t i=0; i<buf.vlen-1; i++) 00266 buf[i]=tolower(str[i]); 00267 buf[buf.vlen-1]='\0'; 00268 00269 if (strstr(buf, "inf") != NULL) 00270 { 00271 *float_result = CMath::INFTY; 00272 00273 if (strchr(buf,'-') != NULL) 00274 *float_result *= -1; 00275 return true; 00276 } 00277 00278 if (strstr(buf, "nan") != NULL) 00279 { 00280 *float_result = CMath::NOT_A_NUMBER; 00281 return true; 00282 } 00283 00284 char* endptr = buf.vector; 00285 *float_result=::strtof(str, &endptr); 00286 return endptr != buf.vector; 00287 } 00288 00289 bool CMath::strtod(const char* str, float64_t* double_result) 00290 { 00291 ASSERT(str); 00292 ASSERT(double_result); 00293 00294 SGVector<char> buf(strlen(str)+1); 00295 00296 for (index_t i=0; i<buf.vlen-1; i++) 00297 buf[i]=tolower(str[i]); 00298 buf[buf.vlen-1]='\0'; 00299 00300 if (strstr(buf, "inf") != NULL) 00301 { 00302 *double_result = CMath::INFTY; 00303 00304 if (strchr(buf,'-') != NULL) 00305 *double_result *= -1; 00306 return true; 00307 } 00308 00309 if (strstr(buf, "nan") != NULL) 00310 { 00311 *double_result = CMath::NOT_A_NUMBER; 00312 return true; 00313 } 00314 00315 char* endptr = buf.vector; 00316 *double_result=::strtod(str, &endptr); 00317 return endptr != buf.vector; 00318 } 00319 00320 bool CMath::strtold(const char* str, floatmax_t* long_double_result) 00321 { 00322 ASSERT(str); 00323 ASSERT(long_double_result); 00324 00325 SGVector<char> buf(strlen(str)+1); 00326 00327 for (index_t i=0; i<buf.vlen-1; i++) 00328 buf[i]=tolower(str[i]); 00329 buf[buf.vlen-1]='\0'; 00330 00331 if (strstr(buf, "inf") != NULL) 00332 { 00333 *long_double_result = CMath::INFTY; 00334 00335 if (strchr(buf,'-') != NULL) 00336 *long_double_result *= -1; 00337 return true; 00338 } 00339 00340 if (strstr(buf, "nan") != NULL) 00341 { 00342 *long_double_result = CMath::NOT_A_NUMBER; 00343 return true; 00344 } 00345 00346 char* endptr = buf.vector; 00347 00348 // fall back to double on win32 / cygwin since strtold is undefined there 00349 #if defined(WIN32) || defined(__CYGWIN__) 00350 *long_double_result=::strtod(str, &endptr); 00351 #else 00352 *long_double_result=::strtold(str, &endptr); 00353 #endif 00354 00355 return endptr != buf.vector; 00356 } 00357