SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011-2012 Heiko Strathmann 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 * 00010 * ALGLIB Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier under GPL2+ 00011 * http://www.alglib.net/ 00012 * See header file for which functions are taken from ALGLIB (with adjustments 00013 * for shogun) 00014 */ 00015 00016 #include <shogun/mathematics/Statistics.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/lib/SGMatrix.h> 00019 #include <shogun/lib/SGVector.h> 00020 #include <shogun/lib/SGSparseMatrix.h> 00021 #include <shogun/lib/SGSparseVector.h> 00022 00023 #ifdef HAVE_LAPACK 00024 #include <shogun/mathematics/lapack.h> 00025 #endif //HAVE_LAPACK 00026 00027 #ifdef HAVE_EIGEN3 00028 #include <shogun/mathematics/eigen3.h> 00029 using namespace Eigen; 00030 #endif //HAVE_EIGEN3 00031 00032 using namespace shogun; 00033 00034 float64_t CStatistics::mean(SGVector<float64_t> values) 00035 { 00036 ASSERT(values.vlen>0) 00037 ASSERT(values.vector) 00038 00039 float64_t sum=0; 00040 for (index_t i=0; i<values.vlen; ++i) 00041 sum+=values.vector[i]; 00042 00043 return sum/values.vlen; 00044 } 00045 00046 float64_t CStatistics::median(SGVector<float64_t> values, bool modify, 00047 bool in_place) 00048 { 00049 float64_t result; 00050 if (modify) 00051 { 00052 /* use QuickSelect method 00053 * This Quickselect routine is based on the algorithm described in 00054 * "Numerical recipes in C", Second Edition, 00055 * Cambridge University Press, 1992, Section 8.5, ISBN 0-521-43108-5 00056 * This code by Nicolas Devillard - 1998. Public domain. 00057 * Adapted to SHOGUN by Heiko Strathmann 00058 */ 00059 int32_t low; 00060 int32_t high; 00061 int32_t median; 00062 int32_t middle; 00063 int32_t l; 00064 int32_t h; 00065 00066 low=0; 00067 high=values.vlen-1; 00068 median=(low+high)/2; 00069 00070 while (true) 00071 { 00072 if (high<=low) 00073 { 00074 result=values[median]; 00075 break; 00076 } 00077 00078 if (high==low+1) 00079 { 00080 if (values[low]>values[high]) 00081 CMath::CMath::swap(values[low], values[high]); 00082 result=values[median]; 00083 break; 00084 } 00085 00086 middle=(low+high)/2; 00087 if (values[middle]>values[high]) 00088 CMath::swap(values[middle], values[high]); 00089 if (values[low]>values[high]) 00090 CMath::swap(values[low], values[high]); 00091 if (values[middle]>values[low]) 00092 CMath::swap(values[middle], values[low]); 00093 00094 CMath::swap(values[middle], values[low+1]); 00095 00096 l=low+1; 00097 h=high; 00098 for (;;) 00099 { 00100 do 00101 l++; 00102 while (values[low]>values[l]); 00103 do 00104 h--; 00105 while (values[h]>values[low]); 00106 if (h<l) 00107 break; 00108 CMath::swap(values[l], values[h]); 00109 } 00110 00111 CMath::swap(values[low], values[h]); 00112 if (h<=median) 00113 low=l; 00114 if (h>=median) 00115 high=h-1; 00116 } 00117 00118 } 00119 else 00120 { 00121 if (in_place) 00122 { 00123 /* use Torben method 00124 * The following code is public domain. 00125 * Algorithm by Torben Mogensen, implementation by N. Devillard. 00126 * This code in public domain. 00127 * Adapted to SHOGUN by Heiko Strathmann 00128 */ 00129 int32_t i; 00130 int32_t less; 00131 int32_t greater; 00132 int32_t equal; 00133 float64_t min; 00134 float64_t max; 00135 float64_t guess; 00136 float64_t maxltguess; 00137 float64_t mingtguess; 00138 min=max=values[0]; 00139 for (i=1; i<values.vlen; i++) 00140 { 00141 if (values[i]<min) 00142 min=values[i]; 00143 if (values[i]>max) 00144 max=values[i]; 00145 } 00146 while (1) 00147 { 00148 guess=(min+max)/2; 00149 less=0; 00150 greater=0; 00151 equal=0; 00152 maxltguess=min; 00153 mingtguess=max; 00154 for (i=0; i<values.vlen; i++) 00155 { 00156 if (values[i]<guess) 00157 { 00158 less++; 00159 if (values[i]>maxltguess) 00160 maxltguess=values[i]; 00161 } 00162 else if (values[i]>guess) 00163 { 00164 greater++; 00165 if (values[i]<mingtguess) 00166 mingtguess=values[i]; 00167 } 00168 else 00169 equal++; 00170 } 00171 if (less<=(values.vlen+1)/2&&greater<=(values.vlen+1)/2) 00172 break; 00173 else if (less>greater) 00174 max=maxltguess; 00175 else 00176 min=mingtguess; 00177 } 00178 00179 if (less>=(values.vlen+1)/2) 00180 result=maxltguess; 00181 else if (less+equal>=(values.vlen+1)/2) 00182 result=guess; 00183 else 00184 result=mingtguess; 00185 } 00186 else 00187 { 00188 /* copy vector and do recursive call which modifies copy */ 00189 SGVector<float64_t> copy(values.vlen); 00190 memcpy(copy.vector, values.vector, sizeof(float64_t)*values.vlen); 00191 result=median(copy, true); 00192 } 00193 } 00194 00195 return result; 00196 } 00197 00198 float64_t CStatistics::matrix_median(SGMatrix<float64_t> values, 00199 bool modify, bool in_place) 00200 { 00201 /* create a vector that uses the matrix data, dont do reference counting */ 00202 SGVector<float64_t> as_vector(values.matrix, 00203 values.num_rows*values.num_cols, false); 00204 00205 /* return vector median method */ 00206 return median(as_vector, modify, in_place); 00207 } 00208 00209 00210 float64_t CStatistics::variance(SGVector<float64_t> values) 00211 { 00212 ASSERT(values.vlen>1) 00213 ASSERT(values.vector) 00214 00215 float64_t mean=CStatistics::mean(values); 00216 00217 float64_t sum_squared_diff=0; 00218 for (index_t i=0; i<values.vlen; ++i) 00219 sum_squared_diff+=CMath::pow(values.vector[i]-mean, 2); 00220 00221 return sum_squared_diff/(values.vlen-1); 00222 } 00223 00224 SGVector<float64_t> CStatistics::matrix_mean(SGMatrix<float64_t> values, 00225 bool col_wise) 00226 { 00227 ASSERT(values.num_rows>0) 00228 ASSERT(values.num_cols>0) 00229 ASSERT(values.matrix) 00230 00231 SGVector<float64_t> result; 00232 00233 if (col_wise) 00234 { 00235 result=SGVector<float64_t>(values.num_cols); 00236 for (index_t j=0; j<values.num_cols; ++j) 00237 { 00238 result[j]=0; 00239 for (index_t i=0; i<values.num_rows; ++i) 00240 result[j]+=values(i,j); 00241 00242 result[j]/=values.num_rows; 00243 } 00244 } 00245 else 00246 { 00247 result=SGVector<float64_t>(values.num_rows); 00248 for (index_t j=0; j<values.num_rows; ++j) 00249 { 00250 result[j]=0; 00251 for (index_t i=0; i<values.num_cols; ++i) 00252 result[j]+=values(j,i); 00253 00254 result[j]/=values.num_cols; 00255 } 00256 } 00257 00258 return result; 00259 } 00260 00261 SGVector<float64_t> CStatistics::matrix_variance(SGMatrix<float64_t> values, 00262 bool col_wise) 00263 { 00264 ASSERT(values.num_rows>0) 00265 ASSERT(values.num_cols>0) 00266 ASSERT(values.matrix) 00267 00268 /* first compute mean */ 00269 SGVector<float64_t> mean=CStatistics::matrix_mean(values, col_wise); 00270 00271 SGVector<float64_t> result; 00272 00273 if (col_wise) 00274 { 00275 result=SGVector<float64_t>(values.num_cols); 00276 for (index_t j=0; j<values.num_cols; ++j) 00277 { 00278 result[j]=0; 00279 for (index_t i=0; i<values.num_rows; ++i) 00280 result[j]+=CMath::pow(values(i,j)-mean[j], 2); 00281 00282 result[j]/=(values.num_rows-1); 00283 } 00284 } 00285 else 00286 { 00287 result=SGVector<float64_t>(values.num_rows); 00288 for (index_t j=0; j<values.num_rows; ++j) 00289 { 00290 result[j]=0; 00291 for (index_t i=0; i<values.num_cols; ++i) 00292 result[j]+=CMath::pow(values(j,i)-mean[j], 2); 00293 00294 result[j]/=(values.num_cols-1); 00295 } 00296 } 00297 00298 return result; 00299 } 00300 00301 float64_t CStatistics::std_deviation(SGVector<float64_t> values) 00302 { 00303 return CMath::sqrt(variance(values)); 00304 } 00305 00306 SGVector<float64_t> CStatistics::matrix_std_deviation( 00307 SGMatrix<float64_t> values, bool col_wise) 00308 { 00309 SGVector<float64_t> var=CStatistics::matrix_variance(values, col_wise); 00310 for (index_t i=0; i<var.vlen; ++i) 00311 var[i]=CMath::sqrt(var[i]); 00312 00313 return var; 00314 } 00315 00316 #ifdef HAVE_LAPACK 00317 SGMatrix<float64_t> CStatistics::covariance_matrix( 00318 SGMatrix<float64_t> observations, bool in_place) 00319 { 00320 SGMatrix<float64_t> centered= 00321 in_place ? 00322 observations : 00323 SGMatrix<float64_t>(observations.num_rows, 00324 observations.num_cols); 00325 00326 if (!in_place) 00327 { 00328 memcpy(centered.matrix, observations.matrix, 00329 sizeof(float64_t)*observations.num_rows*observations.num_cols); 00330 } 00331 centered.remove_column_mean(); 00332 00333 /* compute 1/(m-1) * X' * X */ 00334 SGMatrix<float64_t> cov=SGMatrix<float64_t>::matrix_multiply(centered, 00335 centered, true, false, 1.0/(observations.num_rows-1)); 00336 00337 return cov; 00338 } 00339 #endif //HAVE_LAPACK 00340 00341 float64_t CStatistics::confidence_intervals_mean(SGVector<float64_t> values, 00342 float64_t alpha, float64_t& conf_int_low, float64_t& conf_int_up) 00343 { 00344 ASSERT(values.vlen>1) 00345 ASSERT(values.vector) 00346 00347 /* using one sided student t distribution evaluation */ 00348 alpha=alpha/2; 00349 00350 /* degrees of freedom */ 00351 int32_t deg=values.vlen-1; 00352 00353 /* compute absolute value of t-value */ 00354 float64_t t=CMath::abs(inverse_student_t(deg, alpha)); 00355 00356 /* values for calculating confidence interval */ 00357 float64_t std_dev=CStatistics::std_deviation(values); 00358 float64_t mean=CStatistics::mean(values); 00359 00360 /* compute confidence interval */ 00361 float64_t interval=t*std_dev/CMath::sqrt((float64_t)values.vlen); 00362 conf_int_low=mean-interval; 00363 conf_int_up=mean+interval; 00364 00365 return mean; 00366 } 00367 00368 float64_t CStatistics::inverse_student_t(int32_t k, float64_t p) 00369 { 00370 float64_t t; 00371 float64_t rk; 00372 float64_t z; 00373 int32_t rflg; 00374 float64_t result; 00375 00376 if (!(k>0 && greater(p, 0)) && less(p, 1)) 00377 { 00378 SG_SERROR("CStatistics::inverse_student_t_distribution(): " 00379 "Domain error\n"); 00380 } 00381 rk=k; 00382 if (greater(p, 0.25) && less(p, 0.75)) 00383 { 00384 if (equal(p, 0.5)) 00385 { 00386 result=0; 00387 return result; 00388 } 00389 z=1.0-2.0*p; 00390 z=inverse_incomplete_beta(0.5, 0.5*rk, CMath::abs(z)); 00391 t=CMath::sqrt(rk*z/(1.0-z)); 00392 if (less(p, 0.5)) 00393 { 00394 t=-t; 00395 } 00396 result=t; 00397 return result; 00398 } 00399 rflg=-1; 00400 if (greater_equal(p, 0.5)) 00401 { 00402 p=1.0-p; 00403 rflg=1; 00404 } 00405 z=inverse_incomplete_beta(0.5*rk, 0.5, 2.0*p); 00406 if (less(CMath::MAX_REAL_NUMBER*z, rk)) 00407 { 00408 result=rflg*CMath::MAX_REAL_NUMBER; 00409 return result; 00410 } 00411 t=CMath::sqrt(rk/z-rk); 00412 result=rflg*t; 00413 return result; 00414 } 00415 00416 float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b, 00417 float64_t y) 00418 { 00419 float64_t aaa; 00420 float64_t bbb; 00421 float64_t y0; 00422 float64_t d; 00423 float64_t yyy; 00424 float64_t x; 00425 float64_t x0; 00426 float64_t x1; 00427 float64_t lgm; 00428 float64_t yp; 00429 float64_t di; 00430 float64_t dithresh; 00431 float64_t yl; 00432 float64_t yh; 00433 float64_t xt; 00434 int32_t i; 00435 int32_t rflg; 00436 int32_t dir; 00437 int32_t nflg; 00438 int32_t mainlooppos; 00439 int32_t ihalve; 00440 int32_t ihalvecycle; 00441 int32_t newt; 00442 int32_t newtcycle; 00443 int32_t breaknewtcycle; 00444 int32_t breakihalvecycle; 00445 float64_t result; 00446 00447 i=0; 00448 if (!(greater_equal(y, 0) && less_equal(y, 1))) 00449 { 00450 SG_SERROR("CStatistics::inverse_incomplete_beta(): " 00451 "Domain error\n"); 00452 } 00453 00454 /* 00455 * special cases 00456 */ 00457 if (equal(y, 0)) 00458 { 00459 result=0; 00460 return result; 00461 } 00462 if (equal(y, 1.0)) 00463 { 00464 result=1; 00465 return result; 00466 } 00467 00468 /* 00469 * these initializations are not really necessary, 00470 * but without them compiler complains about 'possibly uninitialized variables'. 00471 */ 00472 dithresh=0; 00473 rflg=0; 00474 aaa=0; 00475 bbb=0; 00476 y0=0; 00477 x=0; 00478 yyy=0; 00479 lgm=0; 00480 dir=0; 00481 di=0; 00482 00483 /* 00484 * normal initializations 00485 */ 00486 x0=0.0; 00487 yl=0.0; 00488 x1=1.0; 00489 yh=1.0; 00490 nflg=0; 00491 mainlooppos=0; 00492 ihalve=1; 00493 ihalvecycle=2; 00494 newt=3; 00495 newtcycle=4; 00496 breaknewtcycle=5; 00497 breakihalvecycle=6; 00498 00499 /* 00500 * main loop 00501 */ 00502 for (;;) 00503 { 00504 00505 /* 00506 * start 00507 */ 00508 if (mainlooppos==0) 00509 { 00510 if (less_equal(a, 1.0) || less_equal(b, 1.0)) 00511 { 00512 dithresh=1.0e-6; 00513 rflg=0; 00514 aaa=a; 00515 bbb=b; 00516 y0=y; 00517 x=aaa/(aaa+bbb); 00518 yyy=incomplete_beta(aaa, bbb, x); 00519 mainlooppos=ihalve; 00520 continue; 00521 } 00522 else 00523 { 00524 dithresh=1.0e-4; 00525 } 00526 yp=-inverse_normal_cdf(y); 00527 if (greater(y, 0.5)) 00528 { 00529 rflg=1; 00530 aaa=b; 00531 bbb=a; 00532 y0=1.0-y; 00533 yp=-yp; 00534 } 00535 else 00536 { 00537 rflg=0; 00538 aaa=a; 00539 bbb=b; 00540 y0=y; 00541 } 00542 lgm=(yp*yp-3.0)/6.0; 00543 x=2.0/(1.0/(2.0*aaa-1.0)+1.0/(2.0*bbb-1.0)); 00544 d=yp*CMath::sqrt(x+lgm)/x 00545 -(1.0/(2.0*bbb-1.0)-1.0/(2.0*aaa-1.0)) 00546 *(lgm+5.0/6.0-2.0/(3.0*x)); 00547 d=2.0*d; 00548 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER))) 00549 { 00550 x=0; 00551 break; 00552 } 00553 x=aaa/(aaa+bbb*CMath::exp(d)); 00554 yyy=incomplete_beta(aaa, bbb, x); 00555 yp=(yyy-y0)/y0; 00556 if (less(CMath::abs(yp), 0.2)) 00557 { 00558 mainlooppos=newt; 00559 continue; 00560 } 00561 mainlooppos=ihalve; 00562 continue; 00563 } 00564 00565 /* 00566 * ihalve 00567 */ 00568 if (mainlooppos==ihalve) 00569 { 00570 dir=0; 00571 di=0.5; 00572 i=0; 00573 mainlooppos=ihalvecycle; 00574 continue; 00575 } 00576 00577 /* 00578 * ihalvecycle 00579 */ 00580 if (mainlooppos==ihalvecycle) 00581 { 00582 if (i<=99) 00583 { 00584 if (i!=0) 00585 { 00586 x=x0+di*(x1-x0); 00587 if (equal(x, 1.0)) 00588 { 00589 x=1.0-CMath::MACHINE_EPSILON; 00590 } 00591 if (equal(x, 0.0)) 00592 { 00593 di=0.5; 00594 x=x0+di*(x1-x0); 00595 if (equal(x, 0.0)) 00596 { 00597 break; 00598 } 00599 } 00600 yyy=incomplete_beta(aaa, bbb, x); 00601 yp=(x1-x0)/(x1+x0); 00602 if (less(CMath::abs(yp), dithresh)) 00603 { 00604 mainlooppos=newt; 00605 continue; 00606 } 00607 yp=(yyy-y0)/y0; 00608 if (less(CMath::abs(yp), dithresh)) 00609 { 00610 mainlooppos=newt; 00611 continue; 00612 } 00613 } 00614 if (less(yyy, y0)) 00615 { 00616 x0=x; 00617 yl=yyy; 00618 if (dir<0) 00619 { 00620 dir=0; 00621 di=0.5; 00622 } 00623 else 00624 { 00625 if (dir>3) 00626 { 00627 di=1.0-(1.0-di)*(1.0-di); 00628 } 00629 else 00630 { 00631 if (dir>1) 00632 { 00633 di=0.5*di+0.5; 00634 } 00635 else 00636 { 00637 di=(y0-yyy)/(yh-yl); 00638 } 00639 } 00640 } 00641 dir=dir+1; 00642 if (greater(x0, 0.75)) 00643 { 00644 if (rflg==1) 00645 { 00646 rflg=0; 00647 aaa=a; 00648 bbb=b; 00649 y0=y; 00650 } 00651 else 00652 { 00653 rflg=1; 00654 aaa=b; 00655 bbb=a; 00656 y0=1.0-y; 00657 } 00658 x=1.0-x; 00659 yyy=incomplete_beta(aaa, bbb, x); 00660 x0=0.0; 00661 yl=0.0; 00662 x1=1.0; 00663 yh=1.0; 00664 mainlooppos=ihalve; 00665 continue; 00666 } 00667 } 00668 else 00669 { 00670 x1=x; 00671 if (rflg==1 && less(x1, CMath::MACHINE_EPSILON)) 00672 { 00673 x=0.0; 00674 break; 00675 } 00676 yh=yyy; 00677 if (dir>0) 00678 { 00679 dir=0; 00680 di=0.5; 00681 } 00682 else 00683 { 00684 if (dir<-3) 00685 { 00686 di=di*di; 00687 } 00688 else 00689 { 00690 if (dir<-1) 00691 { 00692 di=0.5*di; 00693 } 00694 else 00695 { 00696 di=(yyy-y0)/(yh-yl); 00697 } 00698 } 00699 } 00700 dir=dir-1; 00701 } 00702 i=i+1; 00703 mainlooppos=ihalvecycle; 00704 continue; 00705 } 00706 else 00707 { 00708 mainlooppos=breakihalvecycle; 00709 continue; 00710 } 00711 } 00712 00713 /* 00714 * breakihalvecycle 00715 */ 00716 if (mainlooppos==breakihalvecycle) 00717 { 00718 if (greater_equal(x0, 1.0)) 00719 { 00720 x=1.0-CMath::MACHINE_EPSILON; 00721 break; 00722 } 00723 if (less_equal(x, 0.0)) 00724 { 00725 x=0.0; 00726 break; 00727 } 00728 mainlooppos=newt; 00729 continue; 00730 } 00731 00732 /* 00733 * newt 00734 */ 00735 if (mainlooppos==newt) 00736 { 00737 if (nflg!=0) 00738 { 00739 break; 00740 } 00741 nflg=1; 00742 lgm=lgamma(aaa+bbb)-lgamma(aaa)-lgamma(bbb); 00743 i=0; 00744 mainlooppos=newtcycle; 00745 continue; 00746 } 00747 00748 /* 00749 * newtcycle 00750 */ 00751 if (mainlooppos==newtcycle) 00752 { 00753 if (i<=7) 00754 { 00755 if (i!=0) 00756 { 00757 yyy=incomplete_beta(aaa, bbb, x); 00758 } 00759 if (less(yyy, yl)) 00760 { 00761 x=x0; 00762 yyy=yl; 00763 } 00764 else 00765 { 00766 if (greater(yyy, yh)) 00767 { 00768 x=x1; 00769 yyy=yh; 00770 } 00771 else 00772 { 00773 if (less(yyy, y0)) 00774 { 00775 x0=x; 00776 yl=yyy; 00777 } 00778 else 00779 { 00780 x1=x; 00781 yh=yyy; 00782 } 00783 } 00784 } 00785 if (equal(x, 1.0) || equal(x, 0.0)) 00786 { 00787 mainlooppos=breaknewtcycle; 00788 continue; 00789 } 00790 d=(aaa-1.0)*CMath::log(x)+(bbb-1.0)*CMath::log(1.0-x)+lgm; 00791 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER))) 00792 { 00793 break; 00794 } 00795 if (greater(d, CMath::log(CMath::MAX_REAL_NUMBER))) 00796 { 00797 mainlooppos=breaknewtcycle; 00798 continue; 00799 } 00800 d=CMath::exp(d); 00801 d=(yyy-y0)/d; 00802 xt=x-d; 00803 if (less_equal(xt, x0)) 00804 { 00805 yyy=(x-x0)/(x1-x0); 00806 xt=x0+0.5*yyy*(x-x0); 00807 if (less_equal(xt, 0.0)) 00808 { 00809 mainlooppos=breaknewtcycle; 00810 continue; 00811 } 00812 } 00813 if (greater_equal(xt, x1)) 00814 { 00815 yyy=(x1-x)/(x1-x0); 00816 xt=x1-0.5*yyy*(x1-x); 00817 if (greater_equal(xt, 1.0)) 00818 { 00819 mainlooppos=breaknewtcycle; 00820 continue; 00821 } 00822 } 00823 x=xt; 00824 if (less(CMath::abs(d/x), 128.0*CMath::MACHINE_EPSILON)) 00825 { 00826 break; 00827 } 00828 i=i+1; 00829 mainlooppos=newtcycle; 00830 continue; 00831 } 00832 else 00833 { 00834 mainlooppos=breaknewtcycle; 00835 continue; 00836 } 00837 } 00838 00839 /* 00840 * breaknewtcycle 00841 */ 00842 if (mainlooppos==breaknewtcycle) 00843 { 00844 dithresh=256.0*CMath::MACHINE_EPSILON; 00845 mainlooppos=ihalve; 00846 continue; 00847 } 00848 } 00849 00850 /* 00851 * done 00852 */ 00853 if (rflg!=0) 00854 { 00855 if (less_equal(x, CMath::MACHINE_EPSILON)) 00856 { 00857 x=1.0-CMath::MACHINE_EPSILON; 00858 } 00859 else 00860 { 00861 x=1.0-x; 00862 } 00863 } 00864 result=x; 00865 return result; 00866 } 00867 00868 float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x) 00869 { 00870 float64_t t; 00871 float64_t xc; 00872 float64_t w; 00873 float64_t y; 00874 int32_t flag; 00875 float64_t big; 00876 float64_t biginv; 00877 float64_t maxgam; 00878 float64_t minlog; 00879 float64_t maxlog; 00880 float64_t result; 00881 00882 big=4.503599627370496e15; 00883 biginv=2.22044604925031308085e-16; 00884 maxgam=171.624376956302725; 00885 minlog=CMath::log(CMath::MIN_REAL_NUMBER); 00886 maxlog=CMath::log(CMath::MAX_REAL_NUMBER); 00887 00888 if (!(greater(a, 0) && greater(b, 0))) 00889 { 00890 SG_SERROR("CStatistics::incomplete_beta(): " 00891 "Domain error\n"); 00892 } 00893 00894 if (!(greater_equal(x, 0) && less_equal(x, 1))) 00895 { 00896 SG_SERROR("CStatistics::incomplete_beta(): " 00897 "Domain error\n"); 00898 } 00899 00900 if (equal(x, 0)) 00901 { 00902 result=0; 00903 return result; 00904 } 00905 if (equal(x, 1)) 00906 { 00907 result=1; 00908 return result; 00909 } 00910 flag=0; 00911 if (less_equal(b*x, 1.0) && less_equal(x, 0.95)) 00912 { 00913 result=ibetaf_incompletebetaps(a, b, x, maxgam); 00914 return result; 00915 } 00916 w=1.0-x; 00917 if (greater(x, a/(a+b))) 00918 { 00919 flag=1; 00920 t=a; 00921 a=b; 00922 b=t; 00923 xc=x; 00924 x=w; 00925 } 00926 else 00927 { 00928 xc=w; 00929 } 00930 if ((flag==1 && less_equal(b*x, 1.0)) && less_equal(x, 0.95)) 00931 { 00932 t=ibetaf_incompletebetaps(a, b, x, maxgam); 00933 if (less_equal(t, CMath::MACHINE_EPSILON)) 00934 { 00935 result=1.0-CMath::MACHINE_EPSILON; 00936 } 00937 else 00938 { 00939 result=1.0-t; 00940 } 00941 return result; 00942 } 00943 y=x*(a+b-2.0)-(a-1.0); 00944 if (less(y, 0.0)) 00945 { 00946 w=ibetaf_incompletebetafe(a, b, x, big, biginv); 00947 } 00948 else 00949 { 00950 w=ibetaf_incompletebetafe2(a, b, x, big, biginv)/xc; 00951 } 00952 y=a*CMath::log(x); 00953 t=b*CMath::log(xc); 00954 if ((less(a+b, maxgam) && less(CMath::abs(y), maxlog)) 00955 && less(CMath::abs(t), maxlog)) 00956 { 00957 t=CMath::pow(xc, b); 00958 t=t*CMath::pow(x, a); 00959 t=t/a; 00960 t=t*w; 00961 t=t*(tgamma(a+b)/(tgamma(a)*tgamma(b))); 00962 if (flag==1) 00963 { 00964 if (less_equal(t, CMath::MACHINE_EPSILON)) 00965 { 00966 result=1.0-CMath::MACHINE_EPSILON; 00967 } 00968 else 00969 { 00970 result=1.0-t; 00971 } 00972 } 00973 else 00974 { 00975 result=t; 00976 } 00977 return result; 00978 } 00979 y=y+t+lgamma(a+b)-lgamma(a)-lgamma(b); 00980 y=y+CMath::log(w/a); 00981 if (less(y, minlog)) 00982 { 00983 t=0.0; 00984 } 00985 else 00986 { 00987 t=CMath::exp(y); 00988 } 00989 if (flag==1) 00990 { 00991 if (less_equal(t, CMath::MACHINE_EPSILON)) 00992 { 00993 t=1.0-CMath::MACHINE_EPSILON; 00994 } 00995 else 00996 { 00997 t=1.0-t; 00998 } 00999 } 01000 result=t; 01001 return result; 01002 } 01003 01004 float64_t CStatistics::inverse_normal_cdf(float64_t y0, float64_t mean, 01005 float64_t std_dev) 01006 { 01007 return inverse_normal_cdf(y0)*std_dev+mean; 01008 } 01009 01010 float64_t CStatistics::inverse_normal_cdf(float64_t y0) 01011 { 01012 float64_t expm2; 01013 float64_t s2pi; 01014 float64_t x; 01015 float64_t y; 01016 float64_t z; 01017 float64_t y2; 01018 float64_t x0; 01019 float64_t x1; 01020 int32_t code; 01021 float64_t p0; 01022 float64_t q0; 01023 float64_t p1; 01024 float64_t q1; 01025 float64_t p2; 01026 float64_t q2; 01027 float64_t result; 01028 01029 expm2=0.13533528323661269189; 01030 s2pi=2.50662827463100050242; 01031 if (less_equal(y0, 0)) 01032 { 01033 result=-CMath::MAX_REAL_NUMBER; 01034 return result; 01035 } 01036 if (greater_equal(y0, 1)) 01037 { 01038 result=CMath::MAX_REAL_NUMBER; 01039 return result; 01040 } 01041 code=1; 01042 y=y0; 01043 if (greater(y, 1.0-expm2)) 01044 { 01045 y=1.0-y; 01046 code=0; 01047 } 01048 if (greater(y, expm2)) 01049 { 01050 y=y-0.5; 01051 y2=y*y; 01052 p0=-59.9633501014107895267; 01053 p0=98.0010754185999661536+y2*p0; 01054 p0=-56.6762857469070293439+y2*p0; 01055 p0=13.9312609387279679503+y2*p0; 01056 p0=-1.23916583867381258016+y2*p0; 01057 q0=1; 01058 q0=1.95448858338141759834+y2*q0; 01059 q0=4.67627912898881538453+y2*q0; 01060 q0=86.3602421390890590575+y2*q0; 01061 q0=-225.462687854119370527+y2*q0; 01062 q0=200.260212380060660359+y2*q0; 01063 q0=-82.0372256168333339912+y2*q0; 01064 q0=15.9056225126211695515+y2*q0; 01065 q0=-1.18331621121330003142+y2*q0; 01066 x=y+y*y2*p0/q0; 01067 x=x*s2pi; 01068 result=x; 01069 return result; 01070 } 01071 x=CMath::sqrt(-2.0*CMath::log(y)); 01072 x0=x-CMath::log(x)/x; 01073 z=1.0/x; 01074 if (less(x, 8.0)) 01075 { 01076 p1=4.05544892305962419923; 01077 p1=31.5251094599893866154+z*p1; 01078 p1=57.1628192246421288162+z*p1; 01079 p1=44.0805073893200834700+z*p1; 01080 p1=14.6849561928858024014+z*p1; 01081 p1=2.18663306850790267539+z*p1; 01082 p1=-1.40256079171354495875*0.1+z*p1; 01083 p1=-3.50424626827848203418*0.01+z*p1; 01084 p1=-8.57456785154685413611*0.0001+z*p1; 01085 q1=1; 01086 q1=15.7799883256466749731+z*q1; 01087 q1=45.3907635128879210584+z*q1; 01088 q1=41.3172038254672030440+z*q1; 01089 q1=15.0425385692907503408+z*q1; 01090 q1=2.50464946208309415979+z*q1; 01091 q1=-1.42182922854787788574*0.1+z*q1; 01092 q1=-3.80806407691578277194*0.01+z*q1; 01093 q1=-9.33259480895457427372*0.0001+z*q1; 01094 x1=z*p1/q1; 01095 } 01096 else 01097 { 01098 p2=3.23774891776946035970; 01099 p2=6.91522889068984211695+z*p2; 01100 p2=3.93881025292474443415+z*p2; 01101 p2=1.33303460815807542389+z*p2; 01102 p2=2.01485389549179081538*0.1+z*p2; 01103 p2=1.23716634817820021358*0.01+z*p2; 01104 p2=3.01581553508235416007*0.0001+z*p2; 01105 p2=2.65806974686737550832*0.000001+z*p2; 01106 p2=6.23974539184983293730*0.000000001+z*p2; 01107 q2=1; 01108 q2=6.02427039364742014255+z*q2; 01109 q2=3.67983563856160859403+z*q2; 01110 q2=1.37702099489081330271+z*q2; 01111 q2=2.16236993594496635890*0.1+z*q2; 01112 q2=1.34204006088543189037*0.01+z*q2; 01113 q2=3.28014464682127739104*0.0001+z*q2; 01114 q2=2.89247864745380683936*0.000001+z*q2; 01115 q2=6.79019408009981274425*0.000000001+z*q2; 01116 x1=z*p2/q2; 01117 } 01118 x=x0-x1; 01119 if (code!=0) 01120 { 01121 x=-x; 01122 } 01123 result=x; 01124 return result; 01125 } 01126 01127 float64_t CStatistics::ibetaf_incompletebetaps(float64_t a, float64_t b, 01128 float64_t x, float64_t maxgam) 01129 { 01130 float64_t s; 01131 float64_t t; 01132 float64_t u; 01133 float64_t v; 01134 float64_t n; 01135 float64_t t1; 01136 float64_t z; 01137 float64_t ai; 01138 float64_t result; 01139 01140 ai=1.0/a; 01141 u=(1.0-b)*x; 01142 v=u/(a+1.0); 01143 t1=v; 01144 t=u; 01145 n=2.0; 01146 s=0.0; 01147 z=CMath::MACHINE_EPSILON*ai; 01148 while (greater(CMath::abs(v), z)) 01149 { 01150 u=(n-b)*x/n; 01151 t=t*u; 01152 v=t/(a+n); 01153 s=s+v; 01154 n=n+1.0; 01155 } 01156 s=s+t1; 01157 s=s+ai; 01158 u=a*CMath::log(x); 01159 if (less(a+b, maxgam) 01160 && less(CMath::abs(u), CMath::log(CMath::MAX_REAL_NUMBER))) 01161 { 01162 t=tgamma(a+b)/(tgamma(a)*tgamma(b)); 01163 s=s*t*CMath::pow(x, a); 01164 } 01165 else 01166 { 01167 t=lgamma(a+b)-lgamma(a)-lgamma(b)+u+CMath::log(s); 01168 if (less(t, CMath::log(CMath::MIN_REAL_NUMBER))) 01169 { 01170 s=0.0; 01171 } 01172 else 01173 { 01174 s=CMath::exp(t); 01175 } 01176 } 01177 result=s; 01178 return result; 01179 } 01180 01181 float64_t CStatistics::ibetaf_incompletebetafe(float64_t a, float64_t b, 01182 float64_t x, float64_t big, float64_t biginv) 01183 { 01184 float64_t xk; 01185 float64_t pk; 01186 float64_t pkm1; 01187 float64_t pkm2; 01188 float64_t qk; 01189 float64_t qkm1; 01190 float64_t qkm2; 01191 float64_t k1; 01192 float64_t k2; 01193 float64_t k3; 01194 float64_t k4; 01195 float64_t k5; 01196 float64_t k6; 01197 float64_t k7; 01198 float64_t k8; 01199 float64_t r; 01200 float64_t t; 01201 float64_t ans; 01202 float64_t thresh; 01203 int32_t n; 01204 float64_t result; 01205 01206 k1=a; 01207 k2=a+b; 01208 k3=a; 01209 k4=a+1.0; 01210 k5=1.0; 01211 k6=b-1.0; 01212 k7=k4; 01213 k8=a+2.0; 01214 pkm2=0.0; 01215 qkm2=1.0; 01216 pkm1=1.0; 01217 qkm1=1.0; 01218 ans=1.0; 01219 r=1.0; 01220 n=0; 01221 thresh=3.0*CMath::MACHINE_EPSILON; 01222 do 01223 { 01224 xk=-x*k1*k2/(k3*k4); 01225 pk=pkm1+pkm2*xk; 01226 qk=qkm1+qkm2*xk; 01227 pkm2=pkm1; 01228 pkm1=pk; 01229 qkm2=qkm1; 01230 qkm1=qk; 01231 xk=x*k5*k6/(k7*k8); 01232 pk=pkm1+pkm2*xk; 01233 qk=qkm1+qkm2*xk; 01234 pkm2=pkm1; 01235 pkm1=pk; 01236 qkm2=qkm1; 01237 qkm1=qk; 01238 if (not_equal(qk, 0)) 01239 { 01240 r=pk/qk; 01241 } 01242 if (not_equal(r, 0)) 01243 { 01244 t=CMath::abs((ans-r)/r); 01245 ans=r; 01246 } 01247 else 01248 { 01249 t=1.0; 01250 } 01251 if (less(t, thresh)) 01252 { 01253 break; 01254 } 01255 k1=k1+1.0; 01256 k2=k2+1.0; 01257 k3=k3+2.0; 01258 k4=k4+2.0; 01259 k5=k5+1.0; 01260 k6=k6-1.0; 01261 k7=k7+2.0; 01262 k8=k8+2.0; 01263 if (greater(CMath::abs(qk)+CMath::abs(pk), big)) 01264 { 01265 pkm2=pkm2*biginv; 01266 pkm1=pkm1*biginv; 01267 qkm2=qkm2*biginv; 01268 qkm1=qkm1*biginv; 01269 } 01270 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv)) 01271 { 01272 pkm2=pkm2*big; 01273 pkm1=pkm1*big; 01274 qkm2=qkm2*big; 01275 qkm1=qkm1*big; 01276 } 01277 n=n+1; 01278 } 01279 while (n!=300); 01280 result=ans; 01281 return result; 01282 } 01283 01284 float64_t CStatistics::ibetaf_incompletebetafe2(float64_t a, float64_t b, 01285 float64_t x, float64_t big, float64_t biginv) 01286 { 01287 float64_t xk; 01288 float64_t pk; 01289 float64_t pkm1; 01290 float64_t pkm2; 01291 float64_t qk; 01292 float64_t qkm1; 01293 float64_t qkm2; 01294 float64_t k1; 01295 float64_t k2; 01296 float64_t k3; 01297 float64_t k4; 01298 float64_t k5; 01299 float64_t k6; 01300 float64_t k7; 01301 float64_t k8; 01302 float64_t r; 01303 float64_t t; 01304 float64_t ans; 01305 float64_t z; 01306 float64_t thresh; 01307 int32_t n; 01308 float64_t result; 01309 01310 k1=a; 01311 k2=b-1.0; 01312 k3=a; 01313 k4=a+1.0; 01314 k5=1.0; 01315 k6=a+b; 01316 k7=a+1.0; 01317 k8=a+2.0; 01318 pkm2=0.0; 01319 qkm2=1.0; 01320 pkm1=1.0; 01321 qkm1=1.0; 01322 z=x/(1.0-x); 01323 ans=1.0; 01324 r=1.0; 01325 n=0; 01326 thresh=3.0*CMath::MACHINE_EPSILON; 01327 do 01328 { 01329 xk=-z*k1*k2/(k3*k4); 01330 pk=pkm1+pkm2*xk; 01331 qk=qkm1+qkm2*xk; 01332 pkm2=pkm1; 01333 pkm1=pk; 01334 qkm2=qkm1; 01335 qkm1=qk; 01336 xk=z*k5*k6/(k7*k8); 01337 pk=pkm1+pkm2*xk; 01338 qk=qkm1+qkm2*xk; 01339 pkm2=pkm1; 01340 pkm1=pk; 01341 qkm2=qkm1; 01342 qkm1=qk; 01343 if (not_equal(qk, 0)) 01344 { 01345 r=pk/qk; 01346 } 01347 if (not_equal(r, 0)) 01348 { 01349 t=CMath::abs((ans-r)/r); 01350 ans=r; 01351 } 01352 else 01353 { 01354 t=1.0; 01355 } 01356 if (less(t, thresh)) 01357 { 01358 break; 01359 } 01360 k1=k1+1.0; 01361 k2=k2-1.0; 01362 k3=k3+2.0; 01363 k4=k4+2.0; 01364 k5=k5+1.0; 01365 k6=k6+1.0; 01366 k7=k7+2.0; 01367 k8=k8+2.0; 01368 if (greater(CMath::abs(qk)+CMath::abs(pk), big)) 01369 { 01370 pkm2=pkm2*biginv; 01371 pkm1=pkm1*biginv; 01372 qkm2=qkm2*biginv; 01373 qkm1=qkm1*biginv; 01374 } 01375 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv)) 01376 { 01377 pkm2=pkm2*big; 01378 pkm1=pkm1*big; 01379 qkm2=qkm2*big; 01380 qkm1=qkm1*big; 01381 } 01382 n=n+1; 01383 } 01384 while (n!=300); 01385 result=ans; 01386 return result; 01387 } 01388 01389 float64_t CStatistics::incomplete_gamma(float64_t a, float64_t x) 01390 { 01391 float64_t igammaepsilon; 01392 float64_t ans; 01393 float64_t ax; 01394 float64_t c; 01395 float64_t r; 01396 float64_t result; 01397 01398 igammaepsilon=0.000000000000001; 01399 if (less_equal(x, 0) || less_equal(a, 0)) 01400 { 01401 result=0; 01402 return result; 01403 } 01404 if (greater(x, 1) && greater(x, a)) 01405 { 01406 result=1-incomplete_gamma_completed(a, x); 01407 return result; 01408 } 01409 ax=a*CMath::log(x)-x-lgamma(a); 01410 if (less(ax, -709.78271289338399)) 01411 { 01412 result=0; 01413 return result; 01414 } 01415 ax=CMath::exp(ax); 01416 r=a; 01417 c=1; 01418 ans=1; 01419 do 01420 { 01421 r=r+1; 01422 c=c*x/r; 01423 ans=ans+c; 01424 } 01425 while (greater(c/ans, igammaepsilon)); 01426 result=ans*ax/a; 01427 return result; 01428 } 01429 01430 float64_t CStatistics::incomplete_gamma_completed(float64_t a, float64_t x) 01431 { 01432 float64_t igammaepsilon; 01433 float64_t igammabignumber; 01434 float64_t igammabignumberinv; 01435 float64_t ans; 01436 float64_t ax; 01437 float64_t c; 01438 float64_t yc; 01439 float64_t r; 01440 float64_t t; 01441 float64_t y; 01442 float64_t z; 01443 float64_t pk; 01444 float64_t pkm1; 01445 float64_t pkm2; 01446 float64_t qk; 01447 float64_t qkm1; 01448 float64_t qkm2; 01449 float64_t result; 01450 01451 igammaepsilon=0.000000000000001; 01452 igammabignumber=4503599627370496.0; 01453 igammabignumberinv=2.22044604925031308085*0.0000000000000001; 01454 if (less_equal(x, 0) || less_equal(a, 0)) 01455 { 01456 result=1; 01457 return result; 01458 } 01459 if (less(x, 1) || less(x, a)) 01460 { 01461 result=1-incomplete_gamma(a, x); 01462 return result; 01463 } 01464 ax=a*CMath::log(x)-x-lgamma(a); 01465 if (less(ax, -709.78271289338399)) 01466 { 01467 result=0; 01468 return result; 01469 } 01470 ax=CMath::exp(ax); 01471 y=1-a; 01472 z=x+y+1; 01473 c=0; 01474 pkm2=1; 01475 qkm2=x; 01476 pkm1=x+1; 01477 qkm1=z*x; 01478 ans=pkm1/qkm1; 01479 do 01480 { 01481 c=c+1; 01482 y=y+1; 01483 z=z+2; 01484 yc=y*c; 01485 pk=pkm1*z-pkm2*yc; 01486 qk=qkm1*z-qkm2*yc; 01487 if (not_equal(qk, 0)) 01488 { 01489 r=pk/qk; 01490 t=CMath::abs((ans-r)/r); 01491 ans=r; 01492 } 01493 else 01494 { 01495 t=1; 01496 } 01497 pkm2=pkm1; 01498 pkm1=pk; 01499 qkm2=qkm1; 01500 qkm1=qk; 01501 if (greater(CMath::abs(pk), igammabignumber)) 01502 { 01503 pkm2=pkm2*igammabignumberinv; 01504 pkm1=pkm1*igammabignumberinv; 01505 qkm2=qkm2*igammabignumberinv; 01506 qkm1=qkm1*igammabignumberinv; 01507 } 01508 } 01509 while (greater(t, igammaepsilon)); 01510 result=ans*ax; 01511 return result; 01512 } 01513 01514 float64_t CStatistics::gamma_cdf(float64_t x, float64_t a, float64_t b) 01515 { 01516 /* definition of wikipedia: incomplete gamma devised by true gamma */ 01517 return incomplete_gamma(a, x/b); 01518 } 01519 01520 float64_t CStatistics::inverse_gamma_cdf(float64_t p, float64_t a, 01521 float64_t b) 01522 { 01523 /* inverse of gamma(a,b) CDF is 01524 * inverse_incomplete_gamma_completed(a, 1. - p) * b */ 01525 return inverse_incomplete_gamma_completed(a, 1-p)*b; 01526 } 01527 01528 float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a, 01529 float64_t y0) 01530 { 01531 float64_t igammaepsilon; 01532 float64_t iinvgammabignumber; 01533 float64_t x0; 01534 float64_t x1; 01535 float64_t x; 01536 float64_t yl; 01537 float64_t yh; 01538 float64_t y; 01539 float64_t d; 01540 float64_t lgm; 01541 float64_t dithresh; 01542 int32_t i; 01543 int32_t dir; 01544 float64_t result; 01545 01546 igammaepsilon=0.000000000000001; 01547 iinvgammabignumber=4503599627370496.0; 01548 x0=iinvgammabignumber; 01549 yl=0; 01550 x1=0; 01551 yh=1; 01552 dithresh=5*igammaepsilon; 01553 d=1/(9*a); 01554 y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d); 01555 x=a*y*y*y; 01556 lgm=lgamma(a); 01557 i=0; 01558 while (i<10) 01559 { 01560 if (greater(x, x0) || less(x, x1)) 01561 { 01562 d=0.0625; 01563 break; 01564 } 01565 y=incomplete_gamma_completed(a, x); 01566 if (less(y, yl) || greater(y, yh)) 01567 { 01568 d=0.0625; 01569 break; 01570 } 01571 if (less(y, y0)) 01572 { 01573 x0=x; 01574 yl=y; 01575 } 01576 else 01577 { 01578 x1=x; 01579 yh=y; 01580 } 01581 d=(a-1)*CMath::log(x)-x-lgm; 01582 if (less(d, -709.78271289338399)) 01583 { 01584 d=0.0625; 01585 break; 01586 } 01587 d=-CMath::exp(d); 01588 d=(y-y0)/d; 01589 if (less(CMath::abs(d/x), igammaepsilon)) 01590 { 01591 result=x; 01592 return result; 01593 } 01594 x=x-d; 01595 i=i+1; 01596 } 01597 if (equal(x0, iinvgammabignumber)) 01598 { 01599 if (less_equal(x, 0)) 01600 { 01601 x=1; 01602 } 01603 while (equal(x0, iinvgammabignumber)) 01604 { 01605 x=(1+d)*x; 01606 y=incomplete_gamma_completed(a, x); 01607 if (less(y, y0)) 01608 { 01609 x0=x; 01610 yl=y; 01611 break; 01612 } 01613 d=d+d; 01614 } 01615 } 01616 d=0.5; 01617 dir=0; 01618 i=0; 01619 while (i<400) 01620 { 01621 x=x1+d*(x0-x1); 01622 y=incomplete_gamma_completed(a, x); 01623 lgm=(x0-x1)/(x1+x0); 01624 if (less(CMath::abs(lgm), dithresh)) 01625 { 01626 break; 01627 } 01628 lgm=(y-y0)/y0; 01629 if (less(CMath::abs(lgm), dithresh)) 01630 { 01631 break; 01632 } 01633 if (less_equal(x, 0.0)) 01634 { 01635 break; 01636 } 01637 if (greater_equal(y, y0)) 01638 { 01639 x1=x; 01640 yh=y; 01641 if (dir<0) 01642 { 01643 dir=0; 01644 d=0.5; 01645 } 01646 else 01647 { 01648 if (dir>1) 01649 { 01650 d=0.5*d+0.5; 01651 } 01652 else 01653 { 01654 d=(y0-yl)/(yh-yl); 01655 } 01656 } 01657 dir=dir+1; 01658 } 01659 else 01660 { 01661 x0=x; 01662 yl=y; 01663 if (dir>0) 01664 { 01665 dir=0; 01666 d=0.5; 01667 } 01668 else 01669 { 01670 if (dir<-1) 01671 { 01672 d=0.5*d; 01673 } 01674 else 01675 { 01676 d=(y0-yl)/(yh-yl); 01677 } 01678 } 01679 dir=dir-1; 01680 } 01681 i=i+1; 01682 } 01683 result=x; 01684 return result; 01685 } 01686 01687 float64_t CStatistics::normal_cdf(float64_t x, float64_t std_dev) 01688 { 01689 return 0.5*(error_function(x/std_dev/1.41421356237309504880)+1); 01690 } 01691 01692 float64_t CStatistics::lnormal_cdf(float64_t x) 01693 { 01694 float64_t result; 01695 01696 if (x<-10.0) 01697 { 01698 float64_t x2=x*x; 01699 float64_t s=1.0-1.0/x2*(1.0-3.0/x2*(1.0-5.0/x2*(1.0-7.0/x2))); 01700 result=-0.5*CMath::log(2*CMath::PI)-x2*0.5-CMath::log(-x)+CMath::log(s); 01701 } 01702 else 01703 result=CMath::log(normal_cdf(x)); 01704 01705 return result; 01706 } 01707 01708 float64_t CStatistics::error_function(float64_t x) 01709 { 01710 float64_t xsq; 01711 float64_t s; 01712 float64_t p; 01713 float64_t q; 01714 float64_t result; 01715 01716 s=CMath::sign(x); 01717 x=CMath::abs(x); 01718 if (less(x, 0.5)) 01719 { 01720 xsq=x*x; 01721 p=0.007547728033418631287834; 01722 p=0.288805137207594084924010+xsq*p; 01723 p=14.3383842191748205576712+xsq*p; 01724 p=38.0140318123903008244444+xsq*p; 01725 p=3017.82788536507577809226+xsq*p; 01726 p=7404.07142710151470082064+xsq*p; 01727 p=80437.3630960840172832162+xsq*p; 01728 q=0.0; 01729 q=1.00000000000000000000000+xsq*q; 01730 q=38.0190713951939403753468+xsq*q; 01731 q=658.070155459240506326937+xsq*q; 01732 q=6379.60017324428279487120+xsq*q; 01733 q=34216.5257924628539769006+xsq*q; 01734 q=80437.3630960840172826266+xsq*q; 01735 result=s*1.1283791670955125738961589031*x*p/q; 01736 return result; 01737 } 01738 if (greater_equal(x, 10)) 01739 { 01740 result=s; 01741 return result; 01742 } 01743 result=s*(1-error_function_complement(x)); 01744 return result; 01745 } 01746 01747 float64_t CStatistics::error_function_complement(float64_t x) 01748 { 01749 float64_t p; 01750 float64_t q; 01751 float64_t result; 01752 01753 if (less(x, 0)) 01754 { 01755 result=2-error_function_complement(-x); 01756 return result; 01757 } 01758 if (less(x, 0.5)) 01759 { 01760 result=1.0-error_function(x); 01761 return result; 01762 } 01763 if (greater_equal(x, 10)) 01764 { 01765 result=0; 01766 return result; 01767 } 01768 p=0.0; 01769 p=0.5641877825507397413087057563+x*p; 01770 p=9.675807882987265400604202961+x*p; 01771 p=77.08161730368428609781633646+x*p; 01772 p=368.5196154710010637133875746+x*p; 01773 p=1143.262070703886173606073338+x*p; 01774 p=2320.439590251635247384768711+x*p; 01775 p=2898.0293292167655611275846+x*p; 01776 p=1826.3348842295112592168999+x*p; 01777 q=1.0; 01778 q=17.14980943627607849376131193+x*q; 01779 q=137.1255960500622202878443578+x*q; 01780 q=661.7361207107653469211984771+x*q; 01781 q=2094.384367789539593790281779+x*q; 01782 q=4429.612803883682726711528526+x*q; 01783 q=6089.5424232724435504633068+x*q; 01784 q=4958.82756472114071495438422+x*q; 01785 q=1826.3348842295112595576438+x*q; 01786 result=CMath::exp(-x*x)*p/q; 01787 return result; 01788 } 01789 01790 SGVector<float64_t> CStatistics::fishers_exact_test_for_multiple_2x3_tables( 01791 SGMatrix<float64_t> tables) 01792 { 01793 SGMatrix<float64_t> table(NULL, 2, 3, false); 01794 int32_t len=tables.num_cols/3; 01795 01796 SGVector<float64_t> v(len); 01797 for (int32_t i=0; i<len; i++) 01798 { 01799 table.matrix=&tables.matrix[2*3*i]; 01800 v.vector[i]=fishers_exact_test_for_2x3_table(table); 01801 } 01802 return v; 01803 } 01804 01805 float64_t CStatistics::fishers_exact_test_for_2x3_table( 01806 SGMatrix<float64_t> table) 01807 { 01808 ASSERT(table.num_rows==2) 01809 ASSERT(table.num_cols==3) 01810 01811 int32_t m_len=3+2; 01812 float64_t* m=SG_MALLOC(float64_t, 3+2); 01813 m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4]; 01814 m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5]; 01815 m[2]=table.matrix[0]+table.matrix[1]; 01816 m[3]=table.matrix[2]+table.matrix[3]; 01817 m[4]=table.matrix[4]+table.matrix[5]; 01818 01819 float64_t n=SGVector<float64_t>::sum(m, m_len)/2.0; 01820 int32_t x_len=2*3*CMath::sq(SGVector<float64_t>::max(m, m_len)); 01821 float64_t* x=SG_MALLOC(float64_t, x_len); 01822 SGVector<float64_t>::fill_vector(x, x_len, 0.0); 01823 01824 float64_t log_nom=0.0; 01825 for (int32_t i=0; i<3+2; i++) 01826 log_nom+=lgamma(m[i]+1); 01827 log_nom-=lgamma(n+1.0); 01828 01829 float64_t log_denomf=0; 01830 floatmax_t log_denom=0; 01831 01832 for (int32_t i=0; i<3*2; i++) 01833 { 01834 log_denom+=lgammal((floatmax_t)table.matrix[i]+1); 01835 log_denomf+=lgammal((floatmax_t)table.matrix[i]+1); 01836 } 01837 01838 floatmax_t prob_table_log=log_nom-log_denom; 01839 01840 int32_t dim1=CMath::min(m[0], m[2]); 01841 01842 //traverse all possible tables with given m 01843 int32_t counter=0; 01844 for (int32_t k=0; k<=dim1; k++) 01845 { 01846 for (int32_t l=CMath::max(0.0, m[0]-m[4]-k); 01847 l<=CMath::min(m[0]-k, m[3]); l++) 01848 { 01849 x[0+0*2+counter*2*3]=k; 01850 x[0+1*2+counter*2*3]=l; 01851 x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3]; 01852 x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3]; 01853 x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3]; 01854 x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3]; 01855 01856 counter++; 01857 } 01858 } 01859 01860 //#define DEBUG_FISHER_TABLE 01861 #ifdef DEBUG_FISHER_TABLE 01862 SG_SPRINT("counter=%d\n", counter) 01863 SG_SPRINT("dim1=%d\n", dim1) 01864 SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3])) 01865 SG_SPRINT("n=%g\n", n) 01866 SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log) 01867 SG_SPRINT("log_denomf=%.18g\n", log_denomf) 01868 SG_SPRINT("log_denom=%.18Lg\n", log_denom) 01869 SG_SPRINT("log_nom=%.18g\n", log_nom) 01870 display_vector(m, m_len, "marginals"); 01871 display_vector(x, 2*3*counter, "x"); 01872 #endif // DEBUG_FISHER_TABLE 01873 01874 floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter); 01875 SGVector<floatmax_t>::fill_vector(log_denom_vec, counter, (floatmax_t)0.0); 01876 01877 for (int32_t k=0; k<counter; k++) 01878 { 01879 for (int32_t j=0; j<3; j++) 01880 { 01881 for (int32_t i=0; i<2; i++) 01882 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0); 01883 } 01884 } 01885 01886 for (int32_t i=0; i<counter; i++) 01887 log_denom_vec[i]=log_nom-log_denom_vec[i]; 01888 01889 #ifdef DEBUG_FISHER_TABLE 01890 display_vector(log_denom_vec, counter, "log_denom_vec"); 01891 #endif // DEBUG_FISHER_TABLE 01892 01893 float64_t nonrand_p=-CMath::INFTY; 01894 for (int32_t i=0; i<counter; i++) 01895 { 01896 if (log_denom_vec[i]<=prob_table_log) 01897 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]); 01898 } 01899 01900 #ifdef DEBUG_FISHER_TABLE 01901 SG_SPRINT("nonrand_p=%.18g\n", nonrand_p) 01902 SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p)) 01903 #endif // DEBUG_FISHER_TABLE 01904 nonrand_p=CMath::exp(nonrand_p); 01905 01906 SG_FREE(log_denom_vec); 01907 SG_FREE(x); 01908 SG_FREE(m); 01909 01910 return nonrand_p; 01911 } 01912 01913 float64_t CStatistics::mutual_info(float64_t* p1, float64_t* p2, int32_t len) 01914 { 01915 double e=0; 01916 01917 for (int32_t i=0; i<len; i++) 01918 for (int32_t j=0; j<len; j++) 01919 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]); 01920 01921 return (float64_t)e; 01922 } 01923 01924 float64_t CStatistics::relative_entropy(float64_t* p, float64_t* q, int32_t len) 01925 { 01926 double e=0; 01927 01928 for (int32_t i=0; i<len; i++) 01929 e+=exp(p[i])*(p[i]-q[i]); 01930 01931 return (float64_t)e; 01932 } 01933 01934 float64_t CStatistics::entropy(float64_t* p, int32_t len) 01935 { 01936 double e=0; 01937 01938 for (int32_t i=0; i<len; i++) 01939 e-=exp(p[i])*p[i]; 01940 01941 return (float64_t)e; 01942 } 01943 01944 SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N) 01945 { 01946 REQUIRE(sample_size<N, 01947 "sample size should be less than number of indices\n"); 01948 int32_t* idxs=SG_MALLOC(int32_t,N); 01949 int32_t i, rnd; 01950 int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size); 01951 01952 // reservoir sampling 01953 for (i=0; i<N; i++) 01954 idxs[i]=i; 01955 for (i=0; i<sample_size; i++) 01956 permuted_idxs[i]=idxs[i]; 01957 for (i=sample_size; i<N; i++) 01958 { 01959 rnd=CMath::random(1, i); 01960 if (rnd<sample_size) 01961 permuted_idxs[rnd]=idxs[i]; 01962 } 01963 SG_FREE(idxs); 01964 01965 SGVector<int32_t> result=SGVector<int32_t>(permuted_idxs, sample_size); 01966 result.qsort(); 01967 return result; 01968 } 01969 01970 float64_t CStatistics::dlgamma(float64_t x) 01971 { 01972 float64_t result=0.0; 01973 01974 if (x<0.0) 01975 { 01976 // use reflection formula 01977 x=1.0-x; 01978 result=CMath::PI/CMath::tan(CMath::PI*x); 01979 } 01980 01981 // make x>7 for approximation 01982 // (use reccurent formula: psi(x+1) = psi(x) + 1/x) 01983 while (x<=7.0) 01984 { 01985 result-=1.0/x; 01986 x++; 01987 } 01988 01989 // perform approximation 01990 x-=0.5; 01991 result+=log(x); 01992 01993 float64_t coeff[10]={ 01994 0.04166666666666666667, 01995 -0.00729166666666666667, 01996 0.00384424603174603175, 01997 -0.00413411458333333333, 01998 0.00756096117424242424, 01999 -0.02108249687595390720, 02000 0.08332316080729166666, 02001 -0.44324627670587277880, 02002 3.05393103044765369366, 02003 -26.45616165999210241989}; 02004 02005 float64_t power=1.0; 02006 float64_t ix2=1.0/CMath::sq(x); 02007 02008 // perform approximation 02009 for (index_t i=0; i<10; i++) 02010 { 02011 power*=ix2; 02012 result+=coeff[i]*power; 02013 } 02014 02015 return result; 02016 } 02017 02018 #ifdef HAVE_EIGEN3 02019 float64_t CStatistics::log_det(SGMatrix<float64_t> m) 02020 { 02021 /* map the matrix to eigen3 to perform cholesky */ 02022 Map<MatrixXd> M(m.matrix, m.num_rows, m.num_cols); 02023 02024 /* computing the cholesky decomposition */ 02025 LLT<MatrixXd> llt; 02026 llt.compute(M); 02027 02028 /* the lower triangular matrix */ 02029 MatrixXd l = llt.matrixL(); 02030 02031 /* calculate the log-determinant */ 02032 VectorXd diag = l.diagonal(); 02033 float64_t retval = 0.0; 02034 for( int32_t i = 0; i < diag.rows(); ++i ) { 02035 retval += log(diag(i)); 02036 } 02037 retval *= 2; 02038 02039 return retval; 02040 } 02041 02042 float64_t CStatistics::log_det(const SGSparseMatrix<float64_t> m) 02043 { 02044 typedef SparseMatrix<float64_t> MatrixType; 02045 const MatrixType &M=EigenSparseUtil<float64_t>::toEigenSparse(m); 02046 02047 SimplicialLLT<MatrixType> llt; 02048 02049 // factorize using cholesky with amd permutation 02050 llt.compute(M); 02051 MatrixType L=llt.matrixL(); 02052 02053 // calculate the log-determinant 02054 float64_t retval=0.0; 02055 for( index_t i=0; i<M.rows(); ++i ) 02056 retval+=log(L.coeff(i,i)); 02057 retval*=2; 02058 02059 return retval; 02060 } 02061 02062 SGMatrix<float64_t> CStatistics::sample_from_gaussian(SGVector<float64_t> mean, 02063 SGMatrix<float64_t> cov, int32_t N, bool precision_matrix) 02064 { 02065 REQUIRE(cov.num_rows>0, 02066 "CStatistics::sample_from_gaussian(): \ 02067 Number of covariance rows must be positive!\n"); 02068 REQUIRE(cov.num_cols>0, 02069 "CStatistics::sample_from_gaussian(): \ 02070 Number of covariance cols must be positive!\n"); 02071 REQUIRE(cov.matrix, 02072 "CStatistics::sample_from_gaussian(): \ 02073 Covariance is not initialized!\n"); 02074 REQUIRE(cov.num_rows==cov.num_cols, 02075 "CStatistics::sample_from_gaussian(): \ 02076 Covariance should be square matrix!\n"); 02077 REQUIRE(mean.vlen==cov.num_rows, 02078 "CStatistics::sample_from_gaussian(): \ 02079 Mean and covariance dimension mismatch!\n"); 02080 02081 int32_t dim=mean.vlen; 02082 Map<VectorXd> mu(mean.vector, mean.vlen); 02083 Map<MatrixXd> c(cov.matrix, cov.num_rows, cov.num_cols); 02084 02085 // generate samples, z, from N(0, I), DxN 02086 SGMatrix<float64_t> S(dim, N); 02087 for( int32_t j=0; j<N; ++j ) 02088 for( int32_t i=0; i<dim; ++i ) 02089 S(i,j)=CMath::randn_double(); 02090 02091 // the cholesky factorization c=L*U 02092 MatrixXd U=c.llt().matrixU(); 02093 02094 // generate samples, x, from N(mean, cov) or N(mean, cov^-1) 02095 // return samples of dimension NxD 02096 if( precision_matrix ) 02097 { 02098 // here we have U*x=z, to solve this, we use cholesky again 02099 Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols); 02100 LDLT<MatrixXd> ldlt; 02101 ldlt.compute(U); 02102 s=ldlt.solve(s); 02103 } 02104 02105 SGMatrix<float64_t>::transpose_matrix(S.matrix, S.num_rows, S.num_cols); 02106 02107 if( !precision_matrix ) 02108 { 02109 // here we need to find x=L*z, so, x'=z'*L' i.e. x'=z'*U 02110 Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols); 02111 s=s*U; 02112 } 02113 02114 // add the mean 02115 Map<MatrixXd> x(S.matrix, S.num_rows, S.num_cols); 02116 for( int32_t i=0; i<N; ++i ) 02117 x.row(i)+=mu; 02118 02119 return S; 02120 } 02121 02122 SGMatrix<float64_t> CStatistics::sample_from_gaussian(SGVector<float64_t> mean, 02123 SGSparseMatrix<float64_t> cov, int32_t N, bool precision_matrix) 02124 { 02125 REQUIRE(cov.num_vectors>0, 02126 "CStatistics::sample_from_gaussian(): \ 02127 Number of covariance rows must be positive!\n"); 02128 REQUIRE(cov.num_features>0, 02129 "CStatistics::sample_from_gaussian(): \ 02130 Number of covariance cols must be positive!\n"); 02131 REQUIRE(cov.sparse_matrix, 02132 "CStatistics::sample_from_gaussian(): \ 02133 Covariance is not initialized!\n"); 02134 REQUIRE(cov.num_vectors==cov.num_features, 02135 "CStatistics::sample_from_gaussian(): \ 02136 Covariance should be square matrix!\n"); 02137 REQUIRE(mean.vlen==cov.num_vectors, 02138 "CStatistics::sample_from_gaussian(): \ 02139 Mean and covariance dimension mismatch!\n"); 02140 02141 int32_t dim=mean.vlen; 02142 Map<VectorXd> mu(mean.vector, mean.vlen); 02143 02144 typedef SparseMatrix<float64_t> MatrixType; 02145 const MatrixType &c=EigenSparseUtil<float64_t>::toEigenSparse(cov); 02146 02147 SimplicialLLT<MatrixType> llt; 02148 02149 // generate samples, z, from N(0, I), DxN 02150 SGMatrix<float64_t> S(dim, N); 02151 for( int32_t j=0; j<N; ++j ) 02152 for( int32_t i=0; i<dim; ++i ) 02153 S(i,j)=CMath::randn_double(); 02154 02155 Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols); 02156 02157 // the cholesky factorization P*c*P^-1 = LP*UP, with LP=P*L, UP=U*P^-1 02158 llt.compute(c); 02159 MatrixType LP=llt.matrixL(); 02160 MatrixType UP=llt.matrixU(); 02161 02162 // generate samples, x, from N(mean, cov) or N(mean, cov^-1) 02163 // return samples of dimension NxD 02164 if( precision_matrix ) 02165 { 02166 // here we have UP*xP=z, to solve this, we use cholesky again 02167 SimplicialLLT<MatrixType> lltUP; 02168 lltUP.compute(UP); 02169 s=lltUP.solve(s); 02170 } 02171 else 02172 { 02173 // here we need to find xP=LP*z 02174 s=LP*s; 02175 } 02176 02177 // permute the samples back with x=P^-1*xP 02178 s=llt.permutationPinv()*s; 02179 02180 SGMatrix<float64_t>::transpose_matrix(S.matrix, S.num_rows, S.num_cols); 02181 // add the mean 02182 Map<MatrixXd> x(S.matrix, S.num_rows, S.num_cols); 02183 for( int32_t i=0; i<N; ++i ) 02184 x.row(i)+=mu; 02185 02186 return S; 02187 } 02188 02189 #endif //HAVE_EIGEN3 02190 02191 CStatistics::SigmoidParamters CStatistics::fit_sigmoid(SGVector<float64_t> scores) 02192 { 02193 SG_SDEBUG("entering CStatistics::fit_sigmoid()\n") 02194 02195 REQUIRE(scores.vector, "CStatistics::fit_sigmoid() requires " 02196 "scores vector!\n"); 02197 02198 /* count prior0 and prior1 if needed */ 02199 int32_t prior0=0; 02200 int32_t prior1=0; 02201 SG_SDEBUG("counting number of positive and negative labels\n") 02202 { 02203 for (index_t i=0; i<scores.vlen; ++i) 02204 { 02205 if (scores[i]>0) 02206 prior1++; 02207 else 02208 prior0++; 02209 } 02210 } 02211 SG_SDEBUG("%d pos; %d neg\n", prior1, prior0) 02212 02213 /* parameter setting */ 02214 /* maximum number of iterations */ 02215 index_t maxiter=100; 02216 02217 /* minimum step taken in line search */ 02218 float64_t minstep=1E-10; 02219 02220 /* for numerically strict pd of hessian */ 02221 float64_t sigma=1E-12; 02222 float64_t eps=1E-5; 02223 02224 /* construct target support */ 02225 float64_t hiTarget=(prior1+1.0)/(prior1+2.0); 02226 float64_t loTarget=1/(prior0+2.0); 02227 index_t length=prior1+prior0; 02228 02229 SGVector<float64_t> t(length); 02230 for (index_t i=0; i<length; ++i) 02231 { 02232 if (scores[i]>0) 02233 t[i]=hiTarget; 02234 else 02235 t[i]=loTarget; 02236 } 02237 02238 /* initial Point and Initial Fun Value */ 02239 /* result parameters of sigmoid */ 02240 float64_t a=0; 02241 float64_t b=CMath::log((prior0+1.0)/(prior1+1.0)); 02242 float64_t fval=0.0; 02243 02244 for (index_t i=0; i<length; ++i) 02245 { 02246 float64_t fApB=scores[i]*a+b; 02247 if (fApB>=0) 02248 fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); 02249 else 02250 fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); 02251 } 02252 02253 index_t it; 02254 float64_t g1; 02255 float64_t g2; 02256 for (it=0; it<maxiter; ++it) 02257 { 02258 SG_SDEBUG("Iteration %d, a=%f, b=%f, fval=%f\n", it, a, b, fval) 02259 02260 /* Update Gradient and Hessian (use H' = H + sigma I) */ 02261 float64_t h11=sigma; //Numerically ensures strict PD 02262 float64_t h22=h11; 02263 float64_t h21=0; 02264 g1=0; 02265 g2=0; 02266 02267 for (index_t i=0; i<length; ++i) 02268 { 02269 float64_t fApB=scores[i]*a+b; 02270 float64_t p; 02271 float64_t q; 02272 if (fApB>=0) 02273 { 02274 p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB)); 02275 q=1.0/(1.0+CMath::exp(-fApB)); 02276 } 02277 else 02278 { 02279 p=1.0/(1.0+CMath::exp(fApB)); 02280 q=CMath::exp(fApB)/(1.0+CMath::exp(fApB)); 02281 } 02282 02283 float64_t d2=p*q; 02284 h11+=scores[i]*scores[i]*d2; 02285 h22+=d2; 02286 h21+=scores[i]*d2; 02287 float64_t d1=t[i]-p; 02288 g1+=scores[i]*d1; 02289 g2+=d1; 02290 } 02291 02292 /* Stopping Criteria */ 02293 if (CMath::abs(g1)<eps && CMath::abs(g2)<eps) 02294 break; 02295 02296 /* Finding Newton direction: -inv(H') * g */ 02297 float64_t det=h11*h22-h21*h21; 02298 float64_t dA=-(h22*g1-h21*g2)/det; 02299 float64_t dB=-(-h21*g1+h11*g2)/det; 02300 float64_t gd=g1*dA+g2*dB; 02301 02302 /* Line Search */ 02303 float64_t stepsize=1; 02304 02305 while (stepsize>=minstep) 02306 { 02307 float64_t newA=a+stepsize*dA; 02308 float64_t newB=b+stepsize*dB; 02309 02310 /* New function value */ 02311 float64_t newf=0.0; 02312 for (index_t i=0; i<length; ++i) 02313 { 02314 float64_t fApB=scores[i]*newA+newB; 02315 if (fApB>=0) 02316 newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB)); 02317 else 02318 newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB)); 02319 } 02320 02321 /* Check sufficient decrease */ 02322 if (newf<fval+0.0001*stepsize*gd) 02323 { 02324 a=newA; 02325 b=newB; 02326 fval=newf; 02327 break; 02328 } 02329 else 02330 stepsize=stepsize/2.0; 02331 } 02332 02333 if (stepsize<minstep) 02334 { 02335 SG_SWARNING("CStatistics::fit_sigmoid(): line search fails, A=%f, " 02336 "B=%f, g1=%f, g2=%f, dA=%f, dB=%f, gd=%f\n", 02337 a, b, g1, g2, dA, dB, gd); 02338 } 02339 } 02340 02341 if (it>=maxiter-1) 02342 { 02343 SG_SWARNING("CStatistics::fit_sigmoid(): reaching maximal iterations," 02344 " g1=%f, g2=%f\n", g1, g2); 02345 } 02346 02347 SG_SDEBUG("fitted sigmoid: a=%f, b=%f\n", a, b) 02348 02349 CStatistics::SigmoidParamters result; 02350 result.a=a; 02351 result.b=b; 02352 02353 SG_SDEBUG("leaving CStatistics::fit_sigmoid()\n") 02354 return result; 02355 }