SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Statistics.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011-2012 Heiko Strathmann
00008  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00009  *
00010  * ALGLIB Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier under GPL2+
00011  * http://www.alglib.net/
00012  * See header file for which functions are taken from ALGLIB (with adjustments
00013  * for shogun)
00014  */
00015 
00016 #include <shogun/mathematics/Statistics.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/lib/SGMatrix.h>
00019 #include <shogun/lib/SGVector.h>
00020 #include <shogun/lib/SGSparseMatrix.h>
00021 #include <shogun/lib/SGSparseVector.h>
00022 
00023 #ifdef HAVE_LAPACK
00024 #include <shogun/mathematics/lapack.h>
00025 #endif //HAVE_LAPACK
00026 
00027 #ifdef HAVE_EIGEN3
00028 #include <shogun/mathematics/eigen3.h>
00029 using namespace Eigen;
00030 #endif //HAVE_EIGEN3
00031 
00032 using namespace shogun;
00033 
00034 float64_t CStatistics::mean(SGVector<float64_t> values)
00035 {
00036     ASSERT(values.vlen>0)
00037     ASSERT(values.vector)
00038 
00039     float64_t sum=0;
00040     for (index_t i=0; i<values.vlen; ++i)
00041         sum+=values.vector[i];
00042 
00043     return sum/values.vlen;
00044 }
00045 
00046 float64_t CStatistics::median(SGVector<float64_t> values, bool modify,
00047             bool in_place)
00048 {
00049     float64_t result;
00050     if (modify)
00051     {
00052         /* use QuickSelect method
00053          * This Quickselect routine is based on the algorithm described in
00054          * "Numerical recipes in C", Second Edition,
00055          * Cambridge University Press, 1992, Section 8.5, ISBN 0-521-43108-5
00056          * This code by Nicolas Devillard - 1998. Public domain.
00057          * Adapted to SHOGUN by Heiko Strathmann
00058          */
00059         int32_t low;
00060         int32_t high;
00061         int32_t median;
00062         int32_t middle;
00063         int32_t l;
00064         int32_t h;
00065 
00066         low=0;
00067         high=values.vlen-1;
00068         median=(low+high)/2;
00069 
00070         while (true)
00071         {
00072             if (high<=low)
00073             {
00074                 result=values[median];
00075                 break;
00076             }
00077 
00078             if (high==low+1)
00079             {
00080                 if (values[low]>values[high])
00081                     CMath::CMath::swap(values[low], values[high]);
00082                 result=values[median];
00083                 break;
00084             }
00085 
00086             middle=(low+high)/2;
00087             if (values[middle]>values[high])
00088                 CMath::swap(values[middle], values[high]);
00089             if (values[low]>values[high])
00090                 CMath::swap(values[low], values[high]);
00091             if (values[middle]>values[low])
00092                 CMath::swap(values[middle], values[low]);
00093 
00094             CMath::swap(values[middle], values[low+1]);
00095 
00096             l=low+1;
00097             h=high;
00098             for (;;)
00099             {
00100                 do
00101                     l++;
00102                 while (values[low]>values[l]);
00103                 do
00104                     h--;
00105                 while (values[h]>values[low]);
00106                 if (h<l)
00107                     break;
00108                 CMath::swap(values[l], values[h]);
00109             }
00110 
00111             CMath::swap(values[low], values[h]);
00112             if (h<=median)
00113                 low=l;
00114             if (h>=median)
00115                 high=h-1;
00116         }
00117 
00118     }
00119     else
00120     {
00121         if (in_place)
00122         {
00123             /* use Torben method
00124              * The following code is public domain.
00125              * Algorithm by Torben Mogensen, implementation by N. Devillard.
00126              * This code in public domain.
00127              * Adapted to SHOGUN by Heiko Strathmann
00128              */
00129             int32_t i;
00130             int32_t less;
00131             int32_t greater;
00132             int32_t equal;
00133             float64_t min;
00134             float64_t max;
00135             float64_t guess;
00136             float64_t maxltguess;
00137             float64_t mingtguess;
00138             min=max=values[0];
00139             for (i=1; i<values.vlen; i++)
00140             {
00141                 if (values[i]<min)
00142                     min=values[i];
00143                 if (values[i]>max)
00144                     max=values[i];
00145             }
00146             while (1)
00147             {
00148                 guess=(min+max)/2;
00149                 less=0;
00150                 greater=0;
00151                 equal=0;
00152                 maxltguess=min;
00153                 mingtguess=max;
00154                 for (i=0; i<values.vlen; i++)
00155                 {
00156                     if (values[i]<guess)
00157                     {
00158                         less++;
00159                         if (values[i]>maxltguess)
00160                             maxltguess=values[i];
00161                     }
00162                     else if (values[i]>guess)
00163                     {
00164                         greater++;
00165                         if (values[i]<mingtguess)
00166                             mingtguess=values[i];
00167                     }
00168                     else
00169                         equal++;
00170                 }
00171                 if (less<=(values.vlen+1)/2&&greater<=(values.vlen+1)/2)
00172                     break;
00173                 else if (less>greater)
00174                     max=maxltguess;
00175                 else
00176                     min=mingtguess;
00177             }
00178 
00179             if (less>=(values.vlen+1)/2)
00180                 result=maxltguess;
00181             else if (less+equal>=(values.vlen+1)/2)
00182                 result=guess;
00183             else
00184                 result=mingtguess;
00185         }
00186         else
00187         {
00188             /* copy vector and do recursive call which modifies copy */
00189             SGVector<float64_t> copy(values.vlen);
00190             memcpy(copy.vector, values.vector, sizeof(float64_t)*values.vlen);
00191             result=median(copy, true);
00192         }
00193     }
00194 
00195     return result;
00196 }
00197 
00198 float64_t CStatistics::matrix_median(SGMatrix<float64_t> values,
00199         bool modify, bool in_place)
00200 {
00201     /* create a vector that uses the matrix data, dont do reference counting */
00202     SGVector<float64_t> as_vector(values.matrix,
00203             values.num_rows*values.num_cols, false);
00204 
00205     /* return vector median method */
00206     return median(as_vector, modify, in_place);
00207 }
00208 
00209 
00210 float64_t CStatistics::variance(SGVector<float64_t> values)
00211 {
00212     ASSERT(values.vlen>1)
00213     ASSERT(values.vector)
00214 
00215     float64_t mean=CStatistics::mean(values);
00216 
00217     float64_t sum_squared_diff=0;
00218     for (index_t i=0; i<values.vlen; ++i)
00219         sum_squared_diff+=CMath::pow(values.vector[i]-mean, 2);
00220 
00221     return sum_squared_diff/(values.vlen-1);
00222 }
00223 
00224 SGVector<float64_t> CStatistics::matrix_mean(SGMatrix<float64_t> values,
00225         bool col_wise)
00226 {
00227     ASSERT(values.num_rows>0)
00228     ASSERT(values.num_cols>0)
00229     ASSERT(values.matrix)
00230 
00231     SGVector<float64_t> result;
00232 
00233     if (col_wise)
00234     {
00235         result=SGVector<float64_t>(values.num_cols);
00236         for (index_t j=0; j<values.num_cols; ++j)
00237         {
00238             result[j]=0;
00239             for (index_t i=0; i<values.num_rows; ++i)
00240                 result[j]+=values(i,j);
00241 
00242             result[j]/=values.num_rows;
00243         }
00244     }
00245     else
00246     {
00247         result=SGVector<float64_t>(values.num_rows);
00248         for (index_t j=0; j<values.num_rows; ++j)
00249         {
00250             result[j]=0;
00251             for (index_t i=0; i<values.num_cols; ++i)
00252                 result[j]+=values(j,i);
00253 
00254             result[j]/=values.num_cols;
00255         }
00256     }
00257 
00258     return result;
00259 }
00260 
00261 SGVector<float64_t> CStatistics::matrix_variance(SGMatrix<float64_t> values,
00262         bool col_wise)
00263 {
00264     ASSERT(values.num_rows>0)
00265     ASSERT(values.num_cols>0)
00266     ASSERT(values.matrix)
00267 
00268     /* first compute mean */
00269     SGVector<float64_t> mean=CStatistics::matrix_mean(values, col_wise);
00270 
00271     SGVector<float64_t> result;
00272 
00273     if (col_wise)
00274     {
00275         result=SGVector<float64_t>(values.num_cols);
00276         for (index_t j=0; j<values.num_cols; ++j)
00277         {
00278             result[j]=0;
00279             for (index_t i=0; i<values.num_rows; ++i)
00280                 result[j]+=CMath::pow(values(i,j)-mean[j], 2);
00281 
00282             result[j]/=(values.num_rows-1);
00283         }
00284     }
00285     else
00286     {
00287         result=SGVector<float64_t>(values.num_rows);
00288         for (index_t j=0; j<values.num_rows; ++j)
00289         {
00290             result[j]=0;
00291             for (index_t i=0; i<values.num_cols; ++i)
00292                 result[j]+=CMath::pow(values(j,i)-mean[j], 2);
00293 
00294             result[j]/=(values.num_cols-1);
00295         }
00296     }
00297 
00298     return result;
00299 }
00300 
00301 float64_t CStatistics::std_deviation(SGVector<float64_t> values)
00302 {
00303     return CMath::sqrt(variance(values));
00304 }
00305 
00306 SGVector<float64_t> CStatistics::matrix_std_deviation(
00307         SGMatrix<float64_t> values, bool col_wise)
00308 {
00309     SGVector<float64_t> var=CStatistics::matrix_variance(values, col_wise);
00310     for (index_t i=0; i<var.vlen; ++i)
00311         var[i]=CMath::sqrt(var[i]);
00312 
00313     return var;
00314 }
00315 
00316 #ifdef HAVE_LAPACK
00317 SGMatrix<float64_t> CStatistics::covariance_matrix(
00318         SGMatrix<float64_t> observations, bool in_place)
00319 {
00320     SGMatrix<float64_t> centered=
00321             in_place ?
00322                     observations :
00323                     SGMatrix<float64_t>(observations.num_rows,
00324                             observations.num_cols);
00325 
00326     if (!in_place)
00327     {
00328         memcpy(centered.matrix, observations.matrix,
00329                 sizeof(float64_t)*observations.num_rows*observations.num_cols);
00330     }
00331     centered.remove_column_mean();
00332 
00333     /* compute 1/(m-1) * X' * X */
00334     SGMatrix<float64_t> cov=SGMatrix<float64_t>::matrix_multiply(centered,
00335             centered, true, false, 1.0/(observations.num_rows-1));
00336 
00337     return cov;
00338 }
00339 #endif //HAVE_LAPACK
00340 
00341 float64_t CStatistics::confidence_intervals_mean(SGVector<float64_t> values,
00342         float64_t alpha, float64_t& conf_int_low, float64_t& conf_int_up)
00343 {
00344     ASSERT(values.vlen>1)
00345     ASSERT(values.vector)
00346 
00347     /* using one sided student t distribution evaluation */
00348     alpha=alpha/2;
00349 
00350     /* degrees of freedom */
00351     int32_t deg=values.vlen-1;
00352 
00353     /* compute absolute value of t-value */
00354     float64_t t=CMath::abs(inverse_student_t(deg, alpha));
00355 
00356     /* values for calculating confidence interval */
00357     float64_t std_dev=CStatistics::std_deviation(values);
00358     float64_t mean=CStatistics::mean(values);
00359 
00360     /* compute confidence interval */
00361     float64_t interval=t*std_dev/CMath::sqrt((float64_t)values.vlen);
00362     conf_int_low=mean-interval;
00363     conf_int_up=mean+interval;
00364 
00365     return mean;
00366 }
00367 
00368 float64_t CStatistics::inverse_student_t(int32_t k, float64_t p)
00369 {
00370     float64_t t;
00371     float64_t rk;
00372     float64_t z;
00373     int32_t rflg;
00374     float64_t result;
00375 
00376     if (!(k>0 && greater(p, 0)) && less(p, 1))
00377     {
00378         SG_SERROR("CStatistics::inverse_student_t_distribution(): "
00379         "Domain error\n");
00380     }
00381     rk=k;
00382     if (greater(p, 0.25) && less(p, 0.75))
00383     {
00384         if (equal(p, 0.5))
00385         {
00386             result=0;
00387             return result;
00388         }
00389         z=1.0-2.0*p;
00390         z=inverse_incomplete_beta(0.5, 0.5*rk, CMath::abs(z));
00391         t=CMath::sqrt(rk*z/(1.0-z));
00392         if (less(p, 0.5))
00393         {
00394             t=-t;
00395         }
00396         result=t;
00397         return result;
00398     }
00399     rflg=-1;
00400     if (greater_equal(p, 0.5))
00401     {
00402         p=1.0-p;
00403         rflg=1;
00404     }
00405     z=inverse_incomplete_beta(0.5*rk, 0.5, 2.0*p);
00406     if (less(CMath::MAX_REAL_NUMBER*z, rk))
00407     {
00408         result=rflg*CMath::MAX_REAL_NUMBER;
00409         return result;
00410     }
00411     t=CMath::sqrt(rk/z-rk);
00412     result=rflg*t;
00413     return result;
00414 }
00415 
00416 float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b,
00417         float64_t y)
00418 {
00419     float64_t aaa;
00420     float64_t bbb;
00421     float64_t y0;
00422     float64_t d;
00423     float64_t yyy;
00424     float64_t x;
00425     float64_t x0;
00426     float64_t x1;
00427     float64_t lgm;
00428     float64_t yp;
00429     float64_t di;
00430     float64_t dithresh;
00431     float64_t yl;
00432     float64_t yh;
00433     float64_t xt;
00434     int32_t i;
00435     int32_t rflg;
00436     int32_t dir;
00437     int32_t nflg;
00438     int32_t mainlooppos;
00439     int32_t ihalve;
00440     int32_t ihalvecycle;
00441     int32_t newt;
00442     int32_t newtcycle;
00443     int32_t breaknewtcycle;
00444     int32_t breakihalvecycle;
00445     float64_t result;
00446 
00447     i=0;
00448     if (!(greater_equal(y, 0) && less_equal(y, 1)))
00449     {
00450         SG_SERROR("CStatistics::inverse_incomplete_beta(): "
00451         "Domain error\n");
00452     }
00453 
00454     /*
00455      * special cases
00456      */
00457     if (equal(y, 0))
00458     {
00459         result=0;
00460         return result;
00461     }
00462     if (equal(y, 1.0))
00463     {
00464         result=1;
00465         return result;
00466     }
00467 
00468     /*
00469      * these initializations are not really necessary,
00470      * but without them compiler complains about 'possibly uninitialized variables'.
00471      */
00472     dithresh=0;
00473     rflg=0;
00474     aaa=0;
00475     bbb=0;
00476     y0=0;
00477     x=0;
00478     yyy=0;
00479     lgm=0;
00480     dir=0;
00481     di=0;
00482 
00483     /*
00484      * normal initializations
00485      */
00486     x0=0.0;
00487     yl=0.0;
00488     x1=1.0;
00489     yh=1.0;
00490     nflg=0;
00491     mainlooppos=0;
00492     ihalve=1;
00493     ihalvecycle=2;
00494     newt=3;
00495     newtcycle=4;
00496     breaknewtcycle=5;
00497     breakihalvecycle=6;
00498 
00499     /*
00500      * main loop
00501      */
00502     for (;;)
00503     {
00504 
00505         /*
00506          * start
00507          */
00508         if (mainlooppos==0)
00509         {
00510             if (less_equal(a, 1.0) || less_equal(b, 1.0))
00511             {
00512                 dithresh=1.0e-6;
00513                 rflg=0;
00514                 aaa=a;
00515                 bbb=b;
00516                 y0=y;
00517                 x=aaa/(aaa+bbb);
00518                 yyy=incomplete_beta(aaa, bbb, x);
00519                 mainlooppos=ihalve;
00520                 continue;
00521             }
00522             else
00523             {
00524                 dithresh=1.0e-4;
00525             }
00526             yp=-inverse_normal_cdf(y);
00527             if (greater(y, 0.5))
00528             {
00529                 rflg=1;
00530                 aaa=b;
00531                 bbb=a;
00532                 y0=1.0-y;
00533                 yp=-yp;
00534             }
00535             else
00536             {
00537                 rflg=0;
00538                 aaa=a;
00539                 bbb=b;
00540                 y0=y;
00541             }
00542             lgm=(yp*yp-3.0)/6.0;
00543             x=2.0/(1.0/(2.0*aaa-1.0)+1.0/(2.0*bbb-1.0));
00544             d=yp*CMath::sqrt(x+lgm)/x
00545                     -(1.0/(2.0*bbb-1.0)-1.0/(2.0*aaa-1.0))
00546                             *(lgm+5.0/6.0-2.0/(3.0*x));
00547             d=2.0*d;
00548             if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00549             {
00550                 x=0;
00551                 break;
00552             }
00553             x=aaa/(aaa+bbb*CMath::exp(d));
00554             yyy=incomplete_beta(aaa, bbb, x);
00555             yp=(yyy-y0)/y0;
00556             if (less(CMath::abs(yp), 0.2))
00557             {
00558                 mainlooppos=newt;
00559                 continue;
00560             }
00561             mainlooppos=ihalve;
00562             continue;
00563         }
00564 
00565         /*
00566          * ihalve
00567          */
00568         if (mainlooppos==ihalve)
00569         {
00570             dir=0;
00571             di=0.5;
00572             i=0;
00573             mainlooppos=ihalvecycle;
00574             continue;
00575         }
00576 
00577         /*
00578          * ihalvecycle
00579          */
00580         if (mainlooppos==ihalvecycle)
00581         {
00582             if (i<=99)
00583             {
00584                 if (i!=0)
00585                 {
00586                     x=x0+di*(x1-x0);
00587                     if (equal(x, 1.0))
00588                     {
00589                         x=1.0-CMath::MACHINE_EPSILON;
00590                     }
00591                     if (equal(x, 0.0))
00592                     {
00593                         di=0.5;
00594                         x=x0+di*(x1-x0);
00595                         if (equal(x, 0.0))
00596                         {
00597                             break;
00598                         }
00599                     }
00600                     yyy=incomplete_beta(aaa, bbb, x);
00601                     yp=(x1-x0)/(x1+x0);
00602                     if (less(CMath::abs(yp), dithresh))
00603                     {
00604                         mainlooppos=newt;
00605                         continue;
00606                     }
00607                     yp=(yyy-y0)/y0;
00608                     if (less(CMath::abs(yp), dithresh))
00609                     {
00610                         mainlooppos=newt;
00611                         continue;
00612                     }
00613                 }
00614                 if (less(yyy, y0))
00615                 {
00616                     x0=x;
00617                     yl=yyy;
00618                     if (dir<0)
00619                     {
00620                         dir=0;
00621                         di=0.5;
00622                     }
00623                     else
00624                     {
00625                         if (dir>3)
00626                         {
00627                             di=1.0-(1.0-di)*(1.0-di);
00628                         }
00629                         else
00630                         {
00631                             if (dir>1)
00632                             {
00633                                 di=0.5*di+0.5;
00634                             }
00635                             else
00636                             {
00637                                 di=(y0-yyy)/(yh-yl);
00638                             }
00639                         }
00640                     }
00641                     dir=dir+1;
00642                     if (greater(x0, 0.75))
00643                     {
00644                         if (rflg==1)
00645                         {
00646                             rflg=0;
00647                             aaa=a;
00648                             bbb=b;
00649                             y0=y;
00650                         }
00651                         else
00652                         {
00653                             rflg=1;
00654                             aaa=b;
00655                             bbb=a;
00656                             y0=1.0-y;
00657                         }
00658                         x=1.0-x;
00659                         yyy=incomplete_beta(aaa, bbb, x);
00660                         x0=0.0;
00661                         yl=0.0;
00662                         x1=1.0;
00663                         yh=1.0;
00664                         mainlooppos=ihalve;
00665                         continue;
00666                     }
00667                 }
00668                 else
00669                 {
00670                     x1=x;
00671                     if (rflg==1 && less(x1, CMath::MACHINE_EPSILON))
00672                     {
00673                         x=0.0;
00674                         break;
00675                     }
00676                     yh=yyy;
00677                     if (dir>0)
00678                     {
00679                         dir=0;
00680                         di=0.5;
00681                     }
00682                     else
00683                     {
00684                         if (dir<-3)
00685                         {
00686                             di=di*di;
00687                         }
00688                         else
00689                         {
00690                             if (dir<-1)
00691                             {
00692                                 di=0.5*di;
00693                             }
00694                             else
00695                             {
00696                                 di=(yyy-y0)/(yh-yl);
00697                             }
00698                         }
00699                     }
00700                     dir=dir-1;
00701                 }
00702                 i=i+1;
00703                 mainlooppos=ihalvecycle;
00704                 continue;
00705             }
00706             else
00707             {
00708                 mainlooppos=breakihalvecycle;
00709                 continue;
00710             }
00711         }
00712 
00713         /*
00714          * breakihalvecycle
00715          */
00716         if (mainlooppos==breakihalvecycle)
00717         {
00718             if (greater_equal(x0, 1.0))
00719             {
00720                 x=1.0-CMath::MACHINE_EPSILON;
00721                 break;
00722             }
00723             if (less_equal(x, 0.0))
00724             {
00725                 x=0.0;
00726                 break;
00727             }
00728             mainlooppos=newt;
00729             continue;
00730         }
00731 
00732         /*
00733          * newt
00734          */
00735         if (mainlooppos==newt)
00736         {
00737             if (nflg!=0)
00738             {
00739                 break;
00740             }
00741             nflg=1;
00742             lgm=lgamma(aaa+bbb)-lgamma(aaa)-lgamma(bbb);
00743             i=0;
00744             mainlooppos=newtcycle;
00745             continue;
00746         }
00747 
00748         /*
00749          * newtcycle
00750          */
00751         if (mainlooppos==newtcycle)
00752         {
00753             if (i<=7)
00754             {
00755                 if (i!=0)
00756                 {
00757                     yyy=incomplete_beta(aaa, bbb, x);
00758                 }
00759                 if (less(yyy, yl))
00760                 {
00761                     x=x0;
00762                     yyy=yl;
00763                 }
00764                 else
00765                 {
00766                     if (greater(yyy, yh))
00767                     {
00768                         x=x1;
00769                         yyy=yh;
00770                     }
00771                     else
00772                     {
00773                         if (less(yyy, y0))
00774                         {
00775                             x0=x;
00776                             yl=yyy;
00777                         }
00778                         else
00779                         {
00780                             x1=x;
00781                             yh=yyy;
00782                         }
00783                     }
00784                 }
00785                 if (equal(x, 1.0) || equal(x, 0.0))
00786                 {
00787                     mainlooppos=breaknewtcycle;
00788                     continue;
00789                 }
00790                 d=(aaa-1.0)*CMath::log(x)+(bbb-1.0)*CMath::log(1.0-x)+lgm;
00791                 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER)))
00792                 {
00793                     break;
00794                 }
00795                 if (greater(d, CMath::log(CMath::MAX_REAL_NUMBER)))
00796                 {
00797                     mainlooppos=breaknewtcycle;
00798                     continue;
00799                 }
00800                 d=CMath::exp(d);
00801                 d=(yyy-y0)/d;
00802                 xt=x-d;
00803                 if (less_equal(xt, x0))
00804                 {
00805                     yyy=(x-x0)/(x1-x0);
00806                     xt=x0+0.5*yyy*(x-x0);
00807                     if (less_equal(xt, 0.0))
00808                     {
00809                         mainlooppos=breaknewtcycle;
00810                         continue;
00811                     }
00812                 }
00813                 if (greater_equal(xt, x1))
00814                 {
00815                     yyy=(x1-x)/(x1-x0);
00816                     xt=x1-0.5*yyy*(x1-x);
00817                     if (greater_equal(xt, 1.0))
00818                     {
00819                         mainlooppos=breaknewtcycle;
00820                         continue;
00821                     }
00822                 }
00823                 x=xt;
00824                 if (less(CMath::abs(d/x), 128.0*CMath::MACHINE_EPSILON))
00825                 {
00826                     break;
00827                 }
00828                 i=i+1;
00829                 mainlooppos=newtcycle;
00830                 continue;
00831             }
00832             else
00833             {
00834                 mainlooppos=breaknewtcycle;
00835                 continue;
00836             }
00837         }
00838 
00839         /*
00840          * breaknewtcycle
00841          */
00842         if (mainlooppos==breaknewtcycle)
00843         {
00844             dithresh=256.0*CMath::MACHINE_EPSILON;
00845             mainlooppos=ihalve;
00846             continue;
00847         }
00848     }
00849 
00850     /*
00851      * done
00852      */
00853     if (rflg!=0)
00854     {
00855         if (less_equal(x, CMath::MACHINE_EPSILON))
00856         {
00857             x=1.0-CMath::MACHINE_EPSILON;
00858         }
00859         else
00860         {
00861             x=1.0-x;
00862         }
00863     }
00864     result=x;
00865     return result;
00866 }
00867 
00868 float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x)
00869 {
00870     float64_t t;
00871     float64_t xc;
00872     float64_t w;
00873     float64_t y;
00874     int32_t flag;
00875     float64_t big;
00876     float64_t biginv;
00877     float64_t maxgam;
00878     float64_t minlog;
00879     float64_t maxlog;
00880     float64_t result;
00881 
00882     big=4.503599627370496e15;
00883     biginv=2.22044604925031308085e-16;
00884     maxgam=171.624376956302725;
00885     minlog=CMath::log(CMath::MIN_REAL_NUMBER);
00886     maxlog=CMath::log(CMath::MAX_REAL_NUMBER);
00887 
00888     if (!(greater(a, 0) && greater(b, 0)))
00889     {
00890         SG_SERROR("CStatistics::incomplete_beta(): "
00891         "Domain error\n");
00892     }
00893 
00894     if (!(greater_equal(x, 0) && less_equal(x, 1)))
00895     {
00896         SG_SERROR("CStatistics::incomplete_beta(): "
00897         "Domain error\n");
00898     }
00899 
00900     if (equal(x, 0))
00901     {
00902         result=0;
00903         return result;
00904     }
00905     if (equal(x, 1))
00906     {
00907         result=1;
00908         return result;
00909     }
00910     flag=0;
00911     if (less_equal(b*x, 1.0) && less_equal(x, 0.95))
00912     {
00913         result=ibetaf_incompletebetaps(a, b, x, maxgam);
00914         return result;
00915     }
00916     w=1.0-x;
00917     if (greater(x, a/(a+b)))
00918     {
00919         flag=1;
00920         t=a;
00921         a=b;
00922         b=t;
00923         xc=x;
00924         x=w;
00925     }
00926     else
00927     {
00928         xc=w;
00929     }
00930     if ((flag==1 && less_equal(b*x, 1.0)) && less_equal(x, 0.95))
00931     {
00932         t=ibetaf_incompletebetaps(a, b, x, maxgam);
00933         if (less_equal(t, CMath::MACHINE_EPSILON))
00934         {
00935             result=1.0-CMath::MACHINE_EPSILON;
00936         }
00937         else
00938         {
00939             result=1.0-t;
00940         }
00941         return result;
00942     }
00943     y=x*(a+b-2.0)-(a-1.0);
00944     if (less(y, 0.0))
00945     {
00946         w=ibetaf_incompletebetafe(a, b, x, big, biginv);
00947     }
00948     else
00949     {
00950         w=ibetaf_incompletebetafe2(a, b, x, big, biginv)/xc;
00951     }
00952     y=a*CMath::log(x);
00953     t=b*CMath::log(xc);
00954     if ((less(a+b, maxgam) && less(CMath::abs(y), maxlog))
00955              && less(CMath::abs(t), maxlog))
00956     {
00957         t=CMath::pow(xc, b);
00958         t=t*CMath::pow(x, a);
00959         t=t/a;
00960         t=t*w;
00961         t=t*(tgamma(a+b)/(tgamma(a)*tgamma(b)));
00962         if (flag==1)
00963         {
00964             if (less_equal(t, CMath::MACHINE_EPSILON))
00965             {
00966                 result=1.0-CMath::MACHINE_EPSILON;
00967             }
00968             else
00969             {
00970                 result=1.0-t;
00971             }
00972         }
00973         else
00974         {
00975             result=t;
00976         }
00977         return result;
00978     }
00979     y=y+t+lgamma(a+b)-lgamma(a)-lgamma(b);
00980     y=y+CMath::log(w/a);
00981     if (less(y, minlog))
00982     {
00983         t=0.0;
00984     }
00985     else
00986     {
00987         t=CMath::exp(y);
00988     }
00989     if (flag==1)
00990     {
00991         if (less_equal(t, CMath::MACHINE_EPSILON))
00992         {
00993             t=1.0-CMath::MACHINE_EPSILON;
00994         }
00995         else
00996         {
00997             t=1.0-t;
00998         }
00999     }
01000     result=t;
01001     return result;
01002 }
01003 
01004 float64_t CStatistics::inverse_normal_cdf(float64_t y0, float64_t mean,
01005         float64_t std_dev)
01006 {
01007     return inverse_normal_cdf(y0)*std_dev+mean;
01008 }
01009 
01010 float64_t CStatistics::inverse_normal_cdf(float64_t y0)
01011 {
01012     float64_t expm2;
01013     float64_t s2pi;
01014     float64_t x;
01015     float64_t y;
01016     float64_t z;
01017     float64_t y2;
01018     float64_t x0;
01019     float64_t x1;
01020     int32_t code;
01021     float64_t p0;
01022     float64_t q0;
01023     float64_t p1;
01024     float64_t q1;
01025     float64_t p2;
01026     float64_t q2;
01027     float64_t result;
01028 
01029     expm2=0.13533528323661269189;
01030     s2pi=2.50662827463100050242;
01031     if (less_equal(y0, 0))
01032     {
01033         result=-CMath::MAX_REAL_NUMBER;
01034         return result;
01035     }
01036     if (greater_equal(y0, 1))
01037     {
01038         result=CMath::MAX_REAL_NUMBER;
01039         return result;
01040     }
01041     code=1;
01042     y=y0;
01043     if (greater(y, 1.0-expm2))
01044     {
01045         y=1.0-y;
01046         code=0;
01047     }
01048     if (greater(y, expm2))
01049     {
01050         y=y-0.5;
01051         y2=y*y;
01052         p0=-59.9633501014107895267;
01053         p0=98.0010754185999661536+y2*p0;
01054         p0=-56.6762857469070293439+y2*p0;
01055         p0=13.9312609387279679503+y2*p0;
01056         p0=-1.23916583867381258016+y2*p0;
01057         q0=1;
01058         q0=1.95448858338141759834+y2*q0;
01059         q0=4.67627912898881538453+y2*q0;
01060         q0=86.3602421390890590575+y2*q0;
01061         q0=-225.462687854119370527+y2*q0;
01062         q0=200.260212380060660359+y2*q0;
01063         q0=-82.0372256168333339912+y2*q0;
01064         q0=15.9056225126211695515+y2*q0;
01065         q0=-1.18331621121330003142+y2*q0;
01066         x=y+y*y2*p0/q0;
01067         x=x*s2pi;
01068         result=x;
01069         return result;
01070     }
01071     x=CMath::sqrt(-2.0*CMath::log(y));
01072     x0=x-CMath::log(x)/x;
01073     z=1.0/x;
01074     if (less(x, 8.0))
01075     {
01076         p1=4.05544892305962419923;
01077         p1=31.5251094599893866154+z*p1;
01078         p1=57.1628192246421288162+z*p1;
01079         p1=44.0805073893200834700+z*p1;
01080         p1=14.6849561928858024014+z*p1;
01081         p1=2.18663306850790267539+z*p1;
01082         p1=-1.40256079171354495875*0.1+z*p1;
01083         p1=-3.50424626827848203418*0.01+z*p1;
01084         p1=-8.57456785154685413611*0.0001+z*p1;
01085         q1=1;
01086         q1=15.7799883256466749731+z*q1;
01087         q1=45.3907635128879210584+z*q1;
01088         q1=41.3172038254672030440+z*q1;
01089         q1=15.0425385692907503408+z*q1;
01090         q1=2.50464946208309415979+z*q1;
01091         q1=-1.42182922854787788574*0.1+z*q1;
01092         q1=-3.80806407691578277194*0.01+z*q1;
01093         q1=-9.33259480895457427372*0.0001+z*q1;
01094         x1=z*p1/q1;
01095     }
01096     else
01097     {
01098         p2=3.23774891776946035970;
01099         p2=6.91522889068984211695+z*p2;
01100         p2=3.93881025292474443415+z*p2;
01101         p2=1.33303460815807542389+z*p2;
01102         p2=2.01485389549179081538*0.1+z*p2;
01103         p2=1.23716634817820021358*0.01+z*p2;
01104         p2=3.01581553508235416007*0.0001+z*p2;
01105         p2=2.65806974686737550832*0.000001+z*p2;
01106         p2=6.23974539184983293730*0.000000001+z*p2;
01107         q2=1;
01108         q2=6.02427039364742014255+z*q2;
01109         q2=3.67983563856160859403+z*q2;
01110         q2=1.37702099489081330271+z*q2;
01111         q2=2.16236993594496635890*0.1+z*q2;
01112         q2=1.34204006088543189037*0.01+z*q2;
01113         q2=3.28014464682127739104*0.0001+z*q2;
01114         q2=2.89247864745380683936*0.000001+z*q2;
01115         q2=6.79019408009981274425*0.000000001+z*q2;
01116         x1=z*p2/q2;
01117     }
01118     x=x0-x1;
01119     if (code!=0)
01120     {
01121         x=-x;
01122     }
01123     result=x;
01124     return result;
01125 }
01126 
01127 float64_t CStatistics::ibetaf_incompletebetaps(float64_t a, float64_t b,
01128         float64_t x, float64_t maxgam)
01129 {
01130     float64_t s;
01131     float64_t t;
01132     float64_t u;
01133     float64_t v;
01134     float64_t n;
01135     float64_t t1;
01136     float64_t z;
01137     float64_t ai;
01138     float64_t result;
01139 
01140     ai=1.0/a;
01141     u=(1.0-b)*x;
01142     v=u/(a+1.0);
01143     t1=v;
01144     t=u;
01145     n=2.0;
01146     s=0.0;
01147     z=CMath::MACHINE_EPSILON*ai;
01148     while (greater(CMath::abs(v), z))
01149     {
01150         u=(n-b)*x/n;
01151         t=t*u;
01152         v=t/(a+n);
01153         s=s+v;
01154         n=n+1.0;
01155     }
01156     s=s+t1;
01157     s=s+ai;
01158     u=a*CMath::log(x);
01159     if (less(a+b, maxgam)
01160              && less(CMath::abs(u), CMath::log(CMath::MAX_REAL_NUMBER)))
01161     {
01162         t=tgamma(a+b)/(tgamma(a)*tgamma(b));
01163         s=s*t*CMath::pow(x, a);
01164     }
01165     else
01166     {
01167         t=lgamma(a+b)-lgamma(a)-lgamma(b)+u+CMath::log(s);
01168         if (less(t, CMath::log(CMath::MIN_REAL_NUMBER)))
01169         {
01170             s=0.0;
01171         }
01172         else
01173         {
01174             s=CMath::exp(t);
01175         }
01176     }
01177     result=s;
01178     return result;
01179 }
01180 
01181 float64_t CStatistics::ibetaf_incompletebetafe(float64_t a, float64_t b,
01182         float64_t x, float64_t big, float64_t biginv)
01183 {
01184     float64_t xk;
01185     float64_t pk;
01186     float64_t pkm1;
01187     float64_t pkm2;
01188     float64_t qk;
01189     float64_t qkm1;
01190     float64_t qkm2;
01191     float64_t k1;
01192     float64_t k2;
01193     float64_t k3;
01194     float64_t k4;
01195     float64_t k5;
01196     float64_t k6;
01197     float64_t k7;
01198     float64_t k8;
01199     float64_t r;
01200     float64_t t;
01201     float64_t ans;
01202     float64_t thresh;
01203     int32_t n;
01204     float64_t result;
01205 
01206     k1=a;
01207     k2=a+b;
01208     k3=a;
01209     k4=a+1.0;
01210     k5=1.0;
01211     k6=b-1.0;
01212     k7=k4;
01213     k8=a+2.0;
01214     pkm2=0.0;
01215     qkm2=1.0;
01216     pkm1=1.0;
01217     qkm1=1.0;
01218     ans=1.0;
01219     r=1.0;
01220     n=0;
01221     thresh=3.0*CMath::MACHINE_EPSILON;
01222     do
01223     {
01224         xk=-x*k1*k2/(k3*k4);
01225         pk=pkm1+pkm2*xk;
01226         qk=qkm1+qkm2*xk;
01227         pkm2=pkm1;
01228         pkm1=pk;
01229         qkm2=qkm1;
01230         qkm1=qk;
01231         xk=x*k5*k6/(k7*k8);
01232         pk=pkm1+pkm2*xk;
01233         qk=qkm1+qkm2*xk;
01234         pkm2=pkm1;
01235         pkm1=pk;
01236         qkm2=qkm1;
01237         qkm1=qk;
01238         if (not_equal(qk, 0))
01239         {
01240             r=pk/qk;
01241         }
01242         if (not_equal(r, 0))
01243         {
01244             t=CMath::abs((ans-r)/r);
01245             ans=r;
01246         }
01247         else
01248         {
01249             t=1.0;
01250         }
01251         if (less(t, thresh))
01252         {
01253             break;
01254         }
01255         k1=k1+1.0;
01256         k2=k2+1.0;
01257         k3=k3+2.0;
01258         k4=k4+2.0;
01259         k5=k5+1.0;
01260         k6=k6-1.0;
01261         k7=k7+2.0;
01262         k8=k8+2.0;
01263         if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01264         {
01265             pkm2=pkm2*biginv;
01266             pkm1=pkm1*biginv;
01267             qkm2=qkm2*biginv;
01268             qkm1=qkm1*biginv;
01269         }
01270         if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01271         {
01272             pkm2=pkm2*big;
01273             pkm1=pkm1*big;
01274             qkm2=qkm2*big;
01275             qkm1=qkm1*big;
01276         }
01277         n=n+1;
01278     }
01279     while (n!=300);
01280     result=ans;
01281     return result;
01282 }
01283 
01284 float64_t CStatistics::ibetaf_incompletebetafe2(float64_t a, float64_t b,
01285         float64_t x, float64_t big, float64_t biginv)
01286 {
01287     float64_t xk;
01288     float64_t pk;
01289     float64_t pkm1;
01290     float64_t pkm2;
01291     float64_t qk;
01292     float64_t qkm1;
01293     float64_t qkm2;
01294     float64_t k1;
01295     float64_t k2;
01296     float64_t k3;
01297     float64_t k4;
01298     float64_t k5;
01299     float64_t k6;
01300     float64_t k7;
01301     float64_t k8;
01302     float64_t r;
01303     float64_t t;
01304     float64_t ans;
01305     float64_t z;
01306     float64_t thresh;
01307     int32_t n;
01308     float64_t result;
01309 
01310     k1=a;
01311     k2=b-1.0;
01312     k3=a;
01313     k4=a+1.0;
01314     k5=1.0;
01315     k6=a+b;
01316     k7=a+1.0;
01317     k8=a+2.0;
01318     pkm2=0.0;
01319     qkm2=1.0;
01320     pkm1=1.0;
01321     qkm1=1.0;
01322     z=x/(1.0-x);
01323     ans=1.0;
01324     r=1.0;
01325     n=0;
01326     thresh=3.0*CMath::MACHINE_EPSILON;
01327     do
01328     {
01329         xk=-z*k1*k2/(k3*k4);
01330         pk=pkm1+pkm2*xk;
01331         qk=qkm1+qkm2*xk;
01332         pkm2=pkm1;
01333         pkm1=pk;
01334         qkm2=qkm1;
01335         qkm1=qk;
01336         xk=z*k5*k6/(k7*k8);
01337         pk=pkm1+pkm2*xk;
01338         qk=qkm1+qkm2*xk;
01339         pkm2=pkm1;
01340         pkm1=pk;
01341         qkm2=qkm1;
01342         qkm1=qk;
01343         if (not_equal(qk, 0))
01344         {
01345             r=pk/qk;
01346         }
01347         if (not_equal(r, 0))
01348         {
01349             t=CMath::abs((ans-r)/r);
01350             ans=r;
01351         }
01352         else
01353         {
01354             t=1.0;
01355         }
01356         if (less(t, thresh))
01357         {
01358             break;
01359         }
01360         k1=k1+1.0;
01361         k2=k2-1.0;
01362         k3=k3+2.0;
01363         k4=k4+2.0;
01364         k5=k5+1.0;
01365         k6=k6+1.0;
01366         k7=k7+2.0;
01367         k8=k8+2.0;
01368         if (greater(CMath::abs(qk)+CMath::abs(pk), big))
01369         {
01370             pkm2=pkm2*biginv;
01371             pkm1=pkm1*biginv;
01372             qkm2=qkm2*biginv;
01373             qkm1=qkm1*biginv;
01374         }
01375         if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv))
01376         {
01377             pkm2=pkm2*big;
01378             pkm1=pkm1*big;
01379             qkm2=qkm2*big;
01380             qkm1=qkm1*big;
01381         }
01382         n=n+1;
01383     }
01384     while (n!=300);
01385     result=ans;
01386     return result;
01387 }
01388 
01389 float64_t CStatistics::incomplete_gamma(float64_t a, float64_t x)
01390 {
01391     float64_t igammaepsilon;
01392     float64_t ans;
01393     float64_t ax;
01394     float64_t c;
01395     float64_t r;
01396     float64_t result;
01397 
01398     igammaepsilon=0.000000000000001;
01399     if (less_equal(x, 0) || less_equal(a, 0))
01400     {
01401         result=0;
01402         return result;
01403     }
01404     if (greater(x, 1) && greater(x, a))
01405     {
01406         result=1-incomplete_gamma_completed(a, x);
01407         return result;
01408     }
01409     ax=a*CMath::log(x)-x-lgamma(a);
01410     if (less(ax, -709.78271289338399))
01411     {
01412         result=0;
01413         return result;
01414     }
01415     ax=CMath::exp(ax);
01416     r=a;
01417     c=1;
01418     ans=1;
01419     do
01420     {
01421         r=r+1;
01422         c=c*x/r;
01423         ans=ans+c;
01424     }
01425     while (greater(c/ans, igammaepsilon));
01426     result=ans*ax/a;
01427     return result;
01428 }
01429 
01430 float64_t CStatistics::incomplete_gamma_completed(float64_t a, float64_t x)
01431 {
01432     float64_t igammaepsilon;
01433     float64_t igammabignumber;
01434     float64_t igammabignumberinv;
01435     float64_t ans;
01436     float64_t ax;
01437     float64_t c;
01438     float64_t yc;
01439     float64_t r;
01440     float64_t t;
01441     float64_t y;
01442     float64_t z;
01443     float64_t pk;
01444     float64_t pkm1;
01445     float64_t pkm2;
01446     float64_t qk;
01447     float64_t qkm1;
01448     float64_t qkm2;
01449     float64_t result;
01450 
01451     igammaepsilon=0.000000000000001;
01452     igammabignumber=4503599627370496.0;
01453     igammabignumberinv=2.22044604925031308085*0.0000000000000001;
01454     if (less_equal(x, 0) || less_equal(a, 0))
01455     {
01456         result=1;
01457         return result;
01458     }
01459     if (less(x, 1) || less(x, a))
01460     {
01461         result=1-incomplete_gamma(a, x);
01462         return result;
01463     }
01464     ax=a*CMath::log(x)-x-lgamma(a);
01465     if (less(ax, -709.78271289338399))
01466     {
01467         result=0;
01468         return result;
01469     }
01470     ax=CMath::exp(ax);
01471     y=1-a;
01472     z=x+y+1;
01473     c=0;
01474     pkm2=1;
01475     qkm2=x;
01476     pkm1=x+1;
01477     qkm1=z*x;
01478     ans=pkm1/qkm1;
01479     do
01480     {
01481         c=c+1;
01482         y=y+1;
01483         z=z+2;
01484         yc=y*c;
01485         pk=pkm1*z-pkm2*yc;
01486         qk=qkm1*z-qkm2*yc;
01487         if (not_equal(qk, 0))
01488         {
01489             r=pk/qk;
01490             t=CMath::abs((ans-r)/r);
01491             ans=r;
01492         }
01493         else
01494         {
01495             t=1;
01496         }
01497         pkm2=pkm1;
01498         pkm1=pk;
01499         qkm2=qkm1;
01500         qkm1=qk;
01501         if (greater(CMath::abs(pk), igammabignumber))
01502         {
01503             pkm2=pkm2*igammabignumberinv;
01504             pkm1=pkm1*igammabignumberinv;
01505             qkm2=qkm2*igammabignumberinv;
01506             qkm1=qkm1*igammabignumberinv;
01507         }
01508     }
01509     while (greater(t, igammaepsilon));
01510     result=ans*ax;
01511     return result;
01512 }
01513 
01514 float64_t CStatistics::gamma_cdf(float64_t x, float64_t a, float64_t b)
01515 {
01516     /* definition of wikipedia: incomplete gamma devised by true gamma */
01517     return incomplete_gamma(a, x/b);
01518 }
01519 
01520 float64_t CStatistics::inverse_gamma_cdf(float64_t p, float64_t a,
01521         float64_t b)
01522 {
01523     /* inverse of gamma(a,b) CDF is
01524      * inverse_incomplete_gamma_completed(a, 1. - p) * b */
01525     return inverse_incomplete_gamma_completed(a, 1-p)*b;
01526 }
01527 
01528 float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a,
01529         float64_t y0)
01530 {
01531     float64_t igammaepsilon;
01532     float64_t iinvgammabignumber;
01533     float64_t x0;
01534     float64_t x1;
01535     float64_t x;
01536     float64_t yl;
01537     float64_t yh;
01538     float64_t y;
01539     float64_t d;
01540     float64_t lgm;
01541     float64_t dithresh;
01542     int32_t i;
01543     int32_t dir;
01544     float64_t result;
01545 
01546     igammaepsilon=0.000000000000001;
01547     iinvgammabignumber=4503599627370496.0;
01548     x0=iinvgammabignumber;
01549     yl=0;
01550     x1=0;
01551     yh=1;
01552     dithresh=5*igammaepsilon;
01553     d=1/(9*a);
01554     y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d);
01555     x=a*y*y*y;
01556     lgm=lgamma(a);
01557     i=0;
01558     while (i<10)
01559     {
01560         if (greater(x, x0) || less(x, x1))
01561         {
01562             d=0.0625;
01563             break;
01564         }
01565         y=incomplete_gamma_completed(a, x);
01566         if (less(y, yl) || greater(y, yh))
01567         {
01568             d=0.0625;
01569             break;
01570         }
01571         if (less(y, y0))
01572         {
01573             x0=x;
01574             yl=y;
01575         }
01576         else
01577         {
01578             x1=x;
01579             yh=y;
01580         }
01581         d=(a-1)*CMath::log(x)-x-lgm;
01582         if (less(d, -709.78271289338399))
01583         {
01584             d=0.0625;
01585             break;
01586         }
01587         d=-CMath::exp(d);
01588         d=(y-y0)/d;
01589         if (less(CMath::abs(d/x), igammaepsilon))
01590         {
01591             result=x;
01592             return result;
01593         }
01594         x=x-d;
01595         i=i+1;
01596     }
01597     if (equal(x0, iinvgammabignumber))
01598     {
01599         if (less_equal(x, 0))
01600         {
01601             x=1;
01602         }
01603         while (equal(x0, iinvgammabignumber))
01604         {
01605             x=(1+d)*x;
01606             y=incomplete_gamma_completed(a, x);
01607             if (less(y, y0))
01608             {
01609                 x0=x;
01610                 yl=y;
01611                 break;
01612             }
01613             d=d+d;
01614         }
01615     }
01616     d=0.5;
01617     dir=0;
01618     i=0;
01619     while (i<400)
01620     {
01621         x=x1+d*(x0-x1);
01622         y=incomplete_gamma_completed(a, x);
01623         lgm=(x0-x1)/(x1+x0);
01624         if (less(CMath::abs(lgm), dithresh))
01625         {
01626             break;
01627         }
01628         lgm=(y-y0)/y0;
01629         if (less(CMath::abs(lgm), dithresh))
01630         {
01631             break;
01632         }
01633         if (less_equal(x, 0.0))
01634         {
01635             break;
01636         }
01637         if (greater_equal(y, y0))
01638         {
01639             x1=x;
01640             yh=y;
01641             if (dir<0)
01642             {
01643                 dir=0;
01644                 d=0.5;
01645             }
01646             else
01647             {
01648                 if (dir>1)
01649                 {
01650                     d=0.5*d+0.5;
01651                 }
01652                 else
01653                 {
01654                     d=(y0-yl)/(yh-yl);
01655                 }
01656             }
01657             dir=dir+1;
01658         }
01659         else
01660         {
01661             x0=x;
01662             yl=y;
01663             if (dir>0)
01664             {
01665                 dir=0;
01666                 d=0.5;
01667             }
01668             else
01669             {
01670                 if (dir<-1)
01671                 {
01672                     d=0.5*d;
01673                 }
01674                 else
01675                 {
01676                     d=(y0-yl)/(yh-yl);
01677                 }
01678             }
01679             dir=dir-1;
01680         }
01681         i=i+1;
01682     }
01683     result=x;
01684     return result;
01685 }
01686 
01687 float64_t CStatistics::normal_cdf(float64_t x, float64_t std_dev)
01688 {
01689     return 0.5*(error_function(x/std_dev/1.41421356237309504880)+1);
01690 }
01691 
01692 float64_t CStatistics::lnormal_cdf(float64_t x)
01693 {
01694     float64_t result;
01695 
01696     if (x<-10.0)
01697     {
01698         float64_t x2=x*x;
01699         float64_t s=1.0-1.0/x2*(1.0-3.0/x2*(1.0-5.0/x2*(1.0-7.0/x2)));
01700         result=-0.5*CMath::log(2*CMath::PI)-x2*0.5-CMath::log(-x)+CMath::log(s);
01701     }
01702     else
01703         result=CMath::log(normal_cdf(x));
01704 
01705     return result;
01706 }
01707 
01708 float64_t CStatistics::error_function(float64_t x)
01709 {
01710     float64_t xsq;
01711     float64_t s;
01712     float64_t p;
01713     float64_t q;
01714     float64_t result;
01715 
01716     s=CMath::sign(x);
01717     x=CMath::abs(x);
01718     if (less(x, 0.5))
01719     {
01720         xsq=x*x;
01721         p=0.007547728033418631287834;
01722         p=0.288805137207594084924010+xsq*p;
01723         p=14.3383842191748205576712+xsq*p;
01724         p=38.0140318123903008244444+xsq*p;
01725         p=3017.82788536507577809226+xsq*p;
01726         p=7404.07142710151470082064+xsq*p;
01727         p=80437.3630960840172832162+xsq*p;
01728         q=0.0;
01729         q=1.00000000000000000000000+xsq*q;
01730         q=38.0190713951939403753468+xsq*q;
01731         q=658.070155459240506326937+xsq*q;
01732         q=6379.60017324428279487120+xsq*q;
01733         q=34216.5257924628539769006+xsq*q;
01734         q=80437.3630960840172826266+xsq*q;
01735         result=s*1.1283791670955125738961589031*x*p/q;
01736         return result;
01737     }
01738     if (greater_equal(x, 10))
01739     {
01740         result=s;
01741         return result;
01742     }
01743     result=s*(1-error_function_complement(x));
01744     return result;
01745 }
01746 
01747 float64_t CStatistics::error_function_complement(float64_t x)
01748 {
01749     float64_t p;
01750     float64_t q;
01751     float64_t result;
01752 
01753     if (less(x, 0))
01754     {
01755         result=2-error_function_complement(-x);
01756         return result;
01757     }
01758     if (less(x, 0.5))
01759     {
01760         result=1.0-error_function(x);
01761         return result;
01762     }
01763     if (greater_equal(x, 10))
01764     {
01765         result=0;
01766         return result;
01767     }
01768     p=0.0;
01769     p=0.5641877825507397413087057563+x*p;
01770     p=9.675807882987265400604202961+x*p;
01771     p=77.08161730368428609781633646+x*p;
01772     p=368.5196154710010637133875746+x*p;
01773     p=1143.262070703886173606073338+x*p;
01774     p=2320.439590251635247384768711+x*p;
01775     p=2898.0293292167655611275846+x*p;
01776     p=1826.3348842295112592168999+x*p;
01777     q=1.0;
01778     q=17.14980943627607849376131193+x*q;
01779     q=137.1255960500622202878443578+x*q;
01780     q=661.7361207107653469211984771+x*q;
01781     q=2094.384367789539593790281779+x*q;
01782     q=4429.612803883682726711528526+x*q;
01783     q=6089.5424232724435504633068+x*q;
01784     q=4958.82756472114071495438422+x*q;
01785     q=1826.3348842295112595576438+x*q;
01786     result=CMath::exp(-x*x)*p/q;
01787     return result;
01788 }
01789 
01790 SGVector<float64_t> CStatistics::fishers_exact_test_for_multiple_2x3_tables(
01791         SGMatrix<float64_t> tables)
01792 {
01793     SGMatrix<float64_t> table(NULL, 2, 3, false);
01794     int32_t len=tables.num_cols/3;
01795 
01796     SGVector<float64_t> v(len);
01797     for (int32_t i=0; i<len; i++)
01798     {
01799         table.matrix=&tables.matrix[2*3*i];
01800         v.vector[i]=fishers_exact_test_for_2x3_table(table);
01801     }
01802     return v;
01803 }
01804 
01805 float64_t CStatistics::fishers_exact_test_for_2x3_table(
01806         SGMatrix<float64_t> table)
01807 {
01808     ASSERT(table.num_rows==2)
01809     ASSERT(table.num_cols==3)
01810 
01811     int32_t m_len=3+2;
01812     float64_t* m=SG_MALLOC(float64_t, 3+2);
01813     m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4];
01814     m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5];
01815     m[2]=table.matrix[0]+table.matrix[1];
01816     m[3]=table.matrix[2]+table.matrix[3];
01817     m[4]=table.matrix[4]+table.matrix[5];
01818 
01819     float64_t n=SGVector<float64_t>::sum(m, m_len)/2.0;
01820     int32_t x_len=2*3*CMath::sq(SGVector<float64_t>::max(m, m_len));
01821     float64_t* x=SG_MALLOC(float64_t, x_len);
01822     SGVector<float64_t>::fill_vector(x, x_len, 0.0);
01823 
01824     float64_t log_nom=0.0;
01825     for (int32_t i=0; i<3+2; i++)
01826         log_nom+=lgamma(m[i]+1);
01827     log_nom-=lgamma(n+1.0);
01828 
01829     float64_t log_denomf=0;
01830     floatmax_t log_denom=0;
01831 
01832     for (int32_t i=0; i<3*2; i++)
01833     {
01834         log_denom+=lgammal((floatmax_t)table.matrix[i]+1);
01835         log_denomf+=lgammal((floatmax_t)table.matrix[i]+1);
01836     }
01837 
01838     floatmax_t prob_table_log=log_nom-log_denom;
01839 
01840     int32_t dim1=CMath::min(m[0], m[2]);
01841 
01842     //traverse all possible tables with given m
01843     int32_t counter=0;
01844     for (int32_t k=0; k<=dim1; k++)
01845     {
01846         for (int32_t l=CMath::max(0.0, m[0]-m[4]-k);
01847                 l<=CMath::min(m[0]-k, m[3]); l++)
01848         {
01849             x[0+0*2+counter*2*3]=k;
01850             x[0+1*2+counter*2*3]=l;
01851             x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3];
01852             x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3];
01853             x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3];
01854             x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3];
01855 
01856             counter++;
01857         }
01858     }
01859 
01860 //#define DEBUG_FISHER_TABLE
01861 #ifdef DEBUG_FISHER_TABLE
01862     SG_SPRINT("counter=%d\n", counter)
01863     SG_SPRINT("dim1=%d\n", dim1)
01864     SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3]))
01865     SG_SPRINT("n=%g\n", n)
01866     SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log)
01867     SG_SPRINT("log_denomf=%.18g\n", log_denomf)
01868     SG_SPRINT("log_denom=%.18Lg\n", log_denom)
01869     SG_SPRINT("log_nom=%.18g\n", log_nom)
01870     display_vector(m, m_len, "marginals");
01871     display_vector(x, 2*3*counter, "x");
01872 #endif // DEBUG_FISHER_TABLE
01873 
01874     floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter);
01875     SGVector<floatmax_t>::fill_vector(log_denom_vec, counter, (floatmax_t)0.0);
01876 
01877     for (int32_t k=0; k<counter; k++)
01878     {
01879         for (int32_t j=0; j<3; j++)
01880         {
01881             for (int32_t i=0; i<2; i++)
01882                 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0);
01883         }
01884     }
01885 
01886     for (int32_t i=0; i<counter; i++)
01887         log_denom_vec[i]=log_nom-log_denom_vec[i];
01888 
01889 #ifdef DEBUG_FISHER_TABLE
01890     display_vector(log_denom_vec, counter, "log_denom_vec");
01891 #endif // DEBUG_FISHER_TABLE
01892 
01893     float64_t nonrand_p=-CMath::INFTY;
01894     for (int32_t i=0; i<counter; i++)
01895     {
01896         if (log_denom_vec[i]<=prob_table_log)
01897             nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]);
01898     }
01899 
01900 #ifdef DEBUG_FISHER_TABLE
01901     SG_SPRINT("nonrand_p=%.18g\n", nonrand_p)
01902     SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p))
01903 #endif // DEBUG_FISHER_TABLE
01904     nonrand_p=CMath::exp(nonrand_p);
01905 
01906     SG_FREE(log_denom_vec);
01907     SG_FREE(x);
01908     SG_FREE(m);
01909 
01910     return nonrand_p;
01911 }
01912 
01913 float64_t CStatistics::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
01914 {
01915     double e=0;
01916 
01917     for (int32_t i=0; i<len; i++)
01918         for (int32_t j=0; j<len; j++)
01919             e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
01920 
01921     return (float64_t)e;
01922 }
01923 
01924 float64_t CStatistics::relative_entropy(float64_t* p, float64_t* q, int32_t len)
01925 {
01926     double e=0;
01927 
01928     for (int32_t i=0; i<len; i++)
01929         e+=exp(p[i])*(p[i]-q[i]);
01930 
01931     return (float64_t)e;
01932 }
01933 
01934 float64_t CStatistics::entropy(float64_t* p, int32_t len)
01935 {
01936     double e=0;
01937 
01938     for (int32_t i=0; i<len; i++)
01939         e-=exp(p[i])*p[i];
01940 
01941     return (float64_t)e;
01942 }
01943 
01944 SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N)
01945 {
01946     REQUIRE(sample_size<N,
01947             "sample size should be less than number of indices\n");
01948     int32_t* idxs=SG_MALLOC(int32_t,N);
01949     int32_t i, rnd;
01950     int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size);
01951 
01952     // reservoir sampling
01953     for (i=0; i<N; i++)
01954         idxs[i]=i;
01955     for (i=0; i<sample_size; i++)
01956         permuted_idxs[i]=idxs[i];
01957     for (i=sample_size; i<N; i++)
01958     {
01959         rnd=CMath::random(1, i);
01960         if (rnd<sample_size)
01961             permuted_idxs[rnd]=idxs[i];
01962     }
01963     SG_FREE(idxs);
01964 
01965     SGVector<int32_t> result=SGVector<int32_t>(permuted_idxs, sample_size);
01966     result.qsort();
01967     return result;
01968 }
01969 
01970 float64_t CStatistics::dlgamma(float64_t x)
01971 {
01972     float64_t result=0.0;
01973 
01974     if (x<0.0)
01975     {
01976         // use reflection formula
01977         x=1.0-x;
01978         result=CMath::PI/CMath::tan(CMath::PI*x);
01979     }
01980 
01981     // make x>7 for approximation
01982     // (use reccurent formula: psi(x+1) = psi(x) + 1/x)
01983     while (x<=7.0)
01984     {
01985         result-=1.0/x;
01986         x++;
01987     }
01988 
01989     // perform approximation
01990     x-=0.5;
01991     result+=log(x);
01992 
01993     float64_t coeff[10]={
01994         0.04166666666666666667,
01995         -0.00729166666666666667,
01996         0.00384424603174603175,
01997         -0.00413411458333333333,
01998         0.00756096117424242424,
01999         -0.02108249687595390720,
02000         0.08332316080729166666,
02001         -0.44324627670587277880,
02002         3.05393103044765369366,
02003         -26.45616165999210241989};
02004 
02005     float64_t power=1.0;
02006     float64_t ix2=1.0/CMath::sq(x);
02007 
02008     // perform approximation
02009     for (index_t i=0; i<10; i++)
02010     {
02011         power*=ix2;
02012         result+=coeff[i]*power;
02013     }
02014 
02015     return result;
02016 }
02017 
02018 #ifdef HAVE_EIGEN3
02019 float64_t CStatistics::log_det(SGMatrix<float64_t> m)
02020 {
02021     /* map the matrix to eigen3 to perform cholesky */
02022     Map<MatrixXd> M(m.matrix, m.num_rows, m.num_cols);
02023 
02024     /* computing the cholesky decomposition */
02025     LLT<MatrixXd> llt;
02026     llt.compute(M);
02027 
02028     /* the lower triangular matrix */
02029     MatrixXd l = llt.matrixL();
02030 
02031     /* calculate the log-determinant */
02032     VectorXd diag = l.diagonal();
02033     float64_t retval = 0.0;
02034     for( int32_t i = 0; i < diag.rows(); ++i ) {
02035         retval += log(diag(i));
02036     }
02037     retval *= 2;
02038 
02039     return retval;
02040 }
02041 
02042 float64_t CStatistics::log_det(const SGSparseMatrix<float64_t> m)
02043 {
02044     typedef SparseMatrix<float64_t> MatrixType;
02045     const MatrixType &M=EigenSparseUtil<float64_t>::toEigenSparse(m);
02046 
02047     SimplicialLLT<MatrixType> llt;
02048 
02049     // factorize using cholesky with amd permutation
02050     llt.compute(M);
02051     MatrixType L=llt.matrixL();
02052 
02053     // calculate the log-determinant
02054     float64_t retval=0.0;
02055     for( index_t i=0; i<M.rows(); ++i )
02056         retval+=log(L.coeff(i,i));
02057     retval*=2;
02058 
02059     return retval;
02060 }
02061 
02062 SGMatrix<float64_t> CStatistics::sample_from_gaussian(SGVector<float64_t> mean,
02063     SGMatrix<float64_t> cov, int32_t N, bool precision_matrix)
02064 {
02065     REQUIRE(cov.num_rows>0,
02066         "CStatistics::sample_from_gaussian(): \
02067         Number of covariance rows must be positive!\n");
02068     REQUIRE(cov.num_cols>0,
02069         "CStatistics::sample_from_gaussian(): \
02070         Number of covariance cols must be positive!\n");
02071     REQUIRE(cov.matrix,
02072         "CStatistics::sample_from_gaussian(): \
02073         Covariance is not initialized!\n");
02074     REQUIRE(cov.num_rows==cov.num_cols,
02075         "CStatistics::sample_from_gaussian(): \
02076         Covariance should be square matrix!\n");
02077     REQUIRE(mean.vlen==cov.num_rows,
02078         "CStatistics::sample_from_gaussian(): \
02079         Mean and covariance dimension mismatch!\n");
02080 
02081     int32_t dim=mean.vlen;
02082     Map<VectorXd> mu(mean.vector, mean.vlen);
02083     Map<MatrixXd> c(cov.matrix, cov.num_rows, cov.num_cols);
02084 
02085     // generate samples, z,  from N(0, I), DxN
02086     SGMatrix<float64_t> S(dim, N);
02087     for( int32_t j=0; j<N; ++j )
02088         for( int32_t i=0; i<dim; ++i )
02089             S(i,j)=CMath::randn_double();
02090 
02091     // the cholesky factorization c=L*U
02092     MatrixXd U=c.llt().matrixU();
02093 
02094     // generate samples, x, from N(mean, cov) or N(mean, cov^-1)
02095     // return samples of dimension NxD
02096     if( precision_matrix )
02097     {
02098         // here we have U*x=z, to solve this, we use cholesky again
02099         Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols);
02100         LDLT<MatrixXd> ldlt;
02101         ldlt.compute(U);
02102         s=ldlt.solve(s);
02103     }
02104 
02105     SGMatrix<float64_t>::transpose_matrix(S.matrix, S.num_rows, S.num_cols);
02106 
02107     if( !precision_matrix )
02108     {
02109         // here we need to find x=L*z, so, x'=z'*L' i.e. x'=z'*U
02110         Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols);
02111         s=s*U;
02112     }
02113 
02114     // add the mean
02115     Map<MatrixXd> x(S.matrix, S.num_rows, S.num_cols);
02116     for( int32_t i=0; i<N; ++i )
02117         x.row(i)+=mu;
02118 
02119     return S;
02120 }
02121 
02122 SGMatrix<float64_t> CStatistics::sample_from_gaussian(SGVector<float64_t> mean,
02123  SGSparseMatrix<float64_t> cov, int32_t N, bool precision_matrix)
02124 {
02125     REQUIRE(cov.num_vectors>0,
02126         "CStatistics::sample_from_gaussian(): \
02127         Number of covariance rows must be positive!\n");
02128     REQUIRE(cov.num_features>0,
02129         "CStatistics::sample_from_gaussian(): \
02130         Number of covariance cols must be positive!\n");
02131     REQUIRE(cov.sparse_matrix,
02132         "CStatistics::sample_from_gaussian(): \
02133         Covariance is not initialized!\n");
02134     REQUIRE(cov.num_vectors==cov.num_features,
02135         "CStatistics::sample_from_gaussian(): \
02136         Covariance should be square matrix!\n");
02137     REQUIRE(mean.vlen==cov.num_vectors,
02138         "CStatistics::sample_from_gaussian(): \
02139         Mean and covariance dimension mismatch!\n");
02140 
02141     int32_t dim=mean.vlen;
02142     Map<VectorXd> mu(mean.vector, mean.vlen);
02143 
02144     typedef SparseMatrix<float64_t> MatrixType;
02145     const MatrixType &c=EigenSparseUtil<float64_t>::toEigenSparse(cov);
02146 
02147     SimplicialLLT<MatrixType> llt;
02148 
02149     // generate samples, z,  from N(0, I), DxN
02150     SGMatrix<float64_t> S(dim, N);
02151     for( int32_t j=0; j<N; ++j )
02152         for( int32_t i=0; i<dim; ++i )
02153             S(i,j)=CMath::randn_double();
02154 
02155     Map<MatrixXd> s(S.matrix, S.num_rows, S.num_cols);
02156 
02157     // the cholesky factorization P*c*P^-1 = LP*UP, with LP=P*L, UP=U*P^-1
02158     llt.compute(c);
02159     MatrixType LP=llt.matrixL();
02160     MatrixType UP=llt.matrixU();
02161 
02162     // generate samples, x, from N(mean, cov) or N(mean, cov^-1)
02163     // return samples of dimension NxD
02164     if( precision_matrix )
02165     {
02166         // here we have UP*xP=z, to solve this, we use cholesky again
02167         SimplicialLLT<MatrixType> lltUP;
02168         lltUP.compute(UP);
02169         s=lltUP.solve(s);
02170     }
02171     else
02172     {
02173         // here we need to find xP=LP*z
02174         s=LP*s;
02175     }
02176 
02177     // permute the samples back with x=P^-1*xP
02178     s=llt.permutationPinv()*s;
02179 
02180     SGMatrix<float64_t>::transpose_matrix(S.matrix, S.num_rows, S.num_cols);
02181     // add the mean
02182     Map<MatrixXd> x(S.matrix, S.num_rows, S.num_cols);
02183     for( int32_t i=0; i<N; ++i )
02184         x.row(i)+=mu;
02185 
02186     return S;
02187 }
02188 
02189 #endif //HAVE_EIGEN3
02190 
02191 CStatistics::SigmoidParamters CStatistics::fit_sigmoid(SGVector<float64_t> scores)
02192 {
02193     SG_SDEBUG("entering CStatistics::fit_sigmoid()\n")
02194 
02195     REQUIRE(scores.vector, "CStatistics::fit_sigmoid() requires "
02196             "scores vector!\n");
02197 
02198     /* count prior0 and prior1 if needed */
02199     int32_t prior0=0;
02200     int32_t prior1=0;
02201     SG_SDEBUG("counting number of positive and negative labels\n")
02202     {
02203         for (index_t i=0; i<scores.vlen; ++i)
02204         {
02205             if (scores[i]>0)
02206                 prior1++;
02207             else
02208                 prior0++;
02209         }
02210     }
02211     SG_SDEBUG("%d pos; %d neg\n", prior1, prior0)
02212 
02213     /* parameter setting */
02214     /* maximum number of iterations */
02215     index_t maxiter=100;
02216 
02217     /* minimum step taken in line search */
02218     float64_t minstep=1E-10;
02219 
02220     /* for numerically strict pd of hessian */
02221     float64_t sigma=1E-12;
02222     float64_t eps=1E-5;
02223 
02224     /* construct target support */
02225     float64_t hiTarget=(prior1+1.0)/(prior1+2.0);
02226     float64_t loTarget=1/(prior0+2.0);
02227     index_t length=prior1+prior0;
02228 
02229     SGVector<float64_t> t(length);
02230     for (index_t i=0; i<length; ++i)
02231     {
02232         if (scores[i]>0)
02233             t[i]=hiTarget;
02234         else
02235             t[i]=loTarget;
02236     }
02237 
02238     /* initial Point and Initial Fun Value */
02239     /* result parameters of sigmoid */
02240     float64_t a=0;
02241     float64_t b=CMath::log((prior0+1.0)/(prior1+1.0));
02242     float64_t fval=0.0;
02243 
02244     for (index_t i=0; i<length; ++i)
02245     {
02246         float64_t fApB=scores[i]*a+b;
02247         if (fApB>=0)
02248             fval+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
02249         else
02250             fval+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
02251     }
02252 
02253     index_t it;
02254     float64_t g1;
02255     float64_t g2;
02256     for (it=0; it<maxiter; ++it)
02257     {
02258         SG_SDEBUG("Iteration %d, a=%f, b=%f, fval=%f\n", it, a, b, fval)
02259 
02260         /* Update Gradient and Hessian (use H' = H + sigma I) */
02261         float64_t h11=sigma; //Numerically ensures strict PD
02262         float64_t h22=h11;
02263         float64_t h21=0;
02264         g1=0;
02265         g2=0;
02266 
02267         for (index_t i=0; i<length; ++i)
02268         {
02269             float64_t fApB=scores[i]*a+b;
02270             float64_t p;
02271             float64_t q;
02272             if (fApB>=0)
02273             {
02274                 p=CMath::exp(-fApB)/(1.0+CMath::exp(-fApB));
02275                 q=1.0/(1.0+CMath::exp(-fApB));
02276             }
02277             else
02278             {
02279                 p=1.0/(1.0+CMath::exp(fApB));
02280                 q=CMath::exp(fApB)/(1.0+CMath::exp(fApB));
02281             }
02282 
02283             float64_t d2=p*q;
02284             h11+=scores[i]*scores[i]*d2;
02285             h22+=d2;
02286             h21+=scores[i]*d2;
02287             float64_t d1=t[i]-p;
02288             g1+=scores[i]*d1;
02289             g2+=d1;
02290         }
02291 
02292         /* Stopping Criteria */
02293         if (CMath::abs(g1)<eps && CMath::abs(g2)<eps)
02294             break;
02295 
02296         /* Finding Newton direction: -inv(H') * g */
02297         float64_t det=h11*h22-h21*h21;
02298         float64_t dA=-(h22*g1-h21*g2)/det;
02299         float64_t dB=-(-h21*g1+h11*g2)/det;
02300         float64_t gd=g1*dA+g2*dB;
02301 
02302         /* Line Search */
02303         float64_t stepsize=1;
02304 
02305         while (stepsize>=minstep)
02306         {
02307             float64_t newA=a+stepsize*dA;
02308             float64_t newB=b+stepsize*dB;
02309 
02310             /* New function value */
02311             float64_t newf=0.0;
02312             for (index_t i=0; i<length; ++i)
02313             {
02314                 float64_t fApB=scores[i]*newA+newB;
02315                 if (fApB>=0)
02316                     newf+=t[i]*fApB+CMath::log(1+CMath::exp(-fApB));
02317                 else
02318                     newf+=(t[i]-1)*fApB+CMath::log(1+CMath::exp(fApB));
02319             }
02320 
02321             /* Check sufficient decrease */
02322             if (newf<fval+0.0001*stepsize*gd)
02323             {
02324                 a=newA;
02325                 b=newB;
02326                 fval=newf;
02327                 break;
02328             }
02329             else
02330                 stepsize=stepsize/2.0;
02331         }
02332 
02333         if (stepsize<minstep)
02334         {
02335             SG_SWARNING("CStatistics::fit_sigmoid(): line search fails, A=%f, "
02336                     "B=%f, g1=%f, g2=%f, dA=%f, dB=%f, gd=%f\n",
02337                     a, b, g1, g2, dA, dB, gd);
02338         }
02339     }
02340 
02341     if (it>=maxiter-1)
02342     {
02343         SG_SWARNING("CStatistics::fit_sigmoid(): reaching maximal iterations,"
02344                 " g1=%f, g2=%f\n", g1, g2);
02345     }
02346 
02347     SG_SDEBUG("fitted sigmoid: a=%f, b=%f\n", a, b)
02348 
02349     CStatistics::SigmoidParamters result;
02350     result.a=a;
02351     result.b=b;
02352 
02353     SG_SDEBUG("leaving CStatistics::fit_sigmoid()\n")
02354     return result;
02355 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation