SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
EPInferenceMethod.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Roman Votyakov
00008  *
00009  * Based on ideas from GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION Toolbox
00010  * Copyright (C) 2005-2013 by Carl Edward Rasmussen & Hannes Nickisch under the
00011  * FreeBSD License
00012  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
00013  */
00014 
00015 #include <shogun/machine/gp/EPInferenceMethod.h>
00016 
00017 #ifdef HAVE_EIGEN3
00018 
00019 #include <shogun/mathematics/Math.h>
00020 #include <shogun/labels/RegressionLabels.h>
00021 #include <shogun/features/CombinedFeatures.h>
00022 #include <shogun/features/DotFeatures.h>
00023 #include <shogun/lib/DynamicArray.h>
00024 
00025 #include <shogun/mathematics/eigen3.h>
00026 
00027 using namespace shogun;
00028 using namespace Eigen;
00029 
00030 // try to use previously allocated memory for SGVector
00031 #define CREATE_SGVECTOR(vec, len, sg_type) \
00032     { \
00033         if (!vec.vector || vec.vlen!=len) \
00034             vec=SGVector<sg_type>(len); \
00035     }
00036 
00037 // try to use previously allocated memory for SGMatrix
00038 #define CREATE_SGMATRIX(mat, rows, cols, sg_type) \
00039     { \
00040         if (!mat.matrix || mat.num_rows!=rows || mat.num_cols!=cols) \
00041             mat=SGMatrix<sg_type>(rows, cols); \
00042     }
00043 
00044 CEPInferenceMethod::CEPInferenceMethod()
00045 {
00046     init();
00047 }
00048 
00049 CEPInferenceMethod::CEPInferenceMethod(CKernel* kernel, CFeatures* features,
00050         CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
00051         : CInferenceMethod(kernel, features, mean, labels, model)
00052 {
00053     init();
00054 }
00055 
00056 CEPInferenceMethod::~CEPInferenceMethod()
00057 {
00058 }
00059 
00060 void CEPInferenceMethod::init()
00061 {
00062     m_max_sweep=15;
00063     m_min_sweep=2;
00064     m_tol=1e-4;
00065 }
00066 
00067 float64_t CEPInferenceMethod::get_negative_log_marginal_likelihood()
00068 {
00069     if (update_parameter_hash())
00070         update();
00071 
00072     return m_nlZ;
00073 }
00074 
00075 SGVector<float64_t> CEPInferenceMethod::get_alpha()
00076 {
00077     if (update_parameter_hash())
00078         update();
00079 
00080     return SGVector<float64_t>(m_alpha);
00081 }
00082 
00083 SGMatrix<float64_t> CEPInferenceMethod::get_cholesky()
00084 {
00085     if (update_parameter_hash())
00086         update();
00087 
00088     return SGMatrix<float64_t>(m_L);
00089 }
00090 
00091 SGVector<float64_t> CEPInferenceMethod::get_diagonal_vector()
00092 {
00093     if (update_parameter_hash())
00094         update();
00095 
00096     return SGVector<float64_t>(m_sttau);
00097 }
00098 
00099 SGVector<float64_t> CEPInferenceMethod::get_posterior_mean()
00100 {
00101     if (update_parameter_hash())
00102         update();
00103 
00104     return SGVector<float64_t>(m_mu);
00105 }
00106 
00107 SGMatrix<float64_t> CEPInferenceMethod::get_posterior_covariance()
00108 {
00109     if (update_parameter_hash())
00110         update();
00111 
00112     return SGMatrix<float64_t>(m_Sigma);
00113 }
00114 
00115 void CEPInferenceMethod::update()
00116 {
00117     // update kernel and feature matrix
00118     CInferenceMethod::update();
00119 
00120     // get number of labels (trainig examples)
00121     index_t n=m_labels->get_num_labels();
00122 
00123     // try to use tilde values from previous call
00124     if (m_ttau.vlen==n)
00125     {
00126         update_chol();
00127         update_approx_cov();
00128         update_approx_mean();
00129         update_negative_ml();
00130     }
00131 
00132     // get mean vector
00133     SGVector<float64_t> mean=m_mean->get_mean_vector(m_features);
00134 
00135     // get and scale diagonal of the kernel matrix
00136     SGVector<float64_t> ktrtr_diag=m_ktrtr.get_diagonal_vector();
00137     ktrtr_diag.scale(CMath::sq(m_scale));
00138 
00139     // marginal likelihood for ttau = tnu = 0
00140     float64_t nlZ0=-SGVector<float64_t>::sum(m_model->get_log_zeroth_moments(
00141             mean, ktrtr_diag, m_labels));
00142 
00143     // use zero values if we have no better guess or it's better
00144     if (m_ttau.vlen!=n || m_nlZ>nlZ0)
00145     {
00146         CREATE_SGVECTOR(m_ttau, n, float64_t);
00147         m_ttau.zero();
00148 
00149         CREATE_SGVECTOR(m_sttau, n, float64_t);
00150         m_sttau.zero();
00151 
00152         CREATE_SGVECTOR(m_tnu, n, float64_t);
00153         m_tnu.zero();
00154 
00155         CREATE_SGMATRIX(m_Sigma, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t);
00156 
00157         // copy data manually, since we don't have appropriate method
00158         for (index_t i=0; i<m_ktrtr.num_rows; i++)
00159             for (index_t j=0; j<m_ktrtr.num_cols; j++)
00160                 m_Sigma(i,j)=m_ktrtr(i,j)*CMath::sq(m_scale);
00161 
00162         CREATE_SGVECTOR(m_mu, n, float64_t);
00163         m_mu.zero();
00164 
00165         // set marginal likelihood
00166         m_nlZ=nlZ0;
00167     }
00168 
00169     // create vector of the random permutation
00170     SGVector<index_t> randperm=SGVector<index_t>::randperm_vec(n);
00171 
00172     // cavity tau and nu vectors
00173     SGVector<float64_t> tau_n(n);
00174     SGVector<float64_t> nu_n(n);
00175 
00176     // cavity mu and s2 vectors
00177     SGVector<float64_t> mu_n(n);
00178     SGVector<float64_t> s2_n(n);
00179 
00180     float64_t nlZ_old=CMath::INFTY;
00181     uint32_t sweep=0;
00182 
00183     while ((CMath::abs(m_nlZ-nlZ_old)>m_tol && sweep<m_max_sweep) ||
00184             sweep<m_min_sweep)
00185     {
00186         nlZ_old=m_nlZ;
00187         sweep++;
00188 
00189         // shuffle random permutation
00190         randperm.permute();
00191 
00192         for (index_t j=0; j<n; j++)
00193         {
00194             index_t i=randperm[j];
00195 
00196             // find cavity paramters
00197             tau_n[i]=1.0/m_Sigma(i,i)-m_ttau[i];
00198             nu_n[i]=m_mu[i]/m_Sigma(i,i)+mean[i]*tau_n[i]-m_tnu[i];
00199 
00200             // compute cavity mean and variance
00201             mu_n[i]=nu_n[i]/tau_n[i];
00202             s2_n[i]=1.0/tau_n[i];
00203 
00204             // get moments
00205             float64_t mu=m_model->get_first_moment(mu_n, s2_n, m_labels, i);
00206             float64_t s2=m_model->get_second_moment(mu_n, s2_n, m_labels, i);
00207 
00208             // save old value of ttau
00209             float64_t ttau_old=m_ttau[i];
00210 
00211             // compute ttau and sqrt(ttau)
00212             m_ttau[i]=CMath::max(1.0/s2-tau_n[i], 0.0);
00213             m_sttau[i]=CMath::sqrt(m_ttau[i]);
00214 
00215             // compute tnu
00216             m_tnu[i]=mu/s2-nu_n[i];
00217 
00218             // compute difference ds2=ttau_new-ttau_old
00219             float64_t ds2=m_ttau[i]-ttau_old;
00220 
00221             // create eigen representation of Sigma, tnu and mu
00222             Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
00223                     m_Sigma.num_cols);
00224             Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
00225             Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
00226 
00227             VectorXd eigen_si=eigen_Sigma.col(i);
00228 
00229             // rank-1 update Sigma
00230             eigen_Sigma=eigen_Sigma-ds2/(1.0+ds2*eigen_si(i))*eigen_si*
00231                 eigen_si.adjoint();
00232 
00233             // update mu
00234             eigen_mu=eigen_Sigma*eigen_tnu;
00235         }
00236 
00237         // update upper triangular factor (L^T) of Cholesky decomposition of
00238         // matrix B, approximate posterior covariance and mean, negative
00239         // marginal likelihood
00240         update_chol();
00241         update_approx_cov();
00242         update_approx_mean();
00243         update_negative_ml();
00244     }
00245 
00246     if (sweep==m_max_sweep && CMath::abs(m_nlZ-nlZ_old)>m_tol)
00247     {
00248         SG_ERROR("Maximum number (%d) of sweeps reached, but tolerance (%f) was "
00249                 "not yet reached. You can manually set maximum number of sweeps "
00250                 "or tolerance to fix this problem.\n", m_max_sweep, m_tol);
00251     }
00252 
00253     // update vector alpha
00254     update_alpha();
00255 
00256     // update matrices to compute derivatives
00257     update_deriv();
00258 }
00259 
00260 void CEPInferenceMethod::update_alpha()
00261 {
00262     // create eigen representations kernel matrix, L^T, sqrt(ttau) and tnu
00263     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00264     Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
00265     Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
00266     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00267 
00268     // create shogun and eigen representation of the alpha vector
00269     CREATE_SGVECTOR(m_alpha, m_tnu.vlen, float64_t);
00270     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00271 
00272     // solve LL^T * v = tS^(1/2) * K * tnu
00273     VectorXd eigen_v=eigen_L.triangularView<Upper>().adjoint().solve(
00274             eigen_sttau.cwiseProduct(eigen_K*CMath::sq(m_scale)*eigen_tnu));
00275     eigen_v=eigen_L.triangularView<Upper>().solve(eigen_v);
00276 
00277     // compute alpha = (I - tS^(1/2) * B^(-1) * tS(1/2) * K) * tnu =
00278     // tnu - tS(1/2) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K * tnu =
00279     // tnu - tS(1/2) * v
00280     eigen_alpha=eigen_tnu-eigen_sttau.cwiseProduct(eigen_v);
00281 }
00282 
00283 void CEPInferenceMethod::update_chol()
00284 {
00285     // create eigen representations of kernel matrix and sqrt(ttau)
00286     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00287     Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
00288 
00289     // create shogun and eigen representation of the upper triangular factor
00290     // (L^T) of the Cholesky decomposition of the matrix B
00291     CREATE_SGMATRIX(m_L, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t);
00292     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00293 
00294     // compute upper triangular factor L^T of the Cholesky decomposion of the
00295     // matrix: B = tS^(1/2) * K * tS^(1/2) + I
00296     LLT<MatrixXd> eigen_chol((eigen_sttau*eigen_sttau.adjoint()).cwiseProduct(
00297             eigen_K*CMath::sq(m_scale))+
00298             MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
00299 
00300     eigen_L=eigen_chol.matrixU();
00301 }
00302 
00303 void CEPInferenceMethod::update_approx_cov()
00304 {
00305     // create eigen representations of kernel matrix, L^T matrix and sqrt(ttau)
00306     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00307     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00308     Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
00309 
00310     // create shogun and eigen representation of the approximate covariance
00311     // matrix
00312     CREATE_SGMATRIX(m_Sigma, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t);
00313     Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
00314 
00315     // compute V = L^(-1) * tS^(1/2) * K, using upper triangular factor L^T
00316     MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
00317             eigen_sttau.asDiagonal()*eigen_K*CMath::sq(m_scale));
00318 
00319     // compute covariance matrix of the posterior:
00320     // Sigma = K - K * tS^(1/2) * (L * L^T)^(-1) * tS^(1/2) * K =
00321     // K - (K * tS^(1/2)) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K =
00322     // K - (tS^(1/2) * K)^T * (L^(-1))^T * L^(-1) * tS^(1/2) * K = K - V^T * V
00323     eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
00324 }
00325 
00326 void CEPInferenceMethod::update_approx_mean()
00327 {
00328     // create eigen representation of posterior covariance matrix and tnu
00329     Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
00330     Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
00331 
00332     // create shogun and eigen representation of the approximate mean vector
00333     CREATE_SGVECTOR(m_mu, m_tnu.vlen, float64_t);
00334     Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
00335 
00336     // compute mean vector of the approximate posterior: mu = Sigma * tnu
00337     eigen_mu=eigen_Sigma*eigen_tnu;
00338 }
00339 
00340 void CEPInferenceMethod::update_negative_ml()
00341 {
00342     // create eigen representation of Sigma, L, mu, tnu, ttau
00343     Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols);
00344     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00345     Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
00346     Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen);
00347     Map<VectorXd> eigen_ttau(m_ttau.vector, m_ttau.vlen);
00348 
00349     // get and create eigen representation of the mean vector
00350     SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
00351     Map<VectorXd> eigen_m(m.vector, m.vlen);
00352 
00353     // compute vector of cavity parameter tau_n
00354     VectorXd eigen_tau_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(
00355             eigen_Sigma.diagonal())-eigen_ttau;
00356 
00357     // compute vector of cavity parameter nu_n
00358     VectorXd eigen_nu_n=eigen_mu.cwiseQuotient(eigen_Sigma.diagonal())-
00359         eigen_tnu+eigen_m.cwiseProduct(eigen_tau_n);
00360 
00361     // compute cavity mean: mu_n=nu_n/tau_n
00362     SGVector<float64_t> mu_n(m_ttau.vlen);
00363     Map<VectorXd> eigen_mu_n(mu_n.vector, mu_n.vlen);
00364 
00365     eigen_mu_n=eigen_nu_n.cwiseQuotient(eigen_tau_n);
00366 
00367     // compute cavity variance: s2_n=1.0/tau_n
00368     SGVector<float64_t> s2_n(m_ttau.vlen);
00369     Map<VectorXd> eigen_s2_n(s2_n.vector, s2_n.vlen);
00370 
00371     eigen_s2_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(eigen_tau_n);
00372 
00373     float64_t lZ=SGVector<float64_t>::sum(
00374             m_model->get_log_zeroth_moments(mu_n, s2_n, m_labels));
00375 
00376     // compute nlZ_part1=sum(log(diag(L)))-sum(lZ)-tnu'*Sigma*tnu/2
00377     float64_t nlZ_part1=eigen_L.diagonal().array().log().sum()-lZ-
00378         (eigen_tnu.adjoint()*eigen_Sigma).dot(eigen_tnu)/2.0;
00379 
00380     // compute nlZ_part2=sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2
00381     float64_t nlZ_part2=(eigen_tnu.array().square()/
00382         (eigen_tau_n+eigen_ttau).array()).sum()/2.0-(1.0+eigen_ttau.array()/
00383         eigen_tau_n.array()).log().sum()/2.0;
00384 
00385     // compute nlZ_part3=-(nu_n-m.*tau_n)'*((ttau./tau_n.*(nu_n-m.*tau_n)-2*tnu)
00386     // ./(ttau+tau_n))/2
00387     float64_t nlZ_part3=-(eigen_nu_n-eigen_m.cwiseProduct(eigen_tau_n)).dot(
00388         ((eigen_ttau.array()/eigen_tau_n.array()*(eigen_nu_n.array()-
00389         eigen_m.array()*eigen_tau_n.array())-2*eigen_tnu.array())/
00390         (eigen_ttau.array()+eigen_tau_n.array())).matrix())/2.0;
00391 
00392     // compute nlZ=nlZ_part1+nlZ_part2+nlZ_part3
00393     m_nlZ=nlZ_part1+nlZ_part2+nlZ_part3;
00394 }
00395 
00396 void CEPInferenceMethod::update_deriv()
00397 {
00398     // create eigen representation of L, sstau, alpha
00399     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00400     Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen);
00401     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00402 
00403     // create shogun and eigen representation of F
00404     m_F=SGMatrix<float64_t>(m_L.num_rows, m_L.num_cols);
00405     Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
00406 
00407     // solve L*L^T * V = diag(sqrt(ttau))
00408     MatrixXd V=eigen_L.triangularView<Upper>().adjoint().solve(
00409             MatrixXd(eigen_sttau.asDiagonal()));
00410     V=eigen_L.triangularView<Upper>().solve(V);
00411 
00412     // compute F=alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW))
00413     eigen_F=eigen_alpha*eigen_alpha.adjoint()-eigen_sttau.asDiagonal()*V;
00414 }
00415 
00416 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_inference_method(
00417         const TParameter* param)
00418 {
00419     REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
00420             "the nagative log marginal likelihood wrt %s.%s parameter\n",
00421             get_name(), param->m_name)
00422 
00423     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00424     Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
00425 
00426     SGVector<float64_t> result(1);
00427 
00428     // compute derivative wrt kernel scale: dnlZ=-sum(F.*K*scale*2)/2
00429     result[0]=-(eigen_F.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0;
00430 
00431     return result;
00432 }
00433 
00434 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_likelihood_model(
00435         const TParameter* param)
00436 {
00437     SG_NOTIMPLEMENTED
00438     return SGVector<float64_t>();
00439 }
00440 
00441 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_kernel(
00442         const TParameter* param)
00443 {
00444     // create eigen representation of the matrix Q
00445     Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols);
00446 
00447     SGVector<float64_t> result;
00448 
00449     if (param->m_datatype.m_ctype==CT_VECTOR ||
00450             param->m_datatype.m_ctype==CT_SGVECTOR)
00451     {
00452         REQUIRE(param->m_datatype.m_length_y,
00453                 "Length of the parameter %s should not be NULL\n", param->m_name)
00454         result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
00455     }
00456     else
00457     {
00458         result=SGVector<float64_t>(1);
00459     }
00460 
00461     for (index_t i=0; i<result.vlen; i++)
00462     {
00463         SGMatrix<float64_t> dK;
00464 
00465         if (result.vlen==1)
00466             dK=m_kernel->get_parameter_gradient(param);
00467         else
00468             dK=m_kernel->get_parameter_gradient(param, i);
00469 
00470         Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
00471 
00472         // compute derivative wrt kernel parameter: dnlZ=-sum(F.*dK*scale^2)/2.0
00473         result[i]=-(eigen_F.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0;
00474     }
00475 
00476     return result;
00477 }
00478 
00479 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_mean(
00480         const TParameter* param)
00481 {
00482     SG_NOTIMPLEMENTED
00483     return SGVector<float64_t>();
00484 }
00485 
00486 #endif /* HAVE_EIGEN3 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation