SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ExactInferenceMethod.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Roman Votyakov
00008  * Copyright (C) 2012 Jacob Walker
00009  * Copyright (C) 2013 Roman Votyakov
00010  *
00011  * Code adapted from Gaussian Process Machine Learning Toolbox
00012  * http://www.gaussianprocess.org/gpml/code/matlab/doc/
00013  */
00014 
00015 #include <shogun/machine/gp/ExactInferenceMethod.h>
00016 
00017 #ifdef HAVE_EIGEN3
00018 
00019 #include <shogun/machine/gp/GaussianLikelihood.h>
00020 #include <shogun/labels/RegressionLabels.h>
00021 #include <shogun/mathematics/Math.h>
00022 #include <shogun/mathematics/eigen3.h>
00023 
00024 using namespace shogun;
00025 using namespace Eigen;
00026 
00027 CExactInferenceMethod::CExactInferenceMethod() : CInferenceMethod()
00028 {
00029 }
00030 
00031 CExactInferenceMethod::CExactInferenceMethod(CKernel* kern, CFeatures* feat,
00032         CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) :
00033         CInferenceMethod(kern, feat, m, lab, mod)
00034 {
00035 }
00036 
00037 CExactInferenceMethod::~CExactInferenceMethod()
00038 {
00039 }
00040 
00041 void CExactInferenceMethod::update()
00042 {
00043     CInferenceMethod::update();
00044     update_chol();
00045     update_alpha();
00046     update_deriv();
00047     update_mean();
00048     update_cov();
00049 }
00050 
00051 void CExactInferenceMethod::check_members() const
00052 {
00053     CInferenceMethod::check_members();
00054 
00055     REQUIRE(m_model->get_model_type()==LT_GAUSSIAN,
00056         "Exact inference method can only use Gaussian likelihood function\n")
00057     REQUIRE(m_labels->get_label_type()==LT_REGRESSION,
00058         "Labels must be type of CRegressionLabels\n")
00059 }
00060 
00061 SGVector<float64_t> CExactInferenceMethod::get_diagonal_vector()
00062 {
00063     if (update_parameter_hash())
00064         update();
00065 
00066     // get the sigma variable from the Gaussian likelihood model
00067     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00068     float64_t sigma=lik->get_sigma();
00069     SG_UNREF(lik);
00070 
00071     // compute diagonal vector: sW=1/sigma
00072     SGVector<float64_t> result(m_features->get_num_vectors());
00073     result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma);
00074 
00075     return result;
00076 }
00077 
00078 float64_t CExactInferenceMethod::get_negative_log_marginal_likelihood()
00079 {
00080     if (update_parameter_hash())
00081         update();
00082 
00083     // get the sigma variable from the Gaussian likelihood model
00084     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00085     float64_t sigma=lik->get_sigma();
00086     SG_UNREF(lik);
00087 
00088     // create eigen representation of alpha and L
00089     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00090     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00091 
00092     // get labels and mean vectors and create eigen representation
00093     SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
00094     Map<VectorXd> eigen_y(y.vector, y.vlen);
00095     SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
00096     Map<VectorXd> eigen_m(m.vector, m.vlen);
00097 
00098     // compute negative log of the marginal likelihood:
00099     // nlZ=(y-m)'*alpha/2+sum(log(diag(L)))+n*log(2*pi*sigma^2)/2
00100     float64_t result=(eigen_y-eigen_m).dot(eigen_alpha)/2.0+
00101         eigen_L.diagonal().array().log().sum()+m_L.num_rows*
00102         CMath::log(2*CMath::PI*CMath::sq(sigma))/2.0;
00103 
00104     return result;
00105 }
00106 
00107 SGVector<float64_t> CExactInferenceMethod::get_alpha()
00108 {
00109     if (update_parameter_hash())
00110         update();
00111 
00112     return SGVector<float64_t>(m_alpha);
00113 }
00114 
00115 SGMatrix<float64_t> CExactInferenceMethod::get_cholesky()
00116 {
00117     if (update_parameter_hash())
00118         update();
00119 
00120     return SGMatrix<float64_t>(m_L);
00121 }
00122 
00123 SGVector<float64_t> CExactInferenceMethod::get_posterior_mean()
00124 {
00125     if (update_parameter_hash())
00126         update();
00127 
00128     return SGVector<float64_t>(m_mu);
00129 }
00130 
00131 SGMatrix<float64_t> CExactInferenceMethod::get_posterior_covariance()
00132 {
00133     if (update_parameter_hash())
00134         update();
00135 
00136     return SGMatrix<float64_t>(m_Sigma);
00137 }
00138 
00139 void CExactInferenceMethod::update_chol()
00140 {
00141     // get the sigma variable from the Gaussian likelihood model
00142     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00143     float64_t sigma=lik->get_sigma();
00144     SG_UNREF(lik);
00145 
00146     /* check whether to allocate cholesky memory */
00147     if (!m_L.matrix || m_L.num_rows!=m_ktrtr.num_rows)
00148         m_L=SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols);
00149 
00150     /* creates views on kernel and cholesky matrix and perform cholesky */
00151     Map<MatrixXd> K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00152     Map<MatrixXd> L(m_L.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00153     LLT<MatrixXd> llt(K*(CMath::sq(m_scale)/CMath::sq(sigma))+
00154         MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols));
00155     L=llt.matrixU();
00156 }
00157 
00158 void CExactInferenceMethod::update_alpha()
00159 {
00160     // get the sigma variable from the Gaussian likelihood model
00161     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00162     float64_t sigma=lik->get_sigma();
00163     SG_UNREF(lik);
00164 
00165     // get labels and mean vector and create eigen representation
00166     SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels();
00167     Map<VectorXd> eigen_y(y.vector, y.vlen);
00168     SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
00169     Map<VectorXd> eigen_m(m.vector, m.vlen);
00170 
00171     m_alpha=SGVector<float64_t>(y.vlen);
00172 
00173     /* creates views on cholesky matrix and alpha and solve system
00174      * (L * L^T) * a = y for a */
00175     Map<VectorXd> a(m_alpha.vector, m_alpha.vlen);
00176     Map<MatrixXd> L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00177 
00178     a=L.triangularView<Upper>().adjoint().solve(eigen_y-eigen_m);
00179     a=L.triangularView<Upper>().solve(a);
00180 
00181     a/=CMath::sq(sigma);
00182 }
00183 
00184 void CExactInferenceMethod::update_mean()
00185 {
00186     // create eigen representataion of kernel matrix and alpha
00187     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00188     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00189 
00190     // get mean and create eigen representation of it
00191     SGVector<float64_t> m=m_mean->get_mean_vector(m_features);
00192     Map<VectorXd> eigen_m(m.vector, m.vlen);
00193 
00194     m_mu=SGVector<float64_t>(m.vlen);
00195     Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen);
00196 
00197     // compute mean: mu=K'*alpha+m
00198     eigen_mu=eigen_K*CMath::sq(m_scale)*eigen_alpha+eigen_m;
00199 }
00200 
00201 void CExactInferenceMethod::update_cov()
00202 {
00203     // create eigen representataion of upper triangular factor L^T and kernel
00204     // matrix
00205     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00206     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00207 
00208     m_Sigma=SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols);
00209     Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows,
00210             m_Sigma.num_cols);
00211 
00212     // compute V = L^(-1) * K, using upper triangular factor L^T
00213     MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(
00214             eigen_K*CMath::sq(m_scale));
00215 
00216     // compute covariance matrix of the posterior: Sigma = K - V^T * V
00217     eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V;
00218 }
00219 
00220 void CExactInferenceMethod::update_deriv()
00221 {
00222     // get the sigma variable from the Gaussian likelihood model
00223     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00224     float64_t sigma=lik->get_sigma();
00225     SG_UNREF(lik);
00226 
00227     // create eigen representation of derivative matrix and cholesky
00228     Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
00229     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00230 
00231     m_Q=SGMatrix<float64_t>(m_L.num_rows, m_L.num_cols);
00232     Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
00233 
00234     // solve L * L' * Q = I
00235     eigen_Q=eigen_L.triangularView<Upper>().adjoint().solve(
00236         MatrixXd::Identity(m_L.num_rows, m_L.num_cols));
00237     eigen_Q=eigen_L.triangularView<Upper>().solve(eigen_Q);
00238 
00239     // divide Q by sigma^2
00240     eigen_Q/=CMath::sq(sigma);
00241 
00242     // create eigen representation of alpha and compute Q=Q-alpha*alpha'
00243     eigen_Q-=eigen_alpha*eigen_alpha.transpose();
00244 }
00245 
00246 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_inference_method(
00247         const TParameter* param)
00248 {
00249     REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of "
00250             "the nagative log marginal likelihood wrt %s.%s parameter\n",
00251             get_name(), param->m_name)
00252 
00253     Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols);
00254     Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
00255 
00256     SGVector<float64_t> result(1);
00257 
00258     // compute derivative wrt kernel scale: dnlZ=sum(Q.*K*scale*2)/2
00259     result[0]=(eigen_Q.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0;
00260 
00261     return result;
00262 }
00263 
00264 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_likelihood_model(
00265         const TParameter* param)
00266 {
00267     REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of "
00268             "the nagative log marginal likelihood wrt %s.%s parameter\n",
00269             m_model->get_name(), param->m_name)
00270 
00271     // get the sigma variable from the Gaussian likelihood model
00272     CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model);
00273     float64_t sigma=lik->get_sigma();
00274     SG_UNREF(lik);
00275 
00276     // create eigen representation of the matrix Q
00277     Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
00278 
00279     SGVector<float64_t> result(1);
00280 
00281     // compute derivative wrt likelihood model parameter sigma:
00282     // dnlZ=sigma^2*trace(Q)
00283     result[0]=CMath::sq(sigma)*eigen_Q.trace();
00284 
00285     return result;
00286 }
00287 
00288 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_kernel(
00289         const TParameter* param)
00290 {
00291     // create eigen representation of the matrix Q
00292     Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
00293 
00294     SGVector<float64_t> result;
00295 
00296     if (param->m_datatype.m_ctype==CT_VECTOR ||
00297             param->m_datatype.m_ctype==CT_SGVECTOR)
00298     {
00299         REQUIRE(param->m_datatype.m_length_y,
00300                 "Length of the parameter %s should not be NULL\n", param->m_name)
00301         result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
00302     }
00303     else
00304     {
00305         result=SGVector<float64_t>(1);
00306     }
00307 
00308     for (index_t i=0; i<result.vlen; i++)
00309     {
00310         SGMatrix<float64_t> dK;
00311 
00312         if (result.vlen==1)
00313             dK=m_kernel->get_parameter_gradient(param);
00314         else
00315             dK=m_kernel->get_parameter_gradient(param, i);
00316 
00317         Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols);
00318 
00319         // compute derivative wrt kernel parameter: dnlZ=sum(Q.*dK*scale)/2.0
00320         result[i]=(eigen_Q.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0;
00321     }
00322 
00323     return result;
00324 }
00325 
00326 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_mean(
00327         const TParameter* param)
00328 {
00329     // create eigen representation of alpha vector
00330     Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen);
00331 
00332     SGVector<float64_t> result;
00333 
00334     if (param->m_datatype.m_ctype==CT_VECTOR ||
00335             param->m_datatype.m_ctype==CT_SGVECTOR)
00336     {
00337         REQUIRE(param->m_datatype.m_length_y,
00338                 "Length of the parameter %s should not be NULL\n", param->m_name)
00339 
00340         result=SGVector<float64_t>(*(param->m_datatype.m_length_y));
00341     }
00342     else
00343     {
00344         result=SGVector<float64_t>(1);
00345     }
00346 
00347     for (index_t i=0; i<result.vlen; i++)
00348     {
00349         SGVector<float64_t> dmu;
00350 
00351         if (result.vlen==1)
00352             dmu=m_mean->get_parameter_derivative(m_features, param);
00353         else
00354             dmu=m_mean->get_parameter_derivative(m_features, param, i);
00355 
00356         Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen);
00357 
00358         // compute derivative wrt mean parameter: dnlZ=-dmu'*alpha
00359         result[i]=-eigen_dmu.dot(eigen_alpha);
00360     }
00361 
00362     return result;
00363 }
00364 
00365 #endif /* HAVE_EIGEN3 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation