SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Roman Votyakov 00008 * Copyright (C) 2012 Jacob Walker 00009 * Copyright (C) 2013 Roman Votyakov 00010 * 00011 * Code adapted from Gaussian Process Machine Learning Toolbox 00012 * http://www.gaussianprocess.org/gpml/code/matlab/doc/ 00013 */ 00014 00015 #include <shogun/machine/gp/ExactInferenceMethod.h> 00016 00017 #ifdef HAVE_EIGEN3 00018 00019 #include <shogun/machine/gp/GaussianLikelihood.h> 00020 #include <shogun/labels/RegressionLabels.h> 00021 #include <shogun/mathematics/Math.h> 00022 #include <shogun/mathematics/eigen3.h> 00023 00024 using namespace shogun; 00025 using namespace Eigen; 00026 00027 CExactInferenceMethod::CExactInferenceMethod() : CInferenceMethod() 00028 { 00029 } 00030 00031 CExactInferenceMethod::CExactInferenceMethod(CKernel* kern, CFeatures* feat, 00032 CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) : 00033 CInferenceMethod(kern, feat, m, lab, mod) 00034 { 00035 } 00036 00037 CExactInferenceMethod::~CExactInferenceMethod() 00038 { 00039 } 00040 00041 void CExactInferenceMethod::update() 00042 { 00043 CInferenceMethod::update(); 00044 update_chol(); 00045 update_alpha(); 00046 update_deriv(); 00047 update_mean(); 00048 update_cov(); 00049 } 00050 00051 void CExactInferenceMethod::check_members() const 00052 { 00053 CInferenceMethod::check_members(); 00054 00055 REQUIRE(m_model->get_model_type()==LT_GAUSSIAN, 00056 "Exact inference method can only use Gaussian likelihood function\n") 00057 REQUIRE(m_labels->get_label_type()==LT_REGRESSION, 00058 "Labels must be type of CRegressionLabels\n") 00059 } 00060 00061 SGVector<float64_t> CExactInferenceMethod::get_diagonal_vector() 00062 { 00063 if (update_parameter_hash()) 00064 update(); 00065 00066 // get the sigma variable from the Gaussian likelihood model 00067 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00068 float64_t sigma=lik->get_sigma(); 00069 SG_UNREF(lik); 00070 00071 // compute diagonal vector: sW=1/sigma 00072 SGVector<float64_t> result(m_features->get_num_vectors()); 00073 result.fill_vector(result.vector, m_features->get_num_vectors(), 1.0/sigma); 00074 00075 return result; 00076 } 00077 00078 float64_t CExactInferenceMethod::get_negative_log_marginal_likelihood() 00079 { 00080 if (update_parameter_hash()) 00081 update(); 00082 00083 // get the sigma variable from the Gaussian likelihood model 00084 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00085 float64_t sigma=lik->get_sigma(); 00086 SG_UNREF(lik); 00087 00088 // create eigen representation of alpha and L 00089 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00090 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00091 00092 // get labels and mean vectors and create eigen representation 00093 SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels(); 00094 Map<VectorXd> eigen_y(y.vector, y.vlen); 00095 SGVector<float64_t> m=m_mean->get_mean_vector(m_features); 00096 Map<VectorXd> eigen_m(m.vector, m.vlen); 00097 00098 // compute negative log of the marginal likelihood: 00099 // nlZ=(y-m)'*alpha/2+sum(log(diag(L)))+n*log(2*pi*sigma^2)/2 00100 float64_t result=(eigen_y-eigen_m).dot(eigen_alpha)/2.0+ 00101 eigen_L.diagonal().array().log().sum()+m_L.num_rows* 00102 CMath::log(2*CMath::PI*CMath::sq(sigma))/2.0; 00103 00104 return result; 00105 } 00106 00107 SGVector<float64_t> CExactInferenceMethod::get_alpha() 00108 { 00109 if (update_parameter_hash()) 00110 update(); 00111 00112 return SGVector<float64_t>(m_alpha); 00113 } 00114 00115 SGMatrix<float64_t> CExactInferenceMethod::get_cholesky() 00116 { 00117 if (update_parameter_hash()) 00118 update(); 00119 00120 return SGMatrix<float64_t>(m_L); 00121 } 00122 00123 SGVector<float64_t> CExactInferenceMethod::get_posterior_mean() 00124 { 00125 if (update_parameter_hash()) 00126 update(); 00127 00128 return SGVector<float64_t>(m_mu); 00129 } 00130 00131 SGMatrix<float64_t> CExactInferenceMethod::get_posterior_covariance() 00132 { 00133 if (update_parameter_hash()) 00134 update(); 00135 00136 return SGMatrix<float64_t>(m_Sigma); 00137 } 00138 00139 void CExactInferenceMethod::update_chol() 00140 { 00141 // get the sigma variable from the Gaussian likelihood model 00142 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00143 float64_t sigma=lik->get_sigma(); 00144 SG_UNREF(lik); 00145 00146 /* check whether to allocate cholesky memory */ 00147 if (!m_L.matrix || m_L.num_rows!=m_ktrtr.num_rows) 00148 m_L=SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols); 00149 00150 /* creates views on kernel and cholesky matrix and perform cholesky */ 00151 Map<MatrixXd> K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00152 Map<MatrixXd> L(m_L.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00153 LLT<MatrixXd> llt(K*(CMath::sq(m_scale)/CMath::sq(sigma))+ 00154 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)); 00155 L=llt.matrixU(); 00156 } 00157 00158 void CExactInferenceMethod::update_alpha() 00159 { 00160 // get the sigma variable from the Gaussian likelihood model 00161 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00162 float64_t sigma=lik->get_sigma(); 00163 SG_UNREF(lik); 00164 00165 // get labels and mean vector and create eigen representation 00166 SGVector<float64_t> y=((CRegressionLabels*) m_labels)->get_labels(); 00167 Map<VectorXd> eigen_y(y.vector, y.vlen); 00168 SGVector<float64_t> m=m_mean->get_mean_vector(m_features); 00169 Map<VectorXd> eigen_m(m.vector, m.vlen); 00170 00171 m_alpha=SGVector<float64_t>(y.vlen); 00172 00173 /* creates views on cholesky matrix and alpha and solve system 00174 * (L * L^T) * a = y for a */ 00175 Map<VectorXd> a(m_alpha.vector, m_alpha.vlen); 00176 Map<MatrixXd> L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00177 00178 a=L.triangularView<Upper>().adjoint().solve(eigen_y-eigen_m); 00179 a=L.triangularView<Upper>().solve(a); 00180 00181 a/=CMath::sq(sigma); 00182 } 00183 00184 void CExactInferenceMethod::update_mean() 00185 { 00186 // create eigen representataion of kernel matrix and alpha 00187 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00188 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00189 00190 // get mean and create eigen representation of it 00191 SGVector<float64_t> m=m_mean->get_mean_vector(m_features); 00192 Map<VectorXd> eigen_m(m.vector, m.vlen); 00193 00194 m_mu=SGVector<float64_t>(m.vlen); 00195 Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen); 00196 00197 // compute mean: mu=K'*alpha+m 00198 eigen_mu=eigen_K*CMath::sq(m_scale)*eigen_alpha+eigen_m; 00199 } 00200 00201 void CExactInferenceMethod::update_cov() 00202 { 00203 // create eigen representataion of upper triangular factor L^T and kernel 00204 // matrix 00205 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00206 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00207 00208 m_Sigma=SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols); 00209 Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, 00210 m_Sigma.num_cols); 00211 00212 // compute V = L^(-1) * K, using upper triangular factor L^T 00213 MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve( 00214 eigen_K*CMath::sq(m_scale)); 00215 00216 // compute covariance matrix of the posterior: Sigma = K - V^T * V 00217 eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V; 00218 } 00219 00220 void CExactInferenceMethod::update_deriv() 00221 { 00222 // get the sigma variable from the Gaussian likelihood model 00223 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00224 float64_t sigma=lik->get_sigma(); 00225 SG_UNREF(lik); 00226 00227 // create eigen representation of derivative matrix and cholesky 00228 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00229 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00230 00231 m_Q=SGMatrix<float64_t>(m_L.num_rows, m_L.num_cols); 00232 Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols); 00233 00234 // solve L * L' * Q = I 00235 eigen_Q=eigen_L.triangularView<Upper>().adjoint().solve( 00236 MatrixXd::Identity(m_L.num_rows, m_L.num_cols)); 00237 eigen_Q=eigen_L.triangularView<Upper>().solve(eigen_Q); 00238 00239 // divide Q by sigma^2 00240 eigen_Q/=CMath::sq(sigma); 00241 00242 // create eigen representation of alpha and compute Q=Q-alpha*alpha' 00243 eigen_Q-=eigen_alpha*eigen_alpha.transpose(); 00244 } 00245 00246 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_inference_method( 00247 const TParameter* param) 00248 { 00249 REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of " 00250 "the nagative log marginal likelihood wrt %s.%s parameter\n", 00251 get_name(), param->m_name) 00252 00253 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00254 Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols); 00255 00256 SGVector<float64_t> result(1); 00257 00258 // compute derivative wrt kernel scale: dnlZ=sum(Q.*K*scale*2)/2 00259 result[0]=(eigen_Q.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0; 00260 00261 return result; 00262 } 00263 00264 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_likelihood_model( 00265 const TParameter* param) 00266 { 00267 REQUIRE(!strcmp(param->m_name, "sigma"), "Can't compute derivative of " 00268 "the nagative log marginal likelihood wrt %s.%s parameter\n", 00269 m_model->get_name(), param->m_name) 00270 00271 // get the sigma variable from the Gaussian likelihood model 00272 CGaussianLikelihood* lik=CGaussianLikelihood::obtain_from_generic(m_model); 00273 float64_t sigma=lik->get_sigma(); 00274 SG_UNREF(lik); 00275 00276 // create eigen representation of the matrix Q 00277 Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols); 00278 00279 SGVector<float64_t> result(1); 00280 00281 // compute derivative wrt likelihood model parameter sigma: 00282 // dnlZ=sigma^2*trace(Q) 00283 result[0]=CMath::sq(sigma)*eigen_Q.trace(); 00284 00285 return result; 00286 } 00287 00288 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_kernel( 00289 const TParameter* param) 00290 { 00291 // create eigen representation of the matrix Q 00292 Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols); 00293 00294 SGVector<float64_t> result; 00295 00296 if (param->m_datatype.m_ctype==CT_VECTOR || 00297 param->m_datatype.m_ctype==CT_SGVECTOR) 00298 { 00299 REQUIRE(param->m_datatype.m_length_y, 00300 "Length of the parameter %s should not be NULL\n", param->m_name) 00301 result=SGVector<float64_t>(*(param->m_datatype.m_length_y)); 00302 } 00303 else 00304 { 00305 result=SGVector<float64_t>(1); 00306 } 00307 00308 for (index_t i=0; i<result.vlen; i++) 00309 { 00310 SGMatrix<float64_t> dK; 00311 00312 if (result.vlen==1) 00313 dK=m_kernel->get_parameter_gradient(param); 00314 else 00315 dK=m_kernel->get_parameter_gradient(param, i); 00316 00317 Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols); 00318 00319 // compute derivative wrt kernel parameter: dnlZ=sum(Q.*dK*scale)/2.0 00320 result[i]=(eigen_Q.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0; 00321 } 00322 00323 return result; 00324 } 00325 00326 SGVector<float64_t> CExactInferenceMethod::get_derivative_wrt_mean( 00327 const TParameter* param) 00328 { 00329 // create eigen representation of alpha vector 00330 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00331 00332 SGVector<float64_t> result; 00333 00334 if (param->m_datatype.m_ctype==CT_VECTOR || 00335 param->m_datatype.m_ctype==CT_SGVECTOR) 00336 { 00337 REQUIRE(param->m_datatype.m_length_y, 00338 "Length of the parameter %s should not be NULL\n", param->m_name) 00339 00340 result=SGVector<float64_t>(*(param->m_datatype.m_length_y)); 00341 } 00342 else 00343 { 00344 result=SGVector<float64_t>(1); 00345 } 00346 00347 for (index_t i=0; i<result.vlen; i++) 00348 { 00349 SGVector<float64_t> dmu; 00350 00351 if (result.vlen==1) 00352 dmu=m_mean->get_parameter_derivative(m_features, param); 00353 else 00354 dmu=m_mean->get_parameter_derivative(m_features, param, i); 00355 00356 Map<VectorXd> eigen_dmu(dmu.vector, dmu.vlen); 00357 00358 // compute derivative wrt mean parameter: dnlZ=-dmu'*alpha 00359 result[i]=-eigen_dmu.dot(eigen_alpha); 00360 } 00361 00362 return result; 00363 } 00364 00365 #endif /* HAVE_EIGEN3 */