SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Roman Votyakov 00008 * 00009 * Based on ideas from GAUSSIAN PROCESS REGRESSION AND CLASSIFICATION Toolbox 00010 * Copyright (C) 2005-2013 by Carl Edward Rasmussen & Hannes Nickisch under the 00011 * FreeBSD License 00012 * http://www.gaussianprocess.org/gpml/code/matlab/doc/ 00013 */ 00014 00015 #include <shogun/machine/gp/EPInferenceMethod.h> 00016 00017 #ifdef HAVE_EIGEN3 00018 00019 #include <shogun/mathematics/Math.h> 00020 #include <shogun/labels/RegressionLabels.h> 00021 #include <shogun/features/CombinedFeatures.h> 00022 #include <shogun/features/DotFeatures.h> 00023 #include <shogun/lib/DynamicArray.h> 00024 00025 #include <shogun/mathematics/eigen3.h> 00026 00027 using namespace shogun; 00028 using namespace Eigen; 00029 00030 // try to use previously allocated memory for SGVector 00031 #define CREATE_SGVECTOR(vec, len, sg_type) \ 00032 { \ 00033 if (!vec.vector || vec.vlen!=len) \ 00034 vec=SGVector<sg_type>(len); \ 00035 } 00036 00037 // try to use previously allocated memory for SGMatrix 00038 #define CREATE_SGMATRIX(mat, rows, cols, sg_type) \ 00039 { \ 00040 if (!mat.matrix || mat.num_rows!=rows || mat.num_cols!=cols) \ 00041 mat=SGMatrix<sg_type>(rows, cols); \ 00042 } 00043 00044 CEPInferenceMethod::CEPInferenceMethod() 00045 { 00046 init(); 00047 } 00048 00049 CEPInferenceMethod::CEPInferenceMethod(CKernel* kernel, CFeatures* features, 00050 CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model) 00051 : CInferenceMethod(kernel, features, mean, labels, model) 00052 { 00053 init(); 00054 } 00055 00056 CEPInferenceMethod::~CEPInferenceMethod() 00057 { 00058 } 00059 00060 void CEPInferenceMethod::init() 00061 { 00062 m_max_sweep=15; 00063 m_min_sweep=2; 00064 m_tol=1e-4; 00065 } 00066 00067 float64_t CEPInferenceMethod::get_negative_log_marginal_likelihood() 00068 { 00069 if (update_parameter_hash()) 00070 update(); 00071 00072 return m_nlZ; 00073 } 00074 00075 SGVector<float64_t> CEPInferenceMethod::get_alpha() 00076 { 00077 if (update_parameter_hash()) 00078 update(); 00079 00080 return SGVector<float64_t>(m_alpha); 00081 } 00082 00083 SGMatrix<float64_t> CEPInferenceMethod::get_cholesky() 00084 { 00085 if (update_parameter_hash()) 00086 update(); 00087 00088 return SGMatrix<float64_t>(m_L); 00089 } 00090 00091 SGVector<float64_t> CEPInferenceMethod::get_diagonal_vector() 00092 { 00093 if (update_parameter_hash()) 00094 update(); 00095 00096 return SGVector<float64_t>(m_sttau); 00097 } 00098 00099 SGVector<float64_t> CEPInferenceMethod::get_posterior_mean() 00100 { 00101 if (update_parameter_hash()) 00102 update(); 00103 00104 return SGVector<float64_t>(m_mu); 00105 } 00106 00107 SGMatrix<float64_t> CEPInferenceMethod::get_posterior_covariance() 00108 { 00109 if (update_parameter_hash()) 00110 update(); 00111 00112 return SGMatrix<float64_t>(m_Sigma); 00113 } 00114 00115 void CEPInferenceMethod::update() 00116 { 00117 // update kernel and feature matrix 00118 CInferenceMethod::update(); 00119 00120 // get number of labels (trainig examples) 00121 index_t n=m_labels->get_num_labels(); 00122 00123 // try to use tilde values from previous call 00124 if (m_ttau.vlen==n) 00125 { 00126 update_chol(); 00127 update_approx_cov(); 00128 update_approx_mean(); 00129 update_negative_ml(); 00130 } 00131 00132 // get mean vector 00133 SGVector<float64_t> mean=m_mean->get_mean_vector(m_features); 00134 00135 // get and scale diagonal of the kernel matrix 00136 SGVector<float64_t> ktrtr_diag=m_ktrtr.get_diagonal_vector(); 00137 ktrtr_diag.scale(CMath::sq(m_scale)); 00138 00139 // marginal likelihood for ttau = tnu = 0 00140 float64_t nlZ0=-SGVector<float64_t>::sum(m_model->get_log_zeroth_moments( 00141 mean, ktrtr_diag, m_labels)); 00142 00143 // use zero values if we have no better guess or it's better 00144 if (m_ttau.vlen!=n || m_nlZ>nlZ0) 00145 { 00146 CREATE_SGVECTOR(m_ttau, n, float64_t); 00147 m_ttau.zero(); 00148 00149 CREATE_SGVECTOR(m_sttau, n, float64_t); 00150 m_sttau.zero(); 00151 00152 CREATE_SGVECTOR(m_tnu, n, float64_t); 00153 m_tnu.zero(); 00154 00155 CREATE_SGMATRIX(m_Sigma, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t); 00156 00157 // copy data manually, since we don't have appropriate method 00158 for (index_t i=0; i<m_ktrtr.num_rows; i++) 00159 for (index_t j=0; j<m_ktrtr.num_cols; j++) 00160 m_Sigma(i,j)=m_ktrtr(i,j)*CMath::sq(m_scale); 00161 00162 CREATE_SGVECTOR(m_mu, n, float64_t); 00163 m_mu.zero(); 00164 00165 // set marginal likelihood 00166 m_nlZ=nlZ0; 00167 } 00168 00169 // create vector of the random permutation 00170 SGVector<index_t> randperm=SGVector<index_t>::randperm_vec(n); 00171 00172 // cavity tau and nu vectors 00173 SGVector<float64_t> tau_n(n); 00174 SGVector<float64_t> nu_n(n); 00175 00176 // cavity mu and s2 vectors 00177 SGVector<float64_t> mu_n(n); 00178 SGVector<float64_t> s2_n(n); 00179 00180 float64_t nlZ_old=CMath::INFTY; 00181 uint32_t sweep=0; 00182 00183 while ((CMath::abs(m_nlZ-nlZ_old)>m_tol && sweep<m_max_sweep) || 00184 sweep<m_min_sweep) 00185 { 00186 nlZ_old=m_nlZ; 00187 sweep++; 00188 00189 // shuffle random permutation 00190 randperm.permute(); 00191 00192 for (index_t j=0; j<n; j++) 00193 { 00194 index_t i=randperm[j]; 00195 00196 // find cavity paramters 00197 tau_n[i]=1.0/m_Sigma(i,i)-m_ttau[i]; 00198 nu_n[i]=m_mu[i]/m_Sigma(i,i)+mean[i]*tau_n[i]-m_tnu[i]; 00199 00200 // compute cavity mean and variance 00201 mu_n[i]=nu_n[i]/tau_n[i]; 00202 s2_n[i]=1.0/tau_n[i]; 00203 00204 // get moments 00205 float64_t mu=m_model->get_first_moment(mu_n, s2_n, m_labels, i); 00206 float64_t s2=m_model->get_second_moment(mu_n, s2_n, m_labels, i); 00207 00208 // save old value of ttau 00209 float64_t ttau_old=m_ttau[i]; 00210 00211 // compute ttau and sqrt(ttau) 00212 m_ttau[i]=CMath::max(1.0/s2-tau_n[i], 0.0); 00213 m_sttau[i]=CMath::sqrt(m_ttau[i]); 00214 00215 // compute tnu 00216 m_tnu[i]=mu/s2-nu_n[i]; 00217 00218 // compute difference ds2=ttau_new-ttau_old 00219 float64_t ds2=m_ttau[i]-ttau_old; 00220 00221 // create eigen representation of Sigma, tnu and mu 00222 Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, 00223 m_Sigma.num_cols); 00224 Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen); 00225 Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen); 00226 00227 VectorXd eigen_si=eigen_Sigma.col(i); 00228 00229 // rank-1 update Sigma 00230 eigen_Sigma=eigen_Sigma-ds2/(1.0+ds2*eigen_si(i))*eigen_si* 00231 eigen_si.adjoint(); 00232 00233 // update mu 00234 eigen_mu=eigen_Sigma*eigen_tnu; 00235 } 00236 00237 // update upper triangular factor (L^T) of Cholesky decomposition of 00238 // matrix B, approximate posterior covariance and mean, negative 00239 // marginal likelihood 00240 update_chol(); 00241 update_approx_cov(); 00242 update_approx_mean(); 00243 update_negative_ml(); 00244 } 00245 00246 if (sweep==m_max_sweep && CMath::abs(m_nlZ-nlZ_old)>m_tol) 00247 { 00248 SG_ERROR("Maximum number (%d) of sweeps reached, but tolerance (%f) was " 00249 "not yet reached. You can manually set maximum number of sweeps " 00250 "or tolerance to fix this problem.\n", m_max_sweep, m_tol); 00251 } 00252 00253 // update vector alpha 00254 update_alpha(); 00255 00256 // update matrices to compute derivatives 00257 update_deriv(); 00258 } 00259 00260 void CEPInferenceMethod::update_alpha() 00261 { 00262 // create eigen representations kernel matrix, L^T, sqrt(ttau) and tnu 00263 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00264 Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen); 00265 Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen); 00266 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00267 00268 // create shogun and eigen representation of the alpha vector 00269 CREATE_SGVECTOR(m_alpha, m_tnu.vlen, float64_t); 00270 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00271 00272 // solve LL^T * v = tS^(1/2) * K * tnu 00273 VectorXd eigen_v=eigen_L.triangularView<Upper>().adjoint().solve( 00274 eigen_sttau.cwiseProduct(eigen_K*CMath::sq(m_scale)*eigen_tnu)); 00275 eigen_v=eigen_L.triangularView<Upper>().solve(eigen_v); 00276 00277 // compute alpha = (I - tS^(1/2) * B^(-1) * tS(1/2) * K) * tnu = 00278 // tnu - tS(1/2) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K * tnu = 00279 // tnu - tS(1/2) * v 00280 eigen_alpha=eigen_tnu-eigen_sttau.cwiseProduct(eigen_v); 00281 } 00282 00283 void CEPInferenceMethod::update_chol() 00284 { 00285 // create eigen representations of kernel matrix and sqrt(ttau) 00286 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00287 Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen); 00288 00289 // create shogun and eigen representation of the upper triangular factor 00290 // (L^T) of the Cholesky decomposition of the matrix B 00291 CREATE_SGMATRIX(m_L, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t); 00292 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00293 00294 // compute upper triangular factor L^T of the Cholesky decomposion of the 00295 // matrix: B = tS^(1/2) * K * tS^(1/2) + I 00296 LLT<MatrixXd> eigen_chol((eigen_sttau*eigen_sttau.adjoint()).cwiseProduct( 00297 eigen_K*CMath::sq(m_scale))+ 00298 MatrixXd::Identity(m_L.num_rows, m_L.num_cols)); 00299 00300 eigen_L=eigen_chol.matrixU(); 00301 } 00302 00303 void CEPInferenceMethod::update_approx_cov() 00304 { 00305 // create eigen representations of kernel matrix, L^T matrix and sqrt(ttau) 00306 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00307 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00308 Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen); 00309 00310 // create shogun and eigen representation of the approximate covariance 00311 // matrix 00312 CREATE_SGMATRIX(m_Sigma, m_ktrtr.num_rows, m_ktrtr.num_cols, float64_t); 00313 Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols); 00314 00315 // compute V = L^(-1) * tS^(1/2) * K, using upper triangular factor L^T 00316 MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve( 00317 eigen_sttau.asDiagonal()*eigen_K*CMath::sq(m_scale)); 00318 00319 // compute covariance matrix of the posterior: 00320 // Sigma = K - K * tS^(1/2) * (L * L^T)^(-1) * tS^(1/2) * K = 00321 // K - (K * tS^(1/2)) * (L^T)^(-1) * L^(-1) * tS^(1/2) * K = 00322 // K - (tS^(1/2) * K)^T * (L^(-1))^T * L^(-1) * tS^(1/2) * K = K - V^T * V 00323 eigen_Sigma=eigen_K*CMath::sq(m_scale)-eigen_V.adjoint()*eigen_V; 00324 } 00325 00326 void CEPInferenceMethod::update_approx_mean() 00327 { 00328 // create eigen representation of posterior covariance matrix and tnu 00329 Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols); 00330 Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen); 00331 00332 // create shogun and eigen representation of the approximate mean vector 00333 CREATE_SGVECTOR(m_mu, m_tnu.vlen, float64_t); 00334 Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen); 00335 00336 // compute mean vector of the approximate posterior: mu = Sigma * tnu 00337 eigen_mu=eigen_Sigma*eigen_tnu; 00338 } 00339 00340 void CEPInferenceMethod::update_negative_ml() 00341 { 00342 // create eigen representation of Sigma, L, mu, tnu, ttau 00343 Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols); 00344 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00345 Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen); 00346 Map<VectorXd> eigen_tnu(m_tnu.vector, m_tnu.vlen); 00347 Map<VectorXd> eigen_ttau(m_ttau.vector, m_ttau.vlen); 00348 00349 // get and create eigen representation of the mean vector 00350 SGVector<float64_t> m=m_mean->get_mean_vector(m_features); 00351 Map<VectorXd> eigen_m(m.vector, m.vlen); 00352 00353 // compute vector of cavity parameter tau_n 00354 VectorXd eigen_tau_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient( 00355 eigen_Sigma.diagonal())-eigen_ttau; 00356 00357 // compute vector of cavity parameter nu_n 00358 VectorXd eigen_nu_n=eigen_mu.cwiseQuotient(eigen_Sigma.diagonal())- 00359 eigen_tnu+eigen_m.cwiseProduct(eigen_tau_n); 00360 00361 // compute cavity mean: mu_n=nu_n/tau_n 00362 SGVector<float64_t> mu_n(m_ttau.vlen); 00363 Map<VectorXd> eigen_mu_n(mu_n.vector, mu_n.vlen); 00364 00365 eigen_mu_n=eigen_nu_n.cwiseQuotient(eigen_tau_n); 00366 00367 // compute cavity variance: s2_n=1.0/tau_n 00368 SGVector<float64_t> s2_n(m_ttau.vlen); 00369 Map<VectorXd> eigen_s2_n(s2_n.vector, s2_n.vlen); 00370 00371 eigen_s2_n=(VectorXd::Ones(m_ttau.vlen)).cwiseQuotient(eigen_tau_n); 00372 00373 float64_t lZ=SGVector<float64_t>::sum( 00374 m_model->get_log_zeroth_moments(mu_n, s2_n, m_labels)); 00375 00376 // compute nlZ_part1=sum(log(diag(L)))-sum(lZ)-tnu'*Sigma*tnu/2 00377 float64_t nlZ_part1=eigen_L.diagonal().array().log().sum()-lZ- 00378 (eigen_tnu.adjoint()*eigen_Sigma).dot(eigen_tnu)/2.0; 00379 00380 // compute nlZ_part2=sum(tnu.^2./(tau_n+ttau))/2-sum(log(1+ttau./tau_n))/2 00381 float64_t nlZ_part2=(eigen_tnu.array().square()/ 00382 (eigen_tau_n+eigen_ttau).array()).sum()/2.0-(1.0+eigen_ttau.array()/ 00383 eigen_tau_n.array()).log().sum()/2.0; 00384 00385 // compute nlZ_part3=-(nu_n-m.*tau_n)'*((ttau./tau_n.*(nu_n-m.*tau_n)-2*tnu) 00386 // ./(ttau+tau_n))/2 00387 float64_t nlZ_part3=-(eigen_nu_n-eigen_m.cwiseProduct(eigen_tau_n)).dot( 00388 ((eigen_ttau.array()/eigen_tau_n.array()*(eigen_nu_n.array()- 00389 eigen_m.array()*eigen_tau_n.array())-2*eigen_tnu.array())/ 00390 (eigen_ttau.array()+eigen_tau_n.array())).matrix())/2.0; 00391 00392 // compute nlZ=nlZ_part1+nlZ_part2+nlZ_part3 00393 m_nlZ=nlZ_part1+nlZ_part2+nlZ_part3; 00394 } 00395 00396 void CEPInferenceMethod::update_deriv() 00397 { 00398 // create eigen representation of L, sstau, alpha 00399 Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); 00400 Map<VectorXd> eigen_sttau(m_sttau.vector, m_sttau.vlen); 00401 Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); 00402 00403 // create shogun and eigen representation of F 00404 m_F=SGMatrix<float64_t>(m_L.num_rows, m_L.num_cols); 00405 Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols); 00406 00407 // solve L*L^T * V = diag(sqrt(ttau)) 00408 MatrixXd V=eigen_L.triangularView<Upper>().adjoint().solve( 00409 MatrixXd(eigen_sttau.asDiagonal())); 00410 V=eigen_L.triangularView<Upper>().solve(V); 00411 00412 // compute F=alpha*alpha'-repmat(sW,1,n).*solve_chol(L,diag(sW)) 00413 eigen_F=eigen_alpha*eigen_alpha.adjoint()-eigen_sttau.asDiagonal()*V; 00414 } 00415 00416 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_inference_method( 00417 const TParameter* param) 00418 { 00419 REQUIRE(!strcmp(param->m_name, "scale"), "Can't compute derivative of " 00420 "the nagative log marginal likelihood wrt %s.%s parameter\n", 00421 get_name(), param->m_name) 00422 00423 Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); 00424 Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols); 00425 00426 SGVector<float64_t> result(1); 00427 00428 // compute derivative wrt kernel scale: dnlZ=-sum(F.*K*scale*2)/2 00429 result[0]=-(eigen_F.cwiseProduct(eigen_K)*m_scale*2.0).sum()/2.0; 00430 00431 return result; 00432 } 00433 00434 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_likelihood_model( 00435 const TParameter* param) 00436 { 00437 SG_NOTIMPLEMENTED 00438 return SGVector<float64_t>(); 00439 } 00440 00441 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_kernel( 00442 const TParameter* param) 00443 { 00444 // create eigen representation of the matrix Q 00445 Map<MatrixXd> eigen_F(m_F.matrix, m_F.num_rows, m_F.num_cols); 00446 00447 SGVector<float64_t> result; 00448 00449 if (param->m_datatype.m_ctype==CT_VECTOR || 00450 param->m_datatype.m_ctype==CT_SGVECTOR) 00451 { 00452 REQUIRE(param->m_datatype.m_length_y, 00453 "Length of the parameter %s should not be NULL\n", param->m_name) 00454 result=SGVector<float64_t>(*(param->m_datatype.m_length_y)); 00455 } 00456 else 00457 { 00458 result=SGVector<float64_t>(1); 00459 } 00460 00461 for (index_t i=0; i<result.vlen; i++) 00462 { 00463 SGMatrix<float64_t> dK; 00464 00465 if (result.vlen==1) 00466 dK=m_kernel->get_parameter_gradient(param); 00467 else 00468 dK=m_kernel->get_parameter_gradient(param, i); 00469 00470 Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols); 00471 00472 // compute derivative wrt kernel parameter: dnlZ=-sum(F.*dK*scale^2)/2.0 00473 result[i]=-(eigen_F.cwiseProduct(eigen_dK)*CMath::sq(m_scale)).sum()/2.0; 00474 } 00475 00476 return result; 00477 } 00478 00479 SGVector<float64_t> CEPInferenceMethod::get_derivative_wrt_mean( 00480 const TParameter* param) 00481 { 00482 SG_NOTIMPLEMENTED 00483 return SGVector<float64_t>(); 00484 } 00485 00486 #endif /* HAVE_EIGEN3 */