SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Roman Votyakov 00008 * Written (W) 2013 Heiko Strathmann 00009 * Copyright (C) 2012 Jacob Walker 00010 * Copyright (C) 2013 Roman Votyakov 00011 */ 00012 00013 #include <shogun/lib/config.h> 00014 00015 #ifdef HAVE_EIGEN3 00016 00017 #include <shogun/machine/gp/InferenceMethod.h> 00018 #include <shogun/distributions/classical/GaussianDistribution.h> 00019 #include <shogun/mathematics/Statistics.h> 00020 #include <shogun/lib/Lock.h> 00021 00022 using namespace shogun; 00023 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 struct GRADIENT_THREAD_PARAM 00026 { 00027 CInferenceMethod* inf; 00028 CMap<TParameter*, SGVector<float64_t> >* grad; 00029 CSGObject* obj; 00030 TParameter* param; 00031 CLock* lock; 00032 }; 00033 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 00034 00035 CInferenceMethod::CInferenceMethod() 00036 { 00037 init(); 00038 } 00039 00040 CInferenceMethod::CInferenceMethod(CKernel* kernel, CFeatures* features, 00041 CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model) 00042 { 00043 init(); 00044 00045 set_kernel(kernel); 00046 set_features(features); 00047 set_labels(labels); 00048 set_model(model); 00049 set_mean(mean); 00050 } 00051 00052 CInferenceMethod::~CInferenceMethod() 00053 { 00054 SG_UNREF(m_kernel); 00055 SG_UNREF(m_features); 00056 SG_UNREF(m_labels); 00057 SG_UNREF(m_model); 00058 SG_UNREF(m_mean); 00059 } 00060 00061 void CInferenceMethod::init() 00062 { 00063 SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE); 00064 SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE); 00065 SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model", 00066 MS_AVAILABLE); 00067 SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE); 00068 SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE); 00069 SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE); 00070 00071 m_kernel=NULL; 00072 m_model=NULL; 00073 m_labels=NULL; 00074 m_features=NULL; 00075 m_mean=NULL; 00076 m_scale=1.0; 00077 } 00078 00079 float64_t CInferenceMethod::get_marginal_likelihood_estimate( 00080 int32_t num_importance_samples, float64_t ridge_size) 00081 { 00082 /* sample from Gaussian approximation to q(f|y) */ 00083 SGMatrix<float64_t> cov=get_posterior_covariance(); 00084 00085 /* add ridge */ 00086 for (index_t i=0; i<cov.num_rows; ++i) 00087 cov(i,i)+=ridge_size; 00088 00089 SGVector<float64_t> mean=get_posterior_mean(); 00090 00091 CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov); 00092 SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples); 00093 00094 /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e., 00095 * log pdf of approximation, prior and likelihood */ 00096 00097 /* log pdf q(f^i|y) */ 00098 SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples); 00099 00100 /* dont need gaussian anymore, free memory */ 00101 SG_UNREF(post_approx); 00102 post_approx=NULL; 00103 00104 /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */ 00105 SGMatrix<float64_t> scaled_kernel(m_ktrtr.num_rows, m_ktrtr.num_cols); 00106 memcpy(scaled_kernel.matrix, m_ktrtr.matrix, 00107 sizeof(float64_t)*m_ktrtr.num_rows*m_ktrtr.num_cols); 00108 for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i) 00109 scaled_kernel.matrix[i]*=CMath::sq(m_scale); 00110 00111 /* add ridge */ 00112 for (index_t i=0; i<cov.num_rows; ++i) 00113 scaled_kernel(i,i)+=ridge_size; 00114 00115 CGaussianDistribution* prior=new CGaussianDistribution( 00116 m_mean->get_mean_vector(m_features), scaled_kernel); 00117 SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples); 00118 SG_UNREF(prior); 00119 prior=NULL; 00120 00121 /* p(y|f^i) */ 00122 SGVector<float64_t> log_likelihood=m_model->get_log_probability_fmatrix( 00123 m_labels, samples); 00124 00125 /* combine probabilities */ 00126 ASSERT(log_likelihood.vlen==num_importance_samples); 00127 ASSERT(log_likelihood.vlen==log_pdf_prior.vlen); 00128 ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen); 00129 SGVector<float64_t> sum(log_likelihood); 00130 for (index_t i=0; i<log_likelihood.vlen; ++i) 00131 sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i]; 00132 00133 /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */ 00134 return CMath::log_mean_exp(sum); 00135 } 00136 00137 CMap<TParameter*, SGVector<float64_t> >* CInferenceMethod:: 00138 get_negative_log_marginal_likelihood_derivatives(CMap<TParameter*, CSGObject*>* params) 00139 { 00140 REQUIRE(params->get_num_elements(), "Number of parameters should be greater " 00141 "than zero\n") 00142 00143 if (update_parameter_hash()) 00144 update(); 00145 00146 // get number of derivatives 00147 const index_t num_deriv=params->get_num_elements(); 00148 00149 // create map of derivatives 00150 CMap<TParameter*, SGVector<float64_t> >* result= 00151 new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv); 00152 00153 SG_REF(result); 00154 00155 // create lock object 00156 CLock lock; 00157 00158 #ifdef HAVE_PTHREAD 00159 if (num_deriv<2) 00160 { 00161 #endif /* HAVE_PTHREAD */ 00162 for (index_t i=0; i<num_deriv; i++) 00163 { 00164 CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i); 00165 00166 GRADIENT_THREAD_PARAM thread_params; 00167 00168 thread_params.inf=this; 00169 thread_params.obj=node->data; 00170 thread_params.param=node->key; 00171 thread_params.grad=result; 00172 thread_params.lock=&lock; 00173 00174 get_derivative_helper((void*) &thread_params); 00175 } 00176 #ifdef HAVE_PTHREAD 00177 } 00178 else 00179 { 00180 pthread_t* threads=SG_MALLOC(pthread_t, num_deriv); 00181 GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM, 00182 num_deriv); 00183 00184 for (index_t t=0; t<num_deriv; t++) 00185 { 00186 CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t); 00187 00188 thread_params[t].inf=this; 00189 thread_params[t].obj=node->data; 00190 thread_params[t].param=node->key; 00191 thread_params[t].grad=result; 00192 thread_params[t].lock=&lock; 00193 00194 pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper, 00195 (void*)&thread_params[t]); 00196 } 00197 00198 for (index_t t=0; t<num_deriv; t++) 00199 pthread_join(threads[t], NULL); 00200 00201 SG_FREE(thread_params); 00202 SG_FREE(threads); 00203 } 00204 #endif /* HAVE_PTHREAD */ 00205 00206 return result; 00207 } 00208 00209 void* CInferenceMethod::get_derivative_helper(void *p) 00210 { 00211 GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p; 00212 00213 CInferenceMethod* inf=thread_param->inf; 00214 CSGObject* obj=thread_param->obj; 00215 CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad; 00216 TParameter* param=thread_param->param; 00217 CLock* lock=thread_param->lock; 00218 00219 REQUIRE(param, "Parameter should not be NULL\n"); 00220 REQUIRE(obj, "Object of the parameter should not be NULL\n"); 00221 00222 SGVector<float64_t> gradient; 00223 00224 if (obj==inf) 00225 { 00226 // try to find dervative wrt InferenceMethod.parameter 00227 gradient=inf->get_derivative_wrt_inference_method(param); 00228 } 00229 else if (obj==inf->m_model) 00230 { 00231 // try to find derivative wrt LikelihoodModel.parameter 00232 gradient=inf->get_derivative_wrt_likelihood_model(param); 00233 } 00234 else if (obj==inf->m_kernel) 00235 { 00236 // try to find derivative wrt Kernel.parameter 00237 gradient=inf->get_derivative_wrt_kernel(param); 00238 } 00239 else if (obj==inf->m_mean) 00240 { 00241 // try to find derivative wrt MeanFunction.parameter 00242 gradient=inf->get_derivative_wrt_mean(param); 00243 } 00244 else 00245 { 00246 SG_SERROR("Can't compute derivative of negative log marginal " 00247 "likelihood wrt %s.%s", obj->get_name(), param->m_name); 00248 } 00249 00250 lock->lock(); 00251 grad->add(param, gradient); 00252 lock->unlock(); 00253 00254 return NULL; 00255 } 00256 00257 void CInferenceMethod::update() 00258 { 00259 check_members(); 00260 update_train_kernel(); 00261 } 00262 00263 void CInferenceMethod::check_members() const 00264 { 00265 REQUIRE(m_features, "Training features should not be NULL\n") 00266 REQUIRE(m_features->get_num_vectors(), 00267 "Number of training features must be greater than zero\n") 00268 REQUIRE(m_labels, "Labels should not be NULL\n") 00269 REQUIRE(m_labels->get_num_labels(), 00270 "Number of labels must be greater than zero\n") 00271 REQUIRE(m_labels->get_num_labels()==m_features->get_num_vectors(), 00272 "Number of training vectors must match number of labels, which is " 00273 "%d, but number of training vectors is %d\n", 00274 m_labels->get_num_labels(), m_features->get_num_vectors()) 00275 REQUIRE(m_kernel, "Kernel should not be NULL\n") 00276 REQUIRE(m_mean, "Mean function should not be NULL\n") 00277 } 00278 00279 void CInferenceMethod::update_train_kernel() 00280 { 00281 m_kernel->init(m_features, m_features); 00282 m_ktrtr=m_kernel->get_kernel_matrix(); 00283 } 00284 00285 #endif /* HAVE_EIGEN3 */