SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
InferenceMethod.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Roman Votyakov
00008  * Written (W) 2013 Heiko Strathmann
00009  * Copyright (C) 2012 Jacob Walker
00010  * Copyright (C) 2013 Roman Votyakov
00011  */
00012 
00013 #include <shogun/lib/config.h>
00014 
00015 #ifdef HAVE_EIGEN3
00016 
00017 #include <shogun/machine/gp/InferenceMethod.h>
00018 #include <shogun/distributions/classical/GaussianDistribution.h>
00019 #include <shogun/mathematics/Statistics.h>
00020 #include <shogun/lib/Lock.h>
00021 
00022 using namespace shogun;
00023 
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 struct GRADIENT_THREAD_PARAM
00026 {
00027     CInferenceMethod* inf;
00028     CMap<TParameter*, SGVector<float64_t> >* grad;
00029     CSGObject* obj;
00030     TParameter* param;
00031     CLock* lock;
00032 };
00033 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
00034 
00035 CInferenceMethod::CInferenceMethod()
00036 {
00037     init();
00038 }
00039 
00040 CInferenceMethod::CInferenceMethod(CKernel* kernel, CFeatures* features,
00041         CMeanFunction* mean, CLabels* labels, CLikelihoodModel* model)
00042 {
00043     init();
00044 
00045     set_kernel(kernel);
00046     set_features(features);
00047     set_labels(labels);
00048     set_model(model);
00049     set_mean(mean);
00050 }
00051 
00052 CInferenceMethod::~CInferenceMethod()
00053 {
00054     SG_UNREF(m_kernel);
00055     SG_UNREF(m_features);
00056     SG_UNREF(m_labels);
00057     SG_UNREF(m_model);
00058     SG_UNREF(m_mean);
00059 }
00060 
00061 void CInferenceMethod::init()
00062 {
00063     SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel", MS_AVAILABLE);
00064     SG_ADD(&m_scale, "scale", "Kernel scale", MS_AVAILABLE, GRADIENT_AVAILABLE);
00065     SG_ADD((CSGObject**)&m_model, "likelihood_model", "Likelihood model",
00066             MS_AVAILABLE);
00067     SG_ADD((CSGObject**)&m_mean, "mean_function", "Mean function", MS_AVAILABLE);
00068     SG_ADD((CSGObject**)&m_labels, "labels", "Labels", MS_NOT_AVAILABLE);
00069     SG_ADD((CSGObject**)&m_features, "features", "Features", MS_NOT_AVAILABLE);
00070 
00071     m_kernel=NULL;
00072     m_model=NULL;
00073     m_labels=NULL;
00074     m_features=NULL;
00075     m_mean=NULL;
00076     m_scale=1.0;
00077 }
00078 
00079 float64_t CInferenceMethod::get_marginal_likelihood_estimate(
00080         int32_t num_importance_samples, float64_t ridge_size)
00081 {
00082     /* sample from Gaussian approximation to q(f|y) */
00083     SGMatrix<float64_t> cov=get_posterior_covariance();
00084 
00085     /* add ridge */
00086     for (index_t i=0; i<cov.num_rows; ++i)
00087         cov(i,i)+=ridge_size;
00088 
00089     SGVector<float64_t> mean=get_posterior_mean();
00090 
00091     CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
00092     SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
00093 
00094     /* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
00095      * log pdf of approximation, prior and likelihood */
00096 
00097     /* log pdf q(f^i|y) */
00098     SGVector<float64_t> log_pdf_post_approx=post_approx->log_pdf_multiple(samples);
00099 
00100     /* dont need gaussian anymore, free memory */
00101     SG_UNREF(post_approx);
00102     post_approx=NULL;
00103 
00104     /* log pdf p(f^i|\theta) and free memory afterwise. Scale kernel before */
00105     SGMatrix<float64_t> scaled_kernel(m_ktrtr.num_rows, m_ktrtr.num_cols);
00106     memcpy(scaled_kernel.matrix, m_ktrtr.matrix,
00107             sizeof(float64_t)*m_ktrtr.num_rows*m_ktrtr.num_cols);
00108     for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
00109         scaled_kernel.matrix[i]*=CMath::sq(m_scale);
00110 
00111     /* add ridge */
00112     for (index_t i=0; i<cov.num_rows; ++i)
00113         scaled_kernel(i,i)+=ridge_size;
00114 
00115     CGaussianDistribution* prior=new CGaussianDistribution(
00116             m_mean->get_mean_vector(m_features), scaled_kernel);
00117     SGVector<float64_t> log_pdf_prior=prior->log_pdf_multiple(samples);
00118     SG_UNREF(prior);
00119     prior=NULL;
00120 
00121     /* p(y|f^i) */
00122     SGVector<float64_t> log_likelihood=m_model->get_log_probability_fmatrix(
00123             m_labels, samples);
00124 
00125     /* combine probabilities */
00126     ASSERT(log_likelihood.vlen==num_importance_samples);
00127     ASSERT(log_likelihood.vlen==log_pdf_prior.vlen);
00128     ASSERT(log_likelihood.vlen==log_pdf_post_approx.vlen);
00129     SGVector<float64_t> sum(log_likelihood);
00130     for (index_t i=0; i<log_likelihood.vlen; ++i)
00131         sum[i]=log_likelihood[i]+log_pdf_prior[i]-log_pdf_post_approx[i];
00132 
00133     /* use log-sum-exp (in particular, log-mean-exp) trick to combine values */
00134     return CMath::log_mean_exp(sum);
00135 }
00136 
00137 CMap<TParameter*, SGVector<float64_t> >* CInferenceMethod::
00138 get_negative_log_marginal_likelihood_derivatives(CMap<TParameter*, CSGObject*>* params)
00139 {
00140     REQUIRE(params->get_num_elements(), "Number of parameters should be greater "
00141             "than zero\n")
00142 
00143     if (update_parameter_hash())
00144         update();
00145 
00146     // get number of derivatives
00147     const index_t num_deriv=params->get_num_elements();
00148 
00149     // create map of derivatives
00150     CMap<TParameter*, SGVector<float64_t> >* result=
00151         new CMap<TParameter*, SGVector<float64_t> >(num_deriv, num_deriv);
00152 
00153     SG_REF(result);
00154 
00155     // create lock object
00156     CLock lock;
00157 
00158 #ifdef HAVE_PTHREAD
00159     if (num_deriv<2)
00160     {
00161 #endif /* HAVE_PTHREAD */
00162         for (index_t i=0; i<num_deriv; i++)
00163         {
00164             CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(i);
00165 
00166             GRADIENT_THREAD_PARAM thread_params;
00167 
00168             thread_params.inf=this;
00169             thread_params.obj=node->data;
00170             thread_params.param=node->key;
00171             thread_params.grad=result;
00172             thread_params.lock=&lock;
00173 
00174             get_derivative_helper((void*) &thread_params);
00175         }
00176 #ifdef HAVE_PTHREAD
00177     }
00178     else
00179     {
00180         pthread_t* threads=SG_MALLOC(pthread_t, num_deriv);
00181         GRADIENT_THREAD_PARAM* thread_params=SG_MALLOC(GRADIENT_THREAD_PARAM,
00182                 num_deriv);
00183 
00184         for (index_t t=0; t<num_deriv; t++)
00185         {
00186             CMapNode<TParameter*, CSGObject*>* node=params->get_node_ptr(t);
00187 
00188             thread_params[t].inf=this;
00189             thread_params[t].obj=node->data;
00190             thread_params[t].param=node->key;
00191             thread_params[t].grad=result;
00192             thread_params[t].lock=&lock;
00193 
00194             pthread_create(&threads[t], NULL, CInferenceMethod::get_derivative_helper,
00195                     (void*)&thread_params[t]);
00196         }
00197 
00198         for (index_t t=0; t<num_deriv; t++)
00199             pthread_join(threads[t], NULL);
00200 
00201         SG_FREE(thread_params);
00202         SG_FREE(threads);
00203     }
00204 #endif /* HAVE_PTHREAD */
00205 
00206     return result;
00207 }
00208 
00209 void* CInferenceMethod::get_derivative_helper(void *p)
00210 {
00211     GRADIENT_THREAD_PARAM* thread_param=(GRADIENT_THREAD_PARAM*)p;
00212 
00213     CInferenceMethod* inf=thread_param->inf;
00214     CSGObject* obj=thread_param->obj;
00215     CMap<TParameter*, SGVector<float64_t> >* grad=thread_param->grad;
00216     TParameter* param=thread_param->param;
00217     CLock* lock=thread_param->lock;
00218 
00219     REQUIRE(param, "Parameter should not be NULL\n");
00220     REQUIRE(obj, "Object of the parameter should not be NULL\n");
00221 
00222     SGVector<float64_t> gradient;
00223 
00224     if (obj==inf)
00225     {
00226         // try to find dervative wrt InferenceMethod.parameter
00227         gradient=inf->get_derivative_wrt_inference_method(param);
00228     }
00229     else if (obj==inf->m_model)
00230     {
00231         // try to find derivative wrt LikelihoodModel.parameter
00232         gradient=inf->get_derivative_wrt_likelihood_model(param);
00233     }
00234     else if (obj==inf->m_kernel)
00235     {
00236         // try to find derivative wrt Kernel.parameter
00237         gradient=inf->get_derivative_wrt_kernel(param);
00238     }
00239     else if (obj==inf->m_mean)
00240     {
00241         // try to find derivative wrt MeanFunction.parameter
00242         gradient=inf->get_derivative_wrt_mean(param);
00243     }
00244     else
00245     {
00246         SG_SERROR("Can't compute derivative of negative log marginal "
00247                 "likelihood wrt %s.%s", obj->get_name(), param->m_name);
00248     }
00249 
00250     lock->lock();
00251     grad->add(param, gradient);
00252     lock->unlock();
00253 
00254     return NULL;
00255 }
00256 
00257 void CInferenceMethod::update()
00258 {
00259     check_members();
00260     update_train_kernel();
00261 }
00262 
00263 void CInferenceMethod::check_members() const
00264 {
00265     REQUIRE(m_features, "Training features should not be NULL\n")
00266     REQUIRE(m_features->get_num_vectors(),
00267             "Number of training features must be greater than zero\n")
00268     REQUIRE(m_labels, "Labels should not be NULL\n")
00269     REQUIRE(m_labels->get_num_labels(),
00270             "Number of labels must be greater than zero\n")
00271     REQUIRE(m_labels->get_num_labels()==m_features->get_num_vectors(),
00272             "Number of training vectors must match number of labels, which is "
00273             "%d, but number of training vectors is %d\n",
00274             m_labels->get_num_labels(), m_features->get_num_vectors())
00275     REQUIRE(m_kernel, "Kernel should not be NULL\n")
00276     REQUIRE(m_mean, "Mean function should not be NULL\n")
00277 }
00278 
00279 void CInferenceMethod::update_train_kernel()
00280 {
00281     m_kernel->init(m_features, m_features);
00282     m_ktrtr=m_kernel->get_kernel_matrix();
00283 }
00284 
00285 #endif /* HAVE_EIGEN3 */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation