SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
KernelTwoSampleTestStatistic.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012-2013 Heiko Strathmann
00008  */
00009 
00010 #include <shogun/statistics/KernelTwoSampleTestStatistic.h>
00011 #include <shogun/features/Features.h>
00012 #include <shogun/kernel/Kernel.h>
00013 #include <shogun/kernel/CustomKernel.h>
00014 
00015 using namespace shogun;
00016 
00017 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic() :
00018         CTwoDistributionsTestStatistic()
00019 {
00020     init();
00021 }
00022 
00023 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,
00024         CFeatures* p_and_q, index_t q_start) :
00025         CTwoDistributionsTestStatistic(p_and_q, q_start)
00026 {
00027     init();
00028 
00029     m_kernel=kernel;
00030     SG_REF(kernel);
00031 }
00032 
00033 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel,
00034         CFeatures* p, CFeatures* q) : CTwoDistributionsTestStatistic(p, q)
00035 {
00036     init();
00037 
00038     m_kernel=kernel;
00039     SG_REF(kernel);
00040 }
00041 
00042 CKernelTwoSampleTestStatistic::~CKernelTwoSampleTestStatistic()
00043 {
00044     SG_UNREF(m_kernel);
00045 }
00046 
00047 void CKernelTwoSampleTestStatistic::init()
00048 {
00049     SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel for two sample test",
00050             MS_AVAILABLE);
00051     m_kernel=NULL;
00052 }
00053 
00054 SGVector<float64_t> CKernelTwoSampleTestStatistic::bootstrap_null()
00055 {
00056     REQUIRE(m_kernel, "%s::bootstrap_null(): No kernel set!\n", get_name());
00057     REQUIRE(m_kernel->get_kernel_type()==K_CUSTOM || m_p_and_q,
00058             "%s::bootstrap_null(): No features and no custom kernel set!\n",
00059             get_name());
00060 
00061     /* compute bootstrap statistics for null distribution */
00062     SGVector<float64_t> results;
00063 
00064     /* only do something if a custom kernel is used: use the power of pre-
00065      * computed kernel matrices
00066      */
00067     if (m_kernel->get_kernel_type()==K_CUSTOM)
00068     {
00069         /* allocate memory */
00070         results=SGVector<float64_t>(m_bootstrap_iterations);
00071 
00072         /* memory for index permutations, (would slow down loop) */
00073 
00074         /* in case of custom kernel, there are no features */
00075         index_t num_data;
00076         if (m_kernel->get_kernel_type()==K_CUSTOM)
00077             num_data=m_kernel->get_num_vec_lhs();
00078         else
00079             num_data=m_p_and_q->get_num_vectors();
00080 
00081         SGVector<index_t> ind_permutation(num_data);
00082         ind_permutation.range_fill();
00083 
00084         /* check if kernel is a custom kernel. In that case, changing features is
00085          * not what we want but just subsetting the kernel itself */
00086         CCustomKernel* custom_kernel=(CCustomKernel*)m_kernel;
00087 
00088         for (index_t i=0; i<m_bootstrap_iterations; ++i)
00089         {
00090             /* idea: merge features of p and q, shuffle, and compute statistic.
00091              * This is done using subsets here. add to custom kernel since
00092              * it has no features to subset. CustomKernel has not to be
00093              * re-initialised after each subset setting */
00094             SGVector<int32_t>::permute_vector(ind_permutation);
00095 
00096             custom_kernel->add_row_subset(ind_permutation);
00097             custom_kernel->add_col_subset(ind_permutation);
00098 
00099             /* compute statistic for this permutation of mixed samples */
00100             results[i]=compute_statistic();
00101 
00102             /* remove subsets */
00103             custom_kernel->remove_row_subset();
00104             custom_kernel->remove_col_subset();
00105         }
00106     }
00107     else
00108     {
00109         /* in this case, just use superclass method */
00110         results=CTwoDistributionsTestStatistic::bootstrap_null();
00111     }
00112 
00113     return results;
00114 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation