SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012-2013 Heiko Strathmann 00008 */ 00009 00010 #include <shogun/statistics/KernelTwoSampleTestStatistic.h> 00011 #include <shogun/features/Features.h> 00012 #include <shogun/kernel/Kernel.h> 00013 #include <shogun/kernel/CustomKernel.h> 00014 00015 using namespace shogun; 00016 00017 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic() : 00018 CTwoDistributionsTestStatistic() 00019 { 00020 init(); 00021 } 00022 00023 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel, 00024 CFeatures* p_and_q, index_t q_start) : 00025 CTwoDistributionsTestStatistic(p_and_q, q_start) 00026 { 00027 init(); 00028 00029 m_kernel=kernel; 00030 SG_REF(kernel); 00031 } 00032 00033 CKernelTwoSampleTestStatistic::CKernelTwoSampleTestStatistic(CKernel* kernel, 00034 CFeatures* p, CFeatures* q) : CTwoDistributionsTestStatistic(p, q) 00035 { 00036 init(); 00037 00038 m_kernel=kernel; 00039 SG_REF(kernel); 00040 } 00041 00042 CKernelTwoSampleTestStatistic::~CKernelTwoSampleTestStatistic() 00043 { 00044 SG_UNREF(m_kernel); 00045 } 00046 00047 void CKernelTwoSampleTestStatistic::init() 00048 { 00049 SG_ADD((CSGObject**)&m_kernel, "kernel", "Kernel for two sample test", 00050 MS_AVAILABLE); 00051 m_kernel=NULL; 00052 } 00053 00054 SGVector<float64_t> CKernelTwoSampleTestStatistic::bootstrap_null() 00055 { 00056 REQUIRE(m_kernel, "%s::bootstrap_null(): No kernel set!\n", get_name()); 00057 REQUIRE(m_kernel->get_kernel_type()==K_CUSTOM || m_p_and_q, 00058 "%s::bootstrap_null(): No features and no custom kernel set!\n", 00059 get_name()); 00060 00061 /* compute bootstrap statistics for null distribution */ 00062 SGVector<float64_t> results; 00063 00064 /* only do something if a custom kernel is used: use the power of pre- 00065 * computed kernel matrices 00066 */ 00067 if (m_kernel->get_kernel_type()==K_CUSTOM) 00068 { 00069 /* allocate memory */ 00070 results=SGVector<float64_t>(m_bootstrap_iterations); 00071 00072 /* memory for index permutations, (would slow down loop) */ 00073 00074 /* in case of custom kernel, there are no features */ 00075 index_t num_data; 00076 if (m_kernel->get_kernel_type()==K_CUSTOM) 00077 num_data=m_kernel->get_num_vec_lhs(); 00078 else 00079 num_data=m_p_and_q->get_num_vectors(); 00080 00081 SGVector<index_t> ind_permutation(num_data); 00082 ind_permutation.range_fill(); 00083 00084 /* check if kernel is a custom kernel. In that case, changing features is 00085 * not what we want but just subsetting the kernel itself */ 00086 CCustomKernel* custom_kernel=(CCustomKernel*)m_kernel; 00087 00088 for (index_t i=0; i<m_bootstrap_iterations; ++i) 00089 { 00090 /* idea: merge features of p and q, shuffle, and compute statistic. 00091 * This is done using subsets here. add to custom kernel since 00092 * it has no features to subset. CustomKernel has not to be 00093 * re-initialised after each subset setting */ 00094 SGVector<int32_t>::permute_vector(ind_permutation); 00095 00096 custom_kernel->add_row_subset(ind_permutation); 00097 custom_kernel->add_col_subset(ind_permutation); 00098 00099 /* compute statistic for this permutation of mixed samples */ 00100 results[i]=compute_statistic(); 00101 00102 /* remove subsets */ 00103 custom_kernel->remove_row_subset(); 00104 custom_kernel->remove_col_subset(); 00105 } 00106 } 00107 else 00108 { 00109 /* in this case, just use superclass method */ 00110 results=CTwoDistributionsTestStatistic::bootstrap_null(); 00111 } 00112 00113 return results; 00114 }