SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012-2013 Heiko Strathmann 00008 */ 00009 00010 #include <shogun/statistics/MMDKernelSelectionComb.h> 00011 #include <shogun/statistics/KernelTwoSampleTestStatistic.h> 00012 #include <shogun/kernel/CombinedKernel.h> 00013 00014 using namespace shogun; 00015 00016 CMMDKernelSelectionComb::CMMDKernelSelectionComb() : 00017 CMMDKernelSelection() 00018 { 00019 init(); 00020 } 00021 00022 CMMDKernelSelectionComb::CMMDKernelSelectionComb( 00023 CKernelTwoSampleTestStatistic* mmd) : CMMDKernelSelection(mmd) 00024 { 00025 init(); 00026 } 00027 00028 CMMDKernelSelectionComb::~CMMDKernelSelectionComb() 00029 { 00030 } 00031 00032 void CMMDKernelSelectionComb::init() 00033 { 00034 #ifdef HAVE_LAPACK 00035 SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of " 00036 "iterations for qp solver", MS_NOT_AVAILABLE); 00037 SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver", 00038 MS_NOT_AVAILABLE); 00039 SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization " 00040 "kernel weights", MS_NOT_AVAILABLE); 00041 00042 /* sensible values for optimization */ 00043 m_opt_max_iterations=10000; 00044 m_opt_epsilon=10E-15; 00045 m_opt_low_cut=10E-7; 00046 #endif 00047 } 00048 00049 #ifdef HAVE_LAPACK 00050 /* no reference counting, use the static context constructor of SGMatrix */ 00051 SGMatrix<float64_t> CMMDKernelSelectionComb::m_Q=SGMatrix<float64_t>(false); 00052 00053 const float64_t* CMMDKernelSelectionComb::get_Q_col(uint32_t i) 00054 { 00055 return &m_Q[m_Q.num_rows*i]; 00056 } 00057 00059 void CMMDKernelSelectionComb::print_state(libqp_state_T state) 00060 { 00061 SG_SDEBUG("CMMDKernelSelectionComb::print_state: libqp state:" 00062 " primal=%f\n", state.QP); 00063 } 00064 00065 CKernel* CMMDKernelSelectionComb::select_kernel() 00066 { 00067 /* cast is safe due to assertion in constructor */ 00068 CCombinedKernel* combined=(CCombinedKernel*)m_mmd->get_kernel(); 00069 00070 /* optimise for kernel weights and set them */ 00071 SGVector<float64_t> weights=compute_measures(); 00072 combined->set_subkernel_weights(weights); 00073 00074 /* note that kernel is SG_REF'ed from getter above */ 00075 return combined; 00076 } 00077 00078 SGVector<float64_t> CMMDKernelSelectionComb::solve_optimization( 00079 SGVector<float64_t> mmds) 00080 { 00081 /* readability */ 00082 index_t num_kernels=mmds.vlen; 00083 00084 /* compute sum of mmds to generate feasible point for convex program */ 00085 float64_t sum_mmds=0; 00086 for (index_t i=0; i<mmds.vlen; ++i) 00087 sum_mmds+=mmds[i]; 00088 00089 /* QP: 0.5*x'*Q*x + f'*x 00090 * subject to 00091 * mmds'*x = b 00092 * LB[i] <= x[i] <= UB[i] for all i=1..n */ 00093 SGVector<float64_t> Q_diag(num_kernels); 00094 SGVector<float64_t> f(num_kernels); 00095 SGVector<float64_t> lb(num_kernels); 00096 SGVector<float64_t> ub(num_kernels); 00097 SGVector<float64_t> weights(num_kernels); 00098 00099 /* init everything, there are two cases possible: i) at least one mmd is 00100 * is positive, ii) all mmds are negative */ 00101 bool one_pos=false; 00102 for (index_t i=0; i<mmds.vlen; ++i) 00103 { 00104 if (mmds[i]>0) 00105 { 00106 SG_DEBUG("found at least one positive MMD\n") 00107 one_pos=true; 00108 break; 00109 } 00110 } 00111 00112 if (!one_pos) 00113 { 00114 SG_WARNING("CMMDKernelSelectionComb::solve_optimization(): all mmd " 00115 "estimates are negative. This is techically possible, although " 00116 "extremely rare. Consider using different kernels. " 00117 "This combination will lead to a bad two-sample test. Since any" 00118 "combination is bad, will now just return equally distributed " 00119 "kernel weights\n"); 00120 00121 /* if no element is positive, we can choose arbritary weights since 00122 * the results will be bad anyway */ 00123 weights.set_const(1.0/num_kernels); 00124 } 00125 else 00126 { 00127 SG_DEBUG("one MMD entry is positive, performing optimisation\n") 00128 /* do optimisation, init vectors */ 00129 for (index_t i=0; i<num_kernels; ++i) 00130 { 00131 Q_diag[i]=m_Q(i,i); 00132 f[i]=0; 00133 lb[i]=0; 00134 ub[i]=CMath::INFTY; 00135 00136 /* initial point has to be feasible, i.e. mmds'*x = b */ 00137 weights[i]=1.0/sum_mmds; 00138 } 00139 00140 /* start libqp solver with desired parameters */ 00141 SG_DEBUG("starting libqp optimization\n") 00142 libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector, 00143 f.vector, mmds.vector, 00144 one_pos ? 1 : -1, 00145 lb.vector, ub.vector, 00146 weights.vector, num_kernels, m_opt_max_iterations, 00147 m_opt_epsilon, &(CMMDKernelSelectionComb::print_state)); 00148 00149 SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter, 00150 qp_exitflag.exitflag); 00151 00152 /* set really small entries to zero and sum up for normalization */ 00153 float64_t sum_weights=0; 00154 for (index_t i=0; i<weights.vlen; ++i) 00155 { 00156 if (weights[i]<m_opt_low_cut) 00157 { 00158 SG_DEBUG("lowcut: weight[%i]=%f<%f setting to zero\n", i, weights[i], 00159 m_opt_low_cut); 00160 weights[i]=0; 00161 } 00162 00163 sum_weights+=weights[i]; 00164 } 00165 00166 /* normalize (allowed since problem is scale invariant) */ 00167 for (index_t i=0; i<weights.vlen; ++i) 00168 weights[i]/=sum_weights; 00169 } 00170 00171 return weights; 00172 } 00173 #else 00174 CKernel* CMMDKernelSelectionComb::select_kernel() 00175 { 00176 SG_ERROR("CMMDKernelSelectionComb::select_kernel(): LAPACK needs to be " 00177 "installed in order to use weight optimisation for combined " 00178 "kernels!\n"); 00179 return NULL; 00180 } 00181 00182 SGVector<float64_t> CMMDKernelSelectionComb::compute_measures() 00183 { 00184 SG_ERROR("CMMDKernelSelectionComb::select_kernel(): LAPACK needs to be " 00185 "installed in order to use weight optimisation for combined " 00186 "kernels!\n"); 00187 return SGVector<float64_t>(); 00188 } 00189 00190 SGVector<float64_t> CMMDKernelSelectionComb::solve_optimization( 00191 SGVector<float64_t> mmds) 00192 { 00193 SG_ERROR("CMMDKernelSelectionComb::solve_optimization(): LAPACK needs to be " 00194 "installed in order to use weight optimisation for combined " 00195 "kernels!\n"); 00196 return SGVector<float64_t>(); 00197 } 00198 #endif