SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
MMDKernelSelectionComb.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012-2013 Heiko Strathmann
00008  */
00009 
00010 #include <shogun/statistics/MMDKernelSelectionComb.h>
00011 #include <shogun/statistics/KernelTwoSampleTestStatistic.h>
00012 #include <shogun/kernel/CombinedKernel.h>
00013 
00014 using namespace shogun;
00015 
00016 CMMDKernelSelectionComb::CMMDKernelSelectionComb() :
00017         CMMDKernelSelection()
00018 {
00019     init();
00020 }
00021 
00022 CMMDKernelSelectionComb::CMMDKernelSelectionComb(
00023         CKernelTwoSampleTestStatistic* mmd) : CMMDKernelSelection(mmd)
00024 {
00025     init();
00026 }
00027 
00028 CMMDKernelSelectionComb::~CMMDKernelSelectionComb()
00029 {
00030 }
00031 
00032 void CMMDKernelSelectionComb::init()
00033 {
00034 #ifdef HAVE_LAPACK
00035     SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of "
00036             "iterations for qp solver", MS_NOT_AVAILABLE);
00037     SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver",
00038             MS_NOT_AVAILABLE);
00039     SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization "
00040             "kernel weights", MS_NOT_AVAILABLE);
00041 
00042     /* sensible values for optimization */
00043     m_opt_max_iterations=10000;
00044     m_opt_epsilon=10E-15;
00045     m_opt_low_cut=10E-7;
00046 #endif
00047 }
00048 
00049 #ifdef HAVE_LAPACK
00050 /* no reference counting, use the static context constructor of SGMatrix */
00051 SGMatrix<float64_t> CMMDKernelSelectionComb::m_Q=SGMatrix<float64_t>(false);
00052 
00053 const float64_t* CMMDKernelSelectionComb::get_Q_col(uint32_t i)
00054 {
00055     return &m_Q[m_Q.num_rows*i];
00056 }
00057 
00059 void CMMDKernelSelectionComb::print_state(libqp_state_T state)
00060 {
00061     SG_SDEBUG("CMMDKernelSelectionComb::print_state: libqp state:"
00062             " primal=%f\n", state.QP);
00063 }
00064 
00065 CKernel* CMMDKernelSelectionComb::select_kernel()
00066 {
00067     /* cast is safe due to assertion in constructor */
00068     CCombinedKernel* combined=(CCombinedKernel*)m_mmd->get_kernel();
00069 
00070     /* optimise for kernel weights and set them */
00071     SGVector<float64_t> weights=compute_measures();
00072     combined->set_subkernel_weights(weights);
00073 
00074     /* note that kernel is SG_REF'ed from getter above */
00075     return combined;
00076 }
00077 
00078 SGVector<float64_t> CMMDKernelSelectionComb::solve_optimization(
00079         SGVector<float64_t> mmds)
00080 {
00081     /* readability */
00082     index_t num_kernels=mmds.vlen;
00083 
00084     /* compute sum of mmds to generate feasible point for convex program */
00085     float64_t sum_mmds=0;
00086     for (index_t i=0; i<mmds.vlen; ++i)
00087         sum_mmds+=mmds[i];
00088 
00089     /* QP: 0.5*x'*Q*x + f'*x
00090      * subject to
00091      * mmds'*x = b
00092      * LB[i] <= x[i] <= UB[i]   for all i=1..n */
00093     SGVector<float64_t> Q_diag(num_kernels);
00094     SGVector<float64_t> f(num_kernels);
00095     SGVector<float64_t> lb(num_kernels);
00096     SGVector<float64_t> ub(num_kernels);
00097     SGVector<float64_t> weights(num_kernels);
00098 
00099     /* init everything, there are two cases possible: i) at least one mmd is
00100      * is positive, ii) all mmds are negative */
00101     bool one_pos=false;
00102     for (index_t i=0; i<mmds.vlen; ++i)
00103     {
00104         if (mmds[i]>0)
00105         {
00106             SG_DEBUG("found at least one positive MMD\n")
00107             one_pos=true;
00108             break;
00109         }
00110     }
00111 
00112     if (!one_pos)
00113     {
00114         SG_WARNING("CMMDKernelSelectionComb::solve_optimization(): all mmd "
00115                 "estimates are negative. This is techically possible, although "
00116                 "extremely rare. Consider using different kernels. "
00117                 "This combination will lead to a bad two-sample test. Since any"
00118                 "combination is bad, will now just return equally distributed "
00119                 "kernel weights\n");
00120 
00121         /* if no element is positive, we can choose arbritary weights since
00122          * the results will be bad anyway */
00123         weights.set_const(1.0/num_kernels);
00124     }
00125     else
00126     {
00127         SG_DEBUG("one MMD entry is positive, performing optimisation\n")
00128         /* do optimisation, init vectors */
00129         for (index_t i=0; i<num_kernels; ++i)
00130         {
00131             Q_diag[i]=m_Q(i,i);
00132             f[i]=0;
00133             lb[i]=0;
00134             ub[i]=CMath::INFTY;
00135 
00136             /* initial point has to be feasible, i.e. mmds'*x = b */
00137             weights[i]=1.0/sum_mmds;
00138         }
00139 
00140         /* start libqp solver with desired parameters */
00141         SG_DEBUG("starting libqp optimization\n")
00142         libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector,
00143                 f.vector, mmds.vector,
00144                 one_pos ? 1 : -1,
00145                 lb.vector, ub.vector,
00146                 weights.vector, num_kernels, m_opt_max_iterations,
00147                 m_opt_epsilon, &(CMMDKernelSelectionComb::print_state));
00148 
00149         SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter,
00150                 qp_exitflag.exitflag);
00151 
00152         /* set really small entries to zero and sum up for normalization */
00153         float64_t sum_weights=0;
00154         for (index_t i=0; i<weights.vlen; ++i)
00155         {
00156             if (weights[i]<m_opt_low_cut)
00157             {
00158                 SG_DEBUG("lowcut: weight[%i]=%f<%f setting to zero\n", i, weights[i],
00159                         m_opt_low_cut);
00160                 weights[i]=0;
00161             }
00162 
00163             sum_weights+=weights[i];
00164         }
00165 
00166         /* normalize (allowed since problem is scale invariant) */
00167         for (index_t i=0; i<weights.vlen; ++i)
00168             weights[i]/=sum_weights;
00169     }
00170 
00171     return weights;
00172 }
00173 #else
00174 CKernel* CMMDKernelSelectionComb::select_kernel()
00175 {
00176     SG_ERROR("CMMDKernelSelectionComb::select_kernel(): LAPACK needs to be "
00177             "installed in order to use weight optimisation for combined "
00178             "kernels!\n");
00179     return NULL;
00180 }
00181 
00182 SGVector<float64_t> CMMDKernelSelectionComb::compute_measures()
00183 {
00184     SG_ERROR("CMMDKernelSelectionComb::select_kernel(): LAPACK needs to be "
00185             "installed in order to use weight optimisation for combined "
00186             "kernels!\n");
00187     return SGVector<float64_t>();
00188 }
00189 
00190 SGVector<float64_t> CMMDKernelSelectionComb::solve_optimization(
00191         SGVector<float64_t> mmds)
00192 {
00193     SG_ERROR("CMMDKernelSelectionComb::solve_optimization(): LAPACK needs to be "
00194             "installed in order to use weight optimisation for combined "
00195             "kernels!\n");
00196     return SGVector<float64_t>();
00197 }
00198 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation