SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
StringSubsequenceKernel.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2014 Soumyajit De
00008  */
00009 
00010 #include <shogun/kernel/string/StringSubsequenceKernel.h>
00011 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
00012 #include <shogun/features/StringFeatures.h>
00013 
00014 using namespace shogun;
00015 
00016 CStringSubsequenceKernel::CStringSubsequenceKernel()
00017 : CStringKernel<char>(0), m_maxlen(1), m_lambda(1.0)
00018 {
00019     set_normalizer(new CSqrtDiagKernelNormalizer());
00020     register_params();
00021 }
00022 
00023 CStringSubsequenceKernel::CStringSubsequenceKernel(int32_t size, int32_t maxlen,
00024         float64_t lambda)
00025 : CStringKernel<char>(size), m_maxlen(maxlen), m_lambda(lambda)
00026 {
00027     set_normalizer(new CSqrtDiagKernelNormalizer());
00028     register_params();
00029 }
00030 
00031 CStringSubsequenceKernel::CStringSubsequenceKernel(CStringFeatures<char>* l,
00032         CStringFeatures<char>* r, int32_t maxlen, float64_t lambda)
00033 : CStringKernel<char>(10), m_maxlen(maxlen), m_lambda(lambda)
00034 {
00035     set_normalizer(new CSqrtDiagKernelNormalizer());
00036     init(l, r);
00037     register_params();
00038 }
00039 
00040 CStringSubsequenceKernel::~CStringSubsequenceKernel()
00041 {
00042     cleanup();
00043 }
00044 
00045 bool CStringSubsequenceKernel::init(CFeatures* l, CFeatures* r)
00046 {
00047     CStringKernel<char>::init(l, r);
00048     return init_normalizer();
00049 }
00050 
00051 void CStringSubsequenceKernel::cleanup()
00052 {
00053     CKernel::cleanup();
00054 }
00055 
00056 float64_t CStringSubsequenceKernel::compute(int32_t idx_a, int32_t idx_b)
00057 {
00058     // sanity check
00059     REQUIRE(lhs, "lhs feature vector is not set!\n")
00060     REQUIRE(rhs, "rhs feature vector is not set!\n")
00061 
00062     int32_t alen, blen;
00063     bool free_avec, free_bvec;
00064 
00065     char* avec=dynamic_cast<CStringFeatures<char>*>(lhs)
00066         ->get_feature_vector(idx_a, alen, free_avec);
00067     char* bvec=dynamic_cast<CStringFeatures<char>*>(rhs)
00068         ->get_feature_vector(idx_b, blen, free_bvec);
00069 
00070     REQUIRE(avec, "Feature vector for lhs is NULL!\n");
00071     REQUIRE(bvec, "Feature vector for rhs is NULL!\n");
00072 
00073     // allocating memory for computing K' (Kp)
00074     float64_t ***Kp=SG_MALLOC(float64_t**, m_maxlen+1);
00075     for (index_t i=0; i<m_maxlen+1; ++i)
00076     {
00077         Kp[i]=SG_MALLOC(float64_t*, alen);
00078         for (index_t j=0; j<alen; ++j)
00079             Kp[i][j]=SG_CALLOC(float64_t, blen);
00080     }
00081 
00082     // initialize for 0 subsequence length for both the strings
00083     for (index_t j=0; j<alen; j++)
00084         for (index_t k=0; k<blen; ++k)
00085             Kp[0][j][k]=1.0;
00086 
00087     // computing of the K' (Kp) function using equations
00088     // shown in Lodhi et. al. See the class documentation for
00089     // definitions of Kp and Kpp
00090     for (index_t i=0; i<m_maxlen; i++)
00091     {
00092         for (index_t j=0; j<alen-1; j++)
00093         {
00094             float64_t Kpp=0.0;
00095             for (index_t k=0; k<blen-1; k++)
00096             {
00097                 Kpp=m_lambda*(Kpp+m_lambda*(avec[j]==bvec[k])
00098                         *Kp[i][j][k]);
00099                 Kp[i+1][j+1][k+1]=m_lambda*Kp[i+1][j][k+1]+Kpp;
00100             }
00101         }
00102     }
00103 
00104     // compute the kernel function
00105     float64_t K=0.0;
00106     for (index_t i=0; i<m_maxlen; i++)
00107     {
00108         for (index_t j=0; j<alen; j++)
00109         {
00110             for (index_t k=0; k<blen; k++)
00111             {
00112                 K+=m_lambda*m_lambda*(avec[j]==bvec[k])
00113                     *Kp[i][j][k];
00114             }
00115         }
00116     }
00117 
00118     // cleanup
00119     dynamic_cast<CStringFeatures<char>*>(lhs)->free_feature_vector(avec, idx_a,
00120             free_avec);
00121     dynamic_cast<CStringFeatures<char>*>(rhs)->free_feature_vector(bvec, idx_b,
00122             free_bvec);
00123 
00124     for (index_t i=0; i<m_maxlen+1; ++i)
00125     {
00126         for (index_t j=0; j<alen; ++j)
00127             SG_FREE(Kp[i][j]);
00128         SG_FREE(Kp[i]);
00129     }
00130     SG_FREE(Kp);
00131 
00132     return K;
00133 }
00134 
00135 void CStringSubsequenceKernel::register_params()
00136 {
00137     SG_ADD(&m_maxlen, "m_maxlen", "maximum length of common subsequences", MS_AVAILABLE);
00138     SG_ADD(&m_lambda, "m_lambda", "gap penalty", MS_AVAILABLE);
00139 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation