SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2014 Soumyajit De 00008 */ 00009 00010 #include <shogun/kernel/string/StringSubsequenceKernel.h> 00011 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h> 00012 #include <shogun/features/StringFeatures.h> 00013 00014 using namespace shogun; 00015 00016 CStringSubsequenceKernel::CStringSubsequenceKernel() 00017 : CStringKernel<char>(0), m_maxlen(1), m_lambda(1.0) 00018 { 00019 set_normalizer(new CSqrtDiagKernelNormalizer()); 00020 register_params(); 00021 } 00022 00023 CStringSubsequenceKernel::CStringSubsequenceKernel(int32_t size, int32_t maxlen, 00024 float64_t lambda) 00025 : CStringKernel<char>(size), m_maxlen(maxlen), m_lambda(lambda) 00026 { 00027 set_normalizer(new CSqrtDiagKernelNormalizer()); 00028 register_params(); 00029 } 00030 00031 CStringSubsequenceKernel::CStringSubsequenceKernel(CStringFeatures<char>* l, 00032 CStringFeatures<char>* r, int32_t maxlen, float64_t lambda) 00033 : CStringKernel<char>(10), m_maxlen(maxlen), m_lambda(lambda) 00034 { 00035 set_normalizer(new CSqrtDiagKernelNormalizer()); 00036 init(l, r); 00037 register_params(); 00038 } 00039 00040 CStringSubsequenceKernel::~CStringSubsequenceKernel() 00041 { 00042 cleanup(); 00043 } 00044 00045 bool CStringSubsequenceKernel::init(CFeatures* l, CFeatures* r) 00046 { 00047 CStringKernel<char>::init(l, r); 00048 return init_normalizer(); 00049 } 00050 00051 void CStringSubsequenceKernel::cleanup() 00052 { 00053 CKernel::cleanup(); 00054 } 00055 00056 float64_t CStringSubsequenceKernel::compute(int32_t idx_a, int32_t idx_b) 00057 { 00058 // sanity check 00059 REQUIRE(lhs, "lhs feature vector is not set!\n") 00060 REQUIRE(rhs, "rhs feature vector is not set!\n") 00061 00062 int32_t alen, blen; 00063 bool free_avec, free_bvec; 00064 00065 char* avec=dynamic_cast<CStringFeatures<char>*>(lhs) 00066 ->get_feature_vector(idx_a, alen, free_avec); 00067 char* bvec=dynamic_cast<CStringFeatures<char>*>(rhs) 00068 ->get_feature_vector(idx_b, blen, free_bvec); 00069 00070 REQUIRE(avec, "Feature vector for lhs is NULL!\n"); 00071 REQUIRE(bvec, "Feature vector for rhs is NULL!\n"); 00072 00073 // allocating memory for computing K' (Kp) 00074 float64_t ***Kp=SG_MALLOC(float64_t**, m_maxlen+1); 00075 for (index_t i=0; i<m_maxlen+1; ++i) 00076 { 00077 Kp[i]=SG_MALLOC(float64_t*, alen); 00078 for (index_t j=0; j<alen; ++j) 00079 Kp[i][j]=SG_CALLOC(float64_t, blen); 00080 } 00081 00082 // initialize for 0 subsequence length for both the strings 00083 for (index_t j=0; j<alen; j++) 00084 for (index_t k=0; k<blen; ++k) 00085 Kp[0][j][k]=1.0; 00086 00087 // computing of the K' (Kp) function using equations 00088 // shown in Lodhi et. al. See the class documentation for 00089 // definitions of Kp and Kpp 00090 for (index_t i=0; i<m_maxlen; i++) 00091 { 00092 for (index_t j=0; j<alen-1; j++) 00093 { 00094 float64_t Kpp=0.0; 00095 for (index_t k=0; k<blen-1; k++) 00096 { 00097 Kpp=m_lambda*(Kpp+m_lambda*(avec[j]==bvec[k]) 00098 *Kp[i][j][k]); 00099 Kp[i+1][j+1][k+1]=m_lambda*Kp[i+1][j][k+1]+Kpp; 00100 } 00101 } 00102 } 00103 00104 // compute the kernel function 00105 float64_t K=0.0; 00106 for (index_t i=0; i<m_maxlen; i++) 00107 { 00108 for (index_t j=0; j<alen; j++) 00109 { 00110 for (index_t k=0; k<blen; k++) 00111 { 00112 K+=m_lambda*m_lambda*(avec[j]==bvec[k]) 00113 *Kp[i][j][k]; 00114 } 00115 } 00116 } 00117 00118 // cleanup 00119 dynamic_cast<CStringFeatures<char>*>(lhs)->free_feature_vector(avec, idx_a, 00120 free_avec); 00121 dynamic_cast<CStringFeatures<char>*>(rhs)->free_feature_vector(bvec, idx_b, 00122 free_bvec); 00123 00124 for (index_t i=0; i<m_maxlen+1; ++i) 00125 { 00126 for (index_t j=0; j<alen; ++j) 00127 SG_FREE(Kp[i][j]); 00128 SG_FREE(Kp[i]); 00129 } 00130 SG_FREE(Kp); 00131 00132 return K; 00133 } 00134 00135 void CStringSubsequenceKernel::register_params() 00136 { 00137 SG_ADD(&m_maxlen, "m_maxlen", "maximum length of common subsequences", MS_AVAILABLE); 00138 SG_ADD(&m_lambda, "m_lambda", "gap penalty", MS_AVAILABLE); 00139 }