SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Heiko Strathmann 00008 * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3 00009 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/kernel/string/DistantSegmentsKernel.h> 00013 #include <string> 00014 00015 using namespace shogun; 00016 00017 CDistantSegmentsKernel::CDistantSegmentsKernel() : CStringKernel<char>(), 00018 m_delta(0), m_theta(0) 00019 { 00020 init(); 00021 } 00022 00023 CDistantSegmentsKernel::CDistantSegmentsKernel(int32_t size, int32_t delta, 00024 int32_t theta) : CStringKernel<char>(size), m_delta(delta), 00025 m_theta(theta) 00026 { 00027 init(); 00028 } 00029 00030 CDistantSegmentsKernel::CDistantSegmentsKernel(CStringFeatures<char>* l, 00031 CStringFeatures<char>* r, int32_t size, int32_t delta, int32_t theta) : 00032 CStringKernel<char>(size), m_delta(delta), m_theta(theta) 00033 { 00034 init(); 00035 CStringKernel<char>::init(l, r); 00036 } 00037 00038 bool CDistantSegmentsKernel::init(CFeatures* l, CFeatures* r) 00039 { 00040 CKernel::init(l, r); 00041 return init_normalizer(); 00042 } 00043 00044 void CDistantSegmentsKernel::init() 00045 { 00046 SG_ADD(&m_delta, "delta", "Delta parameter of the DS-Kernel", MS_AVAILABLE); 00047 SG_ADD(&m_theta, "theta", "Theta parameter of the DS-Kernel", MS_AVAILABLE); 00048 } 00049 00050 float64_t CDistantSegmentsKernel::compute(int32_t idx_a, int32_t idx_b) 00051 { 00052 bool free_a, free_b; 00053 int32_t aLength=0, bLength=0; 00054 char* a=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, aLength, 00055 free_a); 00056 char* b=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, bLength, 00057 free_b); 00058 ASSERT(a && b) 00059 00060 if ((aLength<1)||(bLength<1)) 00061 SG_ERROR("Empty sequences") 00062 00063 float64_t result=compute(a, aLength, b, bLength, m_delta, m_theta); 00064 00065 ((CStringFeatures<char>*) lhs)->free_feature_vector(a, idx_a, free_a); 00066 ((CStringFeatures<char>*) rhs)->free_feature_vector(b, idx_b, free_b); 00067 00068 return result; 00069 } 00070 00071 int32_t CDistantSegmentsKernel::bin(int32_t j, int32_t i) 00072 { 00073 if (i>j) 00074 return 0; 00075 if (i==3 && j>=3) 00076 { 00077 return j*(j-1)*(j-2)/6; 00078 } 00079 else if (i==2 && j>=2) 00080 { 00081 return j*(j-1)/2; 00082 } 00083 return 0; 00084 } 00085 00086 int32_t CDistantSegmentsKernel::compute(char* s, int32_t sLength, char* t, 00087 int32_t tLength, int32_t delta_m, int32_t theta_m) 00088 { 00089 int32_t c=0; 00090 int32_t* i_=SG_MALLOC(int32_t, delta_m+1); 00091 int32_t* l_=SG_MALLOC(int32_t, delta_m+1); 00092 for (int32_t j_s=0; j_s<=(int32_t) sLength-1; j_s++) 00093 { 00094 for (int32_t j_t=0; j_t<=(int32_t) tLength-1; j_t++) 00095 { 00096 if (s[j_s-1+1]==t[j_t-1+1]) 00097 { 00098 int32_t n=CMath::min(CMath::min(sLength-j_s, tLength-j_t), delta_m); 00099 int32_t k=-1; 00100 int32_t i=1; 00101 while (i<=n) 00102 { 00103 k++; 00104 i_[2*k]=i; 00105 i++; 00106 while (i<=n&&s[j_s-1+i]==t[j_t-1+i]) 00107 i++; 00108 i_[2*k+1]=i; 00109 l_[k]=i_[2*k+1]-i_[2*k]+1; 00110 i++; 00111 while (i<=n&&s[j_s-1+i]!=t[j_t-1+i]) 00112 i++; 00113 } 00114 c+=bin(l_[0], 3)-2*bin(l_[0]-theta_m, 3) 00115 +bin(l_[0]-2*theta_m, 3); 00116 int32_t c1=0; 00117 for (int32_t r=1; r<=k; r++) 00118 { 00119 c1+=bin(l_[r], 2)-bin(l_[r]-theta_m, 2); 00120 } 00121 c+=CMath::min(theta_m, i_[1]-i_[0])*c1; 00122 } 00123 } 00124 } 00125 SG_FREE(l_); 00126 SG_FREE(i_); 00127 return c; 00128 }