SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 1999-2008 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/common.h> 00013 #include <shogun/io/SGIO.h> 00014 #include <shogun/kernel/string/LocalityImprovedStringKernel.h> 00015 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h> 00016 #include <shogun/features/StringFeatures.h> 00017 00018 using namespace shogun; 00019 00020 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel() 00021 : CStringKernel<char>() 00022 { 00023 init(); 00024 } 00025 00026 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00027 int32_t size, int32_t l, int32_t id, int32_t od) 00028 : CStringKernel<char>(size) 00029 { 00030 init(); 00031 00032 length=l; 00033 inner_degree=id; 00034 outer_degree=od; 00035 00036 SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od) 00037 } 00038 00039 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel( 00040 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len, 00041 int32_t id, int32_t od) 00042 : CStringKernel<char>() 00043 { 00044 init(); 00045 00046 length=len; 00047 inner_degree=id; 00048 outer_degree=od; 00049 00050 SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od) 00051 00052 init(l, r); 00053 } 00054 00055 CLocalityImprovedStringKernel::~CLocalityImprovedStringKernel() 00056 { 00057 cleanup(); 00058 } 00059 00060 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r) 00061 { 00062 CStringKernel<char>::init(l,r); 00063 return init_normalizer(); 00064 } 00065 00066 float64_t CLocalityImprovedStringKernel::compute(int32_t idx_a, int32_t idx_b) 00067 { 00068 int32_t alen, blen; 00069 bool free_avec, free_bvec; 00070 00071 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00072 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00073 // can only deal with strings of same length 00074 ASSERT(alen==blen && alen>0) 00075 00076 int32_t i,t; 00077 float64_t* match=SG_MALLOC(float64_t, alen); 00078 00079 // initialize match table 1 -> match; 0 -> no match 00080 for (i = 0; i<alen; i++) 00081 match[i] = (avec[i] == bvec[i])? 1 : 0; 00082 00083 float64_t outer_sum = 0; 00084 00085 for (t = 0; t<alen-length; t++) 00086 { 00087 float64_t sum = 0; 00088 for (i = 0; i<length && t+i+length+1<alen; i++) 00089 sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1]; 00090 //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1) 00091 float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1)); 00092 inner_sum = pow(inner_sum, inner_degree + 1); 00093 outer_sum += inner_sum; 00094 } 00095 SG_FREE(match); 00096 00097 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00098 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00099 return pow(outer_sum, outer_degree + 1); 00100 } 00101 00102 void CLocalityImprovedStringKernel::init() 00103 { 00104 set_normalizer(new CSqrtDiagKernelNormalizer()); 00105 00106 length = 0; 00107 inner_degree = 0; 00108 outer_degree = 0; 00109 00110 SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE); 00111 SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE); 00112 SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE); 00113 }