SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
LocalityImprovedStringKernel.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 1999-2008 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/common.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/kernel/string/LocalityImprovedStringKernel.h>
00015 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
00016 #include <shogun/features/StringFeatures.h>
00017 
00018 using namespace shogun;
00019 
00020 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel()
00021 : CStringKernel<char>()
00022 {
00023     init();
00024 }
00025 
00026 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel(
00027     int32_t size, int32_t l, int32_t id, int32_t od)
00028 : CStringKernel<char>(size)
00029 {
00030     init();
00031 
00032     length=l;
00033     inner_degree=id;
00034     outer_degree=od;
00035 
00036     SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", l, id, od)
00037 }
00038 
00039 CLocalityImprovedStringKernel::CLocalityImprovedStringKernel(
00040     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t len,
00041     int32_t id, int32_t od)
00042 : CStringKernel<char>()
00043 {
00044     init();
00045 
00046     length=len;
00047     inner_degree=id;
00048     outer_degree=od;
00049 
00050     SG_DEBUG("LIK with parms: l=%d, id=%d, od=%d created!\n", len, id, od)
00051 
00052     init(l, r);
00053 }
00054 
00055 CLocalityImprovedStringKernel::~CLocalityImprovedStringKernel()
00056 {
00057     cleanup();
00058 }
00059 
00060 bool CLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00061 {
00062     CStringKernel<char>::init(l,r);
00063     return init_normalizer();
00064 }
00065 
00066 float64_t CLocalityImprovedStringKernel::compute(int32_t idx_a, int32_t idx_b)
00067 {
00068     int32_t alen, blen;
00069     bool free_avec, free_bvec;
00070 
00071     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00072     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00073     // can only deal with strings of same length
00074     ASSERT(alen==blen && alen>0)
00075 
00076     int32_t i,t;
00077     float64_t* match=SG_MALLOC(float64_t, alen);
00078 
00079     // initialize match table 1 -> match;  0 -> no match
00080     for (i = 0; i<alen; i++)
00081         match[i] = (avec[i] == bvec[i])? 1 : 0;
00082 
00083     float64_t outer_sum = 0;
00084 
00085     for (t = 0; t<alen-length; t++)
00086     {
00087         float64_t sum = 0;
00088         for (i = 0; i<length && t+i+length+1<alen; i++)
00089             sum += (i+1)*match[t+i]+(length-i)*match[t+i+length+1];
00090         //add middle element + normalize with sum_i=0^2l+1 i = (2l+1)(l+1)
00091         float64_t inner_sum = (sum + (length+1)*match[t+length]) / ((2*length+1)*(length+1));
00092         inner_sum = pow(inner_sum, inner_degree + 1);
00093         outer_sum += inner_sum;
00094     }
00095     SG_FREE(match);
00096 
00097     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00098     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00099     return pow(outer_sum, outer_degree + 1);
00100 }
00101 
00102 void CLocalityImprovedStringKernel::init()
00103 {
00104     set_normalizer(new CSqrtDiagKernelNormalizer());
00105 
00106     length = 0;
00107     inner_degree = 0;
00108     outer_degree = 0;
00109 
00110     SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
00111     SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
00112     SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
00113 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation