SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SimpleLocalityImprovedStringKernel.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2013 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  * Copyright (C) 2013 Soeren Sonnenburg
00011  */
00012 
00013 #include <shogun/lib/common.h>
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/kernel/string/SimpleLocalityImprovedStringKernel.h>
00016 #include <shogun/kernel/normalizer/SqrtDiagKernelNormalizer.h>
00017 #include <shogun/features/Features.h>
00018 #include <shogun/features/StringFeatures.h>
00019 
00020 using namespace shogun;
00021 
00022 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel()
00023 : CStringKernel<char>()
00024 {
00025     init();
00026 }
00027 
00028 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00029     int32_t size, int32_t l, int32_t id, int32_t od)
00030 : CStringKernel<char>(size)
00031 {
00032     init();
00033 
00034     length=l;
00035     inner_degree=id;
00036     outer_degree=od;
00037 }
00038 
00039 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00040     CStringFeatures<char>* l, CStringFeatures<char>* r,
00041     int32_t len, int32_t id, int32_t od)
00042 : CStringKernel<char>()
00043 {
00044     init();
00045 
00046     length=len;
00047     inner_degree=id;
00048     outer_degree=od;
00049 
00050     init(l, r);
00051 }
00052 
00053 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00054 {
00055     cleanup();
00056 }
00057 
00058 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00059 {
00060     bool result = CStringKernel<char>::init(l,r);
00061 
00062     if (!result)
00063         return false;
00064     const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00065     const int32_t PYRAL = 2 * length - 1; // total window length
00066     const int32_t pyra_len  = num_features-PYRAL+1;
00067     const int32_t pyra_len2 = (int32_t) pyra_len/2;
00068 
00069     pyramid_weights = SGVector<float64_t>(pyra_len);
00070 
00071     SG_DEBUG("initializing pyramid weights: size=%ld length=%i\n",
00072         num_features, length);
00073 
00074     float64_t PYRAL_pot;
00075     int32_t DEGREE1_1  = (inner_degree & 0x1)==0;
00076     int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00077     int32_t DEGREE1_2  = (inner_degree & 0x2)!=0;
00078     int32_t DEGREE1_3  = (inner_degree & ~0x3)!=0;
00079     int32_t DEGREE1_4  = (inner_degree & 0x4)!=0;
00080     {
00081     float64_t PYRAL_ = PYRAL;
00082     PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00083     if (DEGREE1_1n)
00084     {
00085         PYRAL_ *= PYRAL_;
00086         if (DEGREE1_2)
00087             PYRAL_pot *= PYRAL_;
00088         if (DEGREE1_3)
00089         {
00090             PYRAL_ *= PYRAL_;
00091             if (DEGREE1_4)
00092                 PYRAL_pot *= PYRAL_;
00093         }
00094     }
00095     }
00096 
00097     {
00098     int32_t j;
00099     for (j = 0; j < pyra_len; j++)
00100         pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00101     for (j = 0; j < pyra_len; j++)
00102         pyramid_weights[j] /= PYRAL_pot;
00103     }
00104 
00105     return init_normalizer();
00106 }
00107 
00108 void CSimpleLocalityImprovedStringKernel::cleanup()
00109 {
00110     pyramid_weights = SGVector<float64_t>();
00111     CKernel::cleanup();
00112 }
00113 
00114 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00115          const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00116          const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00117 {
00118     const int32_t PYRAL = 2*NTWIDTH-1; // total window length
00119     float64_t pot;
00120     float64_t sum;
00121     int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00122     int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00123     int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00124     int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00125     int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00126 
00127     ASSERT((DEGREE1 & ~0x7) == 0)
00128     ASSERT((DEGREE2 & ~0x7) == 0)
00129 
00130     register int32_t conv;
00131     register int32_t i;
00132     register int32_t j;
00133 
00134     sum = 0.0;
00135     conv = 0;
00136     for (j = 0; j < PYRAL; j++)
00137         conv += (x1[j] == x2[j]) ? 1 : 0;
00138 
00139     for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00140     {
00141         register float64_t pot2;
00142         if (i>0)
00143             conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00144                 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00145         { /* potencing of conv -- float64_t is faster*/
00146         register float64_t conv2 = conv;
00147         pot2 = (DEGREE1_1) ? 1.0 : conv2;
00148             if (DEGREE1_1n)
00149             {
00150                 conv2 *= conv2;
00151                 if (DEGREE1_2)
00152                     pot2 *= conv2;
00153                 if (DEGREE1_3 && DEGREE1_4)
00154                     pot2 *= conv2*conv2;
00155             }
00156         }
00157         sum += pot2*pyra[i];
00158     }
00159 
00160     pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00161     if ((DEGREE2 & ~0x1) != 0)
00162     {
00163         sum *= sum;
00164         if ((DEGREE2 & 0x2) != 0)
00165             pot *= sum;
00166         if ((DEGREE2 & ~0x3) != 0)
00167         {
00168             sum *= sum;
00169             if ((DEGREE2 & 0x4) != 0)
00170                 pot *= sum;
00171         }
00172     }
00173     return pot;
00174 }
00175 
00176 float64_t CSimpleLocalityImprovedStringKernel::compute(
00177     int32_t idx_a, int32_t idx_b)
00178 {
00179     int32_t alen, blen;
00180     bool free_avec, free_bvec;
00181 
00182     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00183     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00184 
00185     // can only deal with strings of same length
00186     ASSERT(alen==blen)
00187 
00188     float64_t dpt;
00189 
00190     dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00191     dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
00192 
00193     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00194     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00195     return (float64_t) dpt;
00196 }
00197 
00198 void CSimpleLocalityImprovedStringKernel::init()
00199 {
00200     set_normalizer(new CSqrtDiagKernelNormalizer());
00201 
00202     length = 3;
00203     inner_degree = 3;
00204     outer_degree = 1;
00205 
00206     SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
00207     SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
00208     SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
00209     SG_ADD(&pyramid_weights,"pyramid_weights", "Pyramid weights.", MS_AVAILABLE);
00210 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation