SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
OligoStringKernel.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2008 Christian Igel, Tobias Glasmachers
00008  * Copyright (C) 2008 Christian Igel, Tobias Glasmachers
00009  *
00010  * Shogun adjustments (W) 2008-2009,2013 Soeren Sonnenburg
00011  * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00012  * Copyright (C) 2013 Soeren Sonnenburg
00013  */
00014 #ifndef _OLIGOSTRINGKERNEL_H_
00015 #define _OLIGOSTRINGKERNEL_H_
00016 
00017 #include <shogun/kernel/string/StringKernel.h>
00018 
00019 #include <vector>
00020 #include <string>
00021 
00022 namespace shogun
00023 {
00042 class COligoStringKernel : public CStringKernel<char>
00043 {
00044     public:
00046         COligoStringKernel();
00047 
00053         COligoStringKernel(int32_t cache_size, int32_t k, float64_t width);
00054 
00061         COligoStringKernel(
00062                 CStringFeatures<char>* l, CStringFeatures<char>* r,
00063                 int32_t k, float64_t width);
00064 
00066         virtual ~COligoStringKernel();
00067 
00074         virtual bool init(CFeatures* l, CFeatures* r);
00075 
00080         virtual EKernelType get_kernel_type() { return K_OLIGO; }
00081 
00086         virtual const char* get_name() const { return "OligoStringKernel"; }
00087 
00088 
00089         virtual float64_t compute(int32_t x, int32_t y);
00090 
00093         virtual void cleanup();
00094 
00095     protected:
00109         static void encodeOligo(
00110             const std::string& sequence, uint32_t k_mer_length,
00111             const std::string& allowed_characters,
00112             std::vector< std::pair<int32_t, float64_t> >&   values);
00113 
00121         static void getSequences(
00122             const std::vector<std::string>& sequences,
00123             uint32_t k_mer_length, const std::string& allowed_characters,
00124             std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences);
00125 
00141         float64_t kernelOligoFast(
00142             const std::vector< std::pair<int32_t, float64_t> >& x,
00143             const std::vector< std::pair<int32_t, float64_t> >& y,
00144             int32_t max_distance = -1);
00145 
00153         float64_t kernelOligo(
00154                 const std::vector< std::pair<int32_t, float64_t> >& x,
00155                 const std::vector< std::pair<int32_t, float64_t> >& y);
00156 
00157 
00158     private:
00169         void getExpFunctionCache(uint32_t sequence_length);
00170 
00171         static inline bool cmpOligos_(std::pair<int32_t, float64_t> a,
00172                 std::pair<int32_t, float64_t> b )
00173         {
00174             return (a.second < b.second);
00175         }
00176 
00177         void init();
00178 
00179     protected:
00181         int32_t k;
00183         float64_t width;
00185         SGVector<float64_t> gauss_table;
00186 };
00187 }
00188 #endif // _OLIGOSTRINGKERNEL_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation