SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2008 Christian Igel, Tobias Glasmachers 00008 * Copyright (C) 2008 Christian Igel, Tobias Glasmachers 00009 * 00010 * Shogun adjustments (W) 2008-2009,2013 Soeren Sonnenburg 00011 * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00012 * Copyright (C) 2013 Soeren Sonnenburg 00013 */ 00014 #ifndef _OLIGOSTRINGKERNEL_H_ 00015 #define _OLIGOSTRINGKERNEL_H_ 00016 00017 #include <shogun/kernel/string/StringKernel.h> 00018 00019 #include <vector> 00020 #include <string> 00021 00022 namespace shogun 00023 { 00042 class COligoStringKernel : public CStringKernel<char> 00043 { 00044 public: 00046 COligoStringKernel(); 00047 00053 COligoStringKernel(int32_t cache_size, int32_t k, float64_t width); 00054 00061 COligoStringKernel( 00062 CStringFeatures<char>* l, CStringFeatures<char>* r, 00063 int32_t k, float64_t width); 00064 00066 virtual ~COligoStringKernel(); 00067 00074 virtual bool init(CFeatures* l, CFeatures* r); 00075 00080 virtual EKernelType get_kernel_type() { return K_OLIGO; } 00081 00086 virtual const char* get_name() const { return "OligoStringKernel"; } 00087 00088 00089 virtual float64_t compute(int32_t x, int32_t y); 00090 00093 virtual void cleanup(); 00094 00095 protected: 00109 static void encodeOligo( 00110 const std::string& sequence, uint32_t k_mer_length, 00111 const std::string& allowed_characters, 00112 std::vector< std::pair<int32_t, float64_t> >& values); 00113 00121 static void getSequences( 00122 const std::vector<std::string>& sequences, 00123 uint32_t k_mer_length, const std::string& allowed_characters, 00124 std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences); 00125 00141 float64_t kernelOligoFast( 00142 const std::vector< std::pair<int32_t, float64_t> >& x, 00143 const std::vector< std::pair<int32_t, float64_t> >& y, 00144 int32_t max_distance = -1); 00145 00153 float64_t kernelOligo( 00154 const std::vector< std::pair<int32_t, float64_t> >& x, 00155 const std::vector< std::pair<int32_t, float64_t> >& y); 00156 00157 00158 private: 00169 void getExpFunctionCache(uint32_t sequence_length); 00170 00171 static inline bool cmpOligos_(std::pair<int32_t, float64_t> a, 00172 std::pair<int32_t, float64_t> b ) 00173 { 00174 return (a.second < b.second); 00175 } 00176 00177 void init(); 00178 00179 protected: 00181 int32_t k; 00183 float64_t width; 00185 SGVector<float64_t> gauss_table; 00186 }; 00187 } 00188 #endif // _OLIGOSTRINGKERNEL_H_