SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.h
Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017 
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/SGIO.h>
00021 #include <shogun/io/File.h>
00022 #include <shogun/mathematics/Math.h>
00023 #include <shogun/features/FeatureTypes.h>
00024 #include <shogun/base/SGObject.h>
00025 #include <shogun/features/Features.h>
00026 #include <shogun/kernel/normalizer/KernelNormalizer.h>
00027 
00028 namespace shogun
00029 {
00030     class CFile;
00031     class CFeatures;
00032     class CKernelNormalizer;
00033 
00034 #ifdef USE_SHORTREAL_KERNELCACHE
00035 
00036     typedef float32_t KERNELCACHE_ELEM;
00037 #else
00038 
00039     typedef float64_t KERNELCACHE_ELEM;
00040 #endif
00041 
00043 typedef int64_t KERNELCACHE_IDX;
00044 
00045 
00047 enum EOptimizationType
00048 {
00049     FASTBUTMEMHUNGRY,
00050     SLOWBUTMEMEFFICIENT
00051 };
00052 
00054 enum EKernelType
00055 {
00056     K_UNKNOWN = 0,
00057     K_LINEAR = 10,
00058     K_POLY = 20,
00059     K_GAUSSIAN = 30,
00060     K_GAUSSIANSHIFT = 32,
00061     K_GAUSSIANMATCH = 33,
00062     K_HISTOGRAM = 40,
00063     K_SALZBERG = 41,
00064     K_LOCALITYIMPROVED = 50,
00065     K_SIMPLELOCALITYIMPROVED = 60,
00066     K_FIXEDDEGREE = 70,
00067     K_WEIGHTEDDEGREE =    80,
00068     K_WEIGHTEDDEGREEPOS = 81,
00069     K_WEIGHTEDDEGREERBF = 82,
00070     K_WEIGHTEDCOMMWORDSTRING = 90,
00071     K_POLYMATCH = 100,
00072     K_ALIGNMENT = 110,
00073     K_COMMWORDSTRING = 120,
00074     K_COMMULONGSTRING = 121,
00075     K_SPECTRUMRBF = 122,
00076     K_SPECTRUMMISMATCHRBF = 123,
00077     K_COMBINED = 140,
00078     K_AUC = 150,
00079     K_CUSTOM = 160,
00080     K_SIGMOID = 170,
00081     K_CHI2 = 180,
00082     K_DIAG = 190,
00083     K_CONST = 200,
00084     K_DISTANCE = 220,
00085     K_LOCALALIGNMENT = 230,
00086     K_PYRAMIDCHI2 = 240,
00087     K_OLIGO = 250,
00088     K_MATCHWORD = 260,
00089     K_TPPK = 270,
00090     K_REGULATORYMODULES = 280,
00091     K_SPARSESPATIALSAMPLE = 290,
00092     K_HISTOGRAMINTERSECTION = 300,
00093     K_WAVELET = 310,
00094     K_WAVE = 320,
00095     K_CAUCHY = 330,
00096     K_TSTUDENT = 340,
00097     K_RATIONAL_QUADRATIC = 350,
00098     K_MULTIQUADRIC = 360,
00099     K_EXPONENTIAL = 370,
00100     K_SPHERICAL = 380,
00101     K_SPLINE = 390,
00102     K_ANOVA = 400,
00103     K_POWER = 410,
00104     K_LOG = 420,
00105     K_CIRCULAR = 430,
00106     K_INVERSEMULTIQUADRIC = 440,
00107     K_DISTANTSEGMENTS = 450,
00108     K_BESSEL = 460,
00109     K_JENSENSHANNON = 470,
00110     K_DIRECTOR = 480,
00111     K_PRODUCT = 490,
00112     K_LINEARARD = 500,
00113     K_GAUSSIANARD = 510,
00114     K_STREAMING = 520
00115 };
00116 
00118 enum EKernelProperty
00119 {
00120     KP_NONE = 0,
00121     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00122     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00123     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00124 };
00125 
00126 class CSVM;
00127 
00153 class CKernel : public CSGObject
00154 {
00155     friend class CVarianceKernelNormalizer;
00156     friend class CSqrtDiagKernelNormalizer;
00157     friend class CAvgDiagKernelNormalizer;
00158     friend class CRidgeKernelNormalizer;
00159     friend class CFirstElementKernelNormalizer;
00160     friend class CMultitaskKernelNormalizer;
00161     friend class CMultitaskKernelMklNormalizer;
00162     friend class CMultitaskKernelMaskNormalizer;
00163     friend class CMultitaskKernelMaskPairNormalizer;
00164     friend class CTanimotoKernelNormalizer;
00165     friend class CDiceKernelNormalizer;
00166     friend class CZeroMeanCenterKernelNormalizer;
00167 
00168     friend class CStreamingKernel;
00169 
00170     public:
00171 
00175         CKernel();
00176 
00177 
00182         CKernel(int32_t size);
00183 
00190         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00191 
00192         virtual ~CKernel();
00193 
00201         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00202         {
00203             REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
00204                 "%s::kernel(): index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00205                 get_name(), idx_a,num_lhs, idx_b,num_rhs);
00206 
00207             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00208         }
00209 
00214         SGMatrix<float64_t> get_kernel_matrix()
00215         {
00216             return get_kernel_matrix<float64_t>();
00217         }
00218 
00225         SGVector<float64_t> get_kernel_diagonal(SGVector<float64_t>
00226                 preallocated=SGVector<float64_t>())
00227         {
00228             REQUIRE(lhs, "CKernel::get_kernel_diagonal(): Left-handside "
00229                     "features missing!\n");
00230 
00231             REQUIRE(rhs, "CKernel::get_kernel_diagonal(): Right-handside "
00232                         "features missing!\n");
00233 
00234             REQUIRE(lhs->get_num_vectors()==rhs->get_num_vectors(),
00235                     "CKernel::get_kernel_diagonal(): Left- and right-"
00236                     "handside features must be equal sized\n");
00237 
00238             /* allocate space if necessary */
00239             if (!preallocated.vector)
00240                 preallocated=SGVector<float64_t>(lhs->get_num_vectors());
00241             else
00242             {
00243                 REQUIRE(preallocated.vlen==lhs->get_num_vectors(),
00244                         "%s::get_kernel_diagonal(): Preallocated vector has"
00245                         " wrong size!\n", get_name());
00246             }
00247 
00248             for (index_t i=0; i<preallocated.vlen; ++i)
00249                 preallocated[i]=kernel(i, i);
00250 
00251             return preallocated;
00252         }
00253 
00259         virtual SGVector<float64_t> get_kernel_col(int32_t j)
00260         {
00261 
00262             SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00263 
00264             for (int32_t i=0; i!=num_rhs; i++)
00265                 col[i] = kernel(i,j);
00266 
00267             return col;
00268         }
00269 
00270 
00276         virtual SGVector<float64_t> get_kernel_row(int32_t i)
00277         {
00278             SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00279 
00280             for (int32_t j=0; j!=num_lhs; j++)
00281                 row[j] = kernel(i,j);
00282 
00283             return row;
00284         }
00285 
00290         template <class T> SGMatrix<T> get_kernel_matrix();
00291 
00292 
00303         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00304 
00309         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00310 
00315         virtual CKernelNormalizer* get_normalizer();
00316 
00320         virtual bool init_normalizer();
00321 
00328         virtual void cleanup();
00329 
00334         void load(CFile* loader);
00335 
00340         void save(CFile* writer);
00341 
00346         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00347 
00352         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00353 
00358         virtual int32_t get_num_vec_lhs()
00359         {
00360             return num_lhs;
00361         }
00362 
00367         virtual int32_t get_num_vec_rhs()
00368         {
00369             return num_rhs;
00370         }
00371 
00376         virtual bool has_features()
00377         {
00378             return lhs && rhs;
00379         }
00380 
00385         inline bool get_lhs_equals_rhs()
00386         {
00387             return lhs_equals_rhs;
00388         }
00389 
00391         virtual void remove_lhs_and_rhs();
00392 
00394         virtual void remove_lhs();
00395 
00397         virtual void remove_rhs();
00398 
00406         virtual EKernelType get_kernel_type()=0 ;
00407 
00414         virtual EFeatureType get_feature_type()=0;
00415 
00422         virtual EFeatureClass get_feature_class()=0;
00423 
00428         inline void set_cache_size(int32_t size)
00429         {
00430             cache_size = size;
00431 #ifdef USE_SVMLIGHT
00432             cache_reset();
00433 #endif //USE_SVMLIGHT
00434         }
00435 
00440         inline int32_t get_cache_size() { return cache_size; }
00441 
00442 #ifdef USE_SVMLIGHT
00443 
00444         inline void cache_reset() { resize_kernel_cache(cache_size); }
00445 
00450         inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00451 
00456         inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00457 
00465         void get_kernel_row(
00466             int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00467             bool full_line=false);
00468 
00473         void cache_kernel_row(int32_t x);
00474 
00480         void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00481 
00483         void kernel_cache_reset_lru();
00484 
00491         void kernel_cache_shrink(
00492             int32_t totdoc, int32_t num_shrink, int32_t *after);
00493 
00499         void resize_kernel_cache(KERNELCACHE_IDX size,
00500             bool regression_hack=false);
00501 
00506         inline void set_time(int32_t t)
00507         {
00508             kernel_cache.time=t;
00509         }
00510 
00516         inline int32_t kernel_cache_touch(int32_t cacheidx)
00517         {
00518             if(kernel_cache.index[cacheidx] != -1)
00519             {
00520                 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00521                 return(1);
00522             }
00523             return(0);
00524         }
00525 
00531         inline int32_t kernel_cache_check(int32_t cacheidx)
00532         {
00533             return(kernel_cache.index[cacheidx] >= 0);
00534         }
00535 
00540         inline int32_t kernel_cache_space_available()
00541         {
00542             return(kernel_cache.elems < kernel_cache.max_elems);
00543         }
00544 
00550         void kernel_cache_init(int32_t size, bool regression_hack=false);
00551 
00553         void kernel_cache_cleanup();
00554 
00555 #endif //USE_SVMLIGHT
00556 
00558         void list_kernel();
00559 
00565         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00566 
00570         virtual void clear_normal();
00571 
00577         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00578 
00583         inline EOptimizationType get_optimization_type() { return opt_type; }
00584 
00589         virtual void set_optimization_type(EOptimizationType t) { opt_type=t;}
00590 
00595         inline bool get_is_initialized() { return optimization_initialized; }
00596 
00604         virtual bool init_optimization(
00605             int32_t count, int32_t *IDX, float64_t *weights);
00606 
00611         virtual bool delete_optimization();
00612 
00618         bool init_optimization_svm(CSVM * svm) ;
00619 
00625         virtual float64_t compute_optimized(int32_t vector_idx);
00626 
00635         virtual void compute_batch(
00636             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00637             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00638             float64_t factor=1.0);
00639 
00644         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00645 
00650         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00651 
00656         virtual int32_t get_num_subkernels();
00657 
00663         virtual void compute_by_subkernel(
00664             int32_t vector_idx, float64_t * subkernel_contrib);
00665 
00671         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00672 
00677         virtual SGVector<float64_t> get_subkernel_weights();
00678 
00683         virtual void set_subkernel_weights(SGVector<float64_t> weights);
00684 
00692         virtual SGMatrix<float64_t> get_parameter_gradient(
00693                 const TParameter* param, index_t index=-1)
00694         {
00695             SG_ERROR("Can't compute derivative wrt %s parameter\n", param->m_name)
00696             return SGMatrix<float64_t>();
00697         }
00698 
00704         static CKernel* obtain_from_generic(CSGObject* kernel);
00705     protected:
00710         inline void set_property(EKernelProperty p)
00711         {
00712             properties |= p;
00713         }
00714 
00719         inline void unset_property(EKernelProperty p)
00720         {
00721             properties &= (properties | p) ^ p;
00722         }
00723 
00728         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00729 
00740         virtual float64_t compute(int32_t x, int32_t y)=0;
00741 
00748         int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00749         {
00750             int32_t i_start;
00751 
00752             if (symmetric)
00753                 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00754             else
00755                 i_start=(int32_t) (offs/int64_t(n));
00756 
00757             return i_start;
00758         }
00759 
00764         template <class T> static void* get_kernel_matrix_helper(void* p);
00765 
00774         virtual void load_serializable_post() throw (ShogunException);
00775 
00784         virtual void save_serializable_pre() throw (ShogunException);
00785 
00794         virtual void save_serializable_post() throw (ShogunException);
00795 
00800         virtual void register_params();
00801 
00802     private:
00805         void init();
00806 
00807 
00808 #ifdef USE_SVMLIGHT
00809 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00810 
00811         struct KERNEL_CACHE {
00813             int32_t   *index;
00815             int32_t   *invindex;
00817             int32_t   *active2totdoc;
00819             int32_t   *totdoc2active;
00821             int32_t   *lru;
00823             int32_t   *occu;
00825             int32_t   elems;
00827             int32_t   max_elems;
00829             int32_t   time;
00831             int32_t   activenum;
00832 
00834             KERNELCACHE_ELEM  *buffer;
00836             KERNELCACHE_IDX   buffsize;
00837         };
00838 
00840         struct S_KTHREAD_PARAM
00841         {
00843             CKernel* kernel;
00845             KERNEL_CACHE* kernel_cache;
00847             KERNELCACHE_ELEM** cache;
00849             int32_t* uncached_rows;
00851             int32_t num_uncached;
00853             uint8_t* needs_computation;
00855             int32_t start;
00857             int32_t end;
00859             int32_t num_vectors;
00860         };
00861 #endif // DOXYGEN_SHOULD_SKIP_THIS
00862 
00864         static void* cache_multiple_kernel_row_helper(void* p);
00865 
00867         void   kernel_cache_free(int32_t cacheidx);
00868         int32_t   kernel_cache_malloc();
00869         int32_t   kernel_cache_free_lru();
00870         KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
00871 #endif //USE_SVMLIGHT
00872 
00873 
00874     protected:
00876         int32_t cache_size;
00877 
00878 #ifdef USE_SVMLIGHT
00879 
00880         KERNEL_CACHE kernel_cache;
00881 #endif //USE_SVMLIGHT
00882 
00885         KERNELCACHE_ELEM* kernel_matrix;
00886 
00888         CFeatures* lhs;
00890         CFeatures* rhs;
00891 
00893         bool lhs_equals_rhs;
00894 
00896         int32_t num_lhs;
00898         int32_t num_rhs;
00899 
00901         float64_t combined_kernel_weight;
00902 
00904         bool optimization_initialized;
00908         EOptimizationType opt_type;
00909 
00911         uint64_t  properties;
00912 
00915         CKernelNormalizer* normalizer;
00916 };
00917 
00918 }
00919 #endif /* _KERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation