SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
KMeans.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2007-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _KMEANS_H__
00013 #define _KMEANS_H__
00014 
00015 #include <stdio.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/features/DenseFeatures.h>
00019 #include <shogun/distance/Distance.h>
00020 #include <shogun/machine/DistanceMachine.h>
00021 
00022 namespace shogun
00023 {
00024 class CDistanceMachine;
00025 
00027 enum EKMeansMethod
00028 {
00029     KMM_MINI_BATCH,
00030     KMM_LLOYD
00031 };
00032 
00048 class CKMeans : public CDistanceMachine
00049 {
00050     public:
00052         CKMeans();
00053 
00060         CKMeans(int32_t k, CDistance* d, EKMeansMethod f);
00061 
00069         CKMeans(int32_t k, CDistance* d, bool kmeanspp=false, EKMeansMethod f=KMM_LLOYD);
00070 
00077         CKMeans(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i, EKMeansMethod f=KMM_LLOYD);
00078         virtual ~CKMeans();
00079 
00080 
00081         MACHINE_PROBLEM_TYPE(PT_MULTICLASS)
00082 
00083         
00087         virtual EMachineType get_classifier_type() { return CT_KMEANS; }
00088 
00094         virtual bool load(FILE* srcfile);
00095 
00101         virtual bool save(FILE* dstfile);
00102 
00107         void set_k(int32_t p_k);
00108 
00113         int32_t get_k();
00114 
00119         void set_use_kmeanspp(bool kmpp);
00120 
00125         bool get_use_kmeanspp() const;
00126 
00131         void set_fixed_centers(bool fixed);
00132 
00137         bool get_fixed_centers();
00138 
00143         void set_max_iter(int32_t iter);
00144 
00149         float64_t get_max_iter();
00150 
00155         SGVector<float64_t> get_radiuses();
00156 
00161         SGMatrix<float64_t> get_cluster_centers();
00162 
00167         int32_t get_dimensions();
00168 
00170         virtual const char* get_name() const { return "KMeans"; }
00171 
00176         virtual void set_initial_centers(SGMatrix<float64_t> centers);
00177         
00182         void set_train_method(EKMeansMethod f);
00183 
00188         EKMeansMethod get_train_method() const;
00189 
00194         void set_mbKMeans_batch_size(int32_t b);
00195 
00200         int32_t get_mbKMeans_batch_size() const;
00201 
00206         void set_mbKMeans_iter(int32_t t);
00207 
00212         int32_t get_mbKMeans_iter() const;
00213 
00219         void set_mbKMeans_params(int32_t b, int32_t t);
00220 
00221     private:
00230         virtual bool train_machine(CFeatures* data=NULL);
00231 
00233         virtual void store_model_features();
00234 
00235         virtual bool train_require_labels() const { return false; }
00236 
00241         SGMatrix<float64_t> kmeanspp();
00242         void init();
00243 
00248         void set_random_centers(SGVector<float64_t> weights_set, SGVector<int32_t> ClList, int32_t XSize);
00249         void set_initial_centers(SGVector<float64_t> weights_set, 
00250                     SGVector<int32_t> ClList, int32_t XSize);
00251         void compute_cluster_variances();
00252 
00253     private:
00255         int32_t max_iter;
00256 
00258         bool fixed_centers;
00259 
00261         int32_t k;
00262 
00264         int32_t dimensions;
00265 
00267         SGVector<float64_t> R;
00268 
00270         SGMatrix<float64_t> mus_initial;
00271         
00273         bool use_kmeanspp;
00274     
00276         int32_t batch_size;
00277 
00279         int32_t minib_iter;
00280 
00282         SGMatrix<float64_t> mus;
00283 
00285         EKMeansMethod train_method;
00286 };
00287 }
00288 #endif
00289 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation