SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Written (W) 2007-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _KMEANS_H__ 00013 #define _KMEANS_H__ 00014 00015 #include <stdio.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/features/DenseFeatures.h> 00019 #include <shogun/distance/Distance.h> 00020 #include <shogun/machine/DistanceMachine.h> 00021 00022 namespace shogun 00023 { 00024 class CDistanceMachine; 00025 00027 enum EKMeansMethod 00028 { 00029 KMM_MINI_BATCH, 00030 KMM_LLOYD 00031 }; 00032 00048 class CKMeans : public CDistanceMachine 00049 { 00050 public: 00052 CKMeans(); 00053 00060 CKMeans(int32_t k, CDistance* d, EKMeansMethod f); 00061 00069 CKMeans(int32_t k, CDistance* d, bool kmeanspp=false, EKMeansMethod f=KMM_LLOYD); 00070 00077 CKMeans(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i, EKMeansMethod f=KMM_LLOYD); 00078 virtual ~CKMeans(); 00079 00080 00081 MACHINE_PROBLEM_TYPE(PT_MULTICLASS) 00082 00083 00087 virtual EMachineType get_classifier_type() { return CT_KMEANS; } 00088 00094 virtual bool load(FILE* srcfile); 00095 00101 virtual bool save(FILE* dstfile); 00102 00107 void set_k(int32_t p_k); 00108 00113 int32_t get_k(); 00114 00119 void set_use_kmeanspp(bool kmpp); 00120 00125 bool get_use_kmeanspp() const; 00126 00131 void set_fixed_centers(bool fixed); 00132 00137 bool get_fixed_centers(); 00138 00143 void set_max_iter(int32_t iter); 00144 00149 float64_t get_max_iter(); 00150 00155 SGVector<float64_t> get_radiuses(); 00156 00161 SGMatrix<float64_t> get_cluster_centers(); 00162 00167 int32_t get_dimensions(); 00168 00170 virtual const char* get_name() const { return "KMeans"; } 00171 00176 virtual void set_initial_centers(SGMatrix<float64_t> centers); 00177 00182 void set_train_method(EKMeansMethod f); 00183 00188 EKMeansMethod get_train_method() const; 00189 00194 void set_mbKMeans_batch_size(int32_t b); 00195 00200 int32_t get_mbKMeans_batch_size() const; 00201 00206 void set_mbKMeans_iter(int32_t t); 00207 00212 int32_t get_mbKMeans_iter() const; 00213 00219 void set_mbKMeans_params(int32_t b, int32_t t); 00220 00221 private: 00230 virtual bool train_machine(CFeatures* data=NULL); 00231 00233 virtual void store_model_features(); 00234 00235 virtual bool train_require_labels() const { return false; } 00236 00241 SGMatrix<float64_t> kmeanspp(); 00242 void init(); 00243 00248 void set_random_centers(SGVector<float64_t> weights_set, SGVector<int32_t> ClList, int32_t XSize); 00249 void set_initial_centers(SGVector<float64_t> weights_set, 00250 SGVector<int32_t> ClList, int32_t XSize); 00251 void compute_cluster_variances(); 00252 00253 private: 00255 int32_t max_iter; 00256 00258 bool fixed_centers; 00259 00261 int32_t k; 00262 00264 int32_t dimensions; 00265 00267 SGVector<float64_t> R; 00268 00270 SGMatrix<float64_t> mus_initial; 00271 00273 bool use_kmeanspp; 00274 00276 int32_t batch_size; 00277 00279 int32_t minib_iter; 00280 00282 SGMatrix<float64_t> mus; 00283 00285 EKMeansMethod train_method; 00286 }; 00287 } 00288 #endif 00289