SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2014 Parijat Mazumdar 00008 */ 00009 00010 #include "shogun/clustering/KMeansLloydImpl.h" 00011 #include <shogun/distance/Distance.h> 00012 #include <shogun/features/DenseFeatures.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/io/SGIO.h> 00015 00016 using namespace shogun; 00017 00018 namespace shogun 00019 { 00020 void CKMeansLloydImpl::Lloyd_KMeans(int32_t k, CDistance* distance, int32_t max_iter, SGMatrix<float64_t> mus, 00021 SGVector<int32_t> ClList, SGVector<float64_t> weights_set, bool fixed_centers) 00022 { 00023 CDenseFeatures<float64_t>* lhs= 00024 CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs()); 00025 int32_t XSize=lhs->get_num_vectors(); 00026 int32_t dimensions=lhs->get_num_features(); 00027 00028 CDenseFeatures<float64_t>* rhs_mus=new CDenseFeatures<float64_t>(0); 00029 CFeatures* rhs_cache=distance->replace_rhs(rhs_mus); 00030 00031 SGVector<float64_t> dists=SGVector<float64_t>(k*XSize); 00032 dists.zero(); 00033 00034 int32_t changed=1; 00035 int32_t iter=0; 00036 int32_t vlen=0; 00037 bool vfree=false; 00038 float64_t* vec=NULL; 00039 00040 while (changed && (iter<max_iter)) 00041 { 00042 iter++; 00043 if (iter==max_iter-1) 00044 SG_SWARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1) 00045 00046 if (iter%1000 == 0) 00047 SG_SINFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed) 00048 changed=0; 00049 00050 if (!fixed_centers) 00051 { 00052 /* mus=zeros(dimensions, k) ; */ 00053 mus.zero(); 00054 for (int32_t i=0; i<XSize; i++) 00055 { 00056 int32_t Cl=ClList[i]; 00057 00058 vec=lhs->get_feature_vector(i, vlen, vfree); 00059 00060 for (int32_t j=0; j<dimensions; j++) 00061 mus.matrix[Cl*dimensions+j] += vec[j]; 00062 00063 lhs->free_feature_vector(vec, i, vfree); 00064 } 00065 00066 for (int32_t i=0; i<k; i++) 00067 { 00068 if (weights_set[i]!=0.0) 00069 { 00070 for (int32_t j=0; j<dimensions; j++) 00071 mus.matrix[i*dimensions+j] /= weights_set[i]; 00072 } 00073 } 00074 } 00075 rhs_mus->copy_feature_matrix(mus); 00076 for (int32_t i=0; i<XSize; i++) 00077 { 00078 /* ks=ceil(rand(1,XSize)*XSize) ; */ 00079 const int32_t Pat=CMath::random(0, XSize-1); 00080 const int32_t ClList_Pat=ClList[Pat]; 00081 int32_t imini, j; 00082 float64_t mini; 00083 00084 /* compute the distance of this point to all centers */ 00085 for(int32_t idx_k=0;idx_k<k;idx_k++) 00086 dists[idx_k]=distance->distance(Pat,idx_k); 00087 00088 /* [mini,imini]=min(dists(:,i)) ; */ 00089 imini=0 ; mini=dists[0]; 00090 for (j=1; j<k; j++) 00091 if (dists[j]<mini) 00092 { 00093 mini=dists[j]; 00094 imini=j; 00095 } 00096 00097 if (imini!=ClList_Pat) 00098 { 00099 changed++; 00100 00101 /* weights_set(imini) = weights_set(imini) + 1.0 ; */ 00102 weights_set[imini]+= 1.0; 00103 /* weights_set(j) = weights_set(j) - 1.0 ; */ 00104 weights_set[ClList_Pat]-= 1.0; 00105 00106 vec=lhs->get_feature_vector(Pat, vlen, vfree); 00107 00108 for (j=0; j<dimensions; j++) 00109 { 00110 mus.matrix[imini*dimensions+j]-= 00111 (vec[j]-mus.matrix[imini*dimensions+j]) / weights_set[imini]; 00112 } 00113 00114 lhs->free_feature_vector(vec, Pat, vfree); 00115 00116 /* mu_new = mu_old - (x - mu_old)/(n-1) */ 00117 /* if weights_set(j)~=0 */ 00118 if (weights_set[ClList_Pat]!=0.0) 00119 { 00120 vec=lhs->get_feature_vector(Pat, vlen, vfree); 00121 00122 for (j=0; j<dimensions; j++) 00123 { 00124 mus.matrix[ClList_Pat*dimensions+j]-= 00125 (vec[j]-mus.matrix[ClList_Pat*dimensions+j]) / weights_set[ClList_Pat]; 00126 } 00127 lhs->free_feature_vector(vec, Pat, vfree); 00128 } 00129 else 00130 { 00131 /* mus(:,j)=zeros(dimensions,1) ; */ 00132 for (j=0; j<dimensions; j++) 00133 mus.matrix[ClList_Pat*dimensions+j]=0; 00134 } 00135 00136 /* ClList(i)= imini ; */ 00137 ClList[Pat] = imini; 00138 } 00139 } 00140 } 00141 distance->replace_rhs(rhs_cache); 00142 delete rhs_mus; 00143 SG_UNREF(lhs); 00144 } 00145 }