SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
KMeansLloydImpl.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2014 Parijat Mazumdar
00008  */
00009 
00010 #include "shogun/clustering/KMeansLloydImpl.h"
00011 #include <shogun/distance/Distance.h>
00012 #include <shogun/features/DenseFeatures.h>
00013 #include <shogun/mathematics/Math.h>
00014 #include <shogun/io/SGIO.h>
00015 
00016 using namespace shogun;
00017 
00018 namespace shogun
00019 {
00020 void CKMeansLloydImpl::Lloyd_KMeans(int32_t k, CDistance* distance, int32_t max_iter, SGMatrix<float64_t> mus, 
00021         SGVector<int32_t> ClList, SGVector<float64_t> weights_set, bool fixed_centers)
00022 {
00023     CDenseFeatures<float64_t>* lhs=
00024         CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
00025     int32_t XSize=lhs->get_num_vectors();
00026     int32_t dimensions=lhs->get_num_features();
00027 
00028     CDenseFeatures<float64_t>* rhs_mus=new CDenseFeatures<float64_t>(0);
00029     CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
00030 
00031     SGVector<float64_t> dists=SGVector<float64_t>(k*XSize);
00032     dists.zero();
00033 
00034     int32_t changed=1;
00035     int32_t iter=0;
00036     int32_t vlen=0;
00037     bool vfree=false;
00038     float64_t* vec=NULL;
00039 
00040     while (changed && (iter<max_iter))
00041     {
00042         iter++;
00043         if (iter==max_iter-1)
00044             SG_SWARNING("kmeans clustering changed throughout %d iterations stopping...\n", max_iter-1)
00045 
00046         if (iter%1000 == 0)
00047             SG_SINFO("Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter, max_iter, changed)
00048         changed=0;
00049 
00050         if (!fixed_centers)
00051         {
00052             /* mus=zeros(dimensions, k) ; */
00053             mus.zero();
00054             for (int32_t i=0; i<XSize; i++)
00055             {
00056                 int32_t Cl=ClList[i];
00057 
00058                 vec=lhs->get_feature_vector(i, vlen, vfree);
00059 
00060                 for (int32_t j=0; j<dimensions; j++)
00061                     mus.matrix[Cl*dimensions+j] += vec[j];
00062 
00063                 lhs->free_feature_vector(vec, i, vfree);
00064             }
00065 
00066             for (int32_t i=0; i<k; i++)
00067             {
00068                 if (weights_set[i]!=0.0)
00069                 {
00070                     for (int32_t j=0; j<dimensions; j++)
00071                         mus.matrix[i*dimensions+j] /= weights_set[i];
00072                 }
00073             }
00074         }
00075         rhs_mus->copy_feature_matrix(mus);
00076         for (int32_t i=0; i<XSize; i++)
00077         {
00078             /* ks=ceil(rand(1,XSize)*XSize) ; */
00079             const int32_t Pat=CMath::random(0, XSize-1);
00080             const int32_t ClList_Pat=ClList[Pat];
00081             int32_t imini, j;
00082             float64_t mini;
00083 
00084             /* compute the distance of this point to all centers */
00085             for(int32_t idx_k=0;idx_k<k;idx_k++)
00086                 dists[idx_k]=distance->distance(Pat,idx_k);
00087 
00088             /* [mini,imini]=min(dists(:,i)) ; */
00089             imini=0 ; mini=dists[0];
00090             for (j=1; j<k; j++)
00091                 if (dists[j]<mini)
00092                 {
00093                     mini=dists[j];
00094                     imini=j;
00095                 }
00096 
00097             if (imini!=ClList_Pat)
00098             {
00099                 changed++;
00100 
00101                 /* weights_set(imini) = weights_set(imini) + 1.0 ; */
00102                 weights_set[imini]+= 1.0;
00103                 /* weights_set(j)     = weights_set(j)     - 1.0 ; */
00104                 weights_set[ClList_Pat]-= 1.0;
00105 
00106                 vec=lhs->get_feature_vector(Pat, vlen, vfree);
00107 
00108                 for (j=0; j<dimensions; j++)
00109                 {
00110                     mus.matrix[imini*dimensions+j]-=
00111                         (vec[j]-mus.matrix[imini*dimensions+j]) / weights_set[imini];
00112                 }
00113 
00114                 lhs->free_feature_vector(vec, Pat, vfree);
00115 
00116                 /* mu_new = mu_old - (x - mu_old)/(n-1) */
00117                 /* if weights_set(j)~=0 */
00118                 if (weights_set[ClList_Pat]!=0.0)
00119                 {
00120                     vec=lhs->get_feature_vector(Pat, vlen, vfree);
00121     
00122                     for (j=0; j<dimensions; j++)
00123                     {
00124                         mus.matrix[ClList_Pat*dimensions+j]-=
00125                                 (vec[j]-mus.matrix[ClList_Pat*dimensions+j]) / weights_set[ClList_Pat];
00126                     }
00127                     lhs->free_feature_vector(vec, Pat, vfree);
00128                 }
00129                 else
00130                 {
00131                     /*  mus(:,j)=zeros(dimensions,1) ; */
00132                     for (j=0; j<dimensions; j++)
00133                         mus.matrix[ClList_Pat*dimensions+j]=0;
00134                 }
00135 
00136                 /* ClList(i)= imini ; */
00137                 ClList[Pat] = imini;
00138             }
00139         }
00140     }
00141     distance->replace_rhs(rhs_cache);
00142     delete rhs_mus;
00143     SG_UNREF(lhs);
00144 }
00145 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation