SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
KMeansMiniBatchImpl.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2014 Parijat Mazumdar
00008  */
00009 
00010 #include <shogun/clustering/KMeansMiniBatchImpl.h>
00011 #include <shogun/mathematics/Math.h>
00012 #include <shogun/distance/Distance.h>
00013 #include <shogun/features/DenseFeatures.h>
00014 
00015 using namespace shogun;
00016 
00017 namespace shogun
00018 {
00019 void CKMeansMiniBatchImpl::minibatch_KMeans(int32_t k, CDistance* distance, int32_t batch_size, int32_t minib_iter, SGMatrix<float64_t> mus)
00020 {
00021     REQUIRE(batch_size>0,
00022         "batch size not set to positive value. Current batch size %d \n", batch_size);
00023     REQUIRE(minib_iter>0,
00024         "number of iterations not set to positive value. Current iterations %d \n", minib_iter);
00025 
00026     CDenseFeatures<float64_t>* lhs=
00027         CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs());
00028     CDenseFeatures<float64_t>* rhs_mus=new CDenseFeatures<float64_t>(0);
00029     CFeatures* rhs_cache=distance->replace_rhs(rhs_mus);
00030     rhs_mus->set_feature_matrix(mus);
00031     int32_t XSize=lhs->get_num_vectors();
00032     int32_t dims=lhs->get_num_features();
00033 
00034     SGVector<float64_t> v=SGVector<float64_t>(k);
00035     v.zero();
00036 
00037     for (int32_t i=0; i<minib_iter; i++)
00038     {
00039         SGVector<int32_t> M=mbchoose_rand(batch_size,XSize);
00040         SGVector<int32_t> ncent=SGVector<int32_t>(batch_size);
00041         for (int32_t j=0; j<batch_size; j++)
00042         {
00043             SGVector<float64_t> dists=SGVector<float64_t>(k);
00044             for (int32_t p=0; p<k; p++)
00045                 dists[p]=distance->distance(M[j],p);
00046 
00047             int32_t imin=0;
00048             float64_t min=dists[0];
00049             for (int32_t p=1; p<k; p++)
00050             {
00051                 if (dists[p]<min)
00052                 {
00053                     imin=p;
00054                     min=dists[p];
00055                 }
00056             }
00057             ncent[j]=imin;
00058         }
00059         for (int32_t j=0; j<batch_size; j++)
00060         {
00061             int32_t near=ncent[j];
00062             SGVector<float64_t> c_alive=rhs_mus->get_feature_vector(near);
00063             SGVector<float64_t> x=lhs->get_feature_vector(M[j]);
00064             v[near]+=1.0;
00065             float64_t eta=1.0/v[near];
00066             for (int32_t c=0; c<dims; c++)
00067             {
00068                 c_alive[c]=(1.0-eta)*c_alive[c]+eta*x[c];
00069             }
00070         }
00071     }
00072     SG_UNREF(lhs);
00073     distance->replace_rhs(rhs_cache);
00074     delete rhs_mus;
00075 }
00076 
00077 SGVector<int32_t> CKMeansMiniBatchImpl::mbchoose_rand(int32_t b, int32_t num)
00078 {
00079     SGVector<int32_t> chosen=SGVector<int32_t>(num);
00080     SGVector<int32_t> ret=SGVector<int32_t>(b);
00081     chosen.zero();
00082     int32_t ch=0;
00083     while (ch<b)
00084     {
00085         const int32_t n=CMath::random(0,num-1);
00086         if (chosen[n]==0)
00087         {
00088             chosen[n]+=1;
00089             ret[ch]=n;
00090             ch++;
00091         }
00092     }
00093     return ret;
00094 }
00095 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation