SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2014 Parijat Mazumdar 00008 */ 00009 00010 #include <shogun/clustering/KMeansMiniBatchImpl.h> 00011 #include <shogun/mathematics/Math.h> 00012 #include <shogun/distance/Distance.h> 00013 #include <shogun/features/DenseFeatures.h> 00014 00015 using namespace shogun; 00016 00017 namespace shogun 00018 { 00019 void CKMeansMiniBatchImpl::minibatch_KMeans(int32_t k, CDistance* distance, int32_t batch_size, int32_t minib_iter, SGMatrix<float64_t> mus) 00020 { 00021 REQUIRE(batch_size>0, 00022 "batch size not set to positive value. Current batch size %d \n", batch_size); 00023 REQUIRE(minib_iter>0, 00024 "number of iterations not set to positive value. Current iterations %d \n", minib_iter); 00025 00026 CDenseFeatures<float64_t>* lhs= 00027 CDenseFeatures<float64_t>::obtain_from_generic(distance->get_lhs()); 00028 CDenseFeatures<float64_t>* rhs_mus=new CDenseFeatures<float64_t>(0); 00029 CFeatures* rhs_cache=distance->replace_rhs(rhs_mus); 00030 rhs_mus->set_feature_matrix(mus); 00031 int32_t XSize=lhs->get_num_vectors(); 00032 int32_t dims=lhs->get_num_features(); 00033 00034 SGVector<float64_t> v=SGVector<float64_t>(k); 00035 v.zero(); 00036 00037 for (int32_t i=0; i<minib_iter; i++) 00038 { 00039 SGVector<int32_t> M=mbchoose_rand(batch_size,XSize); 00040 SGVector<int32_t> ncent=SGVector<int32_t>(batch_size); 00041 for (int32_t j=0; j<batch_size; j++) 00042 { 00043 SGVector<float64_t> dists=SGVector<float64_t>(k); 00044 for (int32_t p=0; p<k; p++) 00045 dists[p]=distance->distance(M[j],p); 00046 00047 int32_t imin=0; 00048 float64_t min=dists[0]; 00049 for (int32_t p=1; p<k; p++) 00050 { 00051 if (dists[p]<min) 00052 { 00053 imin=p; 00054 min=dists[p]; 00055 } 00056 } 00057 ncent[j]=imin; 00058 } 00059 for (int32_t j=0; j<batch_size; j++) 00060 { 00061 int32_t near=ncent[j]; 00062 SGVector<float64_t> c_alive=rhs_mus->get_feature_vector(near); 00063 SGVector<float64_t> x=lhs->get_feature_vector(M[j]); 00064 v[near]+=1.0; 00065 float64_t eta=1.0/v[near]; 00066 for (int32_t c=0; c<dims; c++) 00067 { 00068 c_alive[c]=(1.0-eta)*c_alive[c]+eta*x[c]; 00069 } 00070 } 00071 } 00072 SG_UNREF(lhs); 00073 distance->replace_rhs(rhs_cache); 00074 delete rhs_mus; 00075 } 00076 00077 SGVector<int32_t> CKMeansMiniBatchImpl::mbchoose_rand(int32_t b, int32_t num) 00078 { 00079 SGVector<int32_t> chosen=SGVector<int32_t>(num); 00080 SGVector<int32_t> ret=SGVector<int32_t>(b); 00081 chosen.zero(); 00082 int32_t ch=0; 00083 while (ch<b) 00084 { 00085 const int32_t n=CMath::random(0,num-1); 00086 if (chosen[n]==0) 00087 { 00088 chosen[n]+=1; 00089 ret[ch]=n; 00090 ch++; 00091 } 00092 } 00093 return ret; 00094 } 00095 }