SHOGUN
v3.2.0
|
00001 /* 00002 * this program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2006-2009 Christian Gehl 00008 * Written (W) 2006-2009 Soeren Sonnenburg 00009 * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/config.h> 00013 #include <shogun/lib/common.h> 00014 #include <shogun/io/SGIO.h> 00015 #include <shogun/io/File.h> 00016 #include <shogun/lib/Time.h> 00017 #include <shogun/lib/Signal.h> 00018 #include <shogun/base/Parallel.h> 00019 #include <shogun/base/Parameter.h> 00020 00021 #include <shogun/distance/Distance.h> 00022 #include <shogun/features/Features.h> 00023 00024 #include <string.h> 00025 #include <unistd.h> 00026 00027 #ifdef HAVE_PTHREAD 00028 #include <pthread.h> 00029 #endif 00030 00031 using namespace shogun; 00032 00034 template <class T> struct D_THREAD_PARAM 00035 { 00037 CDistance* distance; 00039 int32_t start; 00041 int32_t end; 00043 int32_t total_start; 00045 int32_t total_end; 00047 int32_t m; 00049 int32_t n; 00051 T* result; 00053 bool symmetric; 00055 bool verbose; 00056 }; 00057 00058 CDistance::CDistance() : CSGObject() 00059 { 00060 init(); 00061 } 00062 00063 00064 CDistance::CDistance(CFeatures* p_lhs, CFeatures* p_rhs) : CSGObject() 00065 { 00066 init(); 00067 init(p_lhs, p_rhs); 00068 } 00069 00070 CDistance::~CDistance() 00071 { 00072 SG_FREE(precomputed_matrix); 00073 precomputed_matrix=NULL; 00074 00075 remove_lhs_and_rhs(); 00076 } 00077 00078 bool CDistance::init(CFeatures* l, CFeatures* r) 00079 { 00080 //make sure features were indeed supplied 00081 ASSERT(l) 00082 ASSERT(r) 00083 00084 //make sure features are compatible 00085 ASSERT(l->get_feature_class()==r->get_feature_class()) 00086 ASSERT(l->get_feature_type()==r->get_feature_type()) 00087 00088 //remove references to previous features 00089 remove_lhs_and_rhs(); 00090 00091 //increase reference counts 00092 SG_REF(l); 00093 SG_REF(r); 00094 00095 lhs=l; 00096 rhs=r; 00097 00098 num_lhs=l->get_num_vectors(); 00099 num_rhs=r->get_num_vectors(); 00100 00101 SG_FREE(precomputed_matrix); 00102 precomputed_matrix=NULL ; 00103 00104 return true; 00105 } 00106 00107 void CDistance::load(CFile* loader) 00108 { 00109 SG_SET_LOCALE_C; 00110 SG_RESET_LOCALE; 00111 } 00112 00113 void CDistance::save(CFile* writer) 00114 { 00115 SG_SET_LOCALE_C; 00116 SG_RESET_LOCALE; 00117 } 00118 00119 void CDistance::remove_lhs_and_rhs() 00120 { 00121 SG_UNREF(rhs); 00122 rhs = NULL; 00123 num_rhs=0; 00124 00125 SG_UNREF(lhs); 00126 lhs = NULL; 00127 num_lhs=0; 00128 } 00129 00130 void CDistance::remove_lhs() 00131 { 00132 SG_UNREF(lhs); 00133 lhs = NULL; 00134 num_lhs=0; 00135 } 00136 00138 void CDistance::remove_rhs() 00139 { 00140 SG_UNREF(rhs); 00141 rhs = NULL; 00142 num_rhs=0; 00143 } 00144 00145 CFeatures* CDistance::replace_rhs(CFeatures* r) 00146 { 00147 //make sure features were indeed supplied 00148 ASSERT(r) 00149 00150 //make sure features are compatible 00151 ASSERT(lhs->get_feature_class()==r->get_feature_class()) 00152 ASSERT(lhs->get_feature_type()==r->get_feature_type()) 00153 00154 //remove references to previous rhs features 00155 CFeatures* tmp=rhs; 00156 00157 rhs=r; 00158 num_rhs=r->get_num_vectors(); 00159 00160 SG_FREE(precomputed_matrix); 00161 precomputed_matrix=NULL ; 00162 00163 // return old features including reference count 00164 return tmp; 00165 } 00166 00167 CFeatures* CDistance::replace_lhs(CFeatures* l) 00168 { 00169 //make sure features were indeed supplied 00170 ASSERT(l) 00171 00172 //make sure features are compatible 00173 ASSERT(rhs->get_feature_class()==l->get_feature_class()) 00174 ASSERT(rhs->get_feature_type()==l->get_feature_type()) 00175 00176 //remove references to previous rhs features 00177 CFeatures* tmp=lhs; 00178 00179 lhs=l; 00180 num_lhs=l->get_num_vectors(); 00181 00182 SG_FREE(precomputed_matrix); 00183 precomputed_matrix=NULL ; 00184 00185 // return old features including reference count 00186 return tmp; 00187 } 00188 00189 float64_t CDistance::distance(int32_t idx_a, int32_t idx_b) 00190 { 00191 REQUIRE(idx_a >= 0 && idx_b >= 0, "In CDistance::distance(int32_t,int32_t), idx_a and " 00192 "idx_b must be positive, %d and %d given instead\n", idx_a, idx_b) 00193 00194 ASSERT(lhs) 00195 ASSERT(rhs) 00196 00197 if (lhs==rhs) 00198 { 00199 int32_t num_vectors = lhs->get_num_vectors(); 00200 00201 if (idx_a>=num_vectors) 00202 idx_a=2*num_vectors-1-idx_a; 00203 00204 if (idx_b>=num_vectors) 00205 idx_b=2*num_vectors-1-idx_b; 00206 } 00207 00208 REQUIRE(idx_a < lhs->get_num_vectors() && idx_b < rhs->get_num_vectors(), 00209 "In CDistance::distance(int32_t,int32_t), idx_a and idx_b must be less than " 00210 "the number of vectors, but %d >= %d or %d >= %d\n", 00211 idx_a, lhs->get_num_vectors(), idx_b, rhs->get_num_vectors()) 00212 00213 if (precompute_matrix && (precomputed_matrix==NULL) && (lhs==rhs)) 00214 do_precompute_matrix() ; 00215 00216 if (precompute_matrix && (precomputed_matrix!=NULL)) 00217 { 00218 if (idx_a>=idx_b) 00219 return precomputed_matrix[idx_a*(idx_a+1)/2+idx_b] ; 00220 else 00221 return precomputed_matrix[idx_b*(idx_b+1)/2+idx_a] ; 00222 } 00223 00224 return compute(idx_a, idx_b); 00225 } 00226 00227 void CDistance::do_precompute_matrix() 00228 { 00229 int32_t num_left=lhs->get_num_vectors(); 00230 int32_t num_right=rhs->get_num_vectors(); 00231 SG_INFO("precomputing distance matrix (%ix%i)\n", num_left, num_right) 00232 00233 ASSERT(num_left==num_right) 00234 ASSERT(lhs==rhs) 00235 int32_t num=num_left; 00236 00237 SG_FREE(precomputed_matrix); 00238 precomputed_matrix=SG_MALLOC(float32_t, num*(num+1)/2); 00239 00240 for (int32_t i=0; i<num; i++) 00241 { 00242 SG_PROGRESS(i*i,0,num*num) 00243 for (int32_t j=0; j<=i; j++) 00244 precomputed_matrix[i*(i+1)/2+j] = compute(i,j) ; 00245 } 00246 00247 SG_PROGRESS(num*num,0,num*num) 00248 SG_DONE() 00249 } 00250 00251 void CDistance::init() 00252 { 00253 precomputed_matrix = NULL; 00254 precompute_matrix = false; 00255 lhs = NULL; 00256 rhs = NULL; 00257 num_lhs=0; 00258 num_rhs=0; 00259 00260 m_parameters->add((CSGObject**) &lhs, "lhs", 00261 "Feature vectors to occur on left hand side."); 00262 m_parameters->add((CSGObject**) &rhs, "rhs", 00263 "Feature vectors to occur on right hand side."); 00264 } 00265 00266 template <class T> void* CDistance::get_distance_matrix_helper(void* p) 00267 { 00268 D_THREAD_PARAM<T>* params= (D_THREAD_PARAM<T>*) p; 00269 int32_t i_start=params->start; 00270 int32_t i_end=params->end; 00271 CDistance* k=params->distance; 00272 T* result=params->result; 00273 bool symmetric=params->symmetric; 00274 int32_t n=params->n; 00275 int32_t m=params->m; 00276 bool verbose=params->verbose; 00277 int64_t total_start=params->total_start; 00278 int64_t total_end=params->total_end; 00279 int64_t total=total_start; 00280 00281 for (int32_t i=i_start; i<i_end; i++) 00282 { 00283 int32_t j_start=0; 00284 00285 if (symmetric) 00286 j_start=i; 00287 00288 for (int32_t j=j_start; j<n; j++) 00289 { 00290 float64_t v=k->distance(i,j); 00291 result[i+j*m]=v; 00292 00293 if (symmetric && i!=j) 00294 result[j+i*m]=v; 00295 00296 if (verbose) 00297 { 00298 total++; 00299 00300 if (symmetric && i!=j) 00301 total++; 00302 00303 if (total%100 == 0) 00304 SG_OBJ_PROGRESS(k, total, total_start, total_end) 00305 00306 if (CSignal::cancel_computations()) 00307 break; 00308 } 00309 } 00310 00311 } 00312 00313 return NULL; 00314 } 00315 00316 template <class T> 00317 SGMatrix<T> CDistance::get_distance_matrix() 00318 { 00319 T* result = NULL; 00320 00321 REQUIRE(has_features(), "no features assigned to distance\n") 00322 00323 int32_t m=get_num_vec_lhs(); 00324 int32_t n=get_num_vec_rhs(); 00325 00326 int64_t total_num = int64_t(m)*n; 00327 00328 // if lhs == rhs and sizes match assume k(i,j)=k(j,i) 00329 bool symmetric= (lhs && lhs==rhs && m==n); 00330 00331 SG_DEBUG("returning distance matrix of size %dx%d\n", m, n) 00332 00333 result=SG_MALLOC(T, total_num); 00334 00335 int32_t num_threads=parallel->get_num_threads(); 00336 if (num_threads < 2) 00337 { 00338 D_THREAD_PARAM<T> params; 00339 params.distance=this; 00340 params.result=result; 00341 params.start=0; 00342 params.end=m; 00343 params.total_start=0; 00344 params.total_end=total_num; 00345 params.n=n; 00346 params.m=m; 00347 params.symmetric=symmetric; 00348 params.verbose=true; 00349 get_distance_matrix_helper<T>((void*) ¶ms); 00350 } 00351 else 00352 { 00353 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00354 D_THREAD_PARAM<T>* params = SG_MALLOC(D_THREAD_PARAM<T>, num_threads); 00355 int64_t step= total_num/num_threads; 00356 00357 int32_t t; 00358 00359 num_threads--; 00360 for (t=0; t<num_threads; t++) 00361 { 00362 params[t].distance = this; 00363 params[t].result = result; 00364 params[t].start = compute_row_start(t*step, n, symmetric); 00365 params[t].end = compute_row_start((t+1)*step, n, symmetric); 00366 params[t].total_start=t*step; 00367 params[t].total_end=(t+1)*step; 00368 params[t].n=n; 00369 params[t].m=m; 00370 params[t].symmetric=symmetric; 00371 params[t].verbose=false; 00372 00373 int code=pthread_create(&threads[t], NULL, 00374 CDistance::get_distance_matrix_helper<T>, (void*)¶ms[t]); 00375 00376 if (code != 0) 00377 { 00378 SG_WARNING("Thread creation failed (thread %d of %d) " 00379 "with error:'%s'\n",t, num_threads, strerror(code)); 00380 num_threads=t; 00381 break; 00382 } 00383 } 00384 00385 params[t].distance = this; 00386 params[t].result = result; 00387 params[t].start = compute_row_start(t*step, n, symmetric); 00388 params[t].end = m; 00389 params[t].total_start=t*step; 00390 params[t].total_end=total_num; 00391 params[t].n=n; 00392 params[t].m=m; 00393 params[t].symmetric=symmetric; 00394 params[t].verbose=true; 00395 get_distance_matrix_helper<T>(¶ms[t]); 00396 00397 for (t=0; t<num_threads; t++) 00398 { 00399 if (pthread_join(threads[t], NULL) != 0) 00400 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads) 00401 } 00402 00403 SG_FREE(params); 00404 SG_FREE(threads); 00405 } 00406 00407 SG_DONE() 00408 00409 return SGMatrix<T>(result,m,n,true); 00410 } 00411 00412 template SGMatrix<float64_t> CDistance::get_distance_matrix<float64_t>(); 00413 template SGMatrix<float32_t> CDistance::get_distance_matrix<float32_t>(); 00414 00415 template void* CDistance::get_distance_matrix_helper<float64_t>(void* p); 00416 template void* CDistance::get_distance_matrix_helper<float32_t>(void* p);