SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Christian Gehl 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST 00009 */ 00010 00011 #include <shogun/machine/DistanceMachine.h> 00012 #include <shogun/base/Parameter.h> 00013 00014 using namespace shogun; 00015 00016 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00017 struct D_THREAD_PARAM 00018 { 00019 CDistance* d; 00020 float64_t* r; 00021 int32_t idx_r_start; 00022 int32_t idx_start; 00023 int32_t idx_stop; 00024 int32_t idx_comp; 00025 }; 00026 #endif // DOXYGEN_SHOULD_SKIP_THIS 00027 00028 CDistanceMachine::CDistanceMachine() 00029 : CMachine() 00030 { 00031 init(); 00032 } 00033 00034 CDistanceMachine::~CDistanceMachine() 00035 { 00036 SG_UNREF(distance); 00037 } 00038 00039 void CDistanceMachine::init() 00040 { 00041 /* all distance machines should store their models, i.e. cluster centers 00042 * At least, it has to be ensured, that after calling train(), or in the 00043 * call of apply() in the cases where there is no train method, the lhs 00044 * of the underlying distance is set to cluster centers */ 00045 set_store_model_features(true); 00046 00047 distance=NULL; 00048 m_parameters->add((CSGObject**)&distance, "distance", "Distance to use"); 00049 } 00050 00051 void CDistanceMachine::distances_lhs(float64_t* result,int32_t idx_a1,int32_t idx_a2,int32_t idx_b) 00052 { 00053 int32_t num_threads=parallel->get_num_threads(); 00054 ASSERT(num_threads>0) 00055 00056 ASSERT(result) 00057 00058 if (num_threads < 2) 00059 { 00060 D_THREAD_PARAM param; 00061 param.d=distance; 00062 param.r=result; 00063 param.idx_r_start=idx_a1; 00064 param.idx_start=idx_a1; 00065 param.idx_stop=idx_a2+1; 00066 param.idx_comp=idx_b; 00067 00068 run_distance_thread_lhs((void*) ¶m); 00069 } 00070 #ifdef HAVE_PTHREAD 00071 else 00072 { 00073 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00074 D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads); 00075 int32_t num_vec=idx_a2-idx_a1+1; 00076 int32_t step= num_vec/num_threads; 00077 int32_t t; 00078 00079 pthread_attr_t attr; 00080 pthread_attr_init(&attr); 00081 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00082 00083 for (t=0; t<num_threads-1; t++) 00084 { 00085 params[t].d = distance; 00086 params[t].r = result; 00087 params[t].idx_r_start=t*step; 00088 params[t].idx_start = (t*step)+idx_a1; 00089 params[t].idx_stop = ((t+1)*step)+idx_a1; 00090 params[t].idx_comp=idx_b; 00091 00092 pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_lhs, (void*)¶ms[t]); 00093 } 00094 params[t].d = distance; 00095 params[t].r = result; 00096 params[t].idx_r_start=t*step; 00097 params[t].idx_start = (t*step)+idx_a1; 00098 params[t].idx_stop = idx_a2+1; 00099 params[t].idx_comp=idx_b; 00100 00101 run_distance_thread_lhs(¶ms[t]); 00102 00103 for (t=0; t<num_threads-1; t++) 00104 pthread_join(threads[t], NULL); 00105 00106 pthread_attr_destroy(&attr); 00107 SG_FREE(params); 00108 SG_FREE(threads); 00109 } 00110 #endif 00111 } 00112 00113 void CDistanceMachine::distances_rhs(float64_t* result,int32_t idx_b1,int32_t idx_b2,int32_t idx_a) 00114 { 00115 int32_t num_threads=parallel->get_num_threads(); 00116 ASSERT(num_threads>0) 00117 00118 ASSERT(result) 00119 00120 if (num_threads < 2) 00121 { 00122 D_THREAD_PARAM param; 00123 param.d=distance; 00124 param.r=result; 00125 param.idx_r_start=idx_b1; 00126 param.idx_start=idx_b1; 00127 param.idx_stop=idx_b2+1; 00128 param.idx_comp=idx_a; 00129 00130 run_distance_thread_rhs((void*) ¶m); 00131 } 00132 #ifndef WIN32 00133 else 00134 { 00135 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00136 D_THREAD_PARAM* params = SG_MALLOC(D_THREAD_PARAM, num_threads); 00137 int32_t num_vec=idx_b2-idx_b1+1; 00138 int32_t step= num_vec/num_threads; 00139 int32_t t; 00140 00141 pthread_attr_t attr; 00142 pthread_attr_init(&attr); 00143 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00144 00145 for (t=0; t<num_threads-1; t++) 00146 { 00147 params[t].d = distance; 00148 params[t].r = result; 00149 params[t].idx_r_start=t*step; 00150 params[t].idx_start = (t*step)+idx_b1; 00151 params[t].idx_stop = ((t+1)*step)+idx_b1; 00152 params[t].idx_comp=idx_a; 00153 00154 pthread_create(&threads[t], &attr, CDistanceMachine::run_distance_thread_rhs, (void*)¶ms[t]); 00155 } 00156 params[t].d = distance; 00157 params[t].r = result; 00158 params[t].idx_r_start=t*step; 00159 params[t].idx_start = (t*step)+idx_b1; 00160 params[t].idx_stop = idx_b2+1; 00161 params[t].idx_comp=idx_a; 00162 00163 run_distance_thread_rhs(¶ms[t]); 00164 00165 for (t=0; t<num_threads-1; t++) 00166 pthread_join(threads[t], NULL); 00167 00168 pthread_attr_destroy(&attr); 00169 SG_FREE(params); 00170 SG_FREE(threads); 00171 } 00172 #endif 00173 } 00174 00175 void* CDistanceMachine::run_distance_thread_lhs(void* p) 00176 { 00177 D_THREAD_PARAM* params= (D_THREAD_PARAM*) p; 00178 CDistance* distance=params->d; 00179 float64_t* res=params->r; 00180 int32_t idx_res_start=params->idx_r_start; 00181 int32_t idx_act=params->idx_start; 00182 int32_t idx_stop=params->idx_stop; 00183 int32_t idx_c=params->idx_comp; 00184 00185 for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++) 00186 res[i] =distance->distance(idx_act,idx_c); 00187 00188 return NULL; 00189 } 00190 00191 void* CDistanceMachine::run_distance_thread_rhs(void* p) 00192 { 00193 D_THREAD_PARAM* params= (D_THREAD_PARAM*) p; 00194 CDistance* distance=params->d; 00195 float64_t* res=params->r; 00196 int32_t idx_res_start=params->idx_r_start; 00197 int32_t idx_act=params->idx_start; 00198 int32_t idx_stop=params->idx_stop; 00199 int32_t idx_c=params->idx_comp; 00200 00201 for (int32_t i=idx_res_start; idx_act<idx_stop; i++,idx_act++) 00202 res[i] =distance->distance(idx_c,idx_act); 00203 00204 return NULL; 00205 } 00206 00207 CMulticlassLabels* CDistanceMachine::apply_multiclass(CFeatures* data) 00208 { 00209 if (data) 00210 { 00211 /* set distance features to given ones and apply to all */ 00212 CFeatures* lhs=distance->get_lhs(); 00213 distance->init(lhs, data); 00214 SG_UNREF(lhs); 00215 00216 /* build result labels and classify all elements of procedure */ 00217 CMulticlassLabels* result=new CMulticlassLabels(data->get_num_vectors()); 00218 for (index_t i=0; i<data->get_num_vectors(); ++i) 00219 result->set_label(i, apply_one(i)); 00220 return result; 00221 } 00222 else 00223 { 00224 /* call apply on complete right hand side */ 00225 CFeatures* all=distance->get_rhs(); 00226 CMulticlassLabels* result = apply_multiclass(all); 00227 SG_UNREF(all); 00228 return result; 00229 } 00230 return NULL; 00231 } 00232 00233 float64_t CDistanceMachine::apply_one(int32_t num) 00234 { 00235 /* number of clusters */ 00236 CFeatures* lhs=distance->get_lhs(); 00237 int32_t num_clusters=lhs->get_num_vectors(); 00238 SG_UNREF(lhs); 00239 00240 /* (multiple threads) calculate distances to all cluster centers */ 00241 float64_t* dists=SG_MALLOC(float64_t, num_clusters); 00242 distances_lhs(dists, 0, num_clusters-1, num); 00243 00244 /* find cluster index with smallest distance */ 00245 float64_t result=dists[0]; 00246 index_t best_index=0; 00247 for (index_t i=1; i<num_clusters; ++i) 00248 { 00249 if (dists[i]<result) 00250 { 00251 result=dists[i]; 00252 best_index=i; 00253 } 00254 } 00255 00256 SG_FREE(dists); 00257 00258 /* implicit cast */ 00259 return best_index; 00260 } 00261 00262 void CDistanceMachine::set_distance(CDistance* d) 00263 { 00264 SG_REF(d); 00265 SG_UNREF(distance); 00266 distance=d; 00267 } 00268 00269 CDistance* CDistanceMachine::get_distance() const 00270 { 00271 SG_REF(distance); 00272 return distance; 00273 } 00274 00275 void CDistanceMachine::store_model_features() 00276 { 00277 SG_ERROR("store_model_features not yet implemented for %s!\n", 00278 get_name()); 00279 } 00280