SHOGUN
v3.2.0
|
00001 /* 00002 * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS 00003 * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED 00004 * 00005 * this program is free software; you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation; either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * Written (W) 1999-2009 Soeren Sonnenburg 00011 * Written (W) 1999-2008 Gunnar Raetsch 00012 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00013 */ 00014 00015 #include <shogun/lib/config.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/io/File.h> 00019 #include <shogun/lib/Time.h> 00020 #include <shogun/lib/Signal.h> 00021 00022 #include <shogun/base/Parallel.h> 00023 00024 #include <shogun/kernel/Kernel.h> 00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h> 00026 #include <shogun/features/Features.h> 00027 #include <shogun/base/Parameter.h> 00028 00029 #include <shogun/classifier/svm/SVM.h> 00030 00031 #include <string.h> 00032 #include <unistd.h> 00033 #include <math.h> 00034 00035 #ifdef HAVE_PTHREAD 00036 #include <pthread.h> 00037 #endif 00038 00039 using namespace shogun; 00040 00041 CKernel::CKernel() : CSGObject() 00042 { 00043 init(); 00044 register_params(); 00045 } 00046 00047 CKernel::CKernel(int32_t size) : CSGObject() 00048 { 00049 init(); 00050 00051 if (size<10) 00052 size=10; 00053 00054 cache_size=size; 00055 register_params(); 00056 } 00057 00058 00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject() 00060 { 00061 init(); 00062 00063 if (size<10) 00064 size=10; 00065 00066 cache_size=size; 00067 00068 set_normalizer(new CIdentityKernelNormalizer()); 00069 init(p_lhs, p_rhs); 00070 register_params(); 00071 } 00072 00073 CKernel::~CKernel() 00074 { 00075 if (get_is_initialized()) 00076 SG_ERROR("Kernel still initialized on destruction.\n") 00077 00078 remove_lhs_and_rhs(); 00079 SG_UNREF(normalizer); 00080 00081 SG_INFO("Kernel deleted (%p).\n", this) 00082 } 00083 00084 #ifdef USE_SVMLIGHT 00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack) 00086 { 00087 if (size<10) 00088 size=10; 00089 00090 kernel_cache_cleanup(); 00091 cache_size=size; 00092 00093 if (has_features() && get_num_vec_lhs()) 00094 kernel_cache_init(cache_size, regression_hack); 00095 } 00096 #endif //USE_SVMLIGHT 00097 00098 bool CKernel::init(CFeatures* l, CFeatures* r) 00099 { 00100 /* make sure that features are not deleted if same ones are used */ 00101 SG_REF(l); 00102 SG_REF(r); 00103 00104 //make sure features were indeed supplied 00105 REQUIRE(l, "CKernel::init(%p, %p): Left hand side features required!\n", l, r) 00106 REQUIRE(r, "CKernel::init(%p, %p): Right hand side features required!\n", l, r) 00107 00108 //make sure features are compatible 00109 ASSERT(l->get_feature_class()==r->get_feature_class()) 00110 ASSERT(l->get_feature_type()==r->get_feature_type()) 00111 00112 //remove references to previous features 00113 remove_lhs_and_rhs(); 00114 00115 //increase reference counts 00116 SG_REF(l); 00117 if (l==r) 00118 lhs_equals_rhs=true; 00119 else // l!=r 00120 SG_REF(r); 00121 00122 lhs=l; 00123 rhs=r; 00124 00125 ASSERT(!num_lhs || num_lhs==l->get_num_vectors()) 00126 ASSERT(!num_rhs || num_rhs==l->get_num_vectors()) 00127 00128 num_lhs=l->get_num_vectors(); 00129 num_rhs=r->get_num_vectors(); 00130 00131 /* unref "safety" refs from beginning */ 00132 SG_UNREF(r); 00133 SG_UNREF(l); 00134 00135 SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r) 00136 return true; 00137 } 00138 00139 bool CKernel::set_normalizer(CKernelNormalizer* n) 00140 { 00141 SG_REF(n); 00142 if (lhs && rhs) 00143 n->init(this); 00144 00145 SG_UNREF(normalizer); 00146 normalizer=n; 00147 00148 return (normalizer!=NULL); 00149 } 00150 00151 CKernelNormalizer* CKernel::get_normalizer() 00152 { 00153 SG_REF(normalizer) 00154 return normalizer; 00155 } 00156 00157 bool CKernel::init_normalizer() 00158 { 00159 return normalizer->init(this); 00160 } 00161 00162 void CKernel::cleanup() 00163 { 00164 remove_lhs_and_rhs(); 00165 } 00166 00167 #ifdef USE_SVMLIGHT 00168 /****************************** Cache handling *******************************/ 00169 00170 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack) 00171 { 00172 int32_t totdoc=get_num_vec_lhs(); 00173 if (totdoc<=0) 00174 { 00175 SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n", 00176 get_num_vec_lhs(), get_num_vec_rhs()); 00177 } 00178 uint64_t buffer_size=0; 00179 int32_t i; 00180 00181 //in regression the additional constraints are made by doubling the training data 00182 if (regression_hack) 00183 totdoc*=2; 00184 00185 buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM); 00186 if (buffer_size>((uint64_t) totdoc)*totdoc) 00187 buffer_size=((uint64_t) totdoc)*totdoc; 00188 00189 SG_INFO("using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name()) 00190 00191 //make sure it fits in the *signed* KERNELCACHE_IDX type 00192 ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1))) 00193 00194 kernel_cache.index = SG_MALLOC(int32_t, totdoc); 00195 kernel_cache.occu = SG_MALLOC(int32_t, totdoc); 00196 kernel_cache.lru = SG_MALLOC(int32_t, totdoc); 00197 kernel_cache.invindex = SG_MALLOC(int32_t, totdoc); 00198 kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc); 00199 kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc); 00200 kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size); 00201 kernel_cache.buffsize=buffer_size; 00202 kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc); 00203 00204 if(kernel_cache.max_elems>totdoc) { 00205 kernel_cache.max_elems=totdoc; 00206 } 00207 00208 kernel_cache.elems=0; // initialize cache 00209 for(i=0;i<totdoc;i++) { 00210 kernel_cache.index[i]=-1; 00211 kernel_cache.lru[i]=0; 00212 } 00213 for(i=0;i<totdoc;i++) { 00214 kernel_cache.occu[i]=0; 00215 kernel_cache.invindex[i]=-1; 00216 } 00217 00218 kernel_cache.activenum=totdoc;; 00219 for(i=0;i<totdoc;i++) { 00220 kernel_cache.active2totdoc[i]=i; 00221 kernel_cache.totdoc2active[i]=i; 00222 } 00223 00224 kernel_cache.time=0; 00225 } 00226 00227 void CKernel::get_kernel_row( 00228 int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line) 00229 { 00230 int32_t i,j; 00231 KERNELCACHE_IDX start; 00232 00233 int32_t num_vectors = get_num_vec_lhs(); 00234 if (docnum>=num_vectors) 00235 docnum=2*num_vectors-1-docnum; 00236 00237 /* is cached? */ 00238 if(kernel_cache.index[docnum] != -1) 00239 { 00240 kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */ 00241 start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum]; 00242 00243 if (full_line) 00244 { 00245 for(j=0;j<get_num_vec_lhs();j++) 00246 { 00247 if(kernel_cache.totdoc2active[j] >= 0) 00248 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]]; 00249 else 00250 buffer[j]=(float64_t) kernel(docnum, j); 00251 } 00252 } 00253 else 00254 { 00255 for(i=0;(j=active2dnum[i])>=0;i++) 00256 { 00257 if(kernel_cache.totdoc2active[j] >= 0) 00258 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]]; 00259 else 00260 { 00261 int32_t k=j; 00262 if (k>=num_vectors) 00263 k=2*num_vectors-1-k; 00264 buffer[j]=(float64_t) kernel(docnum, k); 00265 } 00266 } 00267 } 00268 } 00269 else 00270 { 00271 if (full_line) 00272 { 00273 for(j=0;j<get_num_vec_lhs();j++) 00274 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j); 00275 } 00276 else 00277 { 00278 for(i=0;(j=active2dnum[i])>=0;i++) 00279 { 00280 int32_t k=j; 00281 if (k>=num_vectors) 00282 k=2*num_vectors-1-k; 00283 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k); 00284 } 00285 } 00286 } 00287 } 00288 00289 00290 // Fills cache for the row m 00291 void CKernel::cache_kernel_row(int32_t m) 00292 { 00293 register int32_t j,k,l; 00294 register KERNELCACHE_ELEM *cache; 00295 00296 int32_t num_vectors = get_num_vec_lhs(); 00297 00298 if (m>=num_vectors) 00299 m=2*num_vectors-1-m; 00300 00301 if(!kernel_cache_check(m)) // not cached yet 00302 { 00303 cache = kernel_cache_clean_and_malloc(m); 00304 if(cache) { 00305 l=kernel_cache.totdoc2active[m]; 00306 00307 for(j=0;j<kernel_cache.activenum;j++) // fill cache 00308 { 00309 k=kernel_cache.active2totdoc[j]; 00310 00311 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) { 00312 cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum) 00313 *kernel_cache.index[k]+l]; 00314 } 00315 else 00316 { 00317 if (k>=num_vectors) 00318 k=2*num_vectors-1-k; 00319 00320 cache[j]=kernel(m, k); 00321 } 00322 } 00323 } 00324 else 00325 perror("Error: Kernel cache full! => increase cache size"); 00326 } 00327 } 00328 00329 00330 void* CKernel::cache_multiple_kernel_row_helper(void* p) 00331 { 00332 int32_t j,k,l; 00333 S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p; 00334 00335 for (int32_t i=params->start; i<params->end; i++) 00336 { 00337 KERNELCACHE_ELEM* cache=params->cache[i]; 00338 int32_t m = params->uncached_rows[i]; 00339 l=params->kernel_cache->totdoc2active[m]; 00340 00341 for(j=0;j<params->kernel_cache->activenum;j++) // fill cache 00342 { 00343 k=params->kernel_cache->active2totdoc[j]; 00344 00345 if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) { 00346 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum) 00347 *params->kernel_cache->index[k]+l]; 00348 } 00349 else 00350 { 00351 if (k>=params->num_vectors) 00352 k=2*params->num_vectors-1-k; 00353 00354 cache[j]=params->kernel->kernel(m, k); 00355 } 00356 } 00357 00358 //now line m is cached 00359 params->needs_computation[m]=0; 00360 } 00361 return NULL; 00362 } 00363 00364 // Fills cache for the rows in key 00365 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows) 00366 { 00367 #ifdef HAVE_PTHREAD 00368 int32_t nthreads=parallel->get_num_threads(); 00369 00370 if (nthreads<2) 00371 { 00372 #endif 00373 for(int32_t i=0;i<num_rows;i++) 00374 cache_kernel_row(rows[i]); 00375 #ifdef HAVE_PTHREAD 00376 } 00377 else 00378 { 00379 // fill up kernel cache 00380 int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows); 00381 KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows); 00382 pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1); 00383 S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1); 00384 int32_t num_threads=nthreads-1; 00385 int32_t num_vec=get_num_vec_lhs(); 00386 ASSERT(num_vec>0) 00387 uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec); 00388 00389 int32_t step=0; 00390 int32_t num=0; 00391 int32_t end=0; 00392 00393 // allocate cachelines if necessary 00394 for (int32_t i=0; i<num_rows; i++) 00395 { 00396 int32_t idx=rows[i]; 00397 if (idx>=num_vec) 00398 idx=2*num_vec-1-idx; 00399 00400 if (kernel_cache_check(idx)) 00401 continue; 00402 00403 needs_computation[idx]=1; 00404 uncached_rows[num]=idx; 00405 cache[num]= kernel_cache_clean_and_malloc(idx); 00406 00407 if (!cache[num]) 00408 SG_ERROR("Kernel cache full! => increase cache size\n") 00409 00410 num++; 00411 } 00412 00413 if (num>0) 00414 { 00415 step= num/nthreads; 00416 00417 if (step<1) 00418 { 00419 num_threads=num-1; 00420 step=1; 00421 } 00422 00423 for (int32_t t=0; t<num_threads; t++) 00424 { 00425 params[t].kernel = this; 00426 params[t].kernel_cache = &kernel_cache; 00427 params[t].cache = cache; 00428 params[t].uncached_rows = uncached_rows; 00429 params[t].needs_computation = needs_computation; 00430 params[t].num_uncached = num; 00431 params[t].start = t*step; 00432 params[t].end = (t+1)*step; 00433 params[t].num_vectors = get_num_vec_lhs(); 00434 end=params[t].end; 00435 00436 int code=pthread_create(&threads[t], NULL, 00437 CKernel::cache_multiple_kernel_row_helper, (void*)¶ms[t]); 00438 00439 if (code != 0) 00440 { 00441 SG_WARNING("Thread creation failed (thread %d of %d) " 00442 "with error:'%s'\n",t, num_threads, strerror(code)); 00443 num_threads=t; 00444 end=t*step; 00445 break; 00446 } 00447 } 00448 } 00449 else 00450 num_threads=-1; 00451 00452 00453 S_KTHREAD_PARAM last_param; 00454 last_param.kernel = this; 00455 last_param.kernel_cache = &kernel_cache; 00456 last_param.cache = cache; 00457 last_param.uncached_rows = uncached_rows; 00458 last_param.needs_computation = needs_computation; 00459 last_param.start = end; 00460 last_param.num_uncached = num; 00461 last_param.end = num; 00462 last_param.num_vectors = get_num_vec_lhs(); 00463 00464 cache_multiple_kernel_row_helper(&last_param); 00465 00466 00467 for (int32_t t=0; t<num_threads; t++) 00468 { 00469 if (pthread_join(threads[t], NULL) != 0) 00470 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads) 00471 } 00472 00473 SG_FREE(needs_computation); 00474 SG_FREE(params); 00475 SG_FREE(threads); 00476 SG_FREE(cache); 00477 SG_FREE(uncached_rows); 00478 } 00479 #endif 00480 } 00481 00482 // remove numshrink columns in the cache 00483 // which correspond to examples marked 00484 void CKernel::kernel_cache_shrink( 00485 int32_t totdoc, int32_t numshrink, int32_t *after) 00486 { 00487 ASSERT(totdoc > 0); 00488 register int32_t i,j,jj,scount; // 0 in after. 00489 KERNELCACHE_IDX from=0,to=0; 00490 int32_t *keep; 00491 00492 keep=SG_MALLOC(int32_t, totdoc); 00493 for(j=0;j<totdoc;j++) { 00494 keep[j]=1; 00495 } 00496 scount=0; 00497 for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) { 00498 j=kernel_cache.active2totdoc[jj]; 00499 if(!after[j]) { 00500 scount++; 00501 keep[j]=0; 00502 } 00503 } 00504 00505 for(i=0;i<kernel_cache.max_elems;i++) { 00506 for(jj=0;jj<kernel_cache.activenum;jj++) { 00507 j=kernel_cache.active2totdoc[jj]; 00508 if(!keep[j]) { 00509 from++; 00510 } 00511 else { 00512 kernel_cache.buffer[to]=kernel_cache.buffer[from]; 00513 to++; 00514 from++; 00515 } 00516 } 00517 } 00518 00519 kernel_cache.activenum=0; 00520 for(j=0;j<totdoc;j++) { 00521 if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) { 00522 kernel_cache.active2totdoc[kernel_cache.activenum]=j; 00523 kernel_cache.totdoc2active[j]=kernel_cache.activenum; 00524 kernel_cache.activenum++; 00525 } 00526 else { 00527 kernel_cache.totdoc2active[j]=-1; 00528 } 00529 } 00530 00531 kernel_cache.max_elems= (int32_t) kernel_cache.buffsize; 00532 00533 if (kernel_cache.activenum>0) 00534 kernel_cache.buffsize/=kernel_cache.activenum; 00535 00536 if(kernel_cache.max_elems>totdoc) 00537 kernel_cache.max_elems=totdoc; 00538 00539 SG_FREE(keep); 00540 00541 } 00542 00543 void CKernel::kernel_cache_reset_lru() 00544 { 00545 int32_t maxlru=0,k; 00546 00547 for(k=0;k<kernel_cache.max_elems;k++) { 00548 if(maxlru < kernel_cache.lru[k]) 00549 maxlru=kernel_cache.lru[k]; 00550 } 00551 for(k=0;k<kernel_cache.max_elems;k++) { 00552 kernel_cache.lru[k]-=maxlru; 00553 } 00554 } 00555 00556 void CKernel::kernel_cache_cleanup() 00557 { 00558 SG_FREE(kernel_cache.index); 00559 SG_FREE(kernel_cache.occu); 00560 SG_FREE(kernel_cache.lru); 00561 SG_FREE(kernel_cache.invindex); 00562 SG_FREE(kernel_cache.active2totdoc); 00563 SG_FREE(kernel_cache.totdoc2active); 00564 SG_FREE(kernel_cache.buffer); 00565 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE)); 00566 } 00567 00568 int32_t CKernel::kernel_cache_malloc() 00569 { 00570 int32_t i; 00571 00572 if(kernel_cache_space_available()) { 00573 for(i=0;i<kernel_cache.max_elems;i++) { 00574 if(!kernel_cache.occu[i]) { 00575 kernel_cache.occu[i]=1; 00576 kernel_cache.elems++; 00577 return(i); 00578 } 00579 } 00580 } 00581 return(-1); 00582 } 00583 00584 void CKernel::kernel_cache_free(int32_t cacheidx) 00585 { 00586 kernel_cache.occu[cacheidx]=0; 00587 kernel_cache.elems--; 00588 } 00589 00590 // remove least recently used cache 00591 // element 00592 int32_t CKernel::kernel_cache_free_lru() 00593 { 00594 register int32_t k,least_elem=-1,least_time; 00595 00596 least_time=kernel_cache.time+1; 00597 for(k=0;k<kernel_cache.max_elems;k++) { 00598 if(kernel_cache.invindex[k] != -1) { 00599 if(kernel_cache.lru[k]<least_time) { 00600 least_time=kernel_cache.lru[k]; 00601 least_elem=k; 00602 } 00603 } 00604 } 00605 00606 if(least_elem != -1) { 00607 kernel_cache_free(least_elem); 00608 kernel_cache.index[kernel_cache.invindex[least_elem]]=-1; 00609 kernel_cache.invindex[least_elem]=-1; 00610 return(1); 00611 } 00612 return(0); 00613 } 00614 00615 // Get a free cache entry. In case cache is full, the lru 00616 // element is removed. 00617 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx) 00618 { 00619 int32_t result; 00620 if((result = kernel_cache_malloc()) == -1) { 00621 if(kernel_cache_free_lru()) { 00622 result = kernel_cache_malloc(); 00623 } 00624 } 00625 kernel_cache.index[cacheidx]=result; 00626 if(result == -1) { 00627 return(0); 00628 } 00629 kernel_cache.invindex[result]=cacheidx; 00630 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru 00631 return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]]; 00632 } 00633 #endif //USE_SVMLIGHT 00634 00635 void CKernel::load(CFile* loader) 00636 { 00637 SG_SET_LOCALE_C; 00638 SG_RESET_LOCALE; 00639 } 00640 00641 void CKernel::save(CFile* writer) 00642 { 00643 SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>(); 00644 SG_SET_LOCALE_C; 00645 writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols); 00646 SG_RESET_LOCALE; 00647 } 00648 00649 void CKernel::remove_lhs_and_rhs() 00650 { 00651 SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n") 00652 if (rhs!=lhs) 00653 SG_UNREF(rhs); 00654 rhs = NULL; 00655 num_rhs=0; 00656 00657 SG_UNREF(lhs); 00658 lhs = NULL; 00659 num_lhs=0; 00660 lhs_equals_rhs=false; 00661 00662 #ifdef USE_SVMLIGHT 00663 cache_reset(); 00664 #endif //USE_SVMLIGHT 00665 SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n") 00666 } 00667 00668 void CKernel::remove_lhs() 00669 { 00670 if (rhs==lhs) 00671 rhs=NULL; 00672 SG_UNREF(lhs); 00673 lhs = NULL; 00674 num_lhs=0; 00675 lhs_equals_rhs=false; 00676 #ifdef USE_SVMLIGHT 00677 cache_reset(); 00678 #endif //USE_SVMLIGHT 00679 } 00680 00682 void CKernel::remove_rhs() 00683 { 00684 if (rhs!=lhs) 00685 SG_UNREF(rhs); 00686 rhs = NULL; 00687 num_rhs=0; 00688 lhs_equals_rhs=false; 00689 00690 #ifdef USE_SVMLIGHT 00691 cache_reset(); 00692 #endif //USE_SVMLIGHT 00693 } 00694 00695 #define ENUM_CASE(n) case n: SG_INFO(#n " ") break; 00696 00697 void CKernel::list_kernel() 00698 { 00699 SG_INFO("%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(), 00700 get_combined_kernel_weight(), 00701 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : 00702 "SLOWBUTMEMEFFICIENT"); 00703 00704 switch (get_kernel_type()) 00705 { 00706 ENUM_CASE(K_UNKNOWN) 00707 ENUM_CASE(K_LINEAR) 00708 ENUM_CASE(K_POLY) 00709 ENUM_CASE(K_GAUSSIAN) 00710 ENUM_CASE(K_GAUSSIANSHIFT) 00711 ENUM_CASE(K_GAUSSIANMATCH) 00712 ENUM_CASE(K_HISTOGRAM) 00713 ENUM_CASE(K_SALZBERG) 00714 ENUM_CASE(K_LOCALITYIMPROVED) 00715 ENUM_CASE(K_SIMPLELOCALITYIMPROVED) 00716 ENUM_CASE(K_FIXEDDEGREE) 00717 ENUM_CASE(K_WEIGHTEDDEGREE) 00718 ENUM_CASE(K_WEIGHTEDDEGREEPOS) 00719 ENUM_CASE(K_WEIGHTEDDEGREERBF) 00720 ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING) 00721 ENUM_CASE(K_POLYMATCH) 00722 ENUM_CASE(K_ALIGNMENT) 00723 ENUM_CASE(K_COMMWORDSTRING) 00724 ENUM_CASE(K_COMMULONGSTRING) 00725 ENUM_CASE(K_SPECTRUMRBF) 00726 ENUM_CASE(K_COMBINED) 00727 ENUM_CASE(K_AUC) 00728 ENUM_CASE(K_CUSTOM) 00729 ENUM_CASE(K_SIGMOID) 00730 ENUM_CASE(K_CHI2) 00731 ENUM_CASE(K_DIAG) 00732 ENUM_CASE(K_CONST) 00733 ENUM_CASE(K_DISTANCE) 00734 ENUM_CASE(K_LOCALALIGNMENT) 00735 ENUM_CASE(K_PYRAMIDCHI2) 00736 ENUM_CASE(K_OLIGO) 00737 ENUM_CASE(K_MATCHWORD) 00738 ENUM_CASE(K_TPPK) 00739 ENUM_CASE(K_REGULATORYMODULES) 00740 ENUM_CASE(K_SPARSESPATIALSAMPLE) 00741 ENUM_CASE(K_HISTOGRAMINTERSECTION) 00742 ENUM_CASE(K_WAVELET) 00743 ENUM_CASE(K_WAVE) 00744 ENUM_CASE(K_CAUCHY) 00745 ENUM_CASE(K_TSTUDENT) 00746 ENUM_CASE(K_MULTIQUADRIC) 00747 ENUM_CASE(K_EXPONENTIAL) 00748 ENUM_CASE(K_RATIONAL_QUADRATIC) 00749 ENUM_CASE(K_POWER) 00750 ENUM_CASE(K_SPHERICAL) 00751 ENUM_CASE(K_LOG) 00752 ENUM_CASE(K_SPLINE) 00753 ENUM_CASE(K_ANOVA) 00754 ENUM_CASE(K_CIRCULAR) 00755 ENUM_CASE(K_INVERSEMULTIQUADRIC) 00756 ENUM_CASE(K_SPECTRUMMISMATCHRBF) 00757 ENUM_CASE(K_DISTANTSEGMENTS) 00758 ENUM_CASE(K_BESSEL) 00759 ENUM_CASE(K_JENSENSHANNON) 00760 ENUM_CASE(K_DIRECTOR) 00761 ENUM_CASE(K_PRODUCT) 00762 ENUM_CASE(K_LINEARARD) 00763 ENUM_CASE(K_GAUSSIANARD) 00764 ENUM_CASE(K_STREAMING) 00765 } 00766 00767 switch (get_feature_class()) 00768 { 00769 ENUM_CASE(C_UNKNOWN) 00770 ENUM_CASE(C_DENSE) 00771 ENUM_CASE(C_SPARSE) 00772 ENUM_CASE(C_STRING) 00773 ENUM_CASE(C_STREAMING_DENSE) 00774 ENUM_CASE(C_STREAMING_SPARSE) 00775 ENUM_CASE(C_STREAMING_STRING) 00776 ENUM_CASE(C_STREAMING_VW) 00777 ENUM_CASE(C_COMBINED) 00778 ENUM_CASE(C_COMBINED_DOT) 00779 ENUM_CASE(C_WD) 00780 ENUM_CASE(C_SPEC) 00781 ENUM_CASE(C_WEIGHTEDSPEC) 00782 ENUM_CASE(C_POLY) 00783 ENUM_CASE(C_BINNED_DOT) 00784 ENUM_CASE(C_DIRECTOR_DOT) 00785 ENUM_CASE(C_LATENT) 00786 ENUM_CASE(C_MATRIX) 00787 ENUM_CASE(C_FACTOR_GRAPH) 00788 ENUM_CASE(C_ANY) 00789 } 00790 00791 switch (get_feature_type()) 00792 { 00793 ENUM_CASE(F_UNKNOWN) 00794 ENUM_CASE(F_BOOL) 00795 ENUM_CASE(F_CHAR) 00796 ENUM_CASE(F_BYTE) 00797 ENUM_CASE(F_SHORT) 00798 ENUM_CASE(F_WORD) 00799 ENUM_CASE(F_INT) 00800 ENUM_CASE(F_UINT) 00801 ENUM_CASE(F_LONG) 00802 ENUM_CASE(F_ULONG) 00803 ENUM_CASE(F_SHORTREAL) 00804 ENUM_CASE(F_DREAL) 00805 ENUM_CASE(F_LONGREAL) 00806 ENUM_CASE(F_ANY) 00807 } 00808 SG_INFO("\n") 00809 } 00810 #undef ENUM_CASE 00811 00812 bool CKernel::init_optimization( 00813 int32_t count, int32_t *IDX, float64_t * weights) 00814 { 00815 SG_ERROR("kernel does not support linadd optimization\n") 00816 return false ; 00817 } 00818 00819 bool CKernel::delete_optimization() 00820 { 00821 SG_ERROR("kernel does not support linadd optimization\n") 00822 return false; 00823 } 00824 00825 float64_t CKernel::compute_optimized(int32_t vector_idx) 00826 { 00827 SG_ERROR("kernel does not support linadd optimization\n") 00828 return 0; 00829 } 00830 00831 void CKernel::compute_batch( 00832 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec, 00833 int32_t* IDX, float64_t* weights, float64_t factor) 00834 { 00835 SG_ERROR("kernel does not support batch computation\n") 00836 } 00837 00838 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight) 00839 { 00840 SG_ERROR("kernel does not support linadd optimization, add_to_normal not implemented\n") 00841 } 00842 00843 void CKernel::clear_normal() 00844 { 00845 SG_ERROR("kernel does not support linadd optimization, clear_normal not implemented\n") 00846 } 00847 00848 int32_t CKernel::get_num_subkernels() 00849 { 00850 return 1; 00851 } 00852 00853 void CKernel::compute_by_subkernel( 00854 int32_t vector_idx, float64_t * subkernel_contrib) 00855 { 00856 SG_ERROR("kernel compute_by_subkernel not implemented\n") 00857 } 00858 00859 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights) 00860 { 00861 num_weights=1 ; 00862 return &combined_kernel_weight ; 00863 } 00864 00865 SGVector<float64_t> CKernel::get_subkernel_weights() 00866 { 00867 int num_weights = 1; 00868 const float64_t* weight = get_subkernel_weights(num_weights); 00869 return SGVector<float64_t>(const_cast<float64_t*>(weight),1,false); 00870 } 00871 00872 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights) 00873 { 00874 ASSERT(weights.vector) 00875 if (weights.vlen!=1) 00876 SG_ERROR("number of subkernel weights should be one ...\n") 00877 00878 combined_kernel_weight = weights.vector[0] ; 00879 } 00880 00881 CKernel* CKernel::obtain_from_generic(CSGObject* kernel) 00882 { 00883 if (kernel) 00884 { 00885 CKernel* casted=dynamic_cast<CKernel*>(kernel); 00886 REQUIRE(casted, "CKernel::obtain_from_generic(): Error, provided object" 00887 " of class \"%s\" is not a subclass of CKernel!\n", 00888 kernel->get_name()); 00889 return casted; 00890 } 00891 else 00892 return NULL; 00893 } 00894 00895 bool CKernel::init_optimization_svm(CSVM * svm) 00896 { 00897 int32_t num_suppvec=svm->get_num_support_vectors(); 00898 int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec); 00899 float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec); 00900 00901 for (int32_t i=0; i<num_suppvec; i++) 00902 { 00903 sv_idx[i] = svm->get_support_vector(i); 00904 sv_weight[i] = svm->get_alpha(i); 00905 } 00906 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight); 00907 00908 SG_FREE(sv_idx); 00909 SG_FREE(sv_weight); 00910 return ret; 00911 } 00912 00913 void CKernel::load_serializable_post() throw (ShogunException) 00914 { 00915 CSGObject::load_serializable_post(); 00916 if (lhs_equals_rhs) 00917 rhs=lhs; 00918 } 00919 00920 void CKernel::save_serializable_pre() throw (ShogunException) 00921 { 00922 CSGObject::save_serializable_pre(); 00923 00924 if (lhs_equals_rhs) 00925 rhs=NULL; 00926 } 00927 00928 void CKernel::save_serializable_post() throw (ShogunException) 00929 { 00930 CSGObject::save_serializable_post(); 00931 00932 if (lhs_equals_rhs) 00933 rhs=lhs; 00934 } 00935 00936 void CKernel::register_params() { 00937 SG_ADD(&cache_size, "cache_size", 00938 "Cache size in MB.", MS_NOT_AVAILABLE); 00939 SG_ADD((CSGObject**) &lhs, "lhs", 00940 "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE); 00941 SG_ADD((CSGObject**) &rhs, "rhs", 00942 "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE); 00943 SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs", 00944 "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE); 00945 SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.", 00946 MS_NOT_AVAILABLE); 00947 SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.", 00948 MS_NOT_AVAILABLE); 00949 SG_ADD(&combined_kernel_weight, "combined_kernel_weight", 00950 "Combined kernel weight.", MS_AVAILABLE); 00951 SG_ADD(&optimization_initialized, "optimization_initialized", 00952 "Optimization is initialized.", MS_NOT_AVAILABLE); 00953 SG_ADD((machine_int_t*) &opt_type, "opt_type", 00954 "Optimization type.", MS_NOT_AVAILABLE); 00955 SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE); 00956 SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.", 00957 MS_AVAILABLE); 00958 } 00959 00960 00961 void CKernel::init() 00962 { 00963 cache_size=10; 00964 kernel_matrix=NULL; 00965 lhs=NULL; 00966 rhs=NULL; 00967 num_lhs=0; 00968 num_rhs=0; 00969 lhs_equals_rhs=false; 00970 combined_kernel_weight=1; 00971 optimization_initialized=false; 00972 opt_type=FASTBUTMEMHUNGRY; 00973 properties=KP_NONE; 00974 normalizer=NULL; 00975 00976 #ifdef USE_SVMLIGHT 00977 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE)); 00978 #endif //USE_SVMLIGHT 00979 00980 set_normalizer(new CIdentityKernelNormalizer()); 00981 } 00982 00983 namespace shogun 00984 { 00986 template <class T> struct K_THREAD_PARAM 00987 { 00989 CKernel* kernel; 00991 int32_t start; 00993 int32_t end; 00995 int64_t total_start; 00997 int64_t total_end; 00999 int32_t m; 01001 int32_t n; 01003 T* result; 01005 bool symmetric; 01007 bool verbose; 01008 }; 01009 } 01010 01011 template <class T> void* CKernel::get_kernel_matrix_helper(void* p) 01012 { 01013 K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p; 01014 int32_t i_start=params->start; 01015 int32_t i_end=params->end; 01016 CKernel* k=params->kernel; 01017 T* result=params->result; 01018 bool symmetric=params->symmetric; 01019 int32_t n=params->n; 01020 int32_t m=params->m; 01021 bool verbose=params->verbose; 01022 int64_t total_start=params->total_start; 01023 int64_t total_end=params->total_end; 01024 int64_t total=total_start; 01025 01026 for (int32_t i=i_start; i<i_end; i++) 01027 { 01028 int32_t j_start=0; 01029 01030 if (symmetric) 01031 j_start=i; 01032 01033 for (int32_t j=j_start; j<n; j++) 01034 { 01035 float64_t v=k->kernel(i,j); 01036 result[i+j*m]=v; 01037 01038 if (symmetric && i!=j) 01039 result[j+i*m]=v; 01040 01041 if (verbose) 01042 { 01043 total++; 01044 01045 if (symmetric && i!=j) 01046 total++; 01047 01048 if (total%100 == 0) 01049 SG_OBJ_PROGRESS(k, total, total_start, total_end) 01050 01051 if (CSignal::cancel_computations()) 01052 break; 01053 } 01054 } 01055 01056 } 01057 01058 return NULL; 01059 } 01060 01061 template <class T> 01062 SGMatrix<T> CKernel::get_kernel_matrix() 01063 { 01064 T* result = NULL; 01065 01066 REQUIRE(has_features(), "no features assigned to kernel\n") 01067 01068 int32_t m=get_num_vec_lhs(); 01069 int32_t n=get_num_vec_rhs(); 01070 01071 int64_t total_num = int64_t(m)*n; 01072 01073 // if lhs == rhs and sizes match assume k(i,j)=k(j,i) 01074 bool symmetric= (lhs && lhs==rhs && m==n); 01075 01076 SG_DEBUG("returning kernel matrix of size %dx%d\n", m, n) 01077 01078 result=SG_MALLOC(T, total_num); 01079 01080 int32_t num_threads=parallel->get_num_threads(); 01081 if (num_threads < 2) 01082 { 01083 K_THREAD_PARAM<T> params; 01084 params.kernel=this; 01085 params.result=result; 01086 params.start=0; 01087 params.end=m; 01088 params.total_start=0; 01089 params.total_end=total_num; 01090 params.n=n; 01091 params.m=m; 01092 params.symmetric=symmetric; 01093 params.verbose=true; 01094 get_kernel_matrix_helper<T>((void*) ¶ms); 01095 } 01096 else 01097 { 01098 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 01099 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads); 01100 int64_t step= total_num/num_threads; 01101 01102 int32_t t; 01103 01104 num_threads--; 01105 for (t=0; t<num_threads; t++) 01106 { 01107 params[t].kernel = this; 01108 params[t].result = result; 01109 params[t].start = compute_row_start(t*step, n, symmetric); 01110 params[t].end = compute_row_start((t+1)*step, n, symmetric); 01111 params[t].total_start=t*step; 01112 params[t].total_end=(t+1)*step; 01113 params[t].n=n; 01114 params[t].m=m; 01115 params[t].symmetric=symmetric; 01116 params[t].verbose=false; 01117 01118 int code=pthread_create(&threads[t], NULL, 01119 CKernel::get_kernel_matrix_helper<T>, (void*)¶ms[t]); 01120 01121 if (code != 0) 01122 { 01123 SG_WARNING("Thread creation failed (thread %d of %d) " 01124 "with error:'%s'\n",t, num_threads, strerror(code)); 01125 num_threads=t; 01126 break; 01127 } 01128 } 01129 01130 params[t].kernel = this; 01131 params[t].result = result; 01132 params[t].start = compute_row_start(t*step, n, symmetric); 01133 params[t].end = m; 01134 params[t].total_start=t*step; 01135 params[t].total_end=total_num; 01136 params[t].n=n; 01137 params[t].m=m; 01138 params[t].symmetric=symmetric; 01139 params[t].verbose=true; 01140 get_kernel_matrix_helper<T>(¶ms[t]); 01141 01142 for (t=0; t<num_threads; t++) 01143 { 01144 if (pthread_join(threads[t], NULL) != 0) 01145 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads) 01146 } 01147 01148 SG_FREE(params); 01149 SG_FREE(threads); 01150 } 01151 01152 SG_DONE() 01153 01154 return SGMatrix<T>(result,m,n,true); 01155 } 01156 01157 template SGMatrix<float64_t> CKernel::get_kernel_matrix<float64_t>(); 01158 template SGMatrix<float32_t> CKernel::get_kernel_matrix<float32_t>(); 01159 01160 template void* CKernel::get_kernel_matrix_helper<float64_t>(void* p); 01161 template void* CKernel::get_kernel_matrix_helper<float32_t>(void* p);