SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.cpp
Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 #include <shogun/base/Parallel.h>
00023 
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028 
00029 #include <shogun/classifier/svm/SVM.h>
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044     register_params();
00045 }
00046 
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049     init();
00050 
00051     if (size<10)
00052         size=10;
00053 
00054     cache_size=size;
00055     register_params();
00056 }
00057 
00058 
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061     init();
00062 
00063     if (size<10)
00064         size=10;
00065 
00066     cache_size=size;
00067 
00068     set_normalizer(new CIdentityKernelNormalizer());
00069     init(p_lhs, p_rhs);
00070     register_params();
00071 }
00072 
00073 CKernel::~CKernel()
00074 {
00075     if (get_is_initialized())
00076         SG_ERROR("Kernel still initialized on destruction.\n")
00077 
00078     remove_lhs_and_rhs();
00079     SG_UNREF(normalizer);
00080 
00081     SG_INFO("Kernel deleted (%p).\n", this)
00082 }
00083 
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087     if (size<10)
00088         size=10;
00089 
00090     kernel_cache_cleanup();
00091     cache_size=size;
00092 
00093     if (has_features() && get_num_vec_lhs())
00094         kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097 
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100     /* make sure that features are not deleted if same ones are used */
00101     SG_REF(l);
00102     SG_REF(r);
00103 
00104     //make sure features were indeed supplied
00105     REQUIRE(l, "CKernel::init(%p, %p): Left hand side features required!\n", l, r)
00106     REQUIRE(r, "CKernel::init(%p, %p): Right hand side features required!\n", l, r)
00107 
00108     //make sure features are compatible
00109     ASSERT(l->get_feature_class()==r->get_feature_class())
00110     ASSERT(l->get_feature_type()==r->get_feature_type())
00111 
00112     //remove references to previous features
00113     remove_lhs_and_rhs();
00114 
00115     //increase reference counts
00116     SG_REF(l);
00117     if (l==r)
00118         lhs_equals_rhs=true;
00119     else // l!=r
00120         SG_REF(r);
00121 
00122     lhs=l;
00123     rhs=r;
00124 
00125     ASSERT(!num_lhs || num_lhs==l->get_num_vectors())
00126     ASSERT(!num_rhs || num_rhs==l->get_num_vectors())
00127 
00128     num_lhs=l->get_num_vectors();
00129     num_rhs=r->get_num_vectors();
00130 
00131     /* unref "safety" refs from beginning */
00132     SG_UNREF(r);
00133     SG_UNREF(l);
00134 
00135     SG_DEBUG("leaving CKernel::init(%p, %p)\n", l, r)
00136     return true;
00137 }
00138 
00139 bool CKernel::set_normalizer(CKernelNormalizer* n)
00140 {
00141     SG_REF(n);
00142     if (lhs && rhs)
00143         n->init(this);
00144 
00145     SG_UNREF(normalizer);
00146     normalizer=n;
00147 
00148     return (normalizer!=NULL);
00149 }
00150 
00151 CKernelNormalizer* CKernel::get_normalizer()
00152 {
00153     SG_REF(normalizer)
00154     return normalizer;
00155 }
00156 
00157 bool CKernel::init_normalizer()
00158 {
00159     return normalizer->init(this);
00160 }
00161 
00162 void CKernel::cleanup()
00163 {
00164     remove_lhs_and_rhs();
00165 }
00166 
00167 #ifdef USE_SVMLIGHT
00168 /****************************** Cache handling *******************************/
00169 
00170 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00171 {
00172     int32_t totdoc=get_num_vec_lhs();
00173     if (totdoc<=0)
00174     {
00175         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00176                 get_num_vec_lhs(), get_num_vec_rhs());
00177     }
00178     uint64_t buffer_size=0;
00179     int32_t i;
00180 
00181     //in regression the additional constraints are made by doubling the training data
00182     if (regression_hack)
00183         totdoc*=2;
00184 
00185     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00186     if (buffer_size>((uint64_t) totdoc)*totdoc)
00187         buffer_size=((uint64_t) totdoc)*totdoc;
00188 
00189     SG_INFO("using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name())
00190 
00191     //make sure it fits in the *signed* KERNELCACHE_IDX type
00192     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)))
00193 
00194     kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00195     kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00196     kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00197     kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00198     kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00199     kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00200     kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00201     kernel_cache.buffsize=buffer_size;
00202     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00203 
00204     if(kernel_cache.max_elems>totdoc) {
00205         kernel_cache.max_elems=totdoc;
00206     }
00207 
00208     kernel_cache.elems=0;   // initialize cache
00209     for(i=0;i<totdoc;i++) {
00210         kernel_cache.index[i]=-1;
00211         kernel_cache.lru[i]=0;
00212     }
00213     for(i=0;i<totdoc;i++) {
00214         kernel_cache.occu[i]=0;
00215         kernel_cache.invindex[i]=-1;
00216     }
00217 
00218     kernel_cache.activenum=totdoc;;
00219     for(i=0;i<totdoc;i++) {
00220         kernel_cache.active2totdoc[i]=i;
00221         kernel_cache.totdoc2active[i]=i;
00222     }
00223 
00224     kernel_cache.time=0;
00225 }
00226 
00227 void CKernel::get_kernel_row(
00228     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00229 {
00230     int32_t i,j;
00231     KERNELCACHE_IDX start;
00232 
00233     int32_t num_vectors = get_num_vec_lhs();
00234     if (docnum>=num_vectors)
00235         docnum=2*num_vectors-1-docnum;
00236 
00237     /* is cached? */
00238     if(kernel_cache.index[docnum] != -1)
00239     {
00240         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00241         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00242 
00243         if (full_line)
00244         {
00245             for(j=0;j<get_num_vec_lhs();j++)
00246             {
00247                 if(kernel_cache.totdoc2active[j] >= 0)
00248                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00249                 else
00250                     buffer[j]=(float64_t) kernel(docnum, j);
00251             }
00252         }
00253         else
00254         {
00255             for(i=0;(j=active2dnum[i])>=0;i++)
00256             {
00257                 if(kernel_cache.totdoc2active[j] >= 0)
00258                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00259                 else
00260                 {
00261                     int32_t k=j;
00262                     if (k>=num_vectors)
00263                         k=2*num_vectors-1-k;
00264                     buffer[j]=(float64_t) kernel(docnum, k);
00265                 }
00266             }
00267         }
00268     }
00269     else
00270     {
00271         if (full_line)
00272         {
00273             for(j=0;j<get_num_vec_lhs();j++)
00274                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00275         }
00276         else
00277         {
00278             for(i=0;(j=active2dnum[i])>=0;i++)
00279             {
00280                 int32_t k=j;
00281                 if (k>=num_vectors)
00282                     k=2*num_vectors-1-k;
00283                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00284             }
00285         }
00286     }
00287 }
00288 
00289 
00290 // Fills cache for the row m
00291 void CKernel::cache_kernel_row(int32_t m)
00292 {
00293     register int32_t j,k,l;
00294     register KERNELCACHE_ELEM *cache;
00295 
00296     int32_t num_vectors = get_num_vec_lhs();
00297 
00298     if (m>=num_vectors)
00299         m=2*num_vectors-1-m;
00300 
00301     if(!kernel_cache_check(m))   // not cached yet
00302     {
00303         cache = kernel_cache_clean_and_malloc(m);
00304         if(cache) {
00305             l=kernel_cache.totdoc2active[m];
00306 
00307             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00308             {
00309                 k=kernel_cache.active2totdoc[j];
00310 
00311                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00312                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00313                         *kernel_cache.index[k]+l];
00314                 }
00315                 else
00316                 {
00317                     if (k>=num_vectors)
00318                         k=2*num_vectors-1-k;
00319 
00320                     cache[j]=kernel(m, k);
00321                 }
00322             }
00323         }
00324         else
00325             perror("Error: Kernel cache full! => increase cache size");
00326     }
00327 }
00328 
00329 
00330 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00331 {
00332     int32_t j,k,l;
00333     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00334 
00335     for (int32_t i=params->start; i<params->end; i++)
00336     {
00337         KERNELCACHE_ELEM* cache=params->cache[i];
00338         int32_t m = params->uncached_rows[i];
00339         l=params->kernel_cache->totdoc2active[m];
00340 
00341         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00342         {
00343             k=params->kernel_cache->active2totdoc[j];
00344 
00345             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00346                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00347                     *params->kernel_cache->index[k]+l];
00348             }
00349             else
00350                 {
00351                     if (k>=params->num_vectors)
00352                         k=2*params->num_vectors-1-k;
00353 
00354                     cache[j]=params->kernel->kernel(m, k);
00355                 }
00356         }
00357 
00358         //now line m is cached
00359         params->needs_computation[m]=0;
00360     }
00361     return NULL;
00362 }
00363 
00364 // Fills cache for the rows in key
00365 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00366 {
00367 #ifdef HAVE_PTHREAD
00368     int32_t nthreads=parallel->get_num_threads();
00369 
00370     if (nthreads<2)
00371     {
00372 #endif
00373         for(int32_t i=0;i<num_rows;i++)
00374             cache_kernel_row(rows[i]);
00375 #ifdef HAVE_PTHREAD
00376     }
00377     else
00378     {
00379         // fill up kernel cache
00380         int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00381         KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00382         pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1);
00383         S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1);
00384         int32_t num_threads=nthreads-1;
00385         int32_t num_vec=get_num_vec_lhs();
00386         ASSERT(num_vec>0)
00387         uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
00388 
00389         int32_t step=0;
00390         int32_t num=0;
00391         int32_t end=0;
00392 
00393         // allocate cachelines if necessary
00394         for (int32_t i=0; i<num_rows; i++)
00395         {
00396             int32_t idx=rows[i];
00397             if (idx>=num_vec)
00398                 idx=2*num_vec-1-idx;
00399 
00400             if (kernel_cache_check(idx))
00401                 continue;
00402 
00403             needs_computation[idx]=1;
00404             uncached_rows[num]=idx;
00405             cache[num]= kernel_cache_clean_and_malloc(idx);
00406 
00407             if (!cache[num])
00408                 SG_ERROR("Kernel cache full! => increase cache size\n")
00409 
00410             num++;
00411         }
00412 
00413         if (num>0)
00414         {
00415             step= num/nthreads;
00416 
00417             if (step<1)
00418             {
00419                 num_threads=num-1;
00420                 step=1;
00421             }
00422 
00423             for (int32_t t=0; t<num_threads; t++)
00424             {
00425                 params[t].kernel = this;
00426                 params[t].kernel_cache = &kernel_cache;
00427                 params[t].cache = cache;
00428                 params[t].uncached_rows = uncached_rows;
00429                 params[t].needs_computation = needs_computation;
00430                 params[t].num_uncached = num;
00431                 params[t].start = t*step;
00432                 params[t].end = (t+1)*step;
00433                 params[t].num_vectors = get_num_vec_lhs();
00434                 end=params[t].end;
00435 
00436                 int code=pthread_create(&threads[t], NULL,
00437                         CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]);
00438 
00439                 if (code != 0)
00440                 {
00441                     SG_WARNING("Thread creation failed (thread %d of %d) "
00442                             "with error:'%s'\n",t, num_threads, strerror(code));
00443                     num_threads=t;
00444                     end=t*step;
00445                     break;
00446                 }
00447             }
00448         }
00449         else
00450             num_threads=-1;
00451 
00452 
00453         S_KTHREAD_PARAM last_param;
00454         last_param.kernel = this;
00455         last_param.kernel_cache = &kernel_cache;
00456         last_param.cache = cache;
00457         last_param.uncached_rows = uncached_rows;
00458         last_param.needs_computation = needs_computation;
00459         last_param.start = end;
00460         last_param.num_uncached = num;
00461         last_param.end = num;
00462         last_param.num_vectors = get_num_vec_lhs();
00463 
00464         cache_multiple_kernel_row_helper(&last_param);
00465 
00466 
00467         for (int32_t t=0; t<num_threads; t++)
00468         {
00469             if (pthread_join(threads[t], NULL) != 0)
00470                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
00471         }
00472 
00473         SG_FREE(needs_computation);
00474         SG_FREE(params);
00475         SG_FREE(threads);
00476         SG_FREE(cache);
00477         SG_FREE(uncached_rows);
00478     }
00479 #endif
00480 }
00481 
00482 // remove numshrink columns in the cache
00483 // which correspond to examples marked
00484 void CKernel::kernel_cache_shrink(
00485     int32_t totdoc, int32_t numshrink, int32_t *after)
00486 {
00487     ASSERT(totdoc > 0);
00488     register int32_t i,j,jj,scount;     // 0 in after.
00489     KERNELCACHE_IDX from=0,to=0;
00490     int32_t *keep;
00491 
00492     keep=SG_MALLOC(int32_t, totdoc);
00493     for(j=0;j<totdoc;j++) {
00494         keep[j]=1;
00495     }
00496     scount=0;
00497     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00498         j=kernel_cache.active2totdoc[jj];
00499         if(!after[j]) {
00500             scount++;
00501             keep[j]=0;
00502         }
00503     }
00504 
00505     for(i=0;i<kernel_cache.max_elems;i++) {
00506         for(jj=0;jj<kernel_cache.activenum;jj++) {
00507             j=kernel_cache.active2totdoc[jj];
00508             if(!keep[j]) {
00509                 from++;
00510             }
00511             else {
00512                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00513                 to++;
00514                 from++;
00515             }
00516         }
00517     }
00518 
00519     kernel_cache.activenum=0;
00520     for(j=0;j<totdoc;j++) {
00521         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00522             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00523             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00524             kernel_cache.activenum++;
00525         }
00526         else {
00527             kernel_cache.totdoc2active[j]=-1;
00528         }
00529     }
00530 
00531     kernel_cache.max_elems= (int32_t) kernel_cache.buffsize;
00532 
00533     if (kernel_cache.activenum>0)
00534         kernel_cache.buffsize/=kernel_cache.activenum;
00535 
00536     if(kernel_cache.max_elems>totdoc)
00537         kernel_cache.max_elems=totdoc;
00538 
00539     SG_FREE(keep);
00540 
00541 }
00542 
00543 void CKernel::kernel_cache_reset_lru()
00544 {
00545     int32_t maxlru=0,k;
00546 
00547     for(k=0;k<kernel_cache.max_elems;k++) {
00548         if(maxlru < kernel_cache.lru[k])
00549             maxlru=kernel_cache.lru[k];
00550     }
00551     for(k=0;k<kernel_cache.max_elems;k++) {
00552         kernel_cache.lru[k]-=maxlru;
00553     }
00554 }
00555 
00556 void CKernel::kernel_cache_cleanup()
00557 {
00558     SG_FREE(kernel_cache.index);
00559     SG_FREE(kernel_cache.occu);
00560     SG_FREE(kernel_cache.lru);
00561     SG_FREE(kernel_cache.invindex);
00562     SG_FREE(kernel_cache.active2totdoc);
00563     SG_FREE(kernel_cache.totdoc2active);
00564     SG_FREE(kernel_cache.buffer);
00565     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00566 }
00567 
00568 int32_t CKernel::kernel_cache_malloc()
00569 {
00570   int32_t i;
00571 
00572   if(kernel_cache_space_available()) {
00573     for(i=0;i<kernel_cache.max_elems;i++) {
00574       if(!kernel_cache.occu[i]) {
00575     kernel_cache.occu[i]=1;
00576     kernel_cache.elems++;
00577     return(i);
00578       }
00579     }
00580   }
00581   return(-1);
00582 }
00583 
00584 void CKernel::kernel_cache_free(int32_t cacheidx)
00585 {
00586     kernel_cache.occu[cacheidx]=0;
00587     kernel_cache.elems--;
00588 }
00589 
00590 // remove least recently used cache
00591 // element
00592 int32_t CKernel::kernel_cache_free_lru()
00593 {
00594   register int32_t k,least_elem=-1,least_time;
00595 
00596   least_time=kernel_cache.time+1;
00597   for(k=0;k<kernel_cache.max_elems;k++) {
00598     if(kernel_cache.invindex[k] != -1) {
00599       if(kernel_cache.lru[k]<least_time) {
00600     least_time=kernel_cache.lru[k];
00601     least_elem=k;
00602       }
00603     }
00604   }
00605 
00606   if(least_elem != -1) {
00607     kernel_cache_free(least_elem);
00608     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00609     kernel_cache.invindex[least_elem]=-1;
00610     return(1);
00611   }
00612   return(0);
00613 }
00614 
00615 // Get a free cache entry. In case cache is full, the lru
00616 // element is removed.
00617 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00618 {
00619     int32_t result;
00620     if((result = kernel_cache_malloc()) == -1) {
00621         if(kernel_cache_free_lru()) {
00622             result = kernel_cache_malloc();
00623         }
00624     }
00625     kernel_cache.index[cacheidx]=result;
00626     if(result == -1) {
00627         return(0);
00628     }
00629     kernel_cache.invindex[result]=cacheidx;
00630     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00631     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00632 }
00633 #endif //USE_SVMLIGHT
00634 
00635 void CKernel::load(CFile* loader)
00636 {
00637     SG_SET_LOCALE_C;
00638     SG_RESET_LOCALE;
00639 }
00640 
00641 void CKernel::save(CFile* writer)
00642 {
00643     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00644     SG_SET_LOCALE_C;
00645     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00646     SG_RESET_LOCALE;
00647 }
00648 
00649 void CKernel::remove_lhs_and_rhs()
00650 {
00651     SG_DEBUG("entering CKernel::remove_lhs_and_rhs\n")
00652     if (rhs!=lhs)
00653         SG_UNREF(rhs);
00654     rhs = NULL;
00655     num_rhs=0;
00656 
00657     SG_UNREF(lhs);
00658     lhs = NULL;
00659     num_lhs=0;
00660     lhs_equals_rhs=false;
00661 
00662 #ifdef USE_SVMLIGHT
00663     cache_reset();
00664 #endif //USE_SVMLIGHT
00665     SG_DEBUG("leaving CKernel::remove_lhs_and_rhs\n")
00666 }
00667 
00668 void CKernel::remove_lhs()
00669 {
00670     if (rhs==lhs)
00671         rhs=NULL;
00672     SG_UNREF(lhs);
00673     lhs = NULL;
00674     num_lhs=0;
00675     lhs_equals_rhs=false;
00676 #ifdef USE_SVMLIGHT
00677     cache_reset();
00678 #endif //USE_SVMLIGHT
00679 }
00680 
00682 void CKernel::remove_rhs()
00683 {
00684     if (rhs!=lhs)
00685         SG_UNREF(rhs);
00686     rhs = NULL;
00687     num_rhs=0;
00688     lhs_equals_rhs=false;
00689 
00690 #ifdef USE_SVMLIGHT
00691     cache_reset();
00692 #endif //USE_SVMLIGHT
00693 }
00694 
00695 #define ENUM_CASE(n) case n: SG_INFO(#n " ") break;
00696 
00697 void CKernel::list_kernel()
00698 {
00699     SG_INFO("%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00700             get_combined_kernel_weight(),
00701             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00702             "SLOWBUTMEMEFFICIENT");
00703 
00704     switch (get_kernel_type())
00705     {
00706         ENUM_CASE(K_UNKNOWN)
00707         ENUM_CASE(K_LINEAR)
00708         ENUM_CASE(K_POLY)
00709         ENUM_CASE(K_GAUSSIAN)
00710         ENUM_CASE(K_GAUSSIANSHIFT)
00711         ENUM_CASE(K_GAUSSIANMATCH)
00712         ENUM_CASE(K_HISTOGRAM)
00713         ENUM_CASE(K_SALZBERG)
00714         ENUM_CASE(K_LOCALITYIMPROVED)
00715         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00716         ENUM_CASE(K_FIXEDDEGREE)
00717         ENUM_CASE(K_WEIGHTEDDEGREE)
00718         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00719         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00720         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00721         ENUM_CASE(K_POLYMATCH)
00722         ENUM_CASE(K_ALIGNMENT)
00723         ENUM_CASE(K_COMMWORDSTRING)
00724         ENUM_CASE(K_COMMULONGSTRING)
00725         ENUM_CASE(K_SPECTRUMRBF)
00726         ENUM_CASE(K_COMBINED)
00727         ENUM_CASE(K_AUC)
00728         ENUM_CASE(K_CUSTOM)
00729         ENUM_CASE(K_SIGMOID)
00730         ENUM_CASE(K_CHI2)
00731         ENUM_CASE(K_DIAG)
00732         ENUM_CASE(K_CONST)
00733         ENUM_CASE(K_DISTANCE)
00734         ENUM_CASE(K_LOCALALIGNMENT)
00735         ENUM_CASE(K_PYRAMIDCHI2)
00736         ENUM_CASE(K_OLIGO)
00737         ENUM_CASE(K_MATCHWORD)
00738         ENUM_CASE(K_TPPK)
00739         ENUM_CASE(K_REGULATORYMODULES)
00740         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00741         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00742         ENUM_CASE(K_WAVELET)
00743         ENUM_CASE(K_WAVE)
00744         ENUM_CASE(K_CAUCHY)
00745         ENUM_CASE(K_TSTUDENT)
00746         ENUM_CASE(K_MULTIQUADRIC)
00747         ENUM_CASE(K_EXPONENTIAL)
00748         ENUM_CASE(K_RATIONAL_QUADRATIC)
00749         ENUM_CASE(K_POWER)
00750         ENUM_CASE(K_SPHERICAL)
00751         ENUM_CASE(K_LOG)
00752         ENUM_CASE(K_SPLINE)
00753         ENUM_CASE(K_ANOVA)
00754         ENUM_CASE(K_CIRCULAR)
00755         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00756         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00757         ENUM_CASE(K_DISTANTSEGMENTS)
00758         ENUM_CASE(K_BESSEL)
00759         ENUM_CASE(K_JENSENSHANNON)
00760         ENUM_CASE(K_DIRECTOR)
00761         ENUM_CASE(K_PRODUCT)
00762         ENUM_CASE(K_LINEARARD)
00763         ENUM_CASE(K_GAUSSIANARD)
00764         ENUM_CASE(K_STREAMING)
00765     }
00766 
00767     switch (get_feature_class())
00768     {
00769         ENUM_CASE(C_UNKNOWN)
00770         ENUM_CASE(C_DENSE)
00771         ENUM_CASE(C_SPARSE)
00772         ENUM_CASE(C_STRING)
00773         ENUM_CASE(C_STREAMING_DENSE)
00774         ENUM_CASE(C_STREAMING_SPARSE)
00775         ENUM_CASE(C_STREAMING_STRING)
00776         ENUM_CASE(C_STREAMING_VW)
00777         ENUM_CASE(C_COMBINED)
00778         ENUM_CASE(C_COMBINED_DOT)
00779         ENUM_CASE(C_WD)
00780         ENUM_CASE(C_SPEC)
00781         ENUM_CASE(C_WEIGHTEDSPEC)
00782         ENUM_CASE(C_POLY)
00783         ENUM_CASE(C_BINNED_DOT)
00784         ENUM_CASE(C_DIRECTOR_DOT)
00785         ENUM_CASE(C_LATENT)
00786         ENUM_CASE(C_MATRIX)
00787         ENUM_CASE(C_FACTOR_GRAPH)
00788         ENUM_CASE(C_ANY)
00789     }
00790 
00791     switch (get_feature_type())
00792     {
00793         ENUM_CASE(F_UNKNOWN)
00794         ENUM_CASE(F_BOOL)
00795         ENUM_CASE(F_CHAR)
00796         ENUM_CASE(F_BYTE)
00797         ENUM_CASE(F_SHORT)
00798         ENUM_CASE(F_WORD)
00799         ENUM_CASE(F_INT)
00800         ENUM_CASE(F_UINT)
00801         ENUM_CASE(F_LONG)
00802         ENUM_CASE(F_ULONG)
00803         ENUM_CASE(F_SHORTREAL)
00804         ENUM_CASE(F_DREAL)
00805         ENUM_CASE(F_LONGREAL)
00806         ENUM_CASE(F_ANY)
00807     }
00808     SG_INFO("\n")
00809 }
00810 #undef ENUM_CASE
00811 
00812 bool CKernel::init_optimization(
00813     int32_t count, int32_t *IDX, float64_t * weights)
00814 {
00815    SG_ERROR("kernel does not support linadd optimization\n")
00816     return false ;
00817 }
00818 
00819 bool CKernel::delete_optimization()
00820 {
00821    SG_ERROR("kernel does not support linadd optimization\n")
00822     return false;
00823 }
00824 
00825 float64_t CKernel::compute_optimized(int32_t vector_idx)
00826 {
00827    SG_ERROR("kernel does not support linadd optimization\n")
00828     return 0;
00829 }
00830 
00831 void CKernel::compute_batch(
00832     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00833     int32_t* IDX, float64_t* weights, float64_t factor)
00834 {
00835    SG_ERROR("kernel does not support batch computation\n")
00836 }
00837 
00838 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00839 {
00840    SG_ERROR("kernel does not support linadd optimization, add_to_normal not implemented\n")
00841 }
00842 
00843 void CKernel::clear_normal()
00844 {
00845    SG_ERROR("kernel does not support linadd optimization, clear_normal not implemented\n")
00846 }
00847 
00848 int32_t CKernel::get_num_subkernels()
00849 {
00850     return 1;
00851 }
00852 
00853 void CKernel::compute_by_subkernel(
00854     int32_t vector_idx, float64_t * subkernel_contrib)
00855 {
00856    SG_ERROR("kernel compute_by_subkernel not implemented\n")
00857 }
00858 
00859 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00860 {
00861     num_weights=1 ;
00862     return &combined_kernel_weight ;
00863 }
00864 
00865 SGVector<float64_t> CKernel::get_subkernel_weights()
00866 {
00867     int num_weights = 1;
00868     const float64_t* weight = get_subkernel_weights(num_weights);
00869     return SGVector<float64_t>(const_cast<float64_t*>(weight),1,false);
00870 }
00871 
00872 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights)
00873 {
00874     ASSERT(weights.vector)
00875     if (weights.vlen!=1)
00876       SG_ERROR("number of subkernel weights should be one ...\n")
00877 
00878     combined_kernel_weight = weights.vector[0] ;
00879 }
00880 
00881 CKernel* CKernel::obtain_from_generic(CSGObject* kernel)
00882 {
00883     if (kernel)
00884     {
00885         CKernel* casted=dynamic_cast<CKernel*>(kernel);
00886         REQUIRE(casted, "CKernel::obtain_from_generic(): Error, provided object"
00887                 " of class \"%s\" is not a subclass of CKernel!\n",
00888                 kernel->get_name());
00889         return casted;
00890     }
00891     else
00892         return NULL;
00893 }
00894 
00895 bool CKernel::init_optimization_svm(CSVM * svm)
00896 {
00897     int32_t num_suppvec=svm->get_num_support_vectors();
00898     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00899     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00900 
00901     for (int32_t i=0; i<num_suppvec; i++)
00902     {
00903         sv_idx[i]    = svm->get_support_vector(i);
00904         sv_weight[i] = svm->get_alpha(i);
00905     }
00906     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00907 
00908     SG_FREE(sv_idx);
00909     SG_FREE(sv_weight);
00910     return ret;
00911 }
00912 
00913 void CKernel::load_serializable_post() throw (ShogunException)
00914 {
00915     CSGObject::load_serializable_post();
00916     if (lhs_equals_rhs)
00917         rhs=lhs;
00918 }
00919 
00920 void CKernel::save_serializable_pre() throw (ShogunException)
00921 {
00922     CSGObject::save_serializable_pre();
00923 
00924     if (lhs_equals_rhs)
00925         rhs=NULL;
00926 }
00927 
00928 void CKernel::save_serializable_post() throw (ShogunException)
00929 {
00930     CSGObject::save_serializable_post();
00931 
00932     if (lhs_equals_rhs)
00933         rhs=lhs;
00934 }
00935 
00936 void CKernel::register_params()   {
00937     SG_ADD(&cache_size, "cache_size",
00938         "Cache size in MB.", MS_NOT_AVAILABLE);
00939     SG_ADD((CSGObject**) &lhs, "lhs",
00940       "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
00941     SG_ADD((CSGObject**) &rhs, "rhs",
00942       "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
00943     SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
00944         "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
00945     SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
00946         MS_NOT_AVAILABLE);
00947     SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
00948         MS_NOT_AVAILABLE);
00949     SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
00950             "Combined kernel weight.", MS_AVAILABLE);
00951     SG_ADD(&optimization_initialized, "optimization_initialized",
00952           "Optimization is initialized.", MS_NOT_AVAILABLE);
00953     SG_ADD((machine_int_t*) &opt_type, "opt_type",
00954           "Optimization type.", MS_NOT_AVAILABLE);
00955     SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
00956     SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
00957         MS_AVAILABLE);
00958 }
00959 
00960 
00961 void CKernel::init()
00962 {
00963     cache_size=10;
00964     kernel_matrix=NULL;
00965     lhs=NULL;
00966     rhs=NULL;
00967     num_lhs=0;
00968     num_rhs=0;
00969     lhs_equals_rhs=false;
00970     combined_kernel_weight=1;
00971     optimization_initialized=false;
00972     opt_type=FASTBUTMEMHUNGRY;
00973     properties=KP_NONE;
00974     normalizer=NULL;
00975 
00976 #ifdef USE_SVMLIGHT
00977     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00978 #endif //USE_SVMLIGHT
00979 
00980     set_normalizer(new CIdentityKernelNormalizer());
00981 }
00982 
00983 namespace shogun
00984 {
00986 template <class T> struct K_THREAD_PARAM
00987 {
00989     CKernel* kernel;
00991     int32_t start;
00993     int32_t end;
00995     int64_t total_start;
00997     int64_t total_end;
00999     int32_t m;
01001     int32_t n;
01003     T* result;
01005     bool symmetric;
01007     bool verbose;
01008 };
01009 }
01010 
01011 template <class T> void* CKernel::get_kernel_matrix_helper(void* p)
01012 {
01013     K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
01014     int32_t i_start=params->start;
01015     int32_t i_end=params->end;
01016     CKernel* k=params->kernel;
01017     T* result=params->result;
01018     bool symmetric=params->symmetric;
01019     int32_t n=params->n;
01020     int32_t m=params->m;
01021     bool verbose=params->verbose;
01022     int64_t total_start=params->total_start;
01023     int64_t total_end=params->total_end;
01024     int64_t total=total_start;
01025 
01026     for (int32_t i=i_start; i<i_end; i++)
01027     {
01028         int32_t j_start=0;
01029 
01030         if (symmetric)
01031             j_start=i;
01032 
01033         for (int32_t j=j_start; j<n; j++)
01034         {
01035             float64_t v=k->kernel(i,j);
01036             result[i+j*m]=v;
01037 
01038             if (symmetric && i!=j)
01039                 result[j+i*m]=v;
01040 
01041             if (verbose)
01042             {
01043                 total++;
01044 
01045                 if (symmetric && i!=j)
01046                     total++;
01047 
01048                 if (total%100 == 0)
01049                     SG_OBJ_PROGRESS(k, total, total_start, total_end)
01050 
01051                 if (CSignal::cancel_computations())
01052                     break;
01053             }
01054         }
01055 
01056     }
01057 
01058     return NULL;
01059 }
01060 
01061 template <class T>
01062 SGMatrix<T> CKernel::get_kernel_matrix()
01063 {
01064     T* result = NULL;
01065 
01066     REQUIRE(has_features(), "no features assigned to kernel\n")
01067 
01068     int32_t m=get_num_vec_lhs();
01069     int32_t n=get_num_vec_rhs();
01070 
01071     int64_t total_num = int64_t(m)*n;
01072 
01073     // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
01074     bool symmetric= (lhs && lhs==rhs && m==n);
01075 
01076     SG_DEBUG("returning kernel matrix of size %dx%d\n", m, n)
01077 
01078     result=SG_MALLOC(T, total_num);
01079 
01080     int32_t num_threads=parallel->get_num_threads();
01081     if (num_threads < 2)
01082     {
01083         K_THREAD_PARAM<T> params;
01084         params.kernel=this;
01085         params.result=result;
01086         params.start=0;
01087         params.end=m;
01088         params.total_start=0;
01089         params.total_end=total_num;
01090         params.n=n;
01091         params.m=m;
01092         params.symmetric=symmetric;
01093         params.verbose=true;
01094         get_kernel_matrix_helper<T>((void*) &params);
01095     }
01096     else
01097     {
01098         pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
01099         K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
01100         int64_t step= total_num/num_threads;
01101 
01102         int32_t t;
01103 
01104         num_threads--;
01105         for (t=0; t<num_threads; t++)
01106         {
01107             params[t].kernel = this;
01108             params[t].result = result;
01109             params[t].start = compute_row_start(t*step, n, symmetric);
01110             params[t].end = compute_row_start((t+1)*step, n, symmetric);
01111             params[t].total_start=t*step;
01112             params[t].total_end=(t+1)*step;
01113             params[t].n=n;
01114             params[t].m=m;
01115             params[t].symmetric=symmetric;
01116             params[t].verbose=false;
01117 
01118             int code=pthread_create(&threads[t], NULL,
01119                     CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
01120 
01121             if (code != 0)
01122             {
01123                 SG_WARNING("Thread creation failed (thread %d of %d) "
01124                         "with error:'%s'\n",t, num_threads, strerror(code));
01125                 num_threads=t;
01126                 break;
01127             }
01128         }
01129 
01130         params[t].kernel = this;
01131         params[t].result = result;
01132         params[t].start = compute_row_start(t*step, n, symmetric);
01133         params[t].end = m;
01134         params[t].total_start=t*step;
01135         params[t].total_end=total_num;
01136         params[t].n=n;
01137         params[t].m=m;
01138         params[t].symmetric=symmetric;
01139         params[t].verbose=true;
01140         get_kernel_matrix_helper<T>(&params[t]);
01141 
01142         for (t=0; t<num_threads; t++)
01143         {
01144             if (pthread_join(threads[t], NULL) != 0)
01145                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads)
01146         }
01147 
01148         SG_FREE(params);
01149         SG_FREE(threads);
01150     }
01151 
01152     SG_DONE()
01153 
01154     return SGMatrix<T>(result,m,n,true);
01155 }
01156 
01157 template SGMatrix<float64_t> CKernel::get_kernel_matrix<float64_t>();
01158 template SGMatrix<float32_t> CKernel::get_kernel_matrix<float32_t>();
01159 
01160 template void* CKernel::get_kernel_matrix_helper<float64_t>(void* p);
01161 template void* CKernel::get_kernel_matrix_helper<float32_t>(void* p);
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation