SHOGUN
v3.2.0
|
00001 /***********************************************************************/ 00002 /* */ 00003 /* SVMLight.cpp */ 00004 /* */ 00005 /* Author: Thorsten Joachims */ 00006 /* Date: 19.07.99 */ 00007 /* */ 00008 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */ 00009 /* */ 00010 /* This software is available for non-commercial use only. It must */ 00011 /* not be modified and distributed without prior permission of the */ 00012 /* author. The author is not responsible for implications from the */ 00013 /* use of this software. */ 00014 /* */ 00015 /* THIS INCLUDES THE FOLLOWING ADDITIONS */ 00016 /* Generic Kernel Interfacing: Soeren Sonnenburg */ 00017 /* Parallizations: Soeren Sonnenburg */ 00018 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */ 00019 /* Alexander Zien, Marius Kloft, Chen Guohua */ 00020 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */ 00021 /* */ 00022 /***********************************************************************/ 00023 #include <shogun/lib/config.h> 00024 00025 #ifdef USE_SVMLIGHT 00026 00027 #include <shogun/io/SGIO.h> 00028 #include <shogun/lib/Signal.h> 00029 #include <shogun/mathematics/Math.h> 00030 #include <shogun/lib/Time.h> 00031 #include <shogun/mathematics/lapack.h> 00032 00033 #include <shogun/classifier/svm/SVMLight.h> 00034 #include <shogun/lib/external/pr_loqo.h> 00035 00036 #include <shogun/kernel/Kernel.h> 00037 #include <shogun/machine/KernelMachine.h> 00038 #include <shogun/kernel/CombinedKernel.h> 00039 00040 #include <unistd.h> 00041 00042 #include <shogun/base/Parallel.h> 00043 #include <shogun/labels/BinaryLabels.h> 00044 00045 #ifdef HAVE_PTHREAD 00046 #include <pthread.h> 00047 #endif 00048 00049 using namespace shogun; 00050 00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00052 struct S_THREAD_PARAM_REACTIVATE_LINADD 00053 { 00054 CKernel* kernel; 00055 float64_t* lin; 00056 float64_t* last_lin; 00057 int32_t* active; 00058 int32_t* docs; 00059 int32_t start; 00060 int32_t end; 00061 }; 00062 00063 struct S_THREAD_PARAM_SVMLIGHT 00064 { 00065 float64_t * lin ; 00066 float64_t* W; 00067 int32_t start, end; 00068 int32_t * active2dnum ; 00069 int32_t * docs ; 00070 CKernel* kernel ; 00071 }; 00072 00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA 00074 { 00075 CKernel* kernel; 00076 float64_t* lin; 00077 float64_t* aicache; 00078 float64_t* a; 00079 float64_t* a_old; 00080 int32_t* changed2dnum; 00081 int32_t* inactive2dnum; 00082 int32_t* label; 00083 int32_t start; 00084 int32_t end; 00085 }; 00086 00087 struct S_THREAD_PARAM_KERNEL 00088 { 00089 float64_t *Kval ; 00090 int32_t *KI, *KJ ; 00091 int32_t start, end; 00092 CSVMLight* svmlight; 00093 }; 00094 00095 #endif // DOXYGEN_SHOULD_SKIP_THIS 00096 00097 void* CSVMLight::update_linear_component_linadd_helper(void* p) 00098 { 00099 S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p; 00100 00101 int32_t jj=0, j=0 ; 00102 00103 for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++) 00104 params->lin[j]+=params->kernel->compute_optimized(params->docs[j]); 00105 00106 return NULL ; 00107 } 00108 00109 void* CSVMLight::compute_kernel_helper(void* p) 00110 { 00111 S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p; 00112 00113 int32_t jj=0 ; 00114 for (jj=params->start;jj<params->end;jj++) 00115 params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ; 00116 00117 return NULL ; 00118 } 00119 00120 CSVMLight::CSVMLight() 00121 : CSVM() 00122 { 00123 init(); 00124 set_kernel(NULL); 00125 } 00126 00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab) 00128 : CSVM(C, k, lab) 00129 { 00130 init(); 00131 } 00132 00133 void CSVMLight::init() 00134 { 00135 //optimizer settings 00136 primal=NULL; 00137 dual=NULL; 00138 init_margin=0.15; 00139 init_iter=500; 00140 precision_violations=0; 00141 model_b=0; 00142 verbosity=1; 00143 opt_precision=DEF_PRECISION; 00144 00145 // svm variables 00146 W=NULL; 00147 model=SG_MALLOC(MODEL, 1); 00148 learn_parm=SG_MALLOC(LEARN_PARM, 1); 00149 model->supvec=NULL; 00150 model->alpha=NULL; 00151 model->index=NULL; 00152 00153 // MKL stuff 00154 mymaxdiff=1 ; 00155 mkl_converged=false; 00156 } 00157 00158 CSVMLight::~CSVMLight() 00159 { 00160 00161 SG_FREE(model->supvec); 00162 SG_FREE(model->alpha); 00163 SG_FREE(model->index); 00164 SG_FREE(model); 00165 SG_FREE(learn_parm); 00166 00167 // MKL stuff 00168 SG_FREE(W); 00169 00170 // optimizer variables 00171 SG_FREE(dual); 00172 SG_FREE(primal); 00173 } 00174 00175 bool CSVMLight::train_machine(CFeatures* data) 00176 { 00177 //certain setup params 00178 mkl_converged=false; 00179 verbosity=1 ; 00180 init_margin=0.15; 00181 init_iter=500; 00182 precision_violations=0; 00183 opt_precision=DEF_PRECISION; 00184 00185 strcpy (learn_parm->predfile, ""); 00186 learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0; 00187 learn_parm->sharedslack=0; 00188 learn_parm->remove_inconsistent=0; 00189 learn_parm->skip_final_opt_check=0; 00190 learn_parm->svm_maxqpsize=get_qpsize(); 00191 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1; 00192 learn_parm->maxiter=100000; 00193 learn_parm->svm_iter_to_shrink=100; 00194 learn_parm->svm_c=C1; 00195 learn_parm->transduction_posratio=0.33; 00196 learn_parm->svm_costratio=C2/C1; 00197 learn_parm->svm_costratio_unlab=1.0; 00198 learn_parm->svm_unlabbound=1E-5; 00199 learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ?? 00200 learn_parm->epsilon_a=1E-15; 00201 learn_parm->compute_loo=0; 00202 learn_parm->rho=1.0; 00203 learn_parm->xa_depth=0; 00204 00205 if (!kernel) 00206 SG_ERROR("SVM_light can not proceed without kernel!\n") 00207 00208 if (data) 00209 { 00210 if (m_labels->get_num_labels() != data->get_num_vectors()) 00211 { 00212 SG_ERROR("%s::train_machine(): Number of training vectors (%d) does" 00213 " not match number of labels (%d)\n", get_name(), 00214 data->get_num_vectors(), m_labels->get_num_labels()); 00215 } 00216 kernel->init(data, data); 00217 } 00218 00219 if (!kernel->has_features()) 00220 SG_ERROR("SVM_light can not proceed without initialized kernel!\n") 00221 00222 ASSERT(m_labels && m_labels->get_num_labels()) 00223 ASSERT(m_labels->get_label_type() == LT_BINARY) 00224 ASSERT(kernel->get_num_vec_lhs()==m_labels->get_num_labels()) 00225 00226 // in case of LINADD enabled kernels cleanup! 00227 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00228 kernel->clear_normal() ; 00229 00230 // output some info 00231 SG_DEBUG("threads = %i\n", parallel->get_num_threads()) 00232 SG_DEBUG("qpsize = %i\n", learn_parm->svm_maxqpsize) 00233 SG_DEBUG("epsilon = %1.1e\n", learn_parm->epsilon_crit) 00234 SG_DEBUG("kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) 00235 SG_DEBUG("kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) 00236 SG_DEBUG("kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) 00237 SG_DEBUG("kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) 00238 SG_DEBUG("get_solver_type() = %i\n", get_solver_type()) 00239 SG_DEBUG("get_linadd_enabled() = %i\n", get_linadd_enabled()) 00240 SG_DEBUG("get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) 00241 SG_DEBUG("kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) 00242 00243 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) || 00244 (get_linadd_enabled() && kernel->has_property(KP_LINADD))); 00245 00246 SG_DEBUG("use_kernel_cache = %i\n", use_kernel_cache) 00247 00248 if (kernel->get_kernel_type() == K_COMBINED) 00249 { 00250 00251 for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++) 00252 { 00253 CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx); 00254 // allocate kernel cache but clean up beforehand 00255 kn->resize_kernel_cache(kn->get_cache_size()); 00256 SG_UNREF(kn); 00257 } 00258 } 00259 00260 kernel->resize_kernel_cache(kernel->get_cache_size()); 00261 00262 // train the svm 00263 svm_learn(); 00264 00265 // brain damaged svm light work around 00266 create_new_model(model->sv_num-1); 00267 set_bias(-model->b); 00268 for (int32_t i=0; i<model->sv_num-1; i++) 00269 { 00270 set_alpha(i, model->alpha[i+1]); 00271 set_support_vector(i, model->supvec[i+1]); 00272 } 00273 00274 // in case of LINADD enabled kernels cleanup! 00275 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00276 { 00277 kernel->clear_normal() ; 00278 kernel->delete_optimization() ; 00279 } 00280 00281 if (use_kernel_cache) 00282 kernel->kernel_cache_cleanup(); 00283 00284 return true ; 00285 } 00286 00287 int32_t CSVMLight::get_runtime() 00288 { 00289 clock_t start; 00290 start = clock(); 00291 return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC)); 00292 } 00293 00294 00295 void CSVMLight::svm_learn() 00296 { 00297 int32_t *inconsistent, i; 00298 int32_t misclassified,upsupvecnum; 00299 float64_t maxdiff, *lin, *c, *a; 00300 int32_t iterations; 00301 int32_t trainpos=0, trainneg=0 ; 00302 ASSERT(m_labels) 00303 SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels(); 00304 int32_t totdoc=lab.vlen; 00305 ASSERT(lab.vector && lab.vlen) 00306 int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen); 00307 00308 int32_t* docs=SG_MALLOC(int32_t, totdoc); 00309 SG_FREE(W); 00310 W=NULL; 00311 count = 0 ; 00312 00313 if (kernel->has_property(KP_KERNCOMBINATION) && callback) 00314 { 00315 W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels()); 00316 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++) 00317 W[i]=0; 00318 } 00319 00320 for (i=0; i<totdoc; i++) 00321 docs[i]=i; 00322 00323 float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */ 00324 float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */ 00325 TIMING timing_profile; 00326 SHRINK_STATE shrink_state; 00327 00328 timing_profile.time_kernel=0; 00329 timing_profile.time_opti=0; 00330 timing_profile.time_shrink=0; 00331 timing_profile.time_update=0; 00332 timing_profile.time_model=0; 00333 timing_profile.time_check=0; 00334 timing_profile.time_select=0; 00335 00336 /* make sure -n value is reasonable */ 00337 if((learn_parm->svm_newvarsinqp < 2) 00338 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { 00339 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00340 } 00341 00342 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK); 00343 00344 inconsistent = SG_MALLOC(int32_t, totdoc); 00345 c = SG_MALLOC(float64_t, totdoc); 00346 a = SG_MALLOC(float64_t, totdoc); 00347 a_fullset = SG_MALLOC(float64_t, totdoc); 00348 xi_fullset = SG_MALLOC(float64_t, totdoc); 00349 lin = SG_MALLOC(float64_t, totdoc); 00350 if (m_linear_term.vlen>0) 00351 learn_parm->eps=get_linear_term_array(); 00352 else 00353 { 00354 learn_parm->eps=SG_MALLOC(float64_t, totdoc); /* equivalent regression epsilon for classification */ 00355 SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0); 00356 } 00357 00358 learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc); 00359 00360 SG_FREE(model->supvec); 00361 SG_FREE(model->alpha); 00362 SG_FREE(model->index); 00363 model->supvec = SG_MALLOC(int32_t, totdoc+2); 00364 model->alpha = SG_MALLOC(float64_t, totdoc+2); 00365 model->index = SG_MALLOC(int32_t, totdoc+2); 00366 00367 model->at_upper_bound=0; 00368 model->b=0; 00369 model->supvec[0]=0; /* element 0 reserved and empty for now */ 00370 model->alpha[0]=0; 00371 model->totdoc=totdoc; 00372 00373 model->kernel=kernel; 00374 00375 model->sv_num=1; 00376 model->loo_error=-1; 00377 model->loo_recall=-1; 00378 model->loo_precision=-1; 00379 model->xa_error=-1; 00380 model->xa_recall=-1; 00381 model->xa_precision=-1; 00382 00383 for (i=0;i<totdoc;i++) { /* various inits */ 00384 inconsistent[i]=0; 00385 c[i]=0; 00386 a[i]=0; 00387 lin[i]=0; 00388 00389 if(label[i] > 0) { 00390 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* 00391 fabs((float64_t)label[i]); 00392 label[i]=1; 00393 trainpos++; 00394 } 00395 else if(label[i] < 0) { 00396 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]); 00397 label[i]=-1; 00398 trainneg++; 00399 } 00400 else { 00401 learn_parm->svm_cost[i]=0; 00402 } 00403 } 00404 00405 /* compute starting state for initial alpha values */ 00406 SG_DEBUG("alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors()) 00407 00408 if(m_alpha.vector && get_num_support_vectors()) { 00409 if(verbosity>=1) { 00410 SG_INFO("Computing starting state...") 00411 } 00412 00413 float64_t* alpha = SG_MALLOC(float64_t, totdoc); 00414 00415 for (i=0; i<totdoc; i++) 00416 alpha[i]=0; 00417 00418 for (i=0; i<get_num_support_vectors(); i++) 00419 alpha[get_support_vector(i)]=get_alpha(i); 00420 00421 int32_t* index = SG_MALLOC(int32_t, totdoc); 00422 int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11); 00423 float64_t* aicache = SG_MALLOC(float64_t, totdoc); 00424 for (i=0;i<totdoc;i++) { /* create full index and clip alphas */ 00425 index[i]=1; 00426 alpha[i]=fabs(alpha[i]); 00427 if(alpha[i]<0) alpha[i]=0; 00428 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; 00429 } 00430 00431 if (use_kernel_cache) 00432 { 00433 if (callback && 00434 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()) 00435 ) 00436 { 00437 CCombinedKernel* k = (CCombinedKernel*) kernel; 00438 for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++) 00439 { 00440 CKernel* kn = k->get_kernel(k_idx); 00441 for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV 00442 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00443 && (kn->kernel_cache_space_available())) 00444 kn->cache_kernel_row(i); 00445 00446 for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV 00447 if((alpha[i]==learn_parm->svm_cost[i]) 00448 && (kn->kernel_cache_space_available())) 00449 kn->cache_kernel_row(i); 00450 00451 SG_UNREF(kn); 00452 } 00453 } 00454 else 00455 { 00456 for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */ 00457 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00458 && (kernel->kernel_cache_space_available())) 00459 kernel->cache_kernel_row(i); 00460 00461 for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */ 00462 if((alpha[i]==learn_parm->svm_cost[i]) 00463 && (kernel->kernel_cache_space_available())) 00464 kernel->cache_kernel_row(i); 00465 } 00466 } 00467 compute_index(index,totdoc,index2dnum); 00468 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, 00469 lin,aicache,NULL); 00470 calculate_svm_model(docs,label,lin,alpha,a,c, 00471 index2dnum,index2dnum); 00472 for (i=0;i<totdoc;i++) { /* copy initial alphas */ 00473 a[i]=alpha[i]; 00474 } 00475 00476 SG_FREE(index); 00477 SG_FREE(index2dnum); 00478 SG_FREE(aicache); 00479 SG_FREE(alpha); 00480 00481 if(verbosity>=1) 00482 SG_DONE() 00483 } 00484 SG_DEBUG("%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg) 00485 SG_DEBUG("Optimizing...\n") 00486 00487 /* train the svm */ 00488 iterations=optimize_to_convergence(docs,label,totdoc, 00489 &shrink_state,inconsistent,a,lin, 00490 c,&timing_profile, 00491 &maxdiff,(int32_t)-1, 00492 (int32_t)1); 00493 00494 00495 if(verbosity>=1) { 00496 if(verbosity==1) 00497 { 00498 SG_DONE() 00499 SG_DEBUG("(%ld iterations)", iterations) 00500 } 00501 00502 misclassified=0; 00503 for (i=0;(i<totdoc);i++) { /* get final statistic */ 00504 if((lin[i]-model->b)*(float64_t)label[i] <= 0.0) 00505 misclassified++; 00506 } 00507 00508 SG_INFO("Optimization finished (%ld misclassified, maxdiff=%.8f).\n", 00509 misclassified,maxdiff); 00510 00511 SG_INFO("obj = %.16f, rho = %.16f\n",get_objective(),model->b) 00512 if (maxdiff>epsilon) 00513 SG_WARNING("maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon) 00514 00515 upsupvecnum=0; 00516 for (i=1;i<model->sv_num;i++) 00517 { 00518 if(fabs(model->alpha[i]) >= 00519 (learn_parm->svm_cost[model->supvec[i]]- 00520 learn_parm->epsilon_a)) 00521 upsupvecnum++; 00522 } 00523 SG_INFO("Number of SV: %d (including %d at upper bound)\n", 00524 model->sv_num-1,upsupvecnum); 00525 } 00526 00527 shrink_state_cleanup(&shrink_state); 00528 SG_FREE(label); 00529 SG_FREE(inconsistent); 00530 SG_FREE(c); 00531 SG_FREE(a); 00532 SG_FREE(a_fullset); 00533 SG_FREE(xi_fullset); 00534 SG_FREE(lin); 00535 SG_FREE(learn_parm->eps); 00536 SG_FREE(learn_parm->svm_cost); 00537 SG_FREE(docs); 00538 } 00539 00540 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc, 00541 SHRINK_STATE *shrink_state, 00542 int32_t *inconsistent, 00543 float64_t *a, float64_t *lin, float64_t *c, 00544 TIMING *timing_profile, float64_t *maxdiff, 00545 int32_t heldout, int32_t retrain) 00546 /* docs: Training vectors (x-part) */ 00547 /* label: Training labels/value (y-part, zero if test example for 00548 transduction) */ 00549 /* totdoc: Number of examples in docs/label */ 00550 /* laern_parm: Learning paramenters */ 00551 /* kernel_parm: Kernel paramenters */ 00552 /* kernel_cache: Initialized/partly filled Cache, if using a kernel. 00553 NULL if linear. */ 00554 /* shrink_state: State of active variables */ 00555 /* inconsistent: examples thrown out as inconstistent */ 00556 /* a: alphas */ 00557 /* lin: linear component of gradient */ 00558 /* c: right hand side of inequalities (margin) */ 00559 /* maxdiff: returns maximum violation of KT-conditions */ 00560 /* heldout: marks held-out example for leave-one-out (or -1) */ 00561 /* retrain: selects training mode (1=regular / 2=holdout) */ 00562 { 00563 00564 int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; 00565 int32_t inconsistentnum,choosenum,already_chosen=0,iteration; 00566 int32_t misclassified,supvecnum=0,*active2dnum,inactivenum; 00567 int32_t *working2dnum,*selexam; 00568 int32_t activenum; 00569 float64_t criterion, eq; 00570 float64_t *a_old; 00571 int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ 00572 float64_t epsilon_crit_org; 00573 float64_t bestmaxdiff; 00574 float64_t worstmaxdiff; 00575 int32_t bestmaxdiffiter,terminate; 00576 bool reactivated=false; 00577 00578 float64_t *selcrit; /* buffer for sorting */ 00579 float64_t *aicache; /* buffer to keep one row of hessian */ 00580 QP qp; /* buffer for one quadratic program */ 00581 00582 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ 00583 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) { 00584 learn_parm->epsilon_crit=2.0; 00585 /* caching makes no sense for linear kernel */ 00586 } 00587 learn_parm->epsilon_shrink=2; 00588 (*maxdiff)=1; 00589 00590 SG_DEBUG("totdoc:%d\n",totdoc) 00591 chosen = SG_MALLOC(int32_t, totdoc); 00592 last_suboptimal_at =SG_MALLOC(int32_t, totdoc); 00593 key =SG_MALLOC(int32_t, totdoc+11); 00594 selcrit =SG_MALLOC(float64_t, totdoc); 00595 selexam =SG_MALLOC(int32_t, totdoc); 00596 a_old =SG_MALLOC(float64_t, totdoc); 00597 aicache =SG_MALLOC(float64_t, totdoc); 00598 working2dnum =SG_MALLOC(int32_t, totdoc+11); 00599 active2dnum =SG_MALLOC(int32_t, totdoc+11); 00600 qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00601 qp.opt_ce0 =SG_MALLOC(float64_t, 1); 00602 qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize); 00603 qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00604 qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00605 qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00606 qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00607 00608 choosenum=0; 00609 inconsistentnum=0; 00610 if(!retrain) retrain=1; 00611 iteration=1; 00612 bestmaxdiffiter=1; 00613 bestmaxdiff=999999999; 00614 worstmaxdiff=1e-10; 00615 terminate=0; 00616 00617 00618 kernel->set_time(iteration); /* for lru cache */ 00619 00620 for (i=0;i<totdoc;i++) { /* various inits */ 00621 chosen[i]=0; 00622 a_old[i]=a[i]; 00623 last_suboptimal_at[i]=1; 00624 if(inconsistent[i]) 00625 inconsistentnum++; 00626 } 00627 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00628 inactivenum=totdoc-activenum; 00629 clear_index(working2dnum); 00630 00631 /* repeat this loop until we have convergence */ 00632 CTime start_time; 00633 mkl_converged=false; 00634 00635 00636 #ifdef CYGWIN 00637 for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){ 00638 #else 00639 CSignal::clear_cancel(); 00640 for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){ 00641 #endif 00642 00643 if(use_kernel_cache) 00644 kernel->set_time(iteration); /* for lru cache */ 00645 00646 if(verbosity>=2) t0=get_runtime(); 00647 if(verbosity>=3) { 00648 SG_DEBUG("\nSelecting working set... ") 00649 } 00650 00651 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) 00652 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00653 00654 i=0; 00655 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ 00656 if((chosen[j]>=(learn_parm->svm_maxqpsize/ 00657 CMath::min(learn_parm->svm_maxqpsize, 00658 learn_parm->svm_newvarsinqp))) 00659 || (inconsistent[j]) 00660 || (j == heldout)) { 00661 chosen[j]=0; 00662 choosenum--; 00663 } 00664 else { 00665 chosen[j]++; 00666 working2dnum[i++]=j; 00667 } 00668 } 00669 working2dnum[i]=-1; 00670 00671 if(retrain == 2) { 00672 choosenum=0; 00673 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ 00674 chosen[j]=0; 00675 } 00676 clear_index(working2dnum); 00677 for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ 00678 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { 00679 chosen[i]=99999; 00680 choosenum++; 00681 a[i]=0; 00682 } 00683 } 00684 if(learn_parm->biased_hyperplane) { 00685 eq=0; 00686 for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ 00687 eq+=a[i]*label[i]; 00688 } 00689 for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { 00690 if((eq*label[i] > 0) && (a[i] > 0)) { 00691 chosen[i]=88888; 00692 choosenum++; 00693 if((eq*label[i]) > a[i]) { 00694 eq-=(a[i]*label[i]); 00695 a[i]=0; 00696 } 00697 else { 00698 a[i]-=(eq*label[i]); 00699 eq=0; 00700 } 00701 } 00702 } 00703 } 00704 compute_index(chosen,totdoc,working2dnum); 00705 } 00706 else 00707 { /* select working set according to steepest gradient */ 00708 if(iteration % 101) 00709 { 00710 already_chosen=0; 00711 if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 && 00712 (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00713 { 00714 /* select part of the working set from cache */ 00715 already_chosen=select_next_qp_subproblem_grad( 00716 label,a,lin,c,totdoc, 00717 (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum, 00718 learn_parm->svm_newvarsinqp)/2), 00719 inconsistent,active2dnum, 00720 working2dnum,selcrit,selexam,1, 00721 key,chosen); 00722 choosenum+=already_chosen; 00723 } 00724 choosenum+=select_next_qp_subproblem_grad( 00725 label,a,lin,c,totdoc, 00726 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00727 learn_parm->svm_newvarsinqp-already_chosen), 00728 inconsistent,active2dnum, 00729 working2dnum,selcrit,selexam,0,key, 00730 chosen); 00731 } 00732 else { /* once in a while, select a somewhat random working set 00733 to get unlocked of infinite loops due to numerical 00734 inaccuracies in the core qp-solver */ 00735 choosenum+=select_next_qp_subproblem_rand( 00736 label,a,lin,c,totdoc, 00737 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00738 learn_parm->svm_newvarsinqp), 00739 inconsistent,active2dnum, 00740 working2dnum,selcrit,selexam,key, 00741 chosen,iteration); 00742 } 00743 } 00744 00745 if(verbosity>=2) { 00746 SG_INFO(" %ld vectors chosen\n",choosenum) 00747 } 00748 00749 if(verbosity>=2) t1=get_runtime(); 00750 00751 if (use_kernel_cache) 00752 { 00753 // in case of MKL w/o linadd cache each kernel independently 00754 // else if linadd is disabled cache single kernel 00755 if ( callback && 00756 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()) 00757 ) 00758 { 00759 CCombinedKernel* k = (CCombinedKernel*) kernel; 00760 for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++) 00761 { 00762 CKernel* kn = k->get_kernel(k_idx); 00763 kn->cache_multiple_kernel_rows(working2dnum, choosenum); 00764 SG_UNREF(kn); 00765 } 00766 } 00767 else 00768 kernel->cache_multiple_kernel_rows(working2dnum, choosenum); 00769 } 00770 00771 if(verbosity>=2) t2=get_runtime(); 00772 00773 if(retrain != 2) { 00774 optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum, 00775 totdoc,working2dnum,choosenum,a,lin,c, 00776 aicache,&qp,&epsilon_crit_org); 00777 } 00778 00779 if(verbosity>=2) t3=get_runtime(); 00780 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, 00781 lin,aicache,c); 00782 00783 if(verbosity>=2) t4=get_runtime(); 00784 supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum); 00785 00786 if(verbosity>=2) t5=get_runtime(); 00787 00788 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 00789 a_old[i]=a[i]; 00790 } 00791 00792 retrain=check_optimality(label,a,lin,c,totdoc, 00793 maxdiff,epsilon_crit_org,&misclassified, 00794 inconsistent,active2dnum,last_suboptimal_at, 00795 iteration); 00796 00797 if(verbosity>=2) { 00798 t6=get_runtime(); 00799 timing_profile->time_select+=t1-t0; 00800 timing_profile->time_kernel+=t2-t1; 00801 timing_profile->time_opti+=t3-t2; 00802 timing_profile->time_update+=t4-t3; 00803 timing_profile->time_model+=t5-t4; 00804 timing_profile->time_check+=t6-t5; 00805 } 00806 00807 /* checking whether optimizer got stuck */ 00808 if((*maxdiff) < bestmaxdiff) { 00809 bestmaxdiff=(*maxdiff); 00810 bestmaxdiffiter=iteration; 00811 } 00812 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { 00813 /* int32_t time no progress? */ 00814 terminate=1; 00815 retrain=0; 00816 SG_WARNING("Relaxing KT-Conditions due to slow progress! Terminating!\n") 00817 SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter ) 00818 } 00819 00820 noshrink= (get_shrinking_enabled()) ? 0 : 1; 00821 00822 if ((!callback) && (!retrain) && (inactivenum>0) && 00823 ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00824 { 00825 t1=get_runtime(); 00826 SG_DEBUG("reactivating inactive examples\n") 00827 00828 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00829 iteration,inconsistent, 00830 docs,aicache, 00831 maxdiff); 00832 reactivated=true; 00833 SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org) 00834 /* Update to new active variables. */ 00835 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00836 inactivenum=totdoc-activenum; 00837 /* reset watchdog */ 00838 bestmaxdiff=(*maxdiff); 00839 bestmaxdiffiter=iteration; 00840 00841 /* termination criterion */ 00842 noshrink=1; 00843 retrain=0; 00844 if((*maxdiff) > learn_parm->epsilon_crit) 00845 { 00846 SG_INFO("restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit) 00847 retrain=1; 00848 } 00849 timing_profile->time_shrink+=get_runtime()-t1; 00850 if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00851 || (verbosity>=2)) { 00852 SG_DONE() 00853 SG_DEBUG("Number of inactive variables = %ld\n", inactivenum) 00854 } 00855 } 00856 00857 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) 00858 learn_parm->epsilon_crit=(*maxdiff); 00859 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { 00860 learn_parm->epsilon_crit/=4.0; 00861 retrain=1; 00862 noshrink=1; 00863 } 00864 if(learn_parm->epsilon_crit<epsilon_crit_org) 00865 learn_parm->epsilon_crit=epsilon_crit_org; 00866 00867 if(verbosity>=2) { 00868 SG_INFO(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", 00869 supvecnum,model->at_upper_bound,(*maxdiff)); 00870 00871 } 00872 mymaxdiff=*maxdiff ; 00873 00874 //don't shrink w/ mkl 00875 if (((iteration % 10) == 0) && (!noshrink) && !callback) 00876 { 00877 activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc, 00878 CMath::max((int32_t)(activenum/10), 00879 CMath::max((int32_t)(totdoc/500),(int32_t) 100)), 00880 a,inconsistent, c, lin, label); 00881 00882 inactivenum=totdoc-activenum; 00883 00884 if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache()) 00885 && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) { 00886 00887 kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum), 00888 (int32_t) (kernel->get_activenum_cache()-supvecnum)), 00889 shrink_state->active); 00890 } 00891 } 00892 00893 if (bestmaxdiff>worstmaxdiff) 00894 worstmaxdiff=bestmaxdiff; 00895 00896 SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6) 00897 00898 /* Terminate loop */ 00899 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) { 00900 terminate = 1; 00901 retrain = 0; 00902 } 00903 00904 } /* end of loop */ 00905 00906 SG_DEBUG("inactive:%d\n", inactivenum) 00907 00908 if (inactivenum && !reactivated && !callback) 00909 { 00910 SG_DEBUG("reactivating inactive examples\n") 00911 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00912 iteration,inconsistent, 00913 docs,aicache, 00914 maxdiff); 00915 SG_DEBUG("done reactivating inactive examples\n") 00916 /* Update to new active variables. */ 00917 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00918 inactivenum=totdoc-activenum; 00919 /* reset watchdog */ 00920 bestmaxdiff=(*maxdiff); 00921 bestmaxdiffiter=iteration; 00922 } 00923 00924 //use this for our purposes! 00925 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc); 00926 CSVM::set_objective(criterion); 00927 00928 SG_FREE(chosen); 00929 SG_FREE(last_suboptimal_at); 00930 SG_FREE(key); 00931 SG_FREE(selcrit); 00932 SG_FREE(selexam); 00933 SG_FREE(a_old); 00934 SG_FREE(aicache); 00935 SG_FREE(working2dnum); 00936 SG_FREE(active2dnum); 00937 SG_FREE(qp.opt_ce); 00938 SG_FREE(qp.opt_ce0); 00939 SG_FREE(qp.opt_g); 00940 SG_FREE(qp.opt_g0); 00941 SG_FREE(qp.opt_xinit); 00942 SG_FREE(qp.opt_low); 00943 SG_FREE(qp.opt_up); 00944 00945 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ 00946 00947 return(iteration); 00948 } 00949 00950 float64_t CSVMLight::compute_objective_function( 00951 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label, 00952 int32_t totdoc) 00953 /* Return value of objective function. */ 00954 /* Works only relative to the active variables! */ 00955 { 00956 /* calculate value of objective function */ 00957 float64_t criterion=0; 00958 00959 for (int32_t i=0;i<totdoc;i++) 00960 criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; 00961 00962 return(criterion); 00963 } 00964 00965 00966 void CSVMLight::clear_index(int32_t *index) 00967 /* initializes and empties index */ 00968 { 00969 index[0]=-1; 00970 } 00971 00972 void CSVMLight::add_to_index(int32_t *index, int32_t elem) 00973 /* initializes and empties index */ 00974 { 00975 register int32_t i; 00976 for (i=0;index[i] != -1;i++); 00977 index[i]=elem; 00978 index[i+1]=-1; 00979 } 00980 00981 int32_t CSVMLight::compute_index( 00982 int32_t *binfeature, int32_t range, int32_t *index) 00983 /* create an inverted index of binfeature */ 00984 { 00985 register int32_t i,ii; 00986 00987 ii=0; 00988 for (i=0;i<range;i++) { 00989 if(binfeature[i]) { 00990 index[ii]=i; 00991 ii++; 00992 } 00993 } 00994 for (i=0;i<4;i++) { 00995 index[ii+i]=-1; 00996 } 00997 return(ii); 00998 } 00999 01000 01001 void CSVMLight::optimize_svm( 01002 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01003 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, 01004 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, 01005 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target) 01006 /* Do optimization on the working set. */ 01007 { 01008 int32_t i; 01009 float64_t *a_v; 01010 01011 //compute_matrices_for_optimization_parallel(docs,label, 01012 // exclude_from_eq_const,eq_target,chosen, 01013 // active2dnum,working2dnum,a,lin,c, 01014 // varnum,totdoc,aicache,qp); 01015 01016 compute_matrices_for_optimization(docs,label, 01017 exclude_from_eq_const,eq_target,chosen, 01018 active2dnum,working2dnum,a,lin,c, 01019 varnum,totdoc,aicache,qp); 01020 01021 if(verbosity>=3) { 01022 SG_DEBUG("Running optimizer...") 01023 } 01024 /* call the qp-subsolver */ 01025 a_v=optimize_qp(qp,epsilon_crit_target, 01026 learn_parm->svm_maxqpsize, 01027 &(model->b), /* in case the optimizer gives us */ 01028 learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */ 01029 /* b is calculated in calculate_model. */ 01030 if(verbosity>=3) { 01031 SG_DONE() 01032 } 01033 01034 for (i=0;i<varnum;i++) 01035 a[working2dnum[i]]=a_v[i]; 01036 } 01037 01038 void CSVMLight::compute_matrices_for_optimization_parallel( 01039 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01040 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01041 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01042 float64_t *aicache, QP *qp) 01043 { 01044 if (parallel->get_num_threads()<=1) 01045 { 01046 compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target, 01047 chosen, active2dnum, key, a, lin, c, 01048 varnum, totdoc, aicache, qp) ; 01049 } 01050 #ifdef HAVE_PTHREAD 01051 else 01052 { 01053 register int32_t ki,kj,i,j; 01054 register float64_t kernel_temp; 01055 01056 qp->opt_n=varnum; 01057 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01058 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01059 if((!chosen[model->supvec[j]]) 01060 && (!exclude_from_eq_const[(model->supvec[j])])) { 01061 qp->opt_ce0[0]+=model->alpha[j]; 01062 } 01063 } 01064 if(learn_parm->biased_hyperplane) 01065 qp->opt_m=1; 01066 else 01067 qp->opt_m=0; /* eq-constraint will be ignored */ 01068 01069 /* init linear part of objective function */ 01070 for (i=0;i<varnum;i++) { 01071 qp->opt_g0[i]=lin[key[i]]; 01072 } 01073 01074 ASSERT(parallel->get_num_threads()>1) 01075 int32_t *KI=SG_MALLOC(int32_t, varnum*varnum); 01076 int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum); 01077 int32_t Knum=0 ; 01078 float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2); 01079 for (i=0;i<varnum;i++) { 01080 ki=key[i]; 01081 KI[Knum]=docs[ki] ; 01082 KJ[Knum]=docs[ki] ; 01083 Knum++ ; 01084 for (j=i+1;j<varnum;j++) 01085 { 01086 kj=key[j]; 01087 KI[Knum]=docs[ki] ; 01088 KJ[Knum]=docs[kj] ; 01089 Knum++ ; 01090 } 01091 } 01092 ASSERT(Knum<=varnum*(varnum+1)/2) 01093 01094 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01095 S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1); 01096 int32_t step= Knum/parallel->get_num_threads(); 01097 //SG_DEBUG("\nkernel-step size: %i\n", step) 01098 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01099 { 01100 params[t].svmlight = this; 01101 params[t].start = t*step; 01102 params[t].end = (t+1)*step; 01103 params[t].KI=KI ; 01104 params[t].KJ=KJ ; 01105 params[t].Kval=Kval ; 01106 pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)¶ms[t]); 01107 } 01108 for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++) 01109 Kval[i]=compute_kernel(KI[i],KJ[i]) ; 01110 01111 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01112 pthread_join(threads[t], NULL); 01113 01114 SG_FREE(params); 01115 SG_FREE(threads); 01116 01117 Knum=0 ; 01118 for (i=0;i<varnum;i++) { 01119 ki=key[i]; 01120 01121 /* Compute the matrix for equality constraints */ 01122 qp->opt_ce[i]=label[ki]; 01123 qp->opt_low[i]=0; 01124 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01125 01126 kernel_temp=Kval[Knum] ; 01127 Knum++ ; 01128 /* compute linear part of objective function */ 01129 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01130 /* compute quadratic part of objective function */ 01131 qp->opt_g[varnum*i+i]=kernel_temp; 01132 01133 for (j=i+1;j<varnum;j++) { 01134 kj=key[j]; 01135 kernel_temp=Kval[Knum] ; 01136 Knum++ ; 01137 /* compute linear part of objective function */ 01138 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01139 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01140 /* compute quadratic part of objective function */ 01141 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01142 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01143 } 01144 01145 if(verbosity>=3) { 01146 if(i % 20 == 0) { 01147 SG_DEBUG("%ld..",i) 01148 } 01149 } 01150 } 01151 01152 SG_FREE(KI); 01153 SG_FREE(KJ); 01154 SG_FREE(Kval); 01155 01156 for (i=0;i<varnum;i++) { 01157 /* assure starting at feasible point */ 01158 qp->opt_xinit[i]=a[key[i]]; 01159 /* set linear part of objective function */ 01160 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]]; 01161 } 01162 01163 if(verbosity>=3) { 01164 SG_DONE() 01165 } 01166 } 01167 #endif 01168 } 01169 01170 void CSVMLight::compute_matrices_for_optimization( 01171 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01172 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01173 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01174 float64_t *aicache, QP *qp) 01175 { 01176 register int32_t ki,kj,i,j; 01177 register float64_t kernel_temp; 01178 01179 qp->opt_n=varnum; 01180 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01181 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01182 if((!chosen[model->supvec[j]]) 01183 && (!exclude_from_eq_const[(model->supvec[j])])) { 01184 qp->opt_ce0[0]+=model->alpha[j]; 01185 } 01186 } 01187 if(learn_parm->biased_hyperplane) 01188 qp->opt_m=1; 01189 else 01190 qp->opt_m=0; /* eq-constraint will be ignored */ 01191 01192 /* init linear part of objective function */ 01193 for (i=0;i<varnum;i++) { 01194 qp->opt_g0[i]=lin[key[i]]; 01195 } 01196 01197 for (i=0;i<varnum;i++) { 01198 ki=key[i]; 01199 01200 /* Compute the matrix for equality constraints */ 01201 qp->opt_ce[i]=label[ki]; 01202 qp->opt_low[i]=0; 01203 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01204 01205 kernel_temp=compute_kernel(docs[ki], docs[ki]); 01206 /* compute linear part of objective function */ 01207 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01208 /* compute quadratic part of objective function */ 01209 qp->opt_g[varnum*i+i]=kernel_temp; 01210 01211 for (j=i+1;j<varnum;j++) { 01212 kj=key[j]; 01213 kernel_temp=compute_kernel(docs[ki], docs[kj]); 01214 01215 /* compute linear part of objective function */ 01216 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01217 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01218 /* compute quadratic part of objective function */ 01219 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01220 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01221 } 01222 01223 if(verbosity>=3) { 01224 if(i % 20 == 0) { 01225 SG_DEBUG("%ld..",i) 01226 } 01227 } 01228 } 01229 01230 for (i=0;i<varnum;i++) { 01231 /* assure starting at feasible point */ 01232 qp->opt_xinit[i]=a[key[i]]; 01233 /* set linear part of objective function */ 01234 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]]; 01235 } 01236 01237 if(verbosity>=3) { 01238 SG_DONE() 01239 } 01240 } 01241 01242 01243 int32_t CSVMLight::calculate_svm_model( 01244 int32_t* docs, int32_t *label, float64_t *lin, float64_t *a, 01245 float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum) 01246 /* Compute decision function based on current values */ 01247 /* of alpha. */ 01248 { 01249 int32_t i,ii,pos,b_calculated=0,first_low,first_high; 01250 float64_t ex_c,b_temp,b_low,b_high; 01251 01252 if(verbosity>=3) { 01253 SG_DEBUG("Calculating model...") 01254 } 01255 01256 if(!learn_parm->biased_hyperplane) { 01257 model->b=0; 01258 b_calculated=1; 01259 } 01260 01261 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01262 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ 01263 pos=model->index[i]; 01264 model->index[i]=-1; 01265 (model->sv_num)--; 01266 model->supvec[pos]=model->supvec[model->sv_num]; 01267 model->alpha[pos]=model->alpha[model->sv_num]; 01268 model->index[model->supvec[pos]]=pos; 01269 } 01270 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ 01271 model->supvec[model->sv_num]=docs[i]; 01272 model->alpha[model->sv_num]=a[i]*(float64_t)label[i]; 01273 model->index[i]=model->sv_num; 01274 (model->sv_num)++; 01275 } 01276 else if(a_old[i]==a[i]) { /* nothing to do */ 01277 } 01278 else { /* just update alpha */ 01279 model->alpha[model->index[i]]=a[i]*(float64_t)label[i]; 01280 } 01281 01282 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01283 if((a_old[i]>=ex_c) && (a[i]<ex_c)) { 01284 (model->at_upper_bound)--; 01285 } 01286 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { 01287 (model->at_upper_bound)++; 01288 } 01289 01290 if((!b_calculated) 01291 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */ 01292 model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]); 01293 b_calculated=1; 01294 } 01295 } 01296 01297 /* No alpha in the working set not at bounds, so b was not 01298 calculated in the usual way. The following handles this special 01299 case. */ 01300 if(learn_parm->biased_hyperplane 01301 && (!b_calculated) 01302 && (model->sv_num-1 == model->at_upper_bound)) { 01303 first_low=1; 01304 first_high=1; 01305 b_low=0; 01306 b_high=0; 01307 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01308 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01309 if(a[i]<ex_c) { 01310 if(label[i]>0) { 01311 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01312 if((b_temp>b_low) || (first_low)) { 01313 b_low=b_temp; 01314 first_low=0; 01315 } 01316 } 01317 else { 01318 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01319 if((b_temp<b_high) || (first_high)) { 01320 b_high=b_temp; 01321 first_high=0; 01322 } 01323 } 01324 } 01325 else { 01326 if(label[i]<0) { 01327 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01328 if((b_temp>b_low) || (first_low)) { 01329 b_low=b_temp; 01330 first_low=0; 01331 } 01332 } 01333 else { 01334 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01335 if((b_temp<b_high) || (first_high)) { 01336 b_high=b_temp; 01337 first_high=0; 01338 } 01339 } 01340 } 01341 } 01342 if(first_high) { 01343 model->b=-b_low; 01344 } 01345 else if(first_low) { 01346 model->b=-b_high; 01347 } 01348 else { 01349 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ 01350 } 01351 } 01352 01353 if(verbosity>=3) { 01354 SG_DONE() 01355 } 01356 01357 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ 01358 } 01359 01360 int32_t CSVMLight::check_optimality( 01361 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01362 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, 01363 int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, 01364 int32_t iteration) 01365 /* Check KT-conditions */ 01366 { 01367 int32_t i,ii,retrain; 01368 float64_t dist,ex_c,target; 01369 01370 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01371 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; 01372 else 01373 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; 01374 retrain=0; 01375 (*maxdiff)=0; 01376 (*misclassified)=0; 01377 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01378 if((!inconsistent[i]) && label[i]) { 01379 dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from 01380 hyperplane*/ 01381 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 01382 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01383 if(dist <= 0) { 01384 (*misclassified)++; /* does not work due to deactivation of var */ 01385 } 01386 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 01387 if((dist-target)>(*maxdiff)) /* largest violation */ 01388 (*maxdiff)=dist-target; 01389 } 01390 else if((a[i]<ex_c) && (dist < target)) { 01391 if((target-dist)>(*maxdiff)) /* largest violation */ 01392 (*maxdiff)=target-dist; 01393 } 01394 /* Count how int32_t a variable was at lower/upper bound (and optimal).*/ 01395 /* Variables, which were at the bound and optimal for a int32_t */ 01396 /* time are unlikely to become support vectors. In case our */ 01397 /* cache is filled up, those variables are excluded to save */ 01398 /* kernel evaluations. (See chapter 'Shrinking').*/ 01399 if((a[i]>(learn_parm->epsilon_a)) 01400 && (a[i]<ex_c)) { 01401 last_suboptimal_at[i]=iteration; /* not at bound */ 01402 } 01403 else if((a[i]<=(learn_parm->epsilon_a)) 01404 && (dist < (target+learn_parm->epsilon_shrink))) { 01405 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01406 } 01407 else if((a[i]>=ex_c) 01408 && (dist > (target-learn_parm->epsilon_shrink))) { 01409 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01410 } 01411 } 01412 } 01413 01414 /* termination criterion */ 01415 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { 01416 retrain=1; 01417 } 01418 return(retrain); 01419 } 01420 01421 void CSVMLight::update_linear_component( 01422 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01423 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01424 float64_t *aicache, float64_t* c) 01425 /* keep track of the linear component */ 01426 /* lin of the gradient etc. by updating */ 01427 /* based on the change of the variables */ 01428 /* in the current working set */ 01429 { 01430 register int32_t i=0,ii=0,j=0,jj=0; 01431 01432 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01433 { 01434 if (callback) 01435 { 01436 update_linear_component_mkl_linadd(docs, label, active2dnum, a, 01437 a_old, working2dnum, totdoc, lin, aicache); 01438 } 01439 else 01440 { 01441 kernel->clear_normal(); 01442 01443 int32_t num_working=0; 01444 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01445 if(a[i] != a_old[i]) { 01446 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01447 num_working++; 01448 } 01449 } 01450 01451 if (num_working>0) 01452 { 01453 if (parallel->get_num_threads() < 2) 01454 { 01455 for (jj=0;(j=active2dnum[jj])>=0;jj++) { 01456 lin[j]+=kernel->compute_optimized(docs[j]); 01457 } 01458 } 01459 #ifdef HAVE_PTHREAD 01460 else 01461 { 01462 int32_t num_elem = 0 ; 01463 for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ; 01464 01465 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01466 S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1); 01467 int32_t start = 0 ; 01468 int32_t step = num_elem/parallel->get_num_threads(); 01469 int32_t end = step ; 01470 01471 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01472 { 01473 params[t].kernel = kernel ; 01474 params[t].lin = lin ; 01475 params[t].docs = docs ; 01476 params[t].active2dnum=active2dnum ; 01477 params[t].start = start ; 01478 params[t].end = end ; 01479 start=end ; 01480 end+=step ; 01481 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ; 01482 } 01483 01484 for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) { 01485 lin[j]+=kernel->compute_optimized(docs[j]); 01486 } 01487 void* ret; 01488 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01489 pthread_join(threads[t], &ret) ; 01490 01491 SG_FREE(params); 01492 SG_FREE(threads); 01493 } 01494 #endif 01495 } 01496 } 01497 } 01498 else 01499 { 01500 if (callback) 01501 { 01502 update_linear_component_mkl(docs, label, active2dnum, 01503 a, a_old, working2dnum, totdoc, lin, aicache); 01504 } 01505 else { 01506 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 01507 if(a[i] != a_old[i]) { 01508 kernel->get_kernel_row(i,active2dnum,aicache); 01509 for (ii=0;(j=active2dnum[ii])>=0;ii++) 01510 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01511 } 01512 } 01513 } 01514 } 01515 } 01516 01517 01518 void CSVMLight::update_linear_component_mkl( 01519 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01520 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01521 float64_t *aicache) 01522 { 01523 //int inner_iters=0; 01524 int32_t num = kernel->get_num_vec_rhs(); 01525 int32_t num_weights = -1; 01526 int32_t num_kernels = kernel->get_num_subkernels() ; 01527 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01528 ASSERT(num_weights==num_kernels) 01529 01530 if ((kernel->get_kernel_type()==K_COMBINED) && 01531 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()))// for combined kernel 01532 { 01533 CCombinedKernel* k = (CCombinedKernel*) kernel; 01534 01535 int32_t n = 0, i, j ; 01536 01537 for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++) 01538 { 01539 CKernel* kn = k->get_kernel(k_idx); 01540 for (i=0;i<num;i++) 01541 { 01542 if(a[i] != a_old[i]) 01543 { 01544 kn->get_kernel_row(i,NULL,aicache, true); 01545 for (j=0;j<num;j++) 01546 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01547 } 01548 } 01549 01550 SG_UNREF(kn); 01551 n++ ; 01552 } 01553 } 01554 else // hope the kernel is fast ... 01555 { 01556 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels); 01557 float64_t* w1 = SG_MALLOC(float64_t, num_kernels); 01558 01559 // backup and set to zero 01560 for (int32_t i=0; i<num_kernels; i++) 01561 { 01562 w_backup[i] = old_beta[i] ; 01563 w1[i]=0.0 ; 01564 } 01565 for (int32_t n=0; n<num_kernels; n++) 01566 { 01567 w1[n]=1.0 ; 01568 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights)); 01569 01570 for (int32_t i=0;i<num;i++) 01571 { 01572 if(a[i] != a_old[i]) 01573 { 01574 for (int32_t j=0;j<num;j++) 01575 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i]; 01576 } 01577 } 01578 w1[n]=0.0 ; 01579 } 01580 01581 // restore old weights 01582 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights)); 01583 01584 SG_FREE(w_backup); 01585 SG_FREE(w1); 01586 } 01587 01588 call_mkl_callback(a, label, lin); 01589 } 01590 01591 01592 void CSVMLight::update_linear_component_mkl_linadd( 01593 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01594 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01595 float64_t *aicache) 01596 { 01597 //int inner_iters=0; 01598 01599 // kernel with LP_LINADD property is assumed to have 01600 // compute_by_subkernel functions 01601 int32_t num = kernel->get_num_vec_rhs(); 01602 int32_t num_weights = -1; 01603 int32_t num_kernels = kernel->get_num_subkernels() ; 01604 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01605 ASSERT(num_weights==num_kernels) 01606 01607 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels); 01608 float64_t* w1 = SG_MALLOC(float64_t, num_kernels); 01609 01610 // backup and set to one 01611 for (int32_t i=0; i<num_kernels; i++) 01612 { 01613 w_backup[i] = old_beta[i] ; 01614 w1[i]=1.0 ; 01615 } 01616 // set the kernel weights 01617 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights)); 01618 01619 // create normal update (with changed alphas only) 01620 kernel->clear_normal(); 01621 for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) { 01622 if(a[i] != a_old[i]) { 01623 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01624 } 01625 } 01626 01627 if (parallel->get_num_threads() < 2) 01628 { 01629 // determine contributions of different kernels 01630 for (int32_t i=0; i<num; i++) 01631 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01632 } 01633 #ifdef HAVE_PTHREAD 01634 else 01635 { 01636 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01637 S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1); 01638 int32_t step= num/parallel->get_num_threads(); 01639 01640 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01641 { 01642 params[t].kernel = kernel; 01643 params[t].W = W; 01644 params[t].start = t*step; 01645 params[t].end = (t+1)*step; 01646 pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)¶ms[t]); 01647 } 01648 01649 for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++) 01650 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01651 01652 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01653 pthread_join(threads[t], NULL); 01654 01655 SG_FREE(params); 01656 SG_FREE(threads); 01657 } 01658 #endif 01659 01660 // restore old weights 01661 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights)); 01662 01663 call_mkl_callback(a, label, lin); 01664 } 01665 01666 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p) 01667 { 01668 S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p; 01669 01670 int32_t num_kernels=params->kernel->get_num_subkernels(); 01671 01672 // determine contributions of different kernels 01673 for (int32_t i=params->start; i<params->end; i++) 01674 params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels])); 01675 01676 return NULL ; 01677 } 01678 01679 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin) 01680 { 01681 int32_t num = kernel->get_num_vec_rhs(); 01682 int32_t num_kernels = kernel->get_num_subkernels() ; 01683 01684 float64_t suma=0; 01685 float64_t* sumw=SG_MALLOC(float64_t, num_kernels); 01686 #ifdef HAVE_LAPACK 01687 int nk = (int) num_kernels; /* calling external lib */ 01688 double* alphay = SG_MALLOC(double, num); 01689 01690 for (int32_t i=0; i<num; i++) 01691 { 01692 alphay[i]=a[i]*label[i]; 01693 suma+=a[i]; 01694 } 01695 01696 for (int32_t i=0; i<num_kernels; i++) 01697 sumw[i]=0; 01698 01699 cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W, 01700 num_kernels, alphay, 1, 1.0, (double*) sumw, 1); 01701 01702 SG_FREE(alphay); 01703 #else 01704 for (int32_t i=0; i<num; i++) 01705 suma += a[i]; 01706 01707 for (int32_t d=0; d<num_kernels; d++) 01708 { 01709 sumw[d]=0; 01710 for (int32_t i=0; i<num; i++) 01711 sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]); 01712 } 01713 #endif 01714 01715 if (callback) 01716 mkl_converged=callback(mkl, sumw, suma); 01717 01718 01719 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels); 01720 01721 // update lin 01722 #ifdef HAVE_LAPACK 01723 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W, 01724 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1); 01725 #else 01726 for (int32_t i=0; i<num; i++) 01727 lin[i]=0 ; 01728 for (int32_t d=0; d<num_kernels; d++) 01729 if (new_beta[d]!=0) 01730 for (int32_t i=0; i<num; i++) 01731 lin[i] += new_beta[d]*W[i*num_kernels+d] ; 01732 #endif 01733 01734 SG_FREE(sumw); 01735 } 01736 01737 01738 /*************************** Working set selection ***************************/ 01739 01740 int32_t CSVMLight::select_next_qp_subproblem_grad( 01741 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01742 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01743 int32_t *working2dnum, float64_t *selcrit, int32_t *select, 01744 int32_t cache_only, int32_t *key, int32_t *chosen) 01745 /* Use the feasible direction approach to select the next 01746 qp-subproblem (see chapter 'Selecting a good working set'). If 01747 'cache_only' is true, then the variables are selected only among 01748 those for which the kernel evaluations are cached. */ 01749 { 01750 int32_t choosenum,i,j,k,activedoc,inum,valid; 01751 float64_t s; 01752 01753 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01754 choosenum=0; 01755 activedoc=0; 01756 for (i=0;(j=active2dnum[i])>=0;i++) { 01757 s=-label[j]; 01758 if(cache_only) 01759 { 01760 if (use_kernel_cache) 01761 valid=(kernel->kernel_cache_check(j)); 01762 else 01763 valid = 1 ; 01764 } 01765 else 01766 valid=1; 01767 if(valid 01768 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01769 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01770 && (s>0))) 01771 && (!chosen[j]) 01772 && (label[j]) 01773 && (!inconsistent[j])) 01774 { 01775 selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01776 key[activedoc]=j; 01777 activedoc++; 01778 } 01779 } 01780 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01781 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01782 i=key[select[k]]; 01783 chosen[i]=1; 01784 working2dnum[inum+choosenum]=i; 01785 choosenum+=1; 01786 if (use_kernel_cache) 01787 kernel->kernel_cache_touch(i); 01788 /* make sure it does not get kicked */ 01789 /* out of cache */ 01790 } 01791 01792 activedoc=0; 01793 for (i=0;(j=active2dnum[i])>=0;i++) { 01794 s=label[j]; 01795 if(cache_only) 01796 { 01797 if (use_kernel_cache) 01798 valid=(kernel->kernel_cache_check(j)); 01799 else 01800 valid = 1 ; 01801 } 01802 else 01803 valid=1; 01804 if(valid 01805 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01806 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01807 && (s>0))) 01808 && (!chosen[j]) 01809 && (label[j]) 01810 && (!inconsistent[j])) 01811 { 01812 selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01813 /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */ 01814 key[activedoc]=j; 01815 activedoc++; 01816 } 01817 } 01818 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01819 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01820 i=key[select[k]]; 01821 chosen[i]=1; 01822 working2dnum[inum+choosenum]=i; 01823 choosenum+=1; 01824 if (use_kernel_cache) 01825 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01826 /* out of cache */ 01827 } 01828 working2dnum[inum+choosenum]=-1; /* complete index */ 01829 return(choosenum); 01830 } 01831 01832 int32_t CSVMLight::select_next_qp_subproblem_rand( 01833 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01834 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01835 int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, 01836 int32_t *chosen, int32_t iteration) 01837 /* Use the feasible direction approach to select the next 01838 qp-subproblem (see section 'Selecting a good working set'). Chooses 01839 a feasible direction at (pseudo) random to help jump over numerical 01840 problem. */ 01841 { 01842 int32_t choosenum,i,j,k,activedoc,inum; 01843 float64_t s; 01844 01845 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01846 choosenum=0; 01847 activedoc=0; 01848 for (i=0;(j=active2dnum[i])>=0;i++) { 01849 s=-label[j]; 01850 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01851 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01852 && (s>0))) 01853 && (!inconsistent[j]) 01854 && (label[j]) 01855 && (!chosen[j])) { 01856 selcrit[activedoc]=(j+iteration) % totdoc; 01857 key[activedoc]=j; 01858 activedoc++; 01859 } 01860 } 01861 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01862 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01863 i=key[select[k]]; 01864 chosen[i]=1; 01865 working2dnum[inum+choosenum]=i; 01866 choosenum+=1; 01867 if (use_kernel_cache) 01868 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01869 /* out of cache */ 01870 } 01871 01872 activedoc=0; 01873 for (i=0;(j=active2dnum[i])>=0;i++) { 01874 s=label[j]; 01875 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01876 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01877 && (s>0))) 01878 && (!inconsistent[j]) 01879 && (label[j]) 01880 && (!chosen[j])) { 01881 selcrit[activedoc]=(j+iteration) % totdoc; 01882 key[activedoc]=j; 01883 activedoc++; 01884 } 01885 } 01886 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01887 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01888 i=key[select[k]]; 01889 chosen[i]=1; 01890 working2dnum[inum+choosenum]=i; 01891 choosenum+=1; 01892 if (use_kernel_cache) 01893 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01894 /* out of cache */ 01895 } 01896 working2dnum[inum+choosenum]=-1; /* complete index */ 01897 return(choosenum); 01898 } 01899 01900 01901 01902 void CSVMLight::select_top_n( 01903 float64_t *selcrit, int32_t range, int32_t* select, int32_t n) 01904 { 01905 register int32_t i,j; 01906 01907 for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */ 01908 for (j=i;j>=0;j--) { 01909 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){ 01910 select[j]=select[j-1]; 01911 } 01912 else { 01913 select[j]=i; 01914 j=-1; 01915 } 01916 } 01917 } 01918 if(n>0) { 01919 for (i=n;i<range;i++) { 01920 if(selcrit[i]>selcrit[select[n-1]]) { 01921 for (j=n-1;j>=0;j--) { 01922 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) { 01923 select[j]=select[j-1]; 01924 } 01925 else { 01926 select[j]=i; 01927 j=-1; 01928 } 01929 } 01930 } 01931 } 01932 } 01933 } 01934 01935 01936 /******************************** Shrinking *********************************/ 01937 01938 void CSVMLight::init_shrink_state( 01939 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory) 01940 { 01941 int32_t i; 01942 01943 shrink_state->deactnum=0; 01944 shrink_state->active = SG_MALLOC(int32_t, totdoc); 01945 shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc); 01946 shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory); 01947 shrink_state->maxhistory=maxhistory; 01948 shrink_state->last_lin = SG_MALLOC(float64_t, totdoc); 01949 shrink_state->last_a = SG_MALLOC(float64_t, totdoc); 01950 01951 for (i=0;i<totdoc;i++) { 01952 shrink_state->active[i]=1; 01953 shrink_state->inactive_since[i]=0; 01954 shrink_state->last_a[i]=0; 01955 shrink_state->last_lin[i]=0; 01956 } 01957 } 01958 01959 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state) 01960 { 01961 SG_FREE(shrink_state->active); 01962 SG_FREE(shrink_state->inactive_since); 01963 if(shrink_state->deactnum > 0) 01964 SG_FREE((shrink_state->a_history[shrink_state->deactnum-1])); 01965 SG_FREE((shrink_state->a_history)); 01966 SG_FREE((shrink_state->last_a)); 01967 SG_FREE((shrink_state->last_lin)); 01968 } 01969 01970 int32_t CSVMLight::shrink_problem( 01971 SHRINK_STATE *shrink_state, int32_t *active2dnum, 01972 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, 01973 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c, 01974 float64_t* lin, int* label) 01975 /* Shrink some variables away. Do the shrinking only if at least 01976 minshrink variables can be removed. */ 01977 { 01978 int32_t i,ii,change,activenum,lastiter; 01979 float64_t *a_old=NULL; 01980 01981 activenum=0; 01982 change=0; 01983 for (ii=0;active2dnum[ii]>=0;ii++) { 01984 i=active2dnum[ii]; 01985 activenum++; 01986 lastiter=last_suboptimal_at[i]; 01987 01988 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 01989 || (inconsistent[i])) { 01990 change++; 01991 } 01992 } 01993 if((change>=minshrink) /* shrink only if sufficiently many candidates */ 01994 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */ 01995 /* Shrink problem by removing those variables which are */ 01996 /* optimal at a bound for a minimum number of iterations */ 01997 if(verbosity>=2) { 01998 SG_INFO(" Shrinking...") 01999 } 02000 02001 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */ 02002 02003 a_old=SG_MALLOC(float64_t, totdoc); 02004 shrink_state->a_history[shrink_state->deactnum]=a_old; 02005 for (i=0;i<totdoc;i++) { 02006 a_old[i]=a[i]; 02007 } 02008 } 02009 for (ii=0;active2dnum[ii]>=0;ii++) { 02010 i=active2dnum[ii]; 02011 lastiter=last_suboptimal_at[i]; 02012 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 02013 || (inconsistent[i])) { 02014 shrink_state->active[i]=0; 02015 shrink_state->inactive_since[i]=shrink_state->deactnum; 02016 } 02017 } 02018 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 02019 shrink_state->deactnum++; 02020 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) 02021 shrink_state->deactnum=0; 02022 02023 if(verbosity>=2) { 02024 SG_DONE() 02025 SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum) 02026 } 02027 } 02028 return(activenum); 02029 } 02030 02031 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p) 02032 { 02033 S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p; 02034 02035 CKernel* k = params->kernel; 02036 float64_t* lin = params->lin; 02037 float64_t* last_lin = params->last_lin; 02038 int32_t* active = params->active; 02039 int32_t* docs = params->docs; 02040 int32_t start = params->start; 02041 int32_t end = params->end; 02042 02043 for (int32_t i=start;i<end;i++) 02044 { 02045 if (!active[i]) 02046 lin[i] = last_lin[i]+k->compute_optimized(docs[i]); 02047 02048 last_lin[i]=lin[i]; 02049 } 02050 02051 return NULL; 02052 } 02053 02054 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p) 02055 { 02056 S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p; 02057 ASSERT(params) 02058 ASSERT(params->kernel) 02059 ASSERT(params->lin) 02060 ASSERT(params->aicache) 02061 ASSERT(params->a) 02062 ASSERT(params->a_old) 02063 ASSERT(params->changed2dnum) 02064 ASSERT(params->inactive2dnum) 02065 ASSERT(params->label) 02066 02067 CKernel* k = params->kernel; 02068 float64_t* lin = params->lin; 02069 float64_t* aicache = params->aicache; 02070 float64_t* a= params->a; 02071 float64_t* a_old = params->a_old; 02072 int32_t* changed2dnum = params->changed2dnum; 02073 int32_t* inactive2dnum = params->inactive2dnum; 02074 int32_t* label = params->label; 02075 int32_t start = params->start; 02076 int32_t end = params->end; 02077 02078 for (int32_t ii=start;ii<end;ii++) 02079 { 02080 int32_t i=changed2dnum[ii]; 02081 int32_t j=0; 02082 ASSERT(i>=0) 02083 02084 k->get_kernel_row(i,inactive2dnum,aicache); 02085 for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++) 02086 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02087 } 02088 return NULL; 02089 } 02090 02091 void CSVMLight::reactivate_inactive_examples( 02092 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, 02093 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, 02094 int32_t* docs, float64_t *aicache, float64_t *maxdiff) 02095 /* Make all variables active again which had been removed by 02096 shrinking. */ 02097 /* Computes lin for those variables from scratch. */ 02098 { 02099 register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum; 02100 int32_t *changed,*inactive; 02101 register float64_t *a_old,dist; 02102 float64_t ex_c,target; 02103 02104 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */ 02105 a_old=shrink_state->last_a; 02106 02107 if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION)) 02108 { 02109 SG_DEBUG(" clear normal - linadd\n") 02110 kernel->clear_normal(); 02111 02112 int32_t num_modified=0; 02113 for (i=0;i<totdoc;i++) { 02114 if(a[i] != a_old[i]) { 02115 kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i])); 02116 a_old[i]=a[i]; 02117 num_modified++; 02118 } 02119 } 02120 02121 if (num_modified>0) 02122 { 02123 int32_t num_threads=parallel->get_num_threads(); 02124 ASSERT(num_threads>0) 02125 if (num_threads < 2) 02126 { 02127 S_THREAD_PARAM_REACTIVATE_LINADD params; 02128 params.kernel=kernel; 02129 params.lin=lin; 02130 params.last_lin=shrink_state->last_lin; 02131 params.docs=docs; 02132 params.active=shrink_state->active; 02133 params.start=0; 02134 params.end=totdoc; 02135 reactivate_inactive_examples_linadd_helper((void*) ¶ms); 02136 } 02137 #ifdef HAVE_PTHREAD 02138 else 02139 { 02140 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 02141 S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads); 02142 int32_t step= totdoc/num_threads; 02143 02144 for (t=0; t<num_threads-1; t++) 02145 { 02146 params[t].kernel=kernel; 02147 params[t].lin=lin; 02148 params[t].last_lin=shrink_state->last_lin; 02149 params[t].docs=docs; 02150 params[t].active=shrink_state->active; 02151 params[t].start = t*step; 02152 params[t].end = (t+1)*step; 02153 pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)¶ms[t]); 02154 } 02155 02156 params[t].kernel=kernel; 02157 params[t].lin=lin; 02158 params[t].last_lin=shrink_state->last_lin; 02159 params[t].docs=docs; 02160 params[t].active=shrink_state->active; 02161 params[t].start = t*step; 02162 params[t].end = totdoc; 02163 reactivate_inactive_examples_linadd_helper((void*) ¶ms[t]); 02164 02165 for (t=0; t<num_threads-1; t++) 02166 pthread_join(threads[t], NULL); 02167 02168 SG_FREE(threads); 02169 SG_FREE(params); 02170 } 02171 #endif 02172 02173 } 02174 } 02175 else 02176 { 02177 float64_t *alphas = SG_MALLOC(float64_t, totdoc); 02178 int32_t *idx = SG_MALLOC(int32_t, totdoc); 02179 int32_t num_suppvec=0 ; 02180 02181 for (i=0; i<totdoc; i++) 02182 { 02183 if(a[i] != a_old[i]) 02184 { 02185 alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i]; 02186 idx[num_suppvec] = i ; 02187 a_old[i] = a[i] ; 02188 num_suppvec++ ; 02189 } 02190 } 02191 02192 if (num_suppvec>0) 02193 { 02194 int32_t num_inactive=0; 02195 int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset 02196 02197 j=0; 02198 for (i=0;i<totdoc;i++) 02199 { 02200 if(!shrink_state->active[i]) 02201 { 02202 inactive_idx[j++] = i; 02203 num_inactive++; 02204 } 02205 } 02206 02207 if (num_inactive>0) 02208 { 02209 float64_t* dest = SG_MALLOC(float64_t, num_inactive); 02210 memset(dest, 0, sizeof(float64_t)*num_inactive); 02211 02212 kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas); 02213 02214 j=0; 02215 for (i=0;i<totdoc;i++) { 02216 if(!shrink_state->active[i]) { 02217 lin[i] = shrink_state->last_lin[i] + dest[j++] ; 02218 } 02219 shrink_state->last_lin[i]=lin[i]; 02220 } 02221 02222 SG_FREE(dest); 02223 } 02224 else 02225 { 02226 for (i=0;i<totdoc;i++) 02227 shrink_state->last_lin[i]=lin[i]; 02228 } 02229 SG_FREE(inactive_idx); 02230 } 02231 SG_FREE(alphas); 02232 SG_FREE(idx); 02233 } 02234 02235 kernel->delete_optimization(); 02236 } 02237 else 02238 { 02239 changed=SG_MALLOC(int32_t, totdoc); 02240 changed2dnum=SG_MALLOC(int32_t, totdoc+11); 02241 inactive=SG_MALLOC(int32_t, totdoc); 02242 inactive2dnum=SG_MALLOC(int32_t, totdoc+11); 02243 for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) 02244 { 02245 if(verbosity>=2) { 02246 SG_INFO("%ld..",t) 02247 } 02248 a_old=shrink_state->a_history[t]; 02249 for (i=0;i<totdoc;i++) { 02250 inactive[i]=((!shrink_state->active[i]) 02251 && (shrink_state->inactive_since[i] == t)); 02252 changed[i]= (a[i] != a_old[i]); 02253 } 02254 compute_index(inactive,totdoc,inactive2dnum); 02255 compute_index(changed,totdoc,changed2dnum); 02256 02257 02258 //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads(); 02259 int32_t num_threads=1; 02260 ASSERT(num_threads>0) 02261 02262 if (num_threads < 2) 02263 { 02264 for (ii=0;(i=changed2dnum[ii])>=0;ii++) { 02265 kernel->get_kernel_row(i,inactive2dnum,aicache); 02266 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02267 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02268 } 02269 } 02270 #ifdef HAVE_PTHREAD 02271 else 02272 { 02273 //find number of the changed ones 02274 int32_t num_changed=0; 02275 for (ii=0;changed2dnum[ii]>=0;ii++) 02276 num_changed++; 02277 02278 if (num_changed>0) 02279 { 02280 pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1); 02281 S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads); 02282 int32_t step= num_changed/num_threads; 02283 02284 // alloc num_threads many tmp buffers 02285 float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads); 02286 memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02287 float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads); 02288 memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02289 02290 int32_t thr; 02291 for (thr=0; thr<num_threads-1; thr++) 02292 { 02293 params[thr].kernel=kernel; 02294 params[thr].lin=&tmp_lin[thr*totdoc]; 02295 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02296 params[thr].a=a; 02297 params[thr].a_old=a_old; 02298 params[thr].changed2dnum=changed2dnum; 02299 params[thr].inactive2dnum=inactive2dnum; 02300 params[thr].label=label; 02301 params[thr].start = thr*step; 02302 params[thr].end = (thr+1)*step; 02303 pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)¶ms[thr]); 02304 } 02305 02306 params[thr].kernel=kernel; 02307 params[thr].lin=&tmp_lin[thr*totdoc]; 02308 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02309 params[thr].a=a; 02310 params[thr].a_old=a_old; 02311 params[thr].changed2dnum=changed2dnum; 02312 params[thr].inactive2dnum=inactive2dnum; 02313 params[thr].label=label; 02314 params[thr].start = thr*step; 02315 params[thr].end = num_changed; 02316 reactivate_inactive_examples_vanilla_helper((void*) ¶ms[thr]); 02317 02318 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02319 lin[j]+=tmp_lin[totdoc*thr+j]; 02320 02321 for (thr=0; thr<num_threads-1; thr++) 02322 { 02323 pthread_join(threads[thr], NULL); 02324 02325 //add up results 02326 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02327 lin[j]+=tmp_lin[totdoc*thr+j]; 02328 } 02329 02330 SG_FREE(tmp_lin); 02331 SG_FREE(tmp_aicache); 02332 SG_FREE(threads); 02333 SG_FREE(params); 02334 } 02335 } 02336 #endif 02337 } 02338 SG_FREE(changed); 02339 SG_FREE(changed2dnum); 02340 SG_FREE(inactive); 02341 SG_FREE(inactive2dnum); 02342 } 02343 02344 (*maxdiff)=0; 02345 for (i=0;i<totdoc;i++) { 02346 shrink_state->inactive_since[i]=shrink_state->deactnum-1; 02347 if(!inconsistent[i]) { 02348 dist=(lin[i]-model->b)*(float64_t)label[i]; 02349 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 02350 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 02351 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 02352 if((dist-target)>(*maxdiff)) /* largest violation */ 02353 (*maxdiff)=dist-target; 02354 } 02355 else if((a[i]<ex_c) && (dist < target)) { 02356 if((target-dist)>(*maxdiff)) /* largest violation */ 02357 (*maxdiff)=target-dist; 02358 } 02359 if((a[i]>(0+learn_parm->epsilon_a)) 02360 && (a[i]<ex_c)) { 02361 shrink_state->active[i]=1; /* not at bound */ 02362 } 02363 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { 02364 shrink_state->active[i]=1; 02365 } 02366 else if((a[i]>=ex_c) 02367 && (dist > (target-learn_parm->epsilon_shrink))) { 02368 shrink_state->active[i]=1; 02369 } 02370 else if(learn_parm->sharedslack) { /* make all active when sharedslack */ 02371 shrink_state->active[i]=1; 02372 } 02373 } 02374 } 02375 02376 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */ 02377 for (i=0;i<totdoc;i++) { 02378 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i]; 02379 } 02380 for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { 02381 SG_FREE(shrink_state->a_history[t]); 02382 shrink_state->a_history[t]=0; 02383 } 02384 } 02385 } 02386 02387 02388 02389 /* start the optimizer and return the optimal values */ 02390 float64_t* CSVMLight::optimize_qp( 02391 QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, 02392 int32_t& svm_maxqpsize) 02393 { 02394 register int32_t i, j, result; 02395 float64_t margin, obj_before, obj_after; 02396 float64_t sigdig, dist, epsilon_loqo; 02397 int32_t iter; 02398 02399 if(!primal) { /* allocate memory at first call */ 02400 primal=SG_MALLOC(float64_t, nx*3); 02401 dual=SG_MALLOC(float64_t, nx*2+1); 02402 } 02403 02404 obj_before=0; /* calculate objective before optimization */ 02405 for(i=0;i<qp->opt_n;i++) { 02406 obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]); 02407 obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]); 02408 for(j=0;j<i;j++) { 02409 obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]); 02410 } 02411 } 02412 02413 result=STILL_RUNNING; 02414 qp->opt_ce0[0]*=(-1.0); 02415 /* Run pr_loqo. If a run fails, try again with parameters which lead */ 02416 /* to a slower, but more robust setting. */ 02417 for(margin=init_margin,iter=init_iter; 02418 (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) { 02419 02420 opt_precision=CMath::max(opt_precision, DEF_PRECISION); 02421 sigdig=-log10(opt_precision); 02422 02423 result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m, 02424 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g, 02425 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0, 02426 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up, 02427 (float64_t *)primal,(float64_t *)dual, 02428 (int32_t)(verbosity-2), 02429 (float64_t)sigdig,(int32_t)iter, 02430 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0); 02431 02432 if(CMath::is_nan(dual[0])) { /* check for choldc problem */ 02433 if(verbosity>=2) { 02434 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n") 02435 } 02436 if(init_margin<0.80) { /* become more conservative in general */ 02437 init_margin=(4.0*margin+1.0)/5.0; 02438 } 02439 margin=(margin+1.0)/2.0; 02440 (opt_precision)*=10.0; /* reduce precision */ 02441 if(verbosity>=2) { 02442 SG_SDEBUG("Reducing precision of PR_LOQO.\n") 02443 } 02444 } 02445 else if(result!=OPTIMAL_SOLUTION) { 02446 iter+=2000; 02447 init_iter+=10; 02448 (opt_precision)*=10.0; /* reduce precision */ 02449 if(verbosity>=2) { 02450 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result) 02451 } 02452 } 02453 } 02454 02455 if(qp->opt_m) /* Thanks to Alex Smola for this hint */ 02456 model_b=dual[0]; 02457 else 02458 model_b=0; 02459 02460 /* Check the precision of the alphas. If results of current optimization */ 02461 /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */ 02462 epsilon_loqo=1E-10; 02463 for(i=0;i<qp->opt_n;i++) { 02464 dist=-model_b*qp->opt_ce[i]; 02465 dist+=(qp->opt_g0[i]+1.0); 02466 for(j=0;j<i;j++) { 02467 dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]); 02468 } 02469 for(j=i;j<qp->opt_n;j++) { 02470 dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]); 02471 } 02472 /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]) */ 02473 if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) { 02474 epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0; 02475 } 02476 else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) { 02477 epsilon_loqo=primal[i]*2.0; 02478 } 02479 } 02480 02481 for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */ 02482 if(primal[i]<=(0+epsilon_loqo)) { 02483 primal[i]=0; 02484 } 02485 else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) { 02486 primal[i]=qp->opt_up[i]; 02487 } 02488 } 02489 02490 obj_after=0; /* calculate objective after optimization */ 02491 for(i=0;i<qp->opt_n;i++) { 02492 obj_after+=(qp->opt_g0[i]*primal[i]); 02493 obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]); 02494 for(j=0;j<i;j++) { 02495 obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]); 02496 } 02497 } 02498 02499 /* if optimizer returned NAN values, reset and retry with smaller */ 02500 /* working set. */ 02501 if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) { 02502 for(i=0;i<qp->opt_n;i++) { 02503 primal[i]=qp->opt_xinit[i]; 02504 } 02505 model_b=0; 02506 if(svm_maxqpsize>2) { 02507 svm_maxqpsize--; /* decrease size of qp-subproblems */ 02508 } 02509 } 02510 02511 if(obj_after >= obj_before) { /* check whether there was progress */ 02512 (opt_precision)/=100.0; 02513 precision_violations++; 02514 if(verbosity>=2) { 02515 SG_SDEBUG("Increasing Precision of PR_LOQO.\n") 02516 } 02517 } 02518 02519 // TODO: add parameter for this (e.g. for MTL experiments) 02520 if(precision_violations > 5000) { 02521 (*epsilon_crit)*=10.0; 02522 precision_violations=0; 02523 SG_SINFO("Relaxing epsilon on KT-Conditions.\n") 02524 } 02525 02526 (*threshold)=model_b; 02527 02528 if(result!=OPTIMAL_SOLUTION) { 02529 SG_SERROR("PR_LOQO did not converge.\n") 02530 return(qp->opt_xinit); 02531 } 02532 else { 02533 return(primal); 02534 } 02535 } 02536 02537 02538 #endif //USE_SVMLIGHT