SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SVMLight.cpp
Go to the documentation of this file.
00001 /***********************************************************************/
00002 /*                                                                     */
00003 /*   SVMLight.cpp                                                      */
00004 /*                                                                     */
00005 /*   Author: Thorsten Joachims                                         */
00006 /*   Date: 19.07.99                                                    */
00007 /*                                                                     */
00008 /*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
00009 /*                                                                     */
00010 /*   This software is available for non-commercial use only. It must   */
00011 /*   not be modified and distributed without prior permission of the   */
00012 /*   author. The author is not responsible for implications from the   */
00013 /*   use of this software.                                             */
00014 /*                                                                     */
00015 /*   THIS INCLUDES THE FOLLOWING ADDITIONS                             */
00016 /*   Generic Kernel Interfacing: Soeren Sonnenburg                     */
00017 /*   Parallizations: Soeren Sonnenburg                                 */
00018 /*   Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg,      */
00019 /*                          Alexander Zien, Marius Kloft, Chen Guohua  */
00020 /*   Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg                 */
00021 /*                                                                     */
00022 /***********************************************************************/
00023 #include <shogun/lib/config.h>
00024 
00025 #ifdef USE_SVMLIGHT
00026 
00027 #include <shogun/io/SGIO.h>
00028 #include <shogun/lib/Signal.h>
00029 #include <shogun/mathematics/Math.h>
00030 #include <shogun/lib/Time.h>
00031 #include <shogun/mathematics/lapack.h>
00032 
00033 #include <shogun/classifier/svm/SVMLight.h>
00034 #include <shogun/lib/external/pr_loqo.h>
00035 
00036 #include <shogun/kernel/Kernel.h>
00037 #include <shogun/machine/KernelMachine.h>
00038 #include <shogun/kernel/CombinedKernel.h>
00039 
00040 #include <unistd.h>
00041 
00042 #include <shogun/base/Parallel.h>
00043 #include <shogun/labels/BinaryLabels.h>
00044 
00045 #ifdef HAVE_PTHREAD
00046 #include <pthread.h>
00047 #endif
00048 
00049 using namespace shogun;
00050 
00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00052 struct S_THREAD_PARAM_REACTIVATE_LINADD
00053 {
00054     CKernel* kernel;
00055     float64_t* lin;
00056     float64_t* last_lin;
00057     int32_t* active;
00058     int32_t* docs;
00059     int32_t start;
00060     int32_t end;
00061 };
00062 
00063 struct S_THREAD_PARAM_SVMLIGHT
00064 {
00065     float64_t * lin ;
00066     float64_t* W;
00067     int32_t start, end;
00068     int32_t * active2dnum ;
00069     int32_t * docs ;
00070     CKernel* kernel ;
00071 };
00072 
00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA
00074 {
00075     CKernel* kernel;
00076     float64_t* lin;
00077     float64_t* aicache;
00078     float64_t* a;
00079     float64_t* a_old;
00080     int32_t* changed2dnum;
00081     int32_t* inactive2dnum;
00082     int32_t* label;
00083     int32_t start;
00084     int32_t end;
00085 };
00086 
00087 struct S_THREAD_PARAM_KERNEL
00088 {
00089     float64_t *Kval ;
00090     int32_t *KI, *KJ ;
00091     int32_t start, end;
00092     CSVMLight* svmlight;
00093 };
00094 
00095 #endif // DOXYGEN_SHOULD_SKIP_THIS
00096 
00097 void* CSVMLight::update_linear_component_linadd_helper(void* p)
00098 {
00099     S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
00100 
00101     int32_t jj=0, j=0 ;
00102 
00103     for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++)
00104         params->lin[j]+=params->kernel->compute_optimized(params->docs[j]);
00105 
00106     return NULL ;
00107 }
00108 
00109 void* CSVMLight::compute_kernel_helper(void* p)
00110 {
00111     S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p;
00112 
00113     int32_t jj=0 ;
00114     for (jj=params->start;jj<params->end;jj++)
00115         params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ;
00116 
00117     return NULL ;
00118 }
00119 
00120 CSVMLight::CSVMLight()
00121 : CSVM()
00122 {
00123     init();
00124     set_kernel(NULL);
00125 }
00126 
00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab)
00128 : CSVM(C, k, lab)
00129 {
00130     init();
00131 }
00132 
00133 void CSVMLight::init()
00134 {
00135     //optimizer settings
00136     primal=NULL;
00137     dual=NULL;
00138     init_margin=0.15;
00139     init_iter=500;
00140     precision_violations=0;
00141     model_b=0;
00142     verbosity=1;
00143     opt_precision=DEF_PRECISION;
00144 
00145     // svm variables
00146     W=NULL;
00147     model=SG_MALLOC(MODEL, 1);
00148     learn_parm=SG_MALLOC(LEARN_PARM, 1);
00149     model->supvec=NULL;
00150     model->alpha=NULL;
00151     model->index=NULL;
00152 
00153     // MKL stuff
00154     mymaxdiff=1 ;
00155     mkl_converged=false;
00156 }
00157 
00158 CSVMLight::~CSVMLight()
00159 {
00160 
00161   SG_FREE(model->supvec);
00162   SG_FREE(model->alpha);
00163   SG_FREE(model->index);
00164   SG_FREE(model);
00165   SG_FREE(learn_parm);
00166 
00167   // MKL stuff
00168   SG_FREE(W);
00169 
00170   // optimizer variables
00171   SG_FREE(dual);
00172   SG_FREE(primal);
00173 }
00174 
00175 bool CSVMLight::train_machine(CFeatures* data)
00176 {
00177     //certain setup params
00178     mkl_converged=false;
00179     verbosity=1 ;
00180     init_margin=0.15;
00181     init_iter=500;
00182     precision_violations=0;
00183     opt_precision=DEF_PRECISION;
00184 
00185     strcpy (learn_parm->predfile, "");
00186     learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0;
00187     learn_parm->sharedslack=0;
00188     learn_parm->remove_inconsistent=0;
00189     learn_parm->skip_final_opt_check=0;
00190     learn_parm->svm_maxqpsize=get_qpsize();
00191     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1;
00192     learn_parm->maxiter=100000;
00193     learn_parm->svm_iter_to_shrink=100;
00194     learn_parm->svm_c=C1;
00195     learn_parm->transduction_posratio=0.33;
00196     learn_parm->svm_costratio=C2/C1;
00197     learn_parm->svm_costratio_unlab=1.0;
00198     learn_parm->svm_unlabbound=1E-5;
00199     learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ??
00200     learn_parm->epsilon_a=1E-15;
00201     learn_parm->compute_loo=0;
00202     learn_parm->rho=1.0;
00203     learn_parm->xa_depth=0;
00204 
00205     if (!kernel)
00206         SG_ERROR("SVM_light can not proceed without kernel!\n")
00207 
00208     if (data)
00209     {
00210         if (m_labels->get_num_labels() != data->get_num_vectors())
00211         {
00212             SG_ERROR("%s::train_machine(): Number of training vectors (%d) does"
00213                     " not match number of labels (%d)\n", get_name(),
00214                     data->get_num_vectors(), m_labels->get_num_labels());
00215         }
00216         kernel->init(data, data);
00217     }
00218 
00219     if (!kernel->has_features())
00220         SG_ERROR("SVM_light can not proceed without initialized kernel!\n")
00221 
00222     ASSERT(m_labels && m_labels->get_num_labels())
00223     ASSERT(m_labels->get_label_type() == LT_BINARY)
00224     ASSERT(kernel->get_num_vec_lhs()==m_labels->get_num_labels())
00225 
00226     // in case of LINADD enabled kernels cleanup!
00227     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00228         kernel->clear_normal() ;
00229 
00230     // output some info
00231     SG_DEBUG("threads = %i\n", parallel->get_num_threads())
00232     SG_DEBUG("qpsize = %i\n", learn_parm->svm_maxqpsize)
00233     SG_DEBUG("epsilon = %1.1e\n", learn_parm->epsilon_crit)
00234     SG_DEBUG("kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD))
00235     SG_DEBUG("kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION))
00236     SG_DEBUG("kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION))
00237     SG_DEBUG("kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" )
00238     SG_DEBUG("get_solver_type() = %i\n", get_solver_type())
00239     SG_DEBUG("get_linadd_enabled() = %i\n", get_linadd_enabled())
00240     SG_DEBUG("get_batch_computation_enabled() = %i\n", get_batch_computation_enabled())
00241     SG_DEBUG("kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels())
00242 
00243     use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) ||
00244                          (get_linadd_enabled() && kernel->has_property(KP_LINADD)));
00245 
00246     SG_DEBUG("use_kernel_cache = %i\n", use_kernel_cache)
00247 
00248     if (kernel->get_kernel_type() == K_COMBINED)
00249     {
00250 
00251         for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++)
00252         {
00253             CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx);
00254             // allocate kernel cache but clean up beforehand
00255             kn->resize_kernel_cache(kn->get_cache_size());
00256             SG_UNREF(kn);
00257         }
00258     }
00259 
00260     kernel->resize_kernel_cache(kernel->get_cache_size());
00261 
00262     // train the svm
00263     svm_learn();
00264 
00265     // brain damaged svm light work around
00266     create_new_model(model->sv_num-1);
00267     set_bias(-model->b);
00268     for (int32_t i=0; i<model->sv_num-1; i++)
00269     {
00270         set_alpha(i, model->alpha[i+1]);
00271         set_support_vector(i, model->supvec[i+1]);
00272     }
00273 
00274     // in case of LINADD enabled kernels cleanup!
00275     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
00276     {
00277         kernel->clear_normal() ;
00278         kernel->delete_optimization() ;
00279     }
00280 
00281     if (use_kernel_cache)
00282         kernel->kernel_cache_cleanup();
00283 
00284     return true ;
00285 }
00286 
00287 int32_t CSVMLight::get_runtime()
00288 {
00289   clock_t start;
00290   start = clock();
00291   return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC));
00292 }
00293 
00294 
00295 void CSVMLight::svm_learn()
00296 {
00297     int32_t *inconsistent, i;
00298     int32_t misclassified,upsupvecnum;
00299     float64_t maxdiff, *lin, *c, *a;
00300     int32_t iterations;
00301     int32_t trainpos=0, trainneg=0 ;
00302     ASSERT(m_labels)
00303     SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels();
00304     int32_t totdoc=lab.vlen;
00305     ASSERT(lab.vector && lab.vlen)
00306     int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen);
00307 
00308     int32_t* docs=SG_MALLOC(int32_t, totdoc);
00309     SG_FREE(W);
00310     W=NULL;
00311     count = 0 ;
00312 
00313     if (kernel->has_property(KP_KERNCOMBINATION) && callback)
00314     {
00315         W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels());
00316         for (i=0; i<totdoc*kernel->get_num_subkernels(); i++)
00317             W[i]=0;
00318     }
00319 
00320     for (i=0; i<totdoc; i++)
00321         docs[i]=i;
00322 
00323     float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */
00324     float64_t *a_fullset;  /* buffer for storing alpha on full sample in loo */
00325     TIMING timing_profile;
00326     SHRINK_STATE shrink_state;
00327 
00328     timing_profile.time_kernel=0;
00329     timing_profile.time_opti=0;
00330     timing_profile.time_shrink=0;
00331     timing_profile.time_update=0;
00332     timing_profile.time_model=0;
00333     timing_profile.time_check=0;
00334     timing_profile.time_select=0;
00335 
00336     /* make sure -n value is reasonable */
00337     if((learn_parm->svm_newvarsinqp < 2)
00338             || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
00339         learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00340     }
00341 
00342     init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK);
00343 
00344     inconsistent = SG_MALLOC(int32_t, totdoc);
00345     c = SG_MALLOC(float64_t, totdoc);
00346     a = SG_MALLOC(float64_t, totdoc);
00347     a_fullset = SG_MALLOC(float64_t, totdoc);
00348     xi_fullset = SG_MALLOC(float64_t, totdoc);
00349     lin = SG_MALLOC(float64_t, totdoc);
00350     if (m_linear_term.vlen>0)
00351         learn_parm->eps=get_linear_term_array();
00352     else
00353     {
00354         learn_parm->eps=SG_MALLOC(float64_t, totdoc);      /* equivalent regression epsilon for classification */
00355         SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0);
00356     }
00357 
00358     learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc);
00359 
00360     SG_FREE(model->supvec);
00361     SG_FREE(model->alpha);
00362     SG_FREE(model->index);
00363     model->supvec = SG_MALLOC(int32_t, totdoc+2);
00364     model->alpha = SG_MALLOC(float64_t, totdoc+2);
00365     model->index = SG_MALLOC(int32_t, totdoc+2);
00366 
00367     model->at_upper_bound=0;
00368     model->b=0;
00369     model->supvec[0]=0;  /* element 0 reserved and empty for now */
00370     model->alpha[0]=0;
00371     model->totdoc=totdoc;
00372 
00373     model->kernel=kernel;
00374 
00375     model->sv_num=1;
00376     model->loo_error=-1;
00377     model->loo_recall=-1;
00378     model->loo_precision=-1;
00379     model->xa_error=-1;
00380     model->xa_recall=-1;
00381     model->xa_precision=-1;
00382 
00383     for (i=0;i<totdoc;i++) {    /* various inits */
00384         inconsistent[i]=0;
00385         c[i]=0;
00386         a[i]=0;
00387         lin[i]=0;
00388 
00389         if(label[i] > 0) {
00390             learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
00391                 fabs((float64_t)label[i]);
00392             label[i]=1;
00393             trainpos++;
00394         }
00395         else if(label[i] < 0) {
00396             learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]);
00397             label[i]=-1;
00398             trainneg++;
00399         }
00400         else {
00401             learn_parm->svm_cost[i]=0;
00402         }
00403     }
00404 
00405   /* compute starting state for initial alpha values */
00406     SG_DEBUG("alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors())
00407 
00408     if(m_alpha.vector && get_num_support_vectors()) {
00409         if(verbosity>=1) {
00410             SG_INFO("Computing starting state...")
00411         }
00412 
00413     float64_t* alpha = SG_MALLOC(float64_t, totdoc);
00414 
00415     for (i=0; i<totdoc; i++)
00416         alpha[i]=0;
00417 
00418     for (i=0; i<get_num_support_vectors(); i++)
00419         alpha[get_support_vector(i)]=get_alpha(i);
00420 
00421     int32_t* index = SG_MALLOC(int32_t, totdoc);
00422     int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11);
00423     float64_t* aicache = SG_MALLOC(float64_t, totdoc);
00424     for (i=0;i<totdoc;i++) {    /* create full index and clip alphas */
00425       index[i]=1;
00426       alpha[i]=fabs(alpha[i]);
00427       if(alpha[i]<0) alpha[i]=0;
00428       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
00429     }
00430 
00431     if (use_kernel_cache)
00432     {
00433         if (callback &&
00434                 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00435            )
00436         {
00437             CCombinedKernel* k = (CCombinedKernel*) kernel;
00438             for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
00439             {
00440                 CKernel* kn = k->get_kernel(k_idx);
00441                 for (i=0;i<totdoc;i++)     // fill kernel cache with unbounded SV
00442                     if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00443                             && (kn->kernel_cache_space_available()))
00444                         kn->cache_kernel_row(i);
00445 
00446                 for (i=0;i<totdoc;i++)     // fill rest of kernel cache with bounded SV
00447                     if((alpha[i]==learn_parm->svm_cost[i])
00448                             && (kn->kernel_cache_space_available()))
00449                         kn->cache_kernel_row(i);
00450 
00451                 SG_UNREF(kn);
00452             }
00453         }
00454         else
00455         {
00456             for (i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
00457                 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
00458                         && (kernel->kernel_cache_space_available()))
00459                     kernel->cache_kernel_row(i);
00460 
00461             for (i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
00462                 if((alpha[i]==learn_parm->svm_cost[i])
00463                         && (kernel->kernel_cache_space_available()))
00464                     kernel->cache_kernel_row(i);
00465         }
00466     }
00467     compute_index(index,totdoc,index2dnum);
00468     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
00469                 lin,aicache,NULL);
00470     calculate_svm_model(docs,label,lin,alpha,a,c,
00471                   index2dnum,index2dnum);
00472     for (i=0;i<totdoc;i++) {    /* copy initial alphas */
00473       a[i]=alpha[i];
00474     }
00475 
00476     SG_FREE(index);
00477     SG_FREE(index2dnum);
00478     SG_FREE(aicache);
00479     SG_FREE(alpha);
00480 
00481     if(verbosity>=1)
00482         SG_DONE()
00483   }
00484         SG_DEBUG("%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg)
00485         SG_DEBUG("Optimizing...\n")
00486 
00487     /* train the svm */
00488   iterations=optimize_to_convergence(docs,label,totdoc,
00489                      &shrink_state,inconsistent,a,lin,
00490                      c,&timing_profile,
00491                      &maxdiff,(int32_t)-1,
00492                      (int32_t)1);
00493 
00494 
00495     if(verbosity>=1) {
00496         if(verbosity==1)
00497         {
00498             SG_DONE()
00499             SG_DEBUG("(%ld iterations)", iterations)
00500         }
00501 
00502         misclassified=0;
00503         for (i=0;(i<totdoc);i++) { /* get final statistic */
00504             if((lin[i]-model->b)*(float64_t)label[i] <= 0.0)
00505                 misclassified++;
00506         }
00507 
00508         SG_INFO("Optimization finished (%ld misclassified, maxdiff=%.8f).\n",
00509                 misclassified,maxdiff);
00510 
00511         SG_INFO("obj = %.16f, rho = %.16f\n",get_objective(),model->b)
00512         if (maxdiff>epsilon)
00513             SG_WARNING("maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon)
00514 
00515         upsupvecnum=0;
00516         for (i=1;i<model->sv_num;i++)
00517         {
00518             if(fabs(model->alpha[i]) >=
00519                     (learn_parm->svm_cost[model->supvec[i]]-
00520                      learn_parm->epsilon_a))
00521                 upsupvecnum++;
00522         }
00523         SG_INFO("Number of SV: %d (including %d at upper bound)\n",
00524                 model->sv_num-1,upsupvecnum);
00525     }
00526 
00527     shrink_state_cleanup(&shrink_state);
00528     SG_FREE(label);
00529     SG_FREE(inconsistent);
00530     SG_FREE(c);
00531     SG_FREE(a);
00532     SG_FREE(a_fullset);
00533     SG_FREE(xi_fullset);
00534     SG_FREE(lin);
00535     SG_FREE(learn_parm->eps);
00536     SG_FREE(learn_parm->svm_cost);
00537     SG_FREE(docs);
00538 }
00539 
00540 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc,
00541                  SHRINK_STATE *shrink_state,
00542                  int32_t *inconsistent,
00543                  float64_t *a, float64_t *lin, float64_t *c,
00544                  TIMING *timing_profile, float64_t *maxdiff,
00545                  int32_t heldout, int32_t retrain)
00546      /* docs: Training vectors (x-part) */
00547      /* label: Training labels/value (y-part, zero if test example for
00548                   transduction) */
00549      /* totdoc: Number of examples in docs/label */
00550      /* laern_parm: Learning paramenters */
00551      /* kernel_parm: Kernel paramenters */
00552      /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
00553                       NULL if linear. */
00554      /* shrink_state: State of active variables */
00555      /* inconsistent: examples thrown out as inconstistent */
00556      /* a: alphas */
00557      /* lin: linear component of gradient */
00558      /* c: right hand side of inequalities (margin) */
00559      /* maxdiff: returns maximum violation of KT-conditions */
00560      /* heldout: marks held-out example for leave-one-out (or -1) */
00561      /* retrain: selects training mode (1=regular / 2=holdout) */
00562 {
00563 
00564   int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
00565   int32_t inconsistentnum,choosenum,already_chosen=0,iteration;
00566   int32_t misclassified,supvecnum=0,*active2dnum,inactivenum;
00567   int32_t *working2dnum,*selexam;
00568   int32_t activenum;
00569   float64_t criterion, eq;
00570   float64_t *a_old;
00571   int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
00572   float64_t epsilon_crit_org;
00573   float64_t bestmaxdiff;
00574   float64_t worstmaxdiff;
00575   int32_t   bestmaxdiffiter,terminate;
00576   bool reactivated=false;
00577 
00578   float64_t *selcrit;  /* buffer for sorting */
00579   float64_t *aicache;  /* buffer to keep one row of hessian */
00580   QP qp;            /* buffer for one quadratic program */
00581 
00582   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
00583   if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) {
00584       learn_parm->epsilon_crit=2.0;
00585       /* caching makes no sense for linear kernel */
00586   }
00587   learn_parm->epsilon_shrink=2;
00588   (*maxdiff)=1;
00589 
00590   SG_DEBUG("totdoc:%d\n",totdoc)
00591   chosen = SG_MALLOC(int32_t, totdoc);
00592   last_suboptimal_at =SG_MALLOC(int32_t, totdoc);
00593   key =SG_MALLOC(int32_t, totdoc+11);
00594   selcrit =SG_MALLOC(float64_t, totdoc);
00595   selexam =SG_MALLOC(int32_t, totdoc);
00596   a_old =SG_MALLOC(float64_t, totdoc);
00597   aicache =SG_MALLOC(float64_t, totdoc);
00598   working2dnum =SG_MALLOC(int32_t, totdoc+11);
00599   active2dnum =SG_MALLOC(int32_t, totdoc+11);
00600   qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00601   qp.opt_ce0 =SG_MALLOC(float64_t, 1);
00602   qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize);
00603   qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00604   qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00605   qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00606   qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize);
00607 
00608   choosenum=0;
00609   inconsistentnum=0;
00610   if(!retrain) retrain=1;
00611   iteration=1;
00612   bestmaxdiffiter=1;
00613   bestmaxdiff=999999999;
00614   worstmaxdiff=1e-10;
00615   terminate=0;
00616 
00617 
00618   kernel->set_time(iteration);  /* for lru cache */
00619 
00620   for (i=0;i<totdoc;i++) {    /* various inits */
00621     chosen[i]=0;
00622     a_old[i]=a[i];
00623     last_suboptimal_at[i]=1;
00624     if(inconsistent[i])
00625       inconsistentnum++;
00626   }
00627   activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00628   inactivenum=totdoc-activenum;
00629   clear_index(working2dnum);
00630 
00631   /* repeat this loop until we have convergence */
00632   CTime start_time;
00633   mkl_converged=false;
00634 
00635 
00636 #ifdef CYGWIN
00637   for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){
00638 #else
00639       CSignal::clear_cancel();
00640       for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){
00641 #endif
00642 
00643       if(use_kernel_cache)
00644           kernel->set_time(iteration);  /* for lru cache */
00645 
00646       if(verbosity>=2) t0=get_runtime();
00647       if(verbosity>=3) {
00648           SG_DEBUG("\nSelecting working set... ")
00649       }
00650 
00651       if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
00652           learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
00653 
00654       i=0;
00655       for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
00656           if((chosen[j]>=(learn_parm->svm_maxqpsize/
00657                           CMath::min(learn_parm->svm_maxqpsize,
00658                                      learn_parm->svm_newvarsinqp)))
00659              || (inconsistent[j])
00660              || (j == heldout)) {
00661               chosen[j]=0;
00662               choosenum--;
00663           }
00664           else {
00665               chosen[j]++;
00666               working2dnum[i++]=j;
00667           }
00668       }
00669       working2dnum[i]=-1;
00670 
00671       if(retrain == 2) {
00672           choosenum=0;
00673           for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
00674               chosen[j]=0;
00675           }
00676           clear_index(working2dnum);
00677           for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
00678               if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
00679                   chosen[i]=99999;
00680                   choosenum++;
00681                   a[i]=0;
00682               }
00683           }
00684           if(learn_parm->biased_hyperplane) {
00685               eq=0;
00686               for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
00687                   eq+=a[i]*label[i];
00688               }
00689               for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
00690                   if((eq*label[i] > 0) && (a[i] > 0)) {
00691                       chosen[i]=88888;
00692                       choosenum++;
00693                       if((eq*label[i]) > a[i]) {
00694                           eq-=(a[i]*label[i]);
00695                           a[i]=0;
00696                       }
00697                       else {
00698                           a[i]-=(eq*label[i]);
00699                           eq=0;
00700                       }
00701                   }
00702               }
00703           }
00704           compute_index(chosen,totdoc,working2dnum);
00705       }
00706       else
00707       {   /* select working set according to steepest gradient */
00708           if(iteration % 101)
00709           {
00710               already_chosen=0;
00711               if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 &&
00712                       (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00713               {
00714                   /* select part of the working set from cache */
00715                   already_chosen=select_next_qp_subproblem_grad(
00716                       label,a,lin,c,totdoc,
00717                       (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum,
00718                                         learn_parm->svm_newvarsinqp)/2),
00719                       inconsistent,active2dnum,
00720                       working2dnum,selcrit,selexam,1,
00721                       key,chosen);
00722                   choosenum+=already_chosen;
00723               }
00724               choosenum+=select_next_qp_subproblem_grad(
00725                   label,a,lin,c,totdoc,
00726                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00727                              learn_parm->svm_newvarsinqp-already_chosen),
00728                   inconsistent,active2dnum,
00729                   working2dnum,selcrit,selexam,0,key,
00730                   chosen);
00731           }
00732           else { /* once in a while, select a somewhat random working set
00733                     to get unlocked of infinite loops due to numerical
00734                     inaccuracies in the core qp-solver */
00735               choosenum+=select_next_qp_subproblem_rand(
00736                   label,a,lin,c,totdoc,
00737                   CMath::min(learn_parm->svm_maxqpsize-choosenum,
00738                              learn_parm->svm_newvarsinqp),
00739                   inconsistent,active2dnum,
00740                   working2dnum,selcrit,selexam,key,
00741                   chosen,iteration);
00742           }
00743       }
00744 
00745       if(verbosity>=2) {
00746           SG_INFO(" %ld vectors chosen\n",choosenum)
00747       }
00748 
00749       if(verbosity>=2) t1=get_runtime();
00750 
00751       if (use_kernel_cache)
00752       {
00753           // in case of MKL w/o linadd cache each kernel independently
00754           // else if linadd is disabled cache single kernel
00755           if ( callback &&
00756                   (!((CCombinedKernel*) kernel)->get_append_subkernel_weights())
00757              )
00758           {
00759               CCombinedKernel* k = (CCombinedKernel*) kernel;
00760               for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
00761               {
00762                   CKernel* kn = k->get_kernel(k_idx);
00763                   kn->cache_multiple_kernel_rows(working2dnum, choosenum);
00764                   SG_UNREF(kn);
00765               }
00766           }
00767           else
00768               kernel->cache_multiple_kernel_rows(working2dnum, choosenum);
00769       }
00770 
00771       if(verbosity>=2) t2=get_runtime();
00772 
00773       if(retrain != 2) {
00774           optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum,
00775                        totdoc,working2dnum,choosenum,a,lin,c,
00776                        aicache,&qp,&epsilon_crit_org);
00777       }
00778 
00779       if(verbosity>=2) t3=get_runtime();
00780       update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
00781                               lin,aicache,c);
00782 
00783       if(verbosity>=2) t4=get_runtime();
00784       supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum);
00785 
00786       if(verbosity>=2) t5=get_runtime();
00787 
00788       for (jj=0;(i=working2dnum[jj])>=0;jj++) {
00789           a_old[i]=a[i];
00790       }
00791 
00792       retrain=check_optimality(label,a,lin,c,totdoc,
00793                                maxdiff,epsilon_crit_org,&misclassified,
00794                                inconsistent,active2dnum,last_suboptimal_at,
00795                                iteration);
00796 
00797       if(verbosity>=2) {
00798           t6=get_runtime();
00799           timing_profile->time_select+=t1-t0;
00800           timing_profile->time_kernel+=t2-t1;
00801           timing_profile->time_opti+=t3-t2;
00802           timing_profile->time_update+=t4-t3;
00803           timing_profile->time_model+=t5-t4;
00804           timing_profile->time_check+=t6-t5;
00805       }
00806 
00807       /* checking whether optimizer got stuck */
00808       if((*maxdiff) < bestmaxdiff) {
00809           bestmaxdiff=(*maxdiff);
00810           bestmaxdiffiter=iteration;
00811       }
00812       if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
00813           /* int32_t time no progress? */
00814           terminate=1;
00815           retrain=0;
00816           SG_WARNING("Relaxing KT-Conditions due to slow progress! Terminating!\n")
00817           SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter )
00818       }
00819 
00820       noshrink= (get_shrinking_enabled()) ? 0 : 1;
00821 
00822       if ((!callback) && (!retrain) && (inactivenum>0) &&
00823               ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00824       {
00825           t1=get_runtime();
00826           SG_DEBUG("reactivating inactive examples\n")
00827 
00828           reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00829                                        iteration,inconsistent,
00830                                        docs,aicache,
00831                                        maxdiff);
00832           reactivated=true;
00833           SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org)
00834           /* Update to new active variables. */
00835           activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00836           inactivenum=totdoc-activenum;
00837           /* reset watchdog */
00838           bestmaxdiff=(*maxdiff);
00839           bestmaxdiffiter=iteration;
00840 
00841           /* termination criterion */
00842           noshrink=1;
00843           retrain=0;
00844           if((*maxdiff) > learn_parm->epsilon_crit)
00845           {
00846               SG_INFO("restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit)
00847               retrain=1;
00848           }
00849           timing_profile->time_shrink+=get_runtime()-t1;
00850           if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())))
00851              || (verbosity>=2)) {
00852               SG_DONE()
00853               SG_DEBUG("Number of inactive variables = %ld\n", inactivenum)
00854           }
00855       }
00856 
00857       if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
00858           learn_parm->epsilon_crit=(*maxdiff);
00859       if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
00860           learn_parm->epsilon_crit/=4.0;
00861           retrain=1;
00862           noshrink=1;
00863       }
00864       if(learn_parm->epsilon_crit<epsilon_crit_org)
00865           learn_parm->epsilon_crit=epsilon_crit_org;
00866 
00867       if(verbosity>=2) {
00868           SG_INFO(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
00869                        supvecnum,model->at_upper_bound,(*maxdiff));
00870 
00871       }
00872       mymaxdiff=*maxdiff ;
00873 
00874       //don't shrink w/ mkl
00875       if (((iteration % 10) == 0) && (!noshrink) && !callback)
00876       {
00877           activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc,
00878                   CMath::max((int32_t)(activenum/10),
00879                       CMath::max((int32_t)(totdoc/500),(int32_t) 100)),
00880                   a,inconsistent, c, lin, label);
00881 
00882           inactivenum=totdoc-activenum;
00883 
00884           if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache())
00885                   && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) {
00886 
00887               kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum),
00888                           (int32_t) (kernel->get_activenum_cache()-supvecnum)),
00889                       shrink_state->active);
00890           }
00891       }
00892 
00893       if (bestmaxdiff>worstmaxdiff)
00894           worstmaxdiff=bestmaxdiff;
00895 
00896       SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6)
00897 
00898       /* Terminate loop */
00899       if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) {
00900           terminate = 1;
00901           retrain = 0;
00902       }
00903 
00904   } /* end of loop */
00905 
00906   SG_DEBUG("inactive:%d\n", inactivenum)
00907 
00908   if (inactivenum && !reactivated && !callback)
00909   {
00910       SG_DEBUG("reactivating inactive examples\n")
00911       reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc,
00912           iteration,inconsistent,
00913           docs,aicache,
00914           maxdiff);
00915       SG_DEBUG("done reactivating inactive examples\n")
00916       /* Update to new active variables. */
00917       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
00918       inactivenum=totdoc-activenum;
00919       /* reset watchdog */
00920       bestmaxdiff=(*maxdiff);
00921       bestmaxdiffiter=iteration;
00922   }
00923 
00924   //use this for our purposes!
00925   criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc);
00926   CSVM::set_objective(criterion);
00927 
00928   SG_FREE(chosen);
00929   SG_FREE(last_suboptimal_at);
00930   SG_FREE(key);
00931   SG_FREE(selcrit);
00932   SG_FREE(selexam);
00933   SG_FREE(a_old);
00934   SG_FREE(aicache);
00935   SG_FREE(working2dnum);
00936   SG_FREE(active2dnum);
00937   SG_FREE(qp.opt_ce);
00938   SG_FREE(qp.opt_ce0);
00939   SG_FREE(qp.opt_g);
00940   SG_FREE(qp.opt_g0);
00941   SG_FREE(qp.opt_xinit);
00942   SG_FREE(qp.opt_low);
00943   SG_FREE(qp.opt_up);
00944 
00945   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
00946 
00947   return(iteration);
00948 }
00949 
00950 float64_t CSVMLight::compute_objective_function(
00951     float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00952     int32_t totdoc)
00953      /* Return value of objective function. */
00954      /* Works only relative to the active variables! */
00955 {
00956   /* calculate value of objective function */
00957   float64_t criterion=0;
00958 
00959   for (int32_t i=0;i<totdoc;i++)
00960       criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
00961 
00962   return(criterion);
00963 }
00964 
00965 
00966 void CSVMLight::clear_index(int32_t *index)
00967               /* initializes and empties index */
00968 {
00969   index[0]=-1;
00970 }
00971 
00972 void CSVMLight::add_to_index(int32_t *index, int32_t elem)
00973      /* initializes and empties index */
00974 {
00975   register int32_t i;
00976   for (i=0;index[i] != -1;i++);
00977   index[i]=elem;
00978   index[i+1]=-1;
00979 }
00980 
00981 int32_t CSVMLight::compute_index(
00982     int32_t *binfeature, int32_t range, int32_t *index)
00983      /* create an inverted index of binfeature */
00984 {
00985   register int32_t i,ii;
00986 
00987   ii=0;
00988   for (i=0;i<range;i++) {
00989     if(binfeature[i]) {
00990       index[ii]=i;
00991       ii++;
00992     }
00993   }
00994   for (i=0;i<4;i++) {
00995     index[ii+i]=-1;
00996   }
00997   return(ii);
00998 }
00999 
01000 
01001 void CSVMLight::optimize_svm(
01002     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01003     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
01004     int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
01005     float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target)
01006      /* Do optimization on the working set. */
01007 {
01008     int32_t i;
01009     float64_t *a_v;
01010 
01011     //compute_matrices_for_optimization_parallel(docs,label,
01012     //                                         exclude_from_eq_const,eq_target,chosen,
01013     //                                         active2dnum,working2dnum,a,lin,c,
01014     //                                         varnum,totdoc,aicache,qp);
01015 
01016     compute_matrices_for_optimization(docs,label,
01017                                       exclude_from_eq_const,eq_target,chosen,
01018                                       active2dnum,working2dnum,a,lin,c,
01019                                       varnum,totdoc,aicache,qp);
01020 
01021     if(verbosity>=3) {
01022      SG_DEBUG("Running optimizer...")
01023     }
01024     /* call the qp-subsolver */
01025     a_v=optimize_qp(qp,epsilon_crit_target,
01026             learn_parm->svm_maxqpsize,
01027             &(model->b),                /* in case the optimizer gives us */
01028             learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */
01029         /* b is calculated in calculate_model. */
01030     if(verbosity>=3) {
01031      SG_DONE()
01032     }
01033 
01034     for (i=0;i<varnum;i++)
01035       a[working2dnum[i]]=a_v[i];
01036 }
01037 
01038 void CSVMLight::compute_matrices_for_optimization_parallel(
01039     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01040     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01041     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01042     float64_t *aicache, QP *qp)
01043 {
01044     if (parallel->get_num_threads()<=1)
01045     {
01046         compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target,
01047                                                    chosen, active2dnum, key, a, lin, c,
01048                                                    varnum, totdoc, aicache, qp) ;
01049     }
01050 #ifdef HAVE_PTHREAD
01051     else
01052     {
01053         register int32_t ki,kj,i,j;
01054         register float64_t kernel_temp;
01055 
01056         qp->opt_n=varnum;
01057         qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01058         for (j=1;j<model->sv_num;j++) { /* start at 1 */
01059             if((!chosen[model->supvec[j]])
01060                     && (!exclude_from_eq_const[(model->supvec[j])])) {
01061                 qp->opt_ce0[0]+=model->alpha[j];
01062             }
01063         }
01064         if(learn_parm->biased_hyperplane)
01065             qp->opt_m=1;
01066         else
01067             qp->opt_m=0;  /* eq-constraint will be ignored */
01068 
01069         /* init linear part of objective function */
01070         for (i=0;i<varnum;i++) {
01071             qp->opt_g0[i]=lin[key[i]];
01072         }
01073 
01074         ASSERT(parallel->get_num_threads()>1)
01075         int32_t *KI=SG_MALLOC(int32_t, varnum*varnum);
01076         int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum);
01077         int32_t Knum=0 ;
01078         float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2);
01079         for (i=0;i<varnum;i++) {
01080             ki=key[i];
01081             KI[Knum]=docs[ki] ;
01082             KJ[Knum]=docs[ki] ;
01083             Knum++ ;
01084             for (j=i+1;j<varnum;j++)
01085             {
01086                 kj=key[j];
01087                 KI[Knum]=docs[ki] ;
01088                 KJ[Knum]=docs[kj] ;
01089                 Knum++ ;
01090             }
01091         }
01092         ASSERT(Knum<=varnum*(varnum+1)/2)
01093 
01094         pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01095         S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1);
01096         int32_t step= Knum/parallel->get_num_threads();
01097         //SG_DEBUG("\nkernel-step size: %i\n", step)
01098         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01099         {
01100             params[t].svmlight = this;
01101             params[t].start = t*step;
01102             params[t].end = (t+1)*step;
01103             params[t].KI=KI ;
01104             params[t].KJ=KJ ;
01105             params[t].Kval=Kval ;
01106             pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)&params[t]);
01107         }
01108         for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++)
01109             Kval[i]=compute_kernel(KI[i],KJ[i]) ;
01110 
01111         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01112             pthread_join(threads[t], NULL);
01113 
01114         SG_FREE(params);
01115         SG_FREE(threads);
01116 
01117         Knum=0 ;
01118         for (i=0;i<varnum;i++) {
01119             ki=key[i];
01120 
01121             /* Compute the matrix for equality constraints */
01122             qp->opt_ce[i]=label[ki];
01123             qp->opt_low[i]=0;
01124             qp->opt_up[i]=learn_parm->svm_cost[ki];
01125 
01126             kernel_temp=Kval[Knum] ;
01127             Knum++ ;
01128             /* compute linear part of objective function */
01129             qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01130             /* compute quadratic part of objective function */
01131             qp->opt_g[varnum*i+i]=kernel_temp;
01132 
01133             for (j=i+1;j<varnum;j++) {
01134                 kj=key[j];
01135                 kernel_temp=Kval[Knum] ;
01136                 Knum++ ;
01137                 /* compute linear part of objective function */
01138                 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01139                 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01140                 /* compute quadratic part of objective function */
01141                 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01142                 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01143             }
01144 
01145             if(verbosity>=3) {
01146                 if(i % 20 == 0) {
01147                     SG_DEBUG("%ld..",i)
01148                 }
01149             }
01150         }
01151 
01152         SG_FREE(KI);
01153         SG_FREE(KJ);
01154         SG_FREE(Kval);
01155 
01156         for (i=0;i<varnum;i++) {
01157             /* assure starting at feasible point */
01158             qp->opt_xinit[i]=a[key[i]];
01159             /* set linear part of objective function */
01160             qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]];
01161         }
01162 
01163         if(verbosity>=3) {
01164             SG_DONE()
01165         }
01166     }
01167 #endif
01168 }
01169 
01170 void CSVMLight::compute_matrices_for_optimization(
01171     int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
01172     float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
01173     float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
01174     float64_t *aicache, QP *qp)
01175 {
01176   register int32_t ki,kj,i,j;
01177   register float64_t kernel_temp;
01178 
01179   qp->opt_n=varnum;
01180   qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
01181   for (j=1;j<model->sv_num;j++) { /* start at 1 */
01182     if((!chosen[model->supvec[j]])
01183        && (!exclude_from_eq_const[(model->supvec[j])])) {
01184       qp->opt_ce0[0]+=model->alpha[j];
01185     }
01186   }
01187   if(learn_parm->biased_hyperplane)
01188     qp->opt_m=1;
01189   else
01190     qp->opt_m=0;  /* eq-constraint will be ignored */
01191 
01192   /* init linear part of objective function */
01193   for (i=0;i<varnum;i++) {
01194     qp->opt_g0[i]=lin[key[i]];
01195   }
01196 
01197   for (i=0;i<varnum;i++) {
01198       ki=key[i];
01199 
01200       /* Compute the matrix for equality constraints */
01201       qp->opt_ce[i]=label[ki];
01202       qp->opt_low[i]=0;
01203       qp->opt_up[i]=learn_parm->svm_cost[ki];
01204 
01205       kernel_temp=compute_kernel(docs[ki], docs[ki]);
01206       /* compute linear part of objective function */
01207       qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01208       /* compute quadratic part of objective function */
01209       qp->opt_g[varnum*i+i]=kernel_temp;
01210 
01211       for (j=i+1;j<varnum;j++) {
01212           kj=key[j];
01213           kernel_temp=compute_kernel(docs[ki], docs[kj]);
01214 
01215           /* compute linear part of objective function */
01216           qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]);
01217           qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]);
01218           /* compute quadratic part of objective function */
01219           qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01220           qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp;
01221       }
01222 
01223       if(verbosity>=3) {
01224           if(i % 20 == 0) {
01225               SG_DEBUG("%ld..",i)
01226           }
01227       }
01228   }
01229 
01230   for (i=0;i<varnum;i++) {
01231       /* assure starting at feasible point */
01232       qp->opt_xinit[i]=a[key[i]];
01233       /* set linear part of objective function */
01234       qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]];
01235   }
01236 
01237   if(verbosity>=3) {
01238       SG_DONE()
01239   }
01240 }
01241 
01242 
01243 int32_t CSVMLight::calculate_svm_model(
01244     int32_t* docs, int32_t *label, float64_t *lin, float64_t *a,
01245     float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum)
01246      /* Compute decision function based on current values */
01247      /* of alpha. */
01248 {
01249   int32_t i,ii,pos,b_calculated=0,first_low,first_high;
01250   float64_t ex_c,b_temp,b_low,b_high;
01251 
01252   if(verbosity>=3) {
01253    SG_DEBUG("Calculating model...")
01254   }
01255 
01256   if(!learn_parm->biased_hyperplane) {
01257     model->b=0;
01258     b_calculated=1;
01259   }
01260 
01261   for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01262     if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
01263       pos=model->index[i];
01264       model->index[i]=-1;
01265       (model->sv_num)--;
01266       model->supvec[pos]=model->supvec[model->sv_num];
01267       model->alpha[pos]=model->alpha[model->sv_num];
01268       model->index[model->supvec[pos]]=pos;
01269     }
01270     else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
01271       model->supvec[model->sv_num]=docs[i];
01272       model->alpha[model->sv_num]=a[i]*(float64_t)label[i];
01273       model->index[i]=model->sv_num;
01274       (model->sv_num)++;
01275     }
01276     else if(a_old[i]==a[i]) { /* nothing to do */
01277     }
01278     else {  /* just update alpha */
01279       model->alpha[model->index[i]]=a[i]*(float64_t)label[i];
01280     }
01281 
01282     ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01283     if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
01284       (model->at_upper_bound)--;
01285     }
01286     else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
01287       (model->at_upper_bound)++;
01288     }
01289 
01290     if((!b_calculated)
01291        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */
01292     model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]);
01293     b_calculated=1;
01294     }
01295   }
01296 
01297   /* No alpha in the working set not at bounds, so b was not
01298      calculated in the usual way. The following handles this special
01299      case. */
01300   if(learn_parm->biased_hyperplane
01301      && (!b_calculated)
01302      && (model->sv_num-1 == model->at_upper_bound)) {
01303     first_low=1;
01304     first_high=1;
01305     b_low=0;
01306     b_high=0;
01307     for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01308       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01309       if(a[i]<ex_c) {
01310     if(label[i]>0)  {
01311       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01312       if((b_temp>b_low) || (first_low)) {
01313         b_low=b_temp;
01314         first_low=0;
01315       }
01316     }
01317     else {
01318       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01319       if((b_temp<b_high) || (first_high)) {
01320         b_high=b_temp;
01321         first_high=0;
01322       }
01323     }
01324       }
01325       else {
01326     if(label[i]<0)  {
01327       b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]);
01328       if((b_temp>b_low) || (first_low)) {
01329         b_low=b_temp;
01330         first_low=0;
01331       }
01332     }
01333     else {
01334       b_temp=-(learn_parm->eps[i]-c[i]+lin[i]);
01335       if((b_temp<b_high) || (first_high)) {
01336         b_high=b_temp;
01337         first_high=0;
01338       }
01339     }
01340       }
01341     }
01342     if(first_high) {
01343       model->b=-b_low;
01344     }
01345     else if(first_low) {
01346       model->b=-b_high;
01347     }
01348     else {
01349       model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */
01350     }
01351   }
01352 
01353   if(verbosity>=3) {
01354    SG_DONE()
01355   }
01356 
01357   return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
01358 }
01359 
01360 int32_t CSVMLight::check_optimality(
01361     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01362     float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
01363     int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at,
01364     int32_t iteration)
01365      /* Check KT-conditions */
01366 {
01367   int32_t i,ii,retrain;
01368   float64_t dist,ex_c,target;
01369 
01370   if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01371       learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
01372   else
01373       learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
01374   retrain=0;
01375   (*maxdiff)=0;
01376   (*misclassified)=0;
01377   for (ii=0;(i=active2dnum[ii])>=0;ii++) {
01378       if((!inconsistent[i]) && label[i]) {
01379           dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from
01380                                                      hyperplane*/
01381           target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
01382           ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
01383           if(dist <= 0) {
01384               (*misclassified)++;  /* does not work due to deactivation of var */
01385           }
01386           if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
01387               if((dist-target)>(*maxdiff))  /* largest violation */
01388                   (*maxdiff)=dist-target;
01389           }
01390           else if((a[i]<ex_c) && (dist < target)) {
01391               if((target-dist)>(*maxdiff))  /* largest violation */
01392                   (*maxdiff)=target-dist;
01393           }
01394           /* Count how int32_t a variable was at lower/upper bound (and optimal).*/
01395           /* Variables, which were at the bound and optimal for a int32_t */
01396           /* time are unlikely to become support vectors. In case our */
01397           /* cache is filled up, those variables are excluded to save */
01398           /* kernel evaluations. (See chapter 'Shrinking').*/
01399           if((a[i]>(learn_parm->epsilon_a))
01400              && (a[i]<ex_c)) {
01401               last_suboptimal_at[i]=iteration;         /* not at bound */
01402           }
01403           else if((a[i]<=(learn_parm->epsilon_a))
01404                   && (dist < (target+learn_parm->epsilon_shrink))) {
01405               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01406           }
01407           else if((a[i]>=ex_c)
01408                   && (dist > (target-learn_parm->epsilon_shrink)))  {
01409               last_suboptimal_at[i]=iteration;         /* not likely optimal */
01410           }
01411       }
01412   }
01413 
01414   /* termination criterion */
01415   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
01416       retrain=1;
01417   }
01418   return(retrain);
01419 }
01420 
01421 void CSVMLight::update_linear_component(
01422     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01423     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01424     float64_t *aicache, float64_t* c)
01425      /* keep track of the linear component */
01426      /* lin of the gradient etc. by updating */
01427      /* based on the change of the variables */
01428      /* in the current working set */
01429 {
01430     register int32_t i=0,ii=0,j=0,jj=0;
01431 
01432     if (kernel->has_property(KP_LINADD) && get_linadd_enabled())
01433     {
01434         if (callback)
01435         {
01436             update_linear_component_mkl_linadd(docs, label, active2dnum, a,
01437                     a_old, working2dnum, totdoc, lin, aicache);
01438         }
01439         else
01440         {
01441             kernel->clear_normal();
01442 
01443             int32_t num_working=0;
01444             for (ii=0;(i=working2dnum[ii])>=0;ii++) {
01445                 if(a[i] != a_old[i]) {
01446                     kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01447                     num_working++;
01448                 }
01449             }
01450 
01451             if (num_working>0)
01452             {
01453                 if (parallel->get_num_threads() < 2)
01454                 {
01455                     for (jj=0;(j=active2dnum[jj])>=0;jj++) {
01456                         lin[j]+=kernel->compute_optimized(docs[j]);
01457                     }
01458                 }
01459 #ifdef HAVE_PTHREAD
01460                 else
01461                 {
01462                     int32_t num_elem = 0 ;
01463                     for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ;
01464 
01465                     pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01466                     S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1);
01467                     int32_t start = 0 ;
01468                     int32_t step = num_elem/parallel->get_num_threads();
01469                     int32_t end = step ;
01470 
01471                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01472                     {
01473                         params[t].kernel = kernel ;
01474                         params[t].lin = lin ;
01475                         params[t].docs = docs ;
01476                         params[t].active2dnum=active2dnum ;
01477                         params[t].start = start ;
01478                         params[t].end = end ;
01479                         start=end ;
01480                         end+=step ;
01481                         pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)&params[t]) ;
01482                     }
01483 
01484                     for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) {
01485                         lin[j]+=kernel->compute_optimized(docs[j]);
01486                     }
01487                     void* ret;
01488                     for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01489                         pthread_join(threads[t], &ret) ;
01490 
01491                     SG_FREE(params);
01492                     SG_FREE(threads);
01493                 }
01494 #endif
01495             }
01496         }
01497     }
01498     else
01499     {
01500         if (callback)
01501         {
01502             update_linear_component_mkl(docs, label, active2dnum,
01503                     a, a_old, working2dnum, totdoc, lin, aicache);
01504         }
01505         else {
01506             for (jj=0;(i=working2dnum[jj])>=0;jj++) {
01507                 if(a[i] != a_old[i]) {
01508                     kernel->get_kernel_row(i,active2dnum,aicache);
01509                     for (ii=0;(j=active2dnum[ii])>=0;ii++)
01510                         lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01511                 }
01512             }
01513         }
01514     }
01515 }
01516 
01517 
01518 void CSVMLight::update_linear_component_mkl(
01519     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01520     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01521     float64_t *aicache)
01522 {
01523     //int inner_iters=0;
01524     int32_t num = kernel->get_num_vec_rhs();
01525     int32_t num_weights = -1;
01526     int32_t num_kernels = kernel->get_num_subkernels() ;
01527     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01528     ASSERT(num_weights==num_kernels)
01529 
01530     if ((kernel->get_kernel_type()==K_COMBINED) &&
01531              (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()))// for combined kernel
01532     {
01533         CCombinedKernel* k = (CCombinedKernel*) kernel;
01534 
01535         int32_t n = 0, i, j ;
01536 
01537         for (index_t k_idx=0; k_idx<k->get_num_kernels(); k_idx++)
01538         {
01539             CKernel* kn = k->get_kernel(k_idx);
01540             for (i=0;i<num;i++)
01541             {
01542                 if(a[i] != a_old[i])
01543                 {
01544                     kn->get_kernel_row(i,NULL,aicache, true);
01545                     for (j=0;j<num;j++)
01546                         W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
01547                 }
01548             }
01549 
01550             SG_UNREF(kn);
01551             n++ ;
01552         }
01553     }
01554     else // hope the kernel is fast ...
01555     {
01556         float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01557         float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01558 
01559         // backup and set to zero
01560         for (int32_t i=0; i<num_kernels; i++)
01561         {
01562             w_backup[i] = old_beta[i] ;
01563             w1[i]=0.0 ;
01564         }
01565         for (int32_t n=0; n<num_kernels; n++)
01566         {
01567             w1[n]=1.0 ;
01568             kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01569 
01570             for (int32_t i=0;i<num;i++)
01571             {
01572                 if(a[i] != a_old[i])
01573                 {
01574                     for (int32_t j=0;j<num;j++)
01575                         W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i];
01576                 }
01577             }
01578             w1[n]=0.0 ;
01579         }
01580 
01581         // restore old weights
01582         kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01583 
01584         SG_FREE(w_backup);
01585         SG_FREE(w1);
01586     }
01587 
01588     call_mkl_callback(a, label, lin);
01589 }
01590 
01591 
01592 void CSVMLight::update_linear_component_mkl_linadd(
01593     int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a,
01594     float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
01595     float64_t *aicache)
01596 {
01597     //int inner_iters=0;
01598 
01599     // kernel with LP_LINADD property is assumed to have
01600     // compute_by_subkernel functions
01601     int32_t num = kernel->get_num_vec_rhs();
01602     int32_t num_weights = -1;
01603     int32_t num_kernels = kernel->get_num_subkernels() ;
01604     const float64_t* old_beta   = kernel->get_subkernel_weights(num_weights);
01605     ASSERT(num_weights==num_kernels)
01606 
01607     float64_t* w_backup = SG_MALLOC(float64_t, num_kernels);
01608     float64_t* w1 = SG_MALLOC(float64_t, num_kernels);
01609 
01610     // backup and set to one
01611     for (int32_t i=0; i<num_kernels; i++)
01612     {
01613         w_backup[i] = old_beta[i] ;
01614         w1[i]=1.0 ;
01615     }
01616     // set the kernel weights
01617     kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights));
01618 
01619     // create normal update (with changed alphas only)
01620     kernel->clear_normal();
01621     for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) {
01622         if(a[i] != a_old[i]) {
01623             kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]);
01624         }
01625     }
01626 
01627     if (parallel->get_num_threads() < 2)
01628     {
01629         // determine contributions of different kernels
01630         for (int32_t i=0; i<num; i++)
01631             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01632     }
01633 #ifdef HAVE_PTHREAD
01634     else
01635     {
01636         pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1);
01637         S_THREAD_PARAM_SVMLIGHT* params = SG_MALLOC(S_THREAD_PARAM_SVMLIGHT, parallel->get_num_threads()-1);
01638         int32_t step= num/parallel->get_num_threads();
01639 
01640         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01641         {
01642             params[t].kernel = kernel;
01643             params[t].W = W;
01644             params[t].start = t*step;
01645             params[t].end = (t+1)*step;
01646             pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)&params[t]);
01647         }
01648 
01649         for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++)
01650             kernel->compute_by_subkernel(i,&W[i*num_kernels]);
01651 
01652         for (int32_t t=0; t<parallel->get_num_threads()-1; t++)
01653             pthread_join(threads[t], NULL);
01654 
01655         SG_FREE(params);
01656         SG_FREE(threads);
01657     }
01658 #endif
01659 
01660     // restore old weights
01661     kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights));
01662 
01663     call_mkl_callback(a, label, lin);
01664 }
01665 
01666 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p)
01667 {
01668     S_THREAD_PARAM_SVMLIGHT* params = (S_THREAD_PARAM_SVMLIGHT*) p;
01669 
01670     int32_t num_kernels=params->kernel->get_num_subkernels();
01671 
01672     // determine contributions of different kernels
01673     for (int32_t i=params->start; i<params->end; i++)
01674         params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels]));
01675 
01676     return NULL ;
01677 }
01678 
01679 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin)
01680 {
01681     int32_t num = kernel->get_num_vec_rhs();
01682     int32_t num_kernels = kernel->get_num_subkernels() ;
01683 
01684     float64_t suma=0;
01685     float64_t* sumw=SG_MALLOC(float64_t, num_kernels);
01686 #ifdef HAVE_LAPACK
01687     int nk = (int) num_kernels; /* calling external lib */
01688     double* alphay  = SG_MALLOC(double, num);
01689 
01690     for (int32_t i=0; i<num; i++)
01691     {
01692         alphay[i]=a[i]*label[i];
01693         suma+=a[i];
01694     }
01695 
01696     for (int32_t i=0; i<num_kernels; i++)
01697         sumw[i]=0;
01698 
01699     cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W,
01700             num_kernels, alphay, 1, 1.0, (double*) sumw, 1);
01701 
01702     SG_FREE(alphay);
01703 #else
01704     for (int32_t i=0; i<num; i++)
01705         suma += a[i];
01706 
01707     for (int32_t d=0; d<num_kernels; d++)
01708     {
01709         sumw[d]=0;
01710         for (int32_t i=0; i<num; i++)
01711             sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]);
01712     }
01713 #endif
01714 
01715     if (callback)
01716         mkl_converged=callback(mkl, sumw, suma);
01717 
01718 
01719     const float64_t* new_beta   = kernel->get_subkernel_weights(num_kernels);
01720 
01721     // update lin
01722 #ifdef HAVE_LAPACK
01723     cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W,
01724         nk, (double*) new_beta, 1, 0.0, (double*) lin, 1);
01725 #else
01726     for (int32_t i=0; i<num; i++)
01727         lin[i]=0 ;
01728     for (int32_t d=0; d<num_kernels; d++)
01729         if (new_beta[d]!=0)
01730             for (int32_t i=0; i<num; i++)
01731                 lin[i] += new_beta[d]*W[i*num_kernels+d] ;
01732 #endif
01733 
01734     SG_FREE(sumw);
01735 }
01736 
01737 
01738 /*************************** Working set selection ***************************/
01739 
01740 int32_t CSVMLight::select_next_qp_subproblem_grad(
01741     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01742     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01743     int32_t *working2dnum, float64_t *selcrit, int32_t *select,
01744     int32_t cache_only, int32_t *key, int32_t *chosen)
01745     /* Use the feasible direction approach to select the next
01746        qp-subproblem (see chapter 'Selecting a good working set'). If
01747        'cache_only' is true, then the variables are selected only among
01748        those for which the kernel evaluations are cached. */
01749 {
01750     int32_t choosenum,i,j,k,activedoc,inum,valid;
01751     float64_t s;
01752 
01753     for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01754     choosenum=0;
01755     activedoc=0;
01756     for (i=0;(j=active2dnum[i])>=0;i++) {
01757         s=-label[j];
01758         if(cache_only)
01759         {
01760             if (use_kernel_cache)
01761                 valid=(kernel->kernel_cache_check(j));
01762             else
01763                 valid = 1 ;
01764         }
01765         else
01766             valid=1;
01767         if(valid
01768            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01769            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01770                  && (s>0)))
01771            && (!chosen[j])
01772            && (label[j])
01773            && (!inconsistent[j]))
01774         {
01775             selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01776             key[activedoc]=j;
01777             activedoc++;
01778         }
01779     }
01780     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01781     for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01782         i=key[select[k]];
01783         chosen[i]=1;
01784         working2dnum[inum+choosenum]=i;
01785         choosenum+=1;
01786         if (use_kernel_cache)
01787             kernel->kernel_cache_touch(i);
01788         /* make sure it does not get kicked */
01789         /* out of cache */
01790     }
01791 
01792     activedoc=0;
01793     for (i=0;(j=active2dnum[i])>=0;i++) {
01794         s=label[j];
01795         if(cache_only)
01796         {
01797             if (use_kernel_cache)
01798                 valid=(kernel->kernel_cache_check(j));
01799             else
01800                 valid = 1 ;
01801         }
01802         else
01803             valid=1;
01804         if(valid
01805            && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01806            && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01807                  && (s>0)))
01808            && (!chosen[j])
01809            && (label[j])
01810            && (!inconsistent[j]))
01811         {
01812             selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]);
01813             /*  selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */
01814             key[activedoc]=j;
01815             activedoc++;
01816         }
01817     }
01818     select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01819     for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01820         i=key[select[k]];
01821         chosen[i]=1;
01822         working2dnum[inum+choosenum]=i;
01823         choosenum+=1;
01824         if (use_kernel_cache)
01825             kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01826         /* out of cache */
01827     }
01828     working2dnum[inum+choosenum]=-1; /* complete index */
01829     return(choosenum);
01830 }
01831 
01832 int32_t CSVMLight::select_next_qp_subproblem_rand(
01833     int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc,
01834     int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum,
01835     int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key,
01836     int32_t *chosen, int32_t iteration)
01837 /* Use the feasible direction approach to select the next
01838    qp-subproblem (see section 'Selecting a good working set'). Chooses
01839    a feasible direction at (pseudo) random to help jump over numerical
01840    problem. */
01841 {
01842   int32_t choosenum,i,j,k,activedoc,inum;
01843   float64_t s;
01844 
01845   for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
01846   choosenum=0;
01847   activedoc=0;
01848   for (i=0;(j=active2dnum[i])>=0;i++) {
01849     s=-label[j];
01850     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01851        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01852          && (s>0)))
01853        && (!inconsistent[j])
01854        && (label[j])
01855        && (!chosen[j])) {
01856       selcrit[activedoc]=(j+iteration) % totdoc;
01857       key[activedoc]=j;
01858       activedoc++;
01859     }
01860   }
01861   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01862   for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
01863     i=key[select[k]];
01864     chosen[i]=1;
01865     working2dnum[inum+choosenum]=i;
01866     choosenum+=1;
01867     if (use_kernel_cache)
01868         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01869                                         /* out of cache */
01870   }
01871 
01872   activedoc=0;
01873   for (i=0;(j=active2dnum[i])>=0;i++) {
01874     s=label[j];
01875     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
01876        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
01877          && (s>0)))
01878        && (!inconsistent[j])
01879        && (label[j])
01880        && (!chosen[j])) {
01881       selcrit[activedoc]=(j+iteration) % totdoc;
01882       key[activedoc]=j;
01883       activedoc++;
01884     }
01885   }
01886   select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2));
01887   for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
01888     i=key[select[k]];
01889     chosen[i]=1;
01890     working2dnum[inum+choosenum]=i;
01891     choosenum+=1;
01892     if (use_kernel_cache)
01893         kernel->kernel_cache_touch(i); /* make sure it does not get kicked */
01894                                         /* out of cache */
01895   }
01896   working2dnum[inum+choosenum]=-1; /* complete index */
01897   return(choosenum);
01898 }
01899 
01900 
01901 
01902 void CSVMLight::select_top_n(
01903     float64_t *selcrit, int32_t range, int32_t* select, int32_t n)
01904 {
01905   register int32_t i,j;
01906 
01907   for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
01908     for (j=i;j>=0;j--) {
01909       if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
01910     select[j]=select[j-1];
01911       }
01912       else {
01913     select[j]=i;
01914     j=-1;
01915       }
01916     }
01917   }
01918   if(n>0) {
01919     for (i=n;i<range;i++) {
01920       if(selcrit[i]>selcrit[select[n-1]]) {
01921     for (j=n-1;j>=0;j--) {
01922       if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
01923         select[j]=select[j-1];
01924       }
01925       else {
01926         select[j]=i;
01927         j=-1;
01928       }
01929     }
01930       }
01931     }
01932   }
01933 }
01934 
01935 
01936 /******************************** Shrinking  *********************************/
01937 
01938 void CSVMLight::init_shrink_state(
01939     SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory)
01940 {
01941   int32_t i;
01942 
01943   shrink_state->deactnum=0;
01944   shrink_state->active = SG_MALLOC(int32_t, totdoc);
01945   shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc);
01946   shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory);
01947   shrink_state->maxhistory=maxhistory;
01948   shrink_state->last_lin = SG_MALLOC(float64_t, totdoc);
01949   shrink_state->last_a = SG_MALLOC(float64_t, totdoc);
01950 
01951   for (i=0;i<totdoc;i++) {
01952     shrink_state->active[i]=1;
01953     shrink_state->inactive_since[i]=0;
01954     shrink_state->last_a[i]=0;
01955     shrink_state->last_lin[i]=0;
01956   }
01957 }
01958 
01959 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state)
01960 {
01961   SG_FREE(shrink_state->active);
01962   SG_FREE(shrink_state->inactive_since);
01963   if(shrink_state->deactnum > 0)
01964     SG_FREE((shrink_state->a_history[shrink_state->deactnum-1]));
01965   SG_FREE((shrink_state->a_history));
01966   SG_FREE((shrink_state->last_a));
01967   SG_FREE((shrink_state->last_lin));
01968 }
01969 
01970 int32_t CSVMLight::shrink_problem(
01971     SHRINK_STATE *shrink_state, int32_t *active2dnum,
01972     int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
01973     int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
01974     float64_t* lin, int* label)
01975      /* Shrink some variables away.  Do the shrinking only if at least
01976         minshrink variables can be removed. */
01977 {
01978   int32_t i,ii,change,activenum,lastiter;
01979   float64_t *a_old=NULL;
01980 
01981   activenum=0;
01982   change=0;
01983   for (ii=0;active2dnum[ii]>=0;ii++) {
01984       i=active2dnum[ii];
01985       activenum++;
01986       lastiter=last_suboptimal_at[i];
01987 
01988       if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
01989          || (inconsistent[i])) {
01990           change++;
01991       }
01992   }
01993   if((change>=minshrink) /* shrink only if sufficiently many candidates */
01994      && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
01995       /* Shrink problem by removing those variables which are */
01996       /* optimal at a bound for a minimum number of iterations */
01997       if(verbosity>=2) {
01998           SG_INFO(" Shrinking...")
01999       }
02000 
02001       if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /*  non-linear case save alphas */
02002 
02003           a_old=SG_MALLOC(float64_t, totdoc);
02004           shrink_state->a_history[shrink_state->deactnum]=a_old;
02005           for (i=0;i<totdoc;i++) {
02006               a_old[i]=a[i];
02007           }
02008       }
02009       for (ii=0;active2dnum[ii]>=0;ii++) {
02010           i=active2dnum[ii];
02011           lastiter=last_suboptimal_at[i];
02012           if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
02013              || (inconsistent[i])) {
02014               shrink_state->active[i]=0;
02015               shrink_state->inactive_since[i]=shrink_state->deactnum;
02016           }
02017       }
02018       activenum=compute_index(shrink_state->active,totdoc,active2dnum);
02019       shrink_state->deactnum++;
02020       if(kernel->has_property(KP_LINADD) && get_linadd_enabled())
02021           shrink_state->deactnum=0;
02022 
02023       if(verbosity>=2) {
02024           SG_DONE()
02025           SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum)
02026       }
02027   }
02028   return(activenum);
02029 }
02030 
02031 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p)
02032 {
02033     S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p;
02034 
02035     CKernel* k = params->kernel;
02036     float64_t* lin = params->lin;
02037     float64_t* last_lin = params->last_lin;
02038     int32_t* active = params->active;
02039     int32_t* docs = params->docs;
02040     int32_t start = params->start;
02041     int32_t end = params->end;
02042 
02043     for (int32_t i=start;i<end;i++)
02044     {
02045         if (!active[i])
02046             lin[i] = last_lin[i]+k->compute_optimized(docs[i]);
02047 
02048         last_lin[i]=lin[i];
02049     }
02050 
02051     return NULL;
02052 }
02053 
02054 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p)
02055 {
02056     S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p;
02057     ASSERT(params)
02058     ASSERT(params->kernel)
02059     ASSERT(params->lin)
02060     ASSERT(params->aicache)
02061     ASSERT(params->a)
02062     ASSERT(params->a_old)
02063     ASSERT(params->changed2dnum)
02064     ASSERT(params->inactive2dnum)
02065     ASSERT(params->label)
02066 
02067     CKernel* k = params->kernel;
02068     float64_t* lin = params->lin;
02069     float64_t* aicache = params->aicache;
02070     float64_t* a= params->a;
02071     float64_t* a_old = params->a_old;
02072     int32_t* changed2dnum = params->changed2dnum;
02073     int32_t* inactive2dnum = params->inactive2dnum;
02074     int32_t* label = params->label;
02075     int32_t start = params->start;
02076     int32_t end = params->end;
02077 
02078     for (int32_t ii=start;ii<end;ii++)
02079     {
02080         int32_t i=changed2dnum[ii];
02081         int32_t j=0;
02082         ASSERT(i>=0)
02083 
02084         k->get_kernel_row(i,inactive2dnum,aicache);
02085         for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++)
02086             lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02087     }
02088     return NULL;
02089 }
02090 
02091 void CSVMLight::reactivate_inactive_examples(
02092     int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin,
02093     float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent,
02094     int32_t* docs, float64_t *aicache, float64_t *maxdiff)
02095      /* Make all variables active again which had been removed by
02096         shrinking. */
02097      /* Computes lin for those variables from scratch. */
02098 {
02099   register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
02100   int32_t *changed,*inactive;
02101   register float64_t *a_old,dist;
02102   float64_t ex_c,target;
02103 
02104   if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */
02105       a_old=shrink_state->last_a;
02106 
02107       if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION))
02108       {
02109           SG_DEBUG(" clear normal - linadd\n")
02110           kernel->clear_normal();
02111 
02112           int32_t num_modified=0;
02113           for (i=0;i<totdoc;i++) {
02114               if(a[i] != a_old[i]) {
02115                   kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i]));
02116                   a_old[i]=a[i];
02117                   num_modified++;
02118               }
02119           }
02120 
02121           if (num_modified>0)
02122           {
02123               int32_t num_threads=parallel->get_num_threads();
02124               ASSERT(num_threads>0)
02125               if (num_threads < 2)
02126               {
02127                   S_THREAD_PARAM_REACTIVATE_LINADD params;
02128                   params.kernel=kernel;
02129                   params.lin=lin;
02130                   params.last_lin=shrink_state->last_lin;
02131                   params.docs=docs;
02132                   params.active=shrink_state->active;
02133                   params.start=0;
02134                   params.end=totdoc;
02135                   reactivate_inactive_examples_linadd_helper((void*) &params);
02136               }
02137 #ifdef HAVE_PTHREAD
02138               else
02139               {
02140                   pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
02141                   S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads);
02142                   int32_t step= totdoc/num_threads;
02143 
02144                   for (t=0; t<num_threads-1; t++)
02145                   {
02146                       params[t].kernel=kernel;
02147                       params[t].lin=lin;
02148                       params[t].last_lin=shrink_state->last_lin;
02149                       params[t].docs=docs;
02150                       params[t].active=shrink_state->active;
02151                       params[t].start = t*step;
02152                       params[t].end = (t+1)*step;
02153                       pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)&params[t]);
02154                   }
02155 
02156                   params[t].kernel=kernel;
02157                   params[t].lin=lin;
02158                   params[t].last_lin=shrink_state->last_lin;
02159                   params[t].docs=docs;
02160                   params[t].active=shrink_state->active;
02161                   params[t].start = t*step;
02162                   params[t].end = totdoc;
02163                   reactivate_inactive_examples_linadd_helper((void*) &params[t]);
02164 
02165                   for (t=0; t<num_threads-1; t++)
02166                       pthread_join(threads[t], NULL);
02167 
02168                   SG_FREE(threads);
02169                   SG_FREE(params);
02170               }
02171 #endif
02172 
02173           }
02174       }
02175       else
02176       {
02177           float64_t *alphas = SG_MALLOC(float64_t, totdoc);
02178           int32_t *idx = SG_MALLOC(int32_t, totdoc);
02179           int32_t num_suppvec=0 ;
02180 
02181           for (i=0; i<totdoc; i++)
02182           {
02183               if(a[i] != a_old[i])
02184               {
02185                   alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i];
02186                   idx[num_suppvec] = i ;
02187                   a_old[i] = a[i] ;
02188                   num_suppvec++ ;
02189               }
02190           }
02191 
02192           if (num_suppvec>0)
02193           {
02194               int32_t num_inactive=0;
02195               int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset
02196 
02197               j=0;
02198               for (i=0;i<totdoc;i++)
02199               {
02200                   if(!shrink_state->active[i])
02201                   {
02202                       inactive_idx[j++] = i;
02203                       num_inactive++;
02204                   }
02205               }
02206 
02207               if (num_inactive>0)
02208               {
02209                   float64_t* dest = SG_MALLOC(float64_t, num_inactive);
02210                   memset(dest, 0, sizeof(float64_t)*num_inactive);
02211 
02212                   kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas);
02213 
02214                   j=0;
02215                   for (i=0;i<totdoc;i++) {
02216                       if(!shrink_state->active[i]) {
02217                           lin[i] = shrink_state->last_lin[i] + dest[j++] ;
02218                       }
02219                       shrink_state->last_lin[i]=lin[i];
02220                   }
02221 
02222                   SG_FREE(dest);
02223               }
02224               else
02225               {
02226                   for (i=0;i<totdoc;i++)
02227                       shrink_state->last_lin[i]=lin[i];
02228               }
02229               SG_FREE(inactive_idx);
02230           }
02231           SG_FREE(alphas);
02232           SG_FREE(idx);
02233       }
02234 
02235       kernel->delete_optimization();
02236   }
02237   else
02238   {
02239       changed=SG_MALLOC(int32_t, totdoc);
02240       changed2dnum=SG_MALLOC(int32_t, totdoc+11);
02241       inactive=SG_MALLOC(int32_t, totdoc);
02242       inactive2dnum=SG_MALLOC(int32_t, totdoc+11);
02243       for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--)
02244       {
02245           if(verbosity>=2) {
02246               SG_INFO("%ld..",t)
02247           }
02248           a_old=shrink_state->a_history[t];
02249           for (i=0;i<totdoc;i++) {
02250               inactive[i]=((!shrink_state->active[i])
02251                            && (shrink_state->inactive_since[i] == t));
02252               changed[i]= (a[i] != a_old[i]);
02253           }
02254           compute_index(inactive,totdoc,inactive2dnum);
02255           compute_index(changed,totdoc,changed2dnum);
02256 
02257 
02258           //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads();
02259           int32_t num_threads=1;
02260           ASSERT(num_threads>0)
02261 
02262           if (num_threads < 2)
02263           {
02264               for (ii=0;(i=changed2dnum[ii])>=0;ii++) {
02265                   kernel->get_kernel_row(i,inactive2dnum,aicache);
02266                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02267                       lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i];
02268               }
02269           }
02270 #ifdef HAVE_PTHREAD
02271           else
02272           {
02273               //find number of the changed ones
02274               int32_t num_changed=0;
02275               for (ii=0;changed2dnum[ii]>=0;ii++)
02276                   num_changed++;
02277 
02278               if (num_changed>0)
02279               {
02280                   pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1);
02281                   S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads);
02282                   int32_t step= num_changed/num_threads;
02283 
02284                   // alloc num_threads many tmp buffers
02285                   float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads);
02286                   memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02287                   float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads);
02288                   memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads);
02289 
02290                   int32_t thr;
02291                   for (thr=0; thr<num_threads-1; thr++)
02292                   {
02293                       params[thr].kernel=kernel;
02294                       params[thr].lin=&tmp_lin[thr*totdoc];
02295                       params[thr].aicache=&tmp_aicache[thr*totdoc];
02296                       params[thr].a=a;
02297                       params[thr].a_old=a_old;
02298                       params[thr].changed2dnum=changed2dnum;
02299                       params[thr].inactive2dnum=inactive2dnum;
02300                       params[thr].label=label;
02301                       params[thr].start = thr*step;
02302                       params[thr].end = (thr+1)*step;
02303                       pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)&params[thr]);
02304                   }
02305 
02306                   params[thr].kernel=kernel;
02307                   params[thr].lin=&tmp_lin[thr*totdoc];
02308                   params[thr].aicache=&tmp_aicache[thr*totdoc];
02309                   params[thr].a=a;
02310                   params[thr].a_old=a_old;
02311                   params[thr].changed2dnum=changed2dnum;
02312                   params[thr].inactive2dnum=inactive2dnum;
02313                   params[thr].label=label;
02314                   params[thr].start = thr*step;
02315                   params[thr].end = num_changed;
02316                   reactivate_inactive_examples_vanilla_helper((void*) &params[thr]);
02317 
02318                   for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02319                       lin[j]+=tmp_lin[totdoc*thr+j];
02320 
02321                   for (thr=0; thr<num_threads-1; thr++)
02322                   {
02323                       pthread_join(threads[thr], NULL);
02324 
02325                       //add up results
02326                       for (jj=0;(j=inactive2dnum[jj])>=0;jj++)
02327                           lin[j]+=tmp_lin[totdoc*thr+j];
02328                   }
02329 
02330                   SG_FREE(tmp_lin);
02331                   SG_FREE(tmp_aicache);
02332                   SG_FREE(threads);
02333                   SG_FREE(params);
02334               }
02335           }
02336 #endif
02337       }
02338       SG_FREE(changed);
02339       SG_FREE(changed2dnum);
02340       SG_FREE(inactive);
02341       SG_FREE(inactive2dnum);
02342   }
02343 
02344   (*maxdiff)=0;
02345   for (i=0;i<totdoc;i++) {
02346     shrink_state->inactive_since[i]=shrink_state->deactnum-1;
02347     if(!inconsistent[i]) {
02348       dist=(lin[i]-model->b)*(float64_t)label[i];
02349       target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]);
02350       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
02351       if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
02352     if((dist-target)>(*maxdiff))  /* largest violation */
02353       (*maxdiff)=dist-target;
02354       }
02355       else if((a[i]<ex_c) && (dist < target)) {
02356     if((target-dist)>(*maxdiff))  /* largest violation */
02357       (*maxdiff)=target-dist;
02358       }
02359       if((a[i]>(0+learn_parm->epsilon_a))
02360      && (a[i]<ex_c)) {
02361     shrink_state->active[i]=1;                         /* not at bound */
02362       }
02363       else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
02364     shrink_state->active[i]=1;
02365       }
02366       else if((a[i]>=ex_c)
02367           && (dist > (target-learn_parm->epsilon_shrink))) {
02368     shrink_state->active[i]=1;
02369       }
02370       else if(learn_parm->sharedslack) { /* make all active when sharedslack */
02371     shrink_state->active[i]=1;
02372       }
02373     }
02374   }
02375 
02376   if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */
02377       for (i=0;i<totdoc;i++) {
02378           (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
02379       }
02380       for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
02381           SG_FREE(shrink_state->a_history[t]);
02382           shrink_state->a_history[t]=0;
02383       }
02384   }
02385 }
02386 
02387 
02388 
02389 /* start the optimizer and return the optimal values */
02390 float64_t* CSVMLight::optimize_qp(
02391         QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold,
02392         int32_t& svm_maxqpsize)
02393 {
02394     register int32_t i, j, result;
02395     float64_t margin, obj_before, obj_after;
02396     float64_t sigdig, dist, epsilon_loqo;
02397     int32_t iter;
02398 
02399     if(!primal) { /* allocate memory at first call */
02400         primal=SG_MALLOC(float64_t, nx*3);
02401         dual=SG_MALLOC(float64_t, nx*2+1);
02402     }
02403 
02404     obj_before=0; /* calculate objective before optimization */
02405     for(i=0;i<qp->opt_n;i++) {
02406         obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
02407         obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
02408         for(j=0;j<i;j++) {
02409             obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
02410         }
02411     }
02412 
02413     result=STILL_RUNNING;
02414     qp->opt_ce0[0]*=(-1.0);
02415     /* Run pr_loqo. If a run fails, try again with parameters which lead */
02416     /* to a slower, but more robust setting. */
02417     for(margin=init_margin,iter=init_iter;
02418         (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
02419 
02420         opt_precision=CMath::max(opt_precision, DEF_PRECISION);
02421         sigdig=-log10(opt_precision);
02422 
02423         result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m,
02424                 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g,
02425                 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0,
02426                 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up,
02427                 (float64_t *)primal,(float64_t *)dual,
02428                 (int32_t)(verbosity-2),
02429                 (float64_t)sigdig,(int32_t)iter,
02430                 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0);
02431 
02432         if(CMath::is_nan(dual[0])) {     /* check for choldc problem */
02433             if(verbosity>=2) {
02434                 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n")
02435             }
02436             if(init_margin<0.80) { /* become more conservative in general */
02437                 init_margin=(4.0*margin+1.0)/5.0;
02438             }
02439             margin=(margin+1.0)/2.0;
02440             (opt_precision)*=10.0;   /* reduce precision */
02441             if(verbosity>=2) {
02442                 SG_SDEBUG("Reducing precision of PR_LOQO.\n")
02443             }
02444         }
02445         else if(result!=OPTIMAL_SOLUTION) {
02446             iter+=2000;
02447             init_iter+=10;
02448             (opt_precision)*=10.0;   /* reduce precision */
02449             if(verbosity>=2) {
02450                 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result)
02451             }
02452         }
02453     }
02454 
02455     if(qp->opt_m)         /* Thanks to Alex Smola for this hint */
02456         model_b=dual[0];
02457     else
02458         model_b=0;
02459 
02460     /* Check the precision of the alphas. If results of current optimization */
02461     /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
02462     epsilon_loqo=1E-10;
02463     for(i=0;i<qp->opt_n;i++) {
02464         dist=-model_b*qp->opt_ce[i];
02465         dist+=(qp->opt_g0[i]+1.0);
02466         for(j=0;j<i;j++) {
02467             dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
02468         }
02469         for(j=i;j<qp->opt_n;j++) {
02470             dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
02471         }
02472         /*  SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]) */
02473         if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
02474             epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
02475         }
02476         else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
02477             epsilon_loqo=primal[i]*2.0;
02478         }
02479     }
02480 
02481     for(i=0;i<qp->opt_n;i++) {  /* clip alphas to bounds */
02482         if(primal[i]<=(0+epsilon_loqo)) {
02483             primal[i]=0;
02484         }
02485         else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
02486             primal[i]=qp->opt_up[i];
02487         }
02488     }
02489 
02490     obj_after=0;  /* calculate objective after optimization */
02491     for(i=0;i<qp->opt_n;i++) {
02492         obj_after+=(qp->opt_g0[i]*primal[i]);
02493         obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
02494         for(j=0;j<i;j++) {
02495             obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
02496         }
02497     }
02498 
02499     /* if optimizer returned NAN values, reset and retry with smaller */
02500     /* working set. */
02501     if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) {
02502         for(i=0;i<qp->opt_n;i++) {
02503             primal[i]=qp->opt_xinit[i];
02504         }
02505         model_b=0;
02506         if(svm_maxqpsize>2) {
02507             svm_maxqpsize--;  /* decrease size of qp-subproblems */
02508         }
02509     }
02510 
02511     if(obj_after >= obj_before) { /* check whether there was progress */
02512         (opt_precision)/=100.0;
02513         precision_violations++;
02514         if(verbosity>=2) {
02515             SG_SDEBUG("Increasing Precision of PR_LOQO.\n")
02516         }
02517     }
02518 
02519     // TODO: add parameter for this (e.g. for MTL experiments)
02520     if(precision_violations > 5000) {
02521         (*epsilon_crit)*=10.0;
02522         precision_violations=0;
02523         SG_SINFO("Relaxing epsilon on KT-Conditions.\n")
02524     }
02525 
02526     (*threshold)=model_b;
02527 
02528     if(result!=OPTIMAL_SOLUTION) {
02529         SG_SERROR("PR_LOQO did not converge.\n")
02530         return(qp->opt_xinit);
02531     }
02532     else {
02533         return(primal);
02534     }
02535 }
02536 
02537 
02538 #endif //USE_SVMLIGHT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation