SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2004-2009 Soeren Sonnenburg, Gunnar Raetsch 00008 * Alexander Zien, Marius Kloft, Chen Guohua 00009 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 * Copyright (C) 2010 Ryota Tomioka (University of Tokyo) 00011 */ 00012 00013 #include <list> 00014 #include <shogun/lib/Signal.h> 00015 #include <shogun/classifier/mkl/MKL.h> 00016 #include <shogun/classifier/svm/LibSVM.h> 00017 #include <shogun/kernel/CombinedKernel.h> 00018 00019 using namespace shogun; 00020 00021 CMKL::CMKL(CSVM* s) : CSVM(), svm(NULL), C_mkl(0), mkl_norm(1), ent_lambda(0), 00022 mkl_block_norm(1),beta_local(NULL), mkl_iterations(0), mkl_epsilon(1e-5), 00023 interleaved_optimization(true), w_gap(1.0), rho(0) 00024 { 00025 set_constraint_generator(s); 00026 #ifdef USE_CPLEX 00027 lp_cplex = NULL ; 00028 env = NULL ; 00029 #endif 00030 00031 #ifdef USE_GLPK 00032 lp_glpk = NULL; 00033 lp_glpk_parm = NULL; 00034 #endif 00035 00036 SG_DEBUG("creating MKL object %p\n", this) 00037 lp_initialized = false ; 00038 } 00039 00040 CMKL::~CMKL() 00041 { 00042 // -- Delete beta_local for ElasticnetMKL 00043 SG_FREE(beta_local); 00044 00045 SG_DEBUG("deleting MKL object %p\n", this) 00046 if (svm) 00047 svm->set_callback_function(NULL, NULL); 00048 SG_UNREF(svm); 00049 } 00050 00051 void CMKL::init_solver() 00052 { 00053 #ifdef USE_CPLEX 00054 cleanup_cplex(); 00055 00056 if (get_solver_type()==ST_CPLEX) 00057 init_cplex(); 00058 #endif 00059 00060 #ifdef USE_GLPK 00061 cleanup_glpk(); 00062 00063 if (get_solver_type() == ST_GLPK) 00064 init_glpk(); 00065 #endif 00066 } 00067 00068 #ifdef USE_CPLEX 00069 bool CMKL::init_cplex() 00070 { 00071 while (env==NULL) 00072 { 00073 SG_INFO("trying to initialize CPLEX\n") 00074 00075 int status = 0; // calling external lib 00076 env = CPXopenCPLEX (&status); 00077 00078 if ( env == NULL ) 00079 { 00080 char errmsg[1024]; 00081 SG_WARNING("Could not open CPLEX environment.\n") 00082 CPXgeterrorstring (env, status, errmsg); 00083 SG_WARNING("%s", errmsg) 00084 SG_WARNING("retrying in 60 seconds\n") 00085 sleep(60); 00086 } 00087 else 00088 { 00089 // select dual simplex based optimization 00090 status = CPXsetintparam (env, CPX_PARAM_LPMETHOD, CPX_ALG_DUAL); 00091 if ( status ) 00092 { 00093 SG_ERROR("Failure to select dual lp optimization, error %d.\n", status) 00094 } 00095 else 00096 { 00097 status = CPXsetintparam (env, CPX_PARAM_DATACHECK, CPX_ON); 00098 if ( status ) 00099 { 00100 SG_ERROR("Failure to turn on data checking, error %d.\n", status) 00101 } 00102 else 00103 { 00104 lp_cplex = CPXcreateprob (env, &status, "light"); 00105 00106 if ( lp_cplex == NULL ) 00107 SG_ERROR("Failed to create LP.\n") 00108 else 00109 CPXchgobjsen (env, lp_cplex, CPX_MIN); /* Problem is minimization */ 00110 } 00111 } 00112 } 00113 } 00114 00115 return (lp_cplex != NULL) && (env != NULL); 00116 } 00117 00118 bool CMKL::cleanup_cplex() 00119 { 00120 bool result=false; 00121 00122 if (lp_cplex) 00123 { 00124 int32_t status = CPXfreeprob(env, &lp_cplex); 00125 lp_cplex = NULL; 00126 lp_initialized = false; 00127 00128 if (status) 00129 SG_WARNING("CPXfreeprob failed, error code %d.\n", status) 00130 else 00131 result = true; 00132 } 00133 00134 if (env) 00135 { 00136 int32_t status = CPXcloseCPLEX (&env); 00137 env=NULL; 00138 00139 if (status) 00140 { 00141 char errmsg[1024]; 00142 SG_WARNING("Could not close CPLEX environment.\n") 00143 CPXgeterrorstring (env, status, errmsg); 00144 SG_WARNING("%s", errmsg) 00145 } 00146 else 00147 result = true; 00148 } 00149 return result; 00150 } 00151 #endif 00152 00153 #ifdef USE_GLPK 00154 bool CMKL::init_glpk() 00155 { 00156 lp_glpk = glp_create_prob(); 00157 glp_set_obj_dir(lp_glpk, GLP_MIN); 00158 00159 lp_glpk_parm = SG_MALLOC(glp_smcp, 1); 00160 glp_init_smcp(lp_glpk_parm); 00161 lp_glpk_parm->meth = GLP_DUAL; 00162 lp_glpk_parm->presolve = GLP_ON; 00163 00164 glp_term_out(GLP_OFF); 00165 return (lp_glpk != NULL); 00166 } 00167 00168 bool CMKL::cleanup_glpk() 00169 { 00170 lp_initialized = false; 00171 if (lp_glpk) 00172 glp_delete_prob(lp_glpk); 00173 lp_glpk = NULL; 00174 SG_FREE(lp_glpk_parm); 00175 return true; 00176 } 00177 00178 bool CMKL::check_glp_status(glp_prob *lp) 00179 { 00180 int status = glp_get_status(lp); 00181 00182 if (status==GLP_INFEAS) 00183 { 00184 SG_PRINT("solution is infeasible!\n") 00185 return false; 00186 } 00187 else if(status==GLP_NOFEAS) 00188 { 00189 SG_PRINT("problem has no feasible solution!\n") 00190 return false; 00191 } 00192 return true; 00193 } 00194 #endif // USE_GLPK 00195 00196 bool CMKL::train_machine(CFeatures* data) 00197 { 00198 ASSERT(kernel) 00199 ASSERT(m_labels && m_labels->get_num_labels()) 00200 00201 if (data) 00202 { 00203 if (m_labels->get_num_labels() != data->get_num_vectors()) 00204 { 00205 SG_ERROR("%s::train_machine(): Number of training vectors (%d) does" 00206 " not match number of labels (%d)\n", get_name(), 00207 data->get_num_vectors(), m_labels->get_num_labels()); 00208 } 00209 kernel->init(data, data); 00210 } 00211 00212 init_training(); 00213 if (!svm) 00214 SG_ERROR("No constraint generator (SVM) set\n") 00215 00216 int32_t num_label=0; 00217 if (m_labels) 00218 num_label = m_labels->get_num_labels(); 00219 00220 SG_INFO("%d trainlabels (%ld)\n", num_label, m_labels) 00221 if (mkl_epsilon<=0) 00222 mkl_epsilon=1e-2 ; 00223 00224 SG_INFO("mkl_epsilon = %1.1e\n", mkl_epsilon) 00225 SG_INFO("C_mkl = %1.1e\n", C_mkl) 00226 SG_INFO("mkl_norm = %1.3e\n", mkl_norm) 00227 SG_INFO("solver = %d\n", get_solver_type()) 00228 SG_INFO("ent_lambda = %f\n", ent_lambda) 00229 SG_INFO("mkl_block_norm = %f\n", mkl_block_norm) 00230 00231 int32_t num_weights = -1; 00232 int32_t num_kernels = kernel->get_num_subkernels(); 00233 SG_INFO("num_kernels = %d\n", num_kernels) 00234 const float64_t* beta_const = kernel->get_subkernel_weights(num_weights); 00235 float64_t* beta = SGVector<float64_t>::clone_vector(beta_const, num_weights); 00236 ASSERT(num_weights==num_kernels) 00237 00238 if (get_solver_type()==ST_BLOCK_NORM && 00239 mkl_block_norm>=1 && 00240 mkl_block_norm<=2) 00241 { 00242 mkl_norm=mkl_block_norm/(2-mkl_block_norm); 00243 SG_WARNING("Switching to ST_DIRECT method with mkl_norm=%g\n", mkl_norm) 00244 set_solver_type(ST_DIRECT); 00245 } 00246 00247 if (get_solver_type()==ST_ELASTICNET) 00248 { 00249 // -- Initialize subkernel weights for Elasticnet MKL 00250 SGVector<float64_t>::scale_vector(1/SGVector<float64_t>::qnorm(beta, num_kernels, 1.0), beta, num_kernels); 00251 00252 SG_FREE(beta_local); 00253 beta_local = SGVector<float64_t>::clone_vector(beta, num_kernels); 00254 00255 elasticnet_transform(beta, ent_lambda, num_kernels); 00256 } 00257 else 00258 { 00259 SGVector<float64_t>::scale_vector(1/SGVector<float64_t>::qnorm(beta, num_kernels, mkl_norm), 00260 beta, num_kernels); //q-norm = 1 00261 } 00262 00263 kernel->set_subkernel_weights(SGVector<float64_t>(beta, num_kernels, false)); 00264 SG_FREE(beta); 00265 00266 svm->set_bias_enabled(get_bias_enabled()); 00267 svm->set_epsilon(get_epsilon()); 00268 svm->set_max_train_time(get_max_train_time()); 00269 svm->set_nu(get_nu()); 00270 svm->set_C(get_C1(), get_C2()); 00271 svm->set_qpsize(get_qpsize()); 00272 svm->set_shrinking_enabled(get_shrinking_enabled()); 00273 svm->set_linadd_enabled(get_linadd_enabled()); 00274 svm->set_batch_computation_enabled(get_batch_computation_enabled()); 00275 svm->set_labels(m_labels); 00276 svm->set_kernel(kernel); 00277 00278 #ifdef USE_CPLEX 00279 cleanup_cplex(); 00280 00281 if (get_solver_type()==ST_CPLEX) 00282 init_cplex(); 00283 #endif 00284 00285 #ifdef USE_GLPK 00286 if (get_solver_type()==ST_GLPK) 00287 init_glpk(); 00288 #endif 00289 00290 mkl_iterations = 0; 00291 CSignal::clear_cancel(); 00292 00293 training_time_clock.start(); 00294 00295 if (interleaved_optimization) 00296 { 00297 if (svm->get_classifier_type() != CT_LIGHT && svm->get_classifier_type() != CT_SVRLIGHT) 00298 { 00299 SG_ERROR("Interleaved MKL optimization is currently " 00300 "only supported with SVMlight\n"); 00301 } 00302 00303 //Note that there is currently no safe way to properly resolve this 00304 //situation: the mkl object hands itself as a reference to the svm which 00305 //in turn increases the reference count of mkl and when done decreases 00306 //the count. Thus we have to protect the mkl object from deletion by 00307 //ref()'ing it before we set the callback function and should also 00308 //unref() it afterwards. However, when the reference count was zero 00309 //before this unref() might actually try to destroy this (crash ahead) 00310 //but if we don't actually unref() the object we might leak memory... 00311 //So as a workaround we only unref when the reference count was >1 00312 //before. 00313 #ifdef USE_REFERENCE_COUNTING 00314 int32_t refs=this->ref(); 00315 #endif 00316 svm->set_callback_function(this, perform_mkl_step_helper); 00317 svm->train(); 00318 SG_DONE() 00319 svm->set_callback_function(NULL, NULL); 00320 #ifdef USE_REFERENCE_COUNTING 00321 if (refs>1) 00322 this->unref(); 00323 #endif 00324 } 00325 else 00326 { 00327 float64_t* sumw = SG_MALLOC(float64_t, num_kernels); 00328 00329 00330 00331 while (true) 00332 { 00333 svm->train(); 00334 00335 float64_t suma=compute_sum_alpha(); 00336 compute_sum_beta(sumw); 00337 00338 if((training_time_clock.cur_time_diff()>get_max_train_time ())&&(get_max_train_time ()>0)) 00339 { 00340 SG_SWARNING("MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",get_max_train_time ()) 00341 break; 00342 } 00343 00344 00345 mkl_iterations++; 00346 if (perform_mkl_step(sumw, suma) || CSignal::cancel_computations()) 00347 break; 00348 } 00349 00350 SG_FREE(sumw); 00351 } 00352 #ifdef USE_CPLEX 00353 cleanup_cplex(); 00354 #endif 00355 #ifdef USE_GLPK 00356 cleanup_glpk(); 00357 #endif 00358 00359 int32_t nsv=svm->get_num_support_vectors(); 00360 create_new_model(nsv); 00361 00362 set_bias(svm->get_bias()); 00363 for (int32_t i=0; i<nsv; i++) 00364 { 00365 set_alpha(i, svm->get_alpha(i)); 00366 set_support_vector(i, svm->get_support_vector(i)); 00367 } 00368 return true; 00369 } 00370 00371 00372 void CMKL::set_mkl_norm(float64_t norm) 00373 { 00374 00375 if (norm<1) 00376 SG_ERROR("Norm must be >= 1, e.g., 1-norm is the standard MKL; norms>1 nonsparse MKL\n") 00377 00378 mkl_norm = norm; 00379 } 00380 00381 void CMKL::set_elasticnet_lambda(float64_t lambda) 00382 { 00383 if (lambda>1 || lambda<0) 00384 SG_ERROR("0<=lambda<=1\n") 00385 00386 if (lambda==0) 00387 lambda = 1e-6; 00388 else if (lambda==1.0) 00389 lambda = 1.0-1e-6; 00390 00391 ent_lambda=lambda; 00392 } 00393 00394 void CMKL::set_mkl_block_norm(float64_t q) 00395 { 00396 if (q<1) 00397 SG_ERROR("1<=q<=inf\n") 00398 00399 mkl_block_norm=q; 00400 } 00401 00402 bool CMKL::perform_mkl_step( 00403 const float64_t* sumw, float64_t suma) 00404 { 00405 if((training_time_clock.cur_time_diff()>get_max_train_time ())&&(get_max_train_time ()>0)) 00406 { 00407 SG_SWARNING("MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",get_max_train_time ()) 00408 return true; 00409 } 00410 00411 int32_t num_kernels = kernel->get_num_subkernels(); 00412 int32_t nweights=0; 00413 const float64_t* old_beta = kernel->get_subkernel_weights(nweights); 00414 ASSERT(nweights==num_kernels) 00415 float64_t* beta = SG_MALLOC(float64_t, num_kernels); 00416 00417 #if defined(USE_CPLEX) || defined(USE_GLPK) 00418 int32_t inner_iters=0; 00419 #endif 00420 float64_t mkl_objective=0; 00421 00422 mkl_objective=-suma; 00423 for (int32_t i=0; i<num_kernels; i++) 00424 { 00425 beta[i]=old_beta[i]; 00426 mkl_objective+=old_beta[i]*sumw[i]; 00427 } 00428 00429 w_gap = CMath::abs(1-rho/mkl_objective) ; 00430 00431 if( (w_gap >= mkl_epsilon) || 00432 (get_solver_type()==ST_AUTO || get_solver_type()==ST_NEWTON || get_solver_type()==ST_DIRECT ) || get_solver_type()==ST_ELASTICNET || get_solver_type()==ST_BLOCK_NORM) 00433 { 00434 if (get_solver_type()==ST_AUTO || get_solver_type()==ST_DIRECT) 00435 { 00436 rho=compute_optimal_betas_directly(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00437 } 00438 else if (get_solver_type()==ST_BLOCK_NORM) 00439 { 00440 rho=compute_optimal_betas_block_norm(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00441 } 00442 else if (get_solver_type()==ST_ELASTICNET) 00443 { 00444 // -- Direct update of subkernel weights for ElasticnetMKL 00445 // Note that ElasticnetMKL solves a different optimization 00446 // problem from the rest of the solver types 00447 rho=compute_optimal_betas_elasticnet(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00448 } 00449 else if (get_solver_type()==ST_NEWTON) 00450 rho=compute_optimal_betas_newton(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00451 #ifdef USE_CPLEX 00452 else if (get_solver_type()==ST_CPLEX) 00453 rho=compute_optimal_betas_via_cplex(beta, old_beta, num_kernels, sumw, suma, inner_iters); 00454 #endif 00455 #ifdef USE_GLPK 00456 else if (get_solver_type()==ST_GLPK) 00457 rho=compute_optimal_betas_via_glpk(beta, old_beta, num_kernels, sumw, suma, inner_iters); 00458 #endif 00459 else 00460 SG_ERROR("Solver type not supported (not compiled in?)\n") 00461 00462 w_gap = CMath::abs(1-rho/mkl_objective) ; 00463 } 00464 00465 kernel->set_subkernel_weights(SGVector<float64_t>(beta, num_kernels, false)); 00466 SG_FREE(beta); 00467 00468 return converged(); 00469 } 00470 00471 float64_t CMKL::compute_optimal_betas_elasticnet( 00472 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00473 const float64_t* sumw, const float64_t suma, 00474 const float64_t mkl_objective ) 00475 { 00476 const float64_t epsRegul = 0.01; // fraction of root mean squared deviation 00477 float64_t obj; 00478 float64_t Z; 00479 float64_t preR; 00480 int32_t p; 00481 int32_t nofKernelsGood; 00482 00483 // --- optimal beta 00484 nofKernelsGood = num_kernels; 00485 00486 Z = 0; 00487 for (p=0; p<num_kernels; ++p ) 00488 { 00489 if (sumw[p] >= 0.0 && old_beta[p] >= 0.0 ) 00490 { 00491 beta[p] = CMath::sqrt(sumw[p]*old_beta[p]*old_beta[p]); 00492 Z += beta[p]; 00493 } 00494 else 00495 { 00496 beta[p] = 0.0; 00497 --nofKernelsGood; 00498 } 00499 ASSERT( beta[p] >= 0 ) 00500 } 00501 00502 // --- normalize 00503 SGVector<float64_t>::scale_vector(1.0/Z, beta, num_kernels); 00504 00505 // --- regularize & renormalize 00506 00507 preR = 0.0; 00508 for( p=0; p<num_kernels; ++p ) 00509 preR += CMath::pow( beta_local[p] - beta[p], 2.0 ); 00510 const float64_t R = CMath::sqrt( preR ) * epsRegul; 00511 if( !( R >= 0 ) ) 00512 { 00513 SG_PRINT("MKL-direct: p = %.3f\n", 1.0 ) 00514 SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood ) 00515 SG_PRINT("MKL-direct: Z = %e\n", Z ) 00516 SG_PRINT("MKL-direct: eps = %e\n", epsRegul ) 00517 for( p=0; p<num_kernels; ++p ) 00518 { 00519 const float64_t t = CMath::pow( beta_local[p] - beta[p], 2.0 ); 00520 SG_PRINT("MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, beta_local[p]-beta[p], beta_local[p], beta[p] ) 00521 } 00522 SG_PRINT("MKL-direct: preR = %e\n", preR ) 00523 SG_PRINT("MKL-direct: preR/p = %e\n", preR ) 00524 SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR) ) 00525 SG_PRINT("MKL-direct: R = %e\n", R ) 00526 SG_ERROR("Assertion R >= 0 failed!\n" ) 00527 } 00528 00529 Z = 0.0; 00530 for( p=0; p<num_kernels; ++p ) 00531 { 00532 beta[p] += R; 00533 Z += beta[p]; 00534 ASSERT( beta[p] >= 0 ) 00535 } 00536 Z = CMath::pow( Z, -1.0 ); 00537 ASSERT( Z >= 0 ) 00538 for( p=0; p<num_kernels; ++p ) 00539 { 00540 beta[p] *= Z; 00541 ASSERT( beta[p] >= 0.0 ) 00542 if (beta[p] > 1.0 ) 00543 beta[p] = 1.0; 00544 } 00545 00546 // --- copy beta into beta_local 00547 for( p=0; p<num_kernels; ++p ) 00548 beta_local[p] = beta[p]; 00549 00550 // --- elastic-net transform 00551 elasticnet_transform(beta, ent_lambda, num_kernels); 00552 00553 // --- objective 00554 obj = -suma; 00555 for (p=0; p<num_kernels; ++p ) 00556 { 00557 //obj += sumw[p] * old_beta[p]*old_beta[p] / beta[p]; 00558 obj += sumw[p] * beta[p]; 00559 } 00560 return obj; 00561 } 00562 00563 void CMKL::elasticnet_dual(float64_t *ff, float64_t *gg, float64_t *hh, 00564 const float64_t &del, const float64_t* nm, int32_t len, 00565 const float64_t &lambda) 00566 { 00567 std::list<int32_t> I; 00568 float64_t gam = 1.0-lambda; 00569 for (int32_t i=0; i<len;i++) 00570 { 00571 if (nm[i]>=CMath::sqrt(2*del*gam)) 00572 I.push_back(i); 00573 } 00574 int32_t M=I.size(); 00575 00576 *ff=del; 00577 *gg=-(M*gam+lambda); 00578 *hh=0; 00579 for (std::list<int32_t>::iterator it=I.begin(); it!=I.end(); it++) 00580 { 00581 float64_t nmit = nm[*it]; 00582 00583 *ff += del*gam*CMath::pow(nmit/CMath::sqrt(2*del*gam)-1,2)/lambda; 00584 *gg += CMath::sqrt(gam/(2*del))*nmit; 00585 *hh += -0.5*CMath::sqrt(gam/(2*CMath::pow(del,3)))*nmit; 00586 } 00587 } 00588 00589 // assumes that all constraints are satisfied 00590 float64_t CMKL::compute_elasticnet_dual_objective() 00591 { 00592 int32_t n=get_num_support_vectors(); 00593 int32_t num_kernels = kernel->get_num_subkernels(); 00594 float64_t mkl_obj=0; 00595 00596 if (m_labels && kernel && kernel->get_kernel_type() == K_COMBINED) 00597 { 00598 // Compute Elastic-net dual 00599 float64_t* nm = SG_MALLOC(float64_t, num_kernels); 00600 float64_t del=0; 00601 00602 00603 int32_t k=0; 00604 for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++) 00605 { 00606 CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx); 00607 float64_t sum=0; 00608 for (int32_t i=0; i<n; i++) 00609 { 00610 int32_t ii=get_support_vector(i); 00611 00612 for (int32_t j=0; j<n; j++) 00613 { 00614 int32_t jj=get_support_vector(j); 00615 sum+=get_alpha(i)*get_alpha(j)*kn->kernel(ii,jj); 00616 } 00617 } 00618 nm[k]= CMath::pow(sum, 0.5); 00619 del = CMath::max(del, nm[k]); 00620 00621 // SG_PRINT("nm[%d]=%f\n",k,nm[k]) 00622 k++; 00623 00624 00625 SG_UNREF(kn); 00626 } 00627 // initial delta 00628 del = del/CMath::sqrt(2*(1-ent_lambda)); 00629 00630 // Newton's method to optimize delta 00631 k=0; 00632 float64_t ff, gg, hh; 00633 elasticnet_dual(&ff, &gg, &hh, del, nm, num_kernels, ent_lambda); 00634 while (CMath::abs(gg)>+1e-8 && k<100) 00635 { 00636 float64_t ff_old = ff; 00637 float64_t gg_old = gg; 00638 float64_t del_old = del; 00639 00640 // SG_PRINT("[%d] fval=%f gg=%f del=%f\n", k, ff, gg, del) 00641 00642 float64_t step=1.0; 00643 do 00644 { 00645 del=del_old*CMath::exp(-step*gg/(hh*del+gg)); 00646 elasticnet_dual(&ff, &gg, &hh, del, nm, num_kernels, ent_lambda); 00647 step/=2; 00648 } while(ff>ff_old+1e-4*gg_old*(del-del_old)); 00649 00650 k++; 00651 } 00652 mkl_obj=-ff; 00653 00654 SG_FREE(nm); 00655 00656 mkl_obj+=compute_sum_alpha(); 00657 00658 } 00659 else 00660 SG_ERROR("cannot compute objective, labels or kernel not set\n") 00661 00662 return -mkl_obj; 00663 } 00664 00665 float64_t CMKL::compute_optimal_betas_block_norm( 00666 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00667 const float64_t* sumw, const float64_t suma, 00668 const float64_t mkl_objective ) 00669 { 00670 float64_t obj; 00671 float64_t Z=0; 00672 int32_t p; 00673 00674 // --- optimal beta 00675 for( p=0; p<num_kernels; ++p ) 00676 { 00677 ASSERT(sumw[p]>=0) 00678 00679 beta[p] = CMath::pow( sumw[p], -(2.0-mkl_block_norm)/(2.0-2.0*mkl_block_norm)); 00680 Z+= CMath::pow( sumw[p], -(mkl_block_norm)/(2.0-2.0*mkl_block_norm)); 00681 00682 ASSERT( beta[p] >= 0 ) 00683 } 00684 00685 ASSERT(Z>=0) 00686 00687 Z=1.0/CMath::pow(Z, (2.0-mkl_block_norm)/mkl_block_norm); 00688 00689 for( p=0; p<num_kernels; ++p ) 00690 beta[p] *= Z; 00691 00692 // --- objective 00693 obj = -suma; 00694 for( p=0; p<num_kernels; ++p ) 00695 obj += sumw[p] * beta[p]; 00696 00697 return obj; 00698 } 00699 00700 00701 float64_t CMKL::compute_optimal_betas_directly( 00702 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00703 const float64_t* sumw, const float64_t suma, 00704 const float64_t mkl_objective ) 00705 { 00706 const float64_t epsRegul = 0.01; // fraction of root mean squared deviation 00707 float64_t obj; 00708 float64_t Z; 00709 float64_t preR; 00710 int32_t p; 00711 int32_t nofKernelsGood; 00712 00713 // --- optimal beta 00714 nofKernelsGood = num_kernels; 00715 for( p=0; p<num_kernels; ++p ) 00716 { 00717 //SG_PRINT("MKL-direct: sumw[%3d] = %e ( oldbeta = %e )\n", p, sumw[p], old_beta[p] ) 00718 if( sumw[p] >= 0.0 && old_beta[p] >= 0.0 ) 00719 { 00720 beta[p] = sumw[p] * old_beta[p]*old_beta[p] / mkl_norm; 00721 beta[p] = CMath::pow( beta[p], 1.0 / (mkl_norm+1.0) ); 00722 } 00723 else 00724 { 00725 beta[p] = 0.0; 00726 --nofKernelsGood; 00727 } 00728 ASSERT( beta[p] >= 0 ) 00729 } 00730 00731 // --- normalize 00732 Z = 0.0; 00733 for( p=0; p<num_kernels; ++p ) 00734 Z += CMath::pow( beta[p], mkl_norm ); 00735 00736 Z = CMath::pow( Z, -1.0/mkl_norm ); 00737 ASSERT( Z >= 0 ) 00738 for( p=0; p<num_kernels; ++p ) 00739 beta[p] *= Z; 00740 00741 // --- regularize & renormalize 00742 preR = 0.0; 00743 for( p=0; p<num_kernels; ++p ) 00744 preR += CMath::sq( old_beta[p] - beta[p]); 00745 00746 const float64_t R = CMath::sqrt( preR / mkl_norm ) * epsRegul; 00747 if( !( R >= 0 ) ) 00748 { 00749 SG_PRINT("MKL-direct: p = %.3f\n", mkl_norm ) 00750 SG_PRINT("MKL-direct: nofKernelsGood = %d\n", nofKernelsGood ) 00751 SG_PRINT("MKL-direct: Z = %e\n", Z ) 00752 SG_PRINT("MKL-direct: eps = %e\n", epsRegul ) 00753 for( p=0; p<num_kernels; ++p ) 00754 { 00755 const float64_t t = CMath::pow( old_beta[p] - beta[p], 2.0 ); 00756 SG_PRINT("MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, old_beta[p]-beta[p], old_beta[p], beta[p] ) 00757 } 00758 SG_PRINT("MKL-direct: preR = %e\n", preR ) 00759 SG_PRINT("MKL-direct: preR/p = %e\n", preR/mkl_norm ) 00760 SG_PRINT("MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/mkl_norm) ) 00761 SG_PRINT("MKL-direct: R = %e\n", R ) 00762 SG_ERROR("Assertion R >= 0 failed!\n" ) 00763 } 00764 00765 Z = 0.0; 00766 for( p=0; p<num_kernels; ++p ) 00767 { 00768 beta[p] += R; 00769 Z += CMath::pow( beta[p], mkl_norm ); 00770 ASSERT( beta[p] >= 0 ) 00771 } 00772 Z = CMath::pow( Z, -1.0/mkl_norm ); 00773 ASSERT( Z >= 0 ) 00774 for( p=0; p<num_kernels; ++p ) 00775 { 00776 beta[p] *= Z; 00777 ASSERT( beta[p] >= 0.0 ) 00778 if( beta[p] > 1.0 ) 00779 beta[p] = 1.0; 00780 } 00781 00782 // --- objective 00783 obj = -suma; 00784 for( p=0; p<num_kernels; ++p ) 00785 obj += sumw[p] * beta[p]; 00786 00787 return obj; 00788 } 00789 00790 float64_t CMKL::compute_optimal_betas_newton(float64_t* beta, 00791 const float64_t* old_beta, int32_t num_kernels, 00792 const float64_t* sumw, float64_t suma, 00793 float64_t mkl_objective) 00794 { 00795 SG_DEBUG("MKL via NEWTON\n") 00796 00797 if (mkl_norm==1) 00798 SG_ERROR("MKL via NEWTON works only for norms>1\n") 00799 00800 const double epsBeta = 1e-32; 00801 const double epsGamma = 1e-12; 00802 const double epsWsq = 1e-12; 00803 const double epsNewt = 0.0001; 00804 const double epsStep = 1e-9; 00805 const int nofNewtonSteps = 3; 00806 const double hessRidge = 1e-6; 00807 const int inLogSpace = 0; 00808 00809 const float64_t r = mkl_norm / ( mkl_norm - 1.0 ); 00810 float64_t* newtDir = SG_MALLOC(float64_t, num_kernels ); 00811 float64_t* newtBeta = SG_MALLOC(float64_t, num_kernels ); 00812 //float64_t newtStep; 00813 float64_t stepSize; 00814 float64_t Z; 00815 float64_t obj; 00816 float64_t gamma; 00817 int32_t p; 00818 int i; 00819 00820 // --- check beta 00821 Z = 0.0; 00822 for( p=0; p<num_kernels; ++p ) 00823 { 00824 beta[p] = old_beta[p]; 00825 if( !( beta[p] >= epsBeta ) ) 00826 beta[p] = epsBeta; 00827 00828 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ) 00829 Z += CMath::pow( beta[p], mkl_norm ); 00830 } 00831 00832 Z = CMath::pow( Z, -1.0/mkl_norm ); 00833 if( !( fabs(Z-1.0) <= epsGamma ) ) 00834 { 00835 SG_WARNING("old_beta not normalized (diff=%e); forcing normalization. ", Z-1.0 ) 00836 for( p=0; p<num_kernels; ++p ) 00837 { 00838 beta[p] *= Z; 00839 if( beta[p] > 1.0 ) 00840 beta[p] = 1.0; 00841 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ) 00842 } 00843 } 00844 00845 // --- compute gamma 00846 gamma = 0.0; 00847 for ( p=0; p<num_kernels; ++p ) 00848 { 00849 if ( !( sumw[p] >= 0 ) ) 00850 { 00851 if( !( sumw[p] >= -epsWsq ) ) 00852 SG_WARNING("sumw[%d] = %e; treated as 0. ", p, sumw[p] ) 00853 // should better recompute sumw[] !!! 00854 } 00855 else 00856 { 00857 ASSERT( sumw[p] >= 0 ) 00858 //gamma += CMath::pow( sumw[p] * beta[p]*beta[p], r ); 00859 gamma += CMath::pow( sumw[p] * beta[p]*beta[p] / mkl_norm, r ); 00860 } 00861 } 00862 gamma = CMath::pow( gamma, 1.0/r ) / mkl_norm; 00863 ASSERT( gamma > -1e-9 ) 00864 if( !( gamma > epsGamma ) ) 00865 { 00866 SG_WARNING("bad gamma: %e; set to %e. ", gamma, epsGamma ) 00867 // should better recompute sumw[] !!! 00868 gamma = epsGamma; 00869 } 00870 ASSERT( gamma >= epsGamma ) 00871 //gamma = -gamma; 00872 00873 // --- compute objective 00874 obj = 0.0; 00875 for( p=0; p<num_kernels; ++p ) 00876 { 00877 obj += beta[p] * sumw[p]; 00878 //obj += gamma/mkl_norm * CMath::pow( beta[p], mkl_norm ); 00879 } 00880 if( !( obj >= 0.0 ) ) 00881 SG_WARNING("negative objective: %e. ", obj ) 00882 //SG_PRINT("OBJ = %e. \n", obj ) 00883 00884 00885 // === perform Newton steps 00886 for (i = 0; i < nofNewtonSteps; ++i ) 00887 { 00888 // --- compute Newton direction (Hessian is diagonal) 00889 const float64_t gqq1 = mkl_norm * (mkl_norm-1.0) * gamma; 00890 // newtStep = 0.0; 00891 for( p=0; p<num_kernels; ++p ) 00892 { 00893 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ) 00894 //const float halfw2p = ( sumw[p] >= 0.0 ) ? sumw[p] : 0.0; 00895 //const float64_t t1 = halfw2p*beta[p] - mkl_norm*gamma*CMath::pow(beta[p],mkl_norm); 00896 //const float64_t t2 = 2.0*halfw2p + gqq1*CMath::pow(beta[p],mkl_norm-1.0); 00897 const float halfw2p = ( sumw[p] >= 0.0 ) ? (sumw[p]*old_beta[p]*old_beta[p]) : 0.0; 00898 const float64_t t0 = halfw2p*beta[p] - mkl_norm*gamma*CMath::pow(beta[p],mkl_norm+2.0); 00899 const float64_t t1 = ( t0 < 0 ) ? 0.0 : t0; 00900 const float64_t t2 = 2.0*halfw2p + gqq1*CMath::pow(beta[p],mkl_norm+1.0); 00901 if( inLogSpace ) 00902 newtDir[p] = t1 / ( t1 + t2*beta[p] + hessRidge ); 00903 else 00904 newtDir[p] = ( t1 == 0.0 ) ? 0.0 : ( t1 / t2 ); 00905 // newtStep += newtDir[p] * grad[p]; 00906 ASSERT( newtDir[p] == newtDir[p] ) 00907 //SG_PRINT("newtDir[%d] = %6.3f = %e / %e \n", p, newtDir[p], t1, t2 ) 00908 } 00909 //CMath::display_vector( newtDir, num_kernels, "newton direction " ); 00910 //SG_PRINT("Newton step size = %e\n", Z ) 00911 00912 // --- line search 00913 stepSize = 1.0; 00914 while( stepSize >= epsStep ) 00915 { 00916 // --- perform Newton step 00917 Z = 0.0; 00918 while( Z == 0.0 ) 00919 { 00920 for( p=0; p<num_kernels; ++p ) 00921 { 00922 if( inLogSpace ) 00923 newtBeta[p] = beta[p] * CMath::exp( + stepSize * newtDir[p] ); 00924 else 00925 newtBeta[p] = beta[p] + stepSize * newtDir[p]; 00926 if( !( newtBeta[p] >= epsBeta ) ) 00927 newtBeta[p] = epsBeta; 00928 Z += CMath::pow( newtBeta[p], mkl_norm ); 00929 } 00930 ASSERT( 0.0 <= Z ) 00931 Z = CMath::pow( Z, -1.0/mkl_norm ); 00932 if( Z == 0.0 ) 00933 stepSize /= 2.0; 00934 } 00935 00936 // --- normalize new beta (wrt p-norm) 00937 ASSERT( 0.0 < Z ) 00938 ASSERT( Z < CMath::INFTY ) 00939 for( p=0; p<num_kernels; ++p ) 00940 { 00941 newtBeta[p] *= Z; 00942 if( newtBeta[p] > 1.0 ) 00943 { 00944 //SG_WARNING("beta[%d] = %e; set to 1. ", p, beta[p] ) 00945 newtBeta[p] = 1.0; 00946 } 00947 ASSERT( 0.0 <= newtBeta[p] && newtBeta[p] <= 1.0 ) 00948 } 00949 00950 // --- objective increased? 00951 float64_t newtObj; 00952 newtObj = 0.0; 00953 for( p=0; p<num_kernels; ++p ) 00954 newtObj += sumw[p] * old_beta[p]*old_beta[p] / newtBeta[p]; 00955 //SG_PRINT("step = %.8f: obj = %e -> %e. \n", stepSize, obj, newtObj ) 00956 if ( newtObj < obj - epsNewt*stepSize*obj ) 00957 { 00958 for( p=0; p<num_kernels; ++p ) 00959 beta[p] = newtBeta[p]; 00960 obj = newtObj; 00961 break; 00962 } 00963 stepSize /= 2.0; 00964 00965 } 00966 00967 if( stepSize < epsStep ) 00968 break; 00969 } 00970 SG_FREE(newtDir); 00971 SG_FREE(newtBeta); 00972 00973 // === return new objective 00974 obj = -suma; 00975 for( p=0; p<num_kernels; ++p ) 00976 obj += beta[p] * sumw[p]; 00977 return obj; 00978 } 00979 00980 00981 00982 float64_t CMKL::compute_optimal_betas_via_cplex(float64_t* new_beta, const float64_t* old_beta, int32_t num_kernels, 00983 const float64_t* sumw, float64_t suma, int32_t& inner_iters) 00984 { 00985 SG_DEBUG("MKL via CPLEX\n") 00986 00987 #ifdef USE_CPLEX 00988 ASSERT(new_beta) 00989 ASSERT(old_beta) 00990 00991 int32_t NUMCOLS = 2*num_kernels + 1; 00992 double* x=SG_MALLOC(double, NUMCOLS); 00993 00994 if (!lp_initialized) 00995 { 00996 SG_INFO("creating LP\n") 00997 00998 double obj[NUMCOLS]; /* calling external lib */ 00999 double lb[NUMCOLS]; /* calling external lib */ 01000 double ub[NUMCOLS]; /* calling external lib */ 01001 01002 for (int32_t i=0; i<2*num_kernels; i++) 01003 { 01004 obj[i]=0 ; 01005 lb[i]=0 ; 01006 ub[i]=1 ; 01007 } 01008 01009 for (int32_t i=num_kernels; i<2*num_kernels; i++) 01010 obj[i]= C_mkl; 01011 01012 obj[2*num_kernels]=1 ; 01013 lb[2*num_kernels]=-CPX_INFBOUND ; 01014 ub[2*num_kernels]=CPX_INFBOUND ; 01015 01016 int status = CPXnewcols (env, lp_cplex, NUMCOLS, obj, lb, ub, NULL, NULL); 01017 if ( status ) { 01018 char errmsg[1024]; 01019 CPXgeterrorstring (env, status, errmsg); 01020 SG_ERROR("%s", errmsg) 01021 } 01022 01023 // add constraint sum(w)=1; 01024 SG_INFO("adding the first row\n") 01025 int initial_rmatbeg[1]; /* calling external lib */ 01026 int initial_rmatind[num_kernels+1]; /* calling external lib */ 01027 double initial_rmatval[num_kernels+1]; /* calling ext lib */ 01028 double initial_rhs[1]; /* calling external lib */ 01029 char initial_sense[1]; 01030 01031 // 1-norm MKL 01032 if (mkl_norm==1) 01033 { 01034 initial_rmatbeg[0] = 0; 01035 initial_rhs[0]=1 ; // rhs=1 ; 01036 initial_sense[0]='E' ; // equality 01037 01038 // sparse matrix 01039 for (int32_t i=0; i<num_kernels; i++) 01040 { 01041 initial_rmatind[i]=i ; //index of non-zero element 01042 initial_rmatval[i]=1 ; //value of ... 01043 } 01044 initial_rmatind[num_kernels]=2*num_kernels ; //number of non-zero elements 01045 initial_rmatval[num_kernels]=0 ; 01046 01047 status = CPXaddrows (env, lp_cplex, 0, 1, num_kernels+1, 01048 initial_rhs, initial_sense, initial_rmatbeg, 01049 initial_rmatind, initial_rmatval, NULL, NULL); 01050 01051 } 01052 else // 2 and q-norm MKL 01053 { 01054 initial_rmatbeg[0] = 0; 01055 initial_rhs[0]=0 ; // rhs=1 ; 01056 initial_sense[0]='L' ; // <= (inequality) 01057 01058 initial_rmatind[0]=2*num_kernels ; 01059 initial_rmatval[0]=0 ; 01060 01061 status = CPXaddrows (env, lp_cplex, 0, 1, 1, 01062 initial_rhs, initial_sense, initial_rmatbeg, 01063 initial_rmatind, initial_rmatval, NULL, NULL); 01064 01065 01066 if (mkl_norm==2) 01067 { 01068 for (int32_t i=0; i<num_kernels; i++) 01069 { 01070 initial_rmatind[i]=i ; 01071 initial_rmatval[i]=1 ; 01072 } 01073 initial_rmatind[num_kernels]=2*num_kernels ; 01074 initial_rmatval[num_kernels]=0 ; 01075 01076 status = CPXaddqconstr (env, lp_cplex, 0, num_kernels+1, 1.0, 'L', NULL, NULL, 01077 initial_rmatind, initial_rmatind, initial_rmatval, NULL); 01078 } 01079 } 01080 01081 01082 if ( status ) 01083 SG_ERROR("Failed to add the first row.\n") 01084 01085 lp_initialized = true ; 01086 01087 if (C_mkl!=0.0) 01088 { 01089 for (int32_t q=0; q<num_kernels-1; q++) 01090 { 01091 // add constraint w[i]-w[i+1]<s[i]; 01092 // add constraint w[i+1]-w[i]<s[i]; 01093 int rmatbeg[1]; /* calling external lib */ 01094 int rmatind[3]; /* calling external lib */ 01095 double rmatval[3]; /* calling external lib */ 01096 double rhs[1]; /* calling external lib */ 01097 char sense[1]; 01098 01099 rmatbeg[0] = 0; 01100 rhs[0]=0 ; // rhs=0 ; 01101 sense[0]='L' ; // <= 01102 rmatind[0]=q ; 01103 rmatval[0]=1 ; 01104 rmatind[1]=q+1 ; 01105 rmatval[1]=-1 ; 01106 rmatind[2]=num_kernels+q ; 01107 rmatval[2]=-1 ; 01108 status = CPXaddrows (env, lp_cplex, 0, 1, 3, 01109 rhs, sense, rmatbeg, 01110 rmatind, rmatval, NULL, NULL); 01111 if ( status ) 01112 SG_ERROR("Failed to add a smothness row (1).\n") 01113 01114 rmatbeg[0] = 0; 01115 rhs[0]=0 ; // rhs=0 ; 01116 sense[0]='L' ; // <= 01117 rmatind[0]=q ; 01118 rmatval[0]=-1 ; 01119 rmatind[1]=q+1 ; 01120 rmatval[1]=1 ; 01121 rmatind[2]=num_kernels+q ; 01122 rmatval[2]=-1 ; 01123 status = CPXaddrows (env, lp_cplex, 0, 1, 3, 01124 rhs, sense, rmatbeg, 01125 rmatind, rmatval, NULL, NULL); 01126 if ( status ) 01127 SG_ERROR("Failed to add a smothness row (2).\n") 01128 } 01129 } 01130 } 01131 01132 { // add the new row 01133 //SG_INFO("add the new row\n") 01134 01135 int rmatbeg[1]; 01136 int rmatind[num_kernels+1]; 01137 double rmatval[num_kernels+1]; 01138 double rhs[1]; 01139 char sense[1]; 01140 01141 rmatbeg[0] = 0; 01142 if (mkl_norm==1) 01143 rhs[0]=0 ; 01144 else 01145 rhs[0]=-suma ; 01146 01147 sense[0]='L' ; 01148 01149 for (int32_t i=0; i<num_kernels; i++) 01150 { 01151 rmatind[i]=i ; 01152 if (mkl_norm==1) 01153 rmatval[i]=-(sumw[i]-suma) ; 01154 else 01155 rmatval[i]=-sumw[i]; 01156 } 01157 rmatind[num_kernels]=2*num_kernels ; 01158 rmatval[num_kernels]=-1 ; 01159 01160 int32_t status = CPXaddrows (env, lp_cplex, 0, 1, num_kernels+1, 01161 rhs, sense, rmatbeg, 01162 rmatind, rmatval, NULL, NULL); 01163 if ( status ) 01164 SG_ERROR("Failed to add the new row.\n") 01165 } 01166 01167 inner_iters=0; 01168 int status; 01169 { 01170 01171 if (mkl_norm==1) // optimize 1 norm MKL 01172 status = CPXlpopt (env, lp_cplex); 01173 else if (mkl_norm==2) // optimize 2-norm MKL 01174 status = CPXbaropt(env, lp_cplex); 01175 else // q-norm MKL 01176 { 01177 float64_t* beta=SG_MALLOC(float64_t, 2*num_kernels+1); 01178 float64_t objval_old=1e-8; //some value to cause the loop to not terminate yet 01179 for (int32_t i=0; i<num_kernels; i++) 01180 beta[i]=old_beta[i]; 01181 for (int32_t i=num_kernels; i<2*num_kernels+1; i++) 01182 beta[i]=0; 01183 01184 while (true) 01185 { 01186 //int rows=CPXgetnumrows(env, lp_cplex); 01187 //int cols=CPXgetnumcols(env, lp_cplex); 01188 //SG_PRINT("rows:%d, cols:%d (kernel:%d)\n", rows, cols, num_kernels) 01189 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels, mkl_norm), beta, num_kernels); 01190 01191 set_qnorm_constraints(beta, num_kernels); 01192 01193 status = CPXbaropt(env, lp_cplex); 01194 if ( status ) 01195 SG_ERROR("Failed to optimize Problem.\n") 01196 01197 int solstat=0; 01198 double objval=0; 01199 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01200 (double*) beta, NULL, NULL, NULL); 01201 01202 if ( status ) 01203 { 01204 CMath::display_vector(beta, num_kernels, "beta"); 01205 SG_ERROR("Failed to obtain solution.\n") 01206 } 01207 01208 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels, mkl_norm), beta, num_kernels); 01209 01210 //SG_PRINT("[%d] %f (%f)\n", inner_iters, objval, objval_old) 01211 if ((1-abs(objval/objval_old) < 0.1*mkl_epsilon)) // && (inner_iters>2)) 01212 break; 01213 01214 objval_old=objval; 01215 01216 inner_iters++; 01217 } 01218 SG_FREE(beta); 01219 } 01220 01221 if ( status ) 01222 SG_ERROR("Failed to optimize Problem.\n") 01223 01224 // obtain solution 01225 int32_t cur_numrows=(int32_t) CPXgetnumrows(env, lp_cplex); 01226 int32_t cur_numcols=(int32_t) CPXgetnumcols(env, lp_cplex); 01227 int32_t num_rows=cur_numrows; 01228 ASSERT(cur_numcols<=2*num_kernels+1) 01229 01230 float64_t* slack=SG_MALLOC(float64_t, cur_numrows); 01231 float64_t* pi=SG_MALLOC(float64_t, cur_numrows); 01232 01233 /* calling external lib */ 01234 int solstat=0; 01235 double objval=0; 01236 01237 if (mkl_norm==1) 01238 { 01239 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01240 (double*) x, (double*) pi, (double*) slack, NULL); 01241 } 01242 else 01243 { 01244 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01245 (double*) x, NULL, (double*) slack, NULL); 01246 } 01247 01248 int32_t solution_ok = (!status) ; 01249 if ( status ) 01250 SG_ERROR("Failed to obtain solution.\n") 01251 01252 int32_t num_active_rows=0 ; 01253 if (solution_ok) 01254 { 01255 /* 1 norm mkl */ 01256 float64_t max_slack = -CMath::INFTY ; 01257 int32_t max_idx = -1 ; 01258 int32_t start_row = 1 ; 01259 if (C_mkl!=0.0) 01260 start_row+=2*(num_kernels-1); 01261 01262 for (int32_t i = start_row; i < cur_numrows; i++) // skip first 01263 { 01264 if (mkl_norm==1) 01265 { 01266 if ((pi[i]!=0)) 01267 num_active_rows++ ; 01268 else 01269 { 01270 if (slack[i]>max_slack) 01271 { 01272 max_slack=slack[i] ; 01273 max_idx=i ; 01274 } 01275 } 01276 } 01277 else // 2-norm or general q-norm 01278 { 01279 if ((CMath::abs(slack[i])<1e-6)) 01280 num_active_rows++ ; 01281 else 01282 { 01283 if (slack[i]>max_slack) 01284 { 01285 max_slack=slack[i] ; 01286 max_idx=i ; 01287 } 01288 } 01289 } 01290 } 01291 01292 // have at most max(100,num_active_rows*2) rows, if not, remove one 01293 if ( (num_rows-start_row>CMath::max(100,2*num_active_rows)) && (max_idx!=-1)) 01294 { 01295 //SG_INFO("-%i(%i,%i)",max_idx,start_row,num_rows) 01296 status = CPXdelrows (env, lp_cplex, max_idx, max_idx) ; 01297 if ( status ) 01298 SG_ERROR("Failed to remove an old row.\n") 01299 } 01300 01301 //CMath::display_vector(x, num_kernels, "beta"); 01302 01303 rho = -x[2*num_kernels] ; 01304 SG_FREE(pi); 01305 SG_FREE(slack); 01306 01307 } 01308 else 01309 { 01310 /* then something is wrong and we rather 01311 stop sooner than later */ 01312 rho = 1 ; 01313 } 01314 } 01315 for (int32_t i=0; i<num_kernels; i++) 01316 new_beta[i]=x[i]; 01317 01318 SG_FREE(x); 01319 #else 01320 SG_ERROR("Cplex not enabled at compile time\n") 01321 #endif 01322 return rho; 01323 } 01324 01325 float64_t CMKL::compute_optimal_betas_via_glpk(float64_t* beta, const float64_t* old_beta, 01326 int num_kernels, const float64_t* sumw, float64_t suma, int32_t& inner_iters) 01327 { 01328 SG_DEBUG("MKL via GLPK\n") 01329 01330 if (mkl_norm!=1) 01331 SG_ERROR("MKL via GLPK works only for norm=1\n") 01332 01333 float64_t obj=1.0; 01334 #ifdef USE_GLPK 01335 int32_t NUMCOLS = 2*num_kernels + 1 ; 01336 if (!lp_initialized) 01337 { 01338 SG_INFO("creating LP\n") 01339 01340 //set obj function, note glpk indexing is 1-based 01341 glp_add_cols(lp_glpk, NUMCOLS); 01342 for (int i=1; i<=2*num_kernels; i++) 01343 { 01344 glp_set_obj_coef(lp_glpk, i, 0); 01345 glp_set_col_bnds(lp_glpk, i, GLP_DB, 0, 1); 01346 } 01347 for (int i=num_kernels+1; i<=2*num_kernels; i++) 01348 { 01349 glp_set_obj_coef(lp_glpk, i, C_mkl); 01350 } 01351 glp_set_obj_coef(lp_glpk, NUMCOLS, 1); 01352 glp_set_col_bnds(lp_glpk, NUMCOLS, GLP_FR, 0,0); //unbound 01353 01354 //add first row. sum[w]=1 01355 int row_index = glp_add_rows(lp_glpk, 1); 01356 int* ind = SG_MALLOC(int, num_kernels+2); 01357 float64_t* val = SG_MALLOC(float64_t, num_kernels+2); 01358 for (int i=1; i<=num_kernels; i++) 01359 { 01360 ind[i] = i; 01361 val[i] = 1; 01362 } 01363 ind[num_kernels+1] = NUMCOLS; 01364 val[num_kernels+1] = 0; 01365 glp_set_mat_row(lp_glpk, row_index, num_kernels, ind, val); 01366 glp_set_row_bnds(lp_glpk, row_index, GLP_FX, 1, 1); 01367 SG_FREE(val); 01368 SG_FREE(ind); 01369 01370 lp_initialized = true; 01371 01372 if (C_mkl!=0.0) 01373 { 01374 for (int32_t q=1; q<num_kernels; q++) 01375 { 01376 int mat_ind[4]; 01377 float64_t mat_val[4]; 01378 int mat_row_index = glp_add_rows(lp_glpk, 2); 01379 mat_ind[1] = q; 01380 mat_val[1] = 1; 01381 mat_ind[2] = q+1; 01382 mat_val[2] = -1; 01383 mat_ind[3] = num_kernels+q; 01384 mat_val[3] = -1; 01385 glp_set_mat_row(lp_glpk, mat_row_index, 3, mat_ind, mat_val); 01386 glp_set_row_bnds(lp_glpk, mat_row_index, GLP_UP, 0, 0); 01387 mat_val[1] = -1; 01388 mat_val[2] = 1; 01389 glp_set_mat_row(lp_glpk, mat_row_index+1, 3, mat_ind, mat_val); 01390 glp_set_row_bnds(lp_glpk, mat_row_index+1, GLP_UP, 0, 0); 01391 } 01392 } 01393 } 01394 01395 int* ind=SG_MALLOC(int,num_kernels+2); 01396 float64_t* val=SG_MALLOC(float64_t, num_kernels+2); 01397 int row_index = glp_add_rows(lp_glpk, 1); 01398 for (int32_t i=1; i<=num_kernels; i++) 01399 { 01400 ind[i] = i; 01401 val[i] = -(sumw[i-1]-suma); 01402 } 01403 ind[num_kernels+1] = 2*num_kernels+1; 01404 val[num_kernels+1] = -1; 01405 glp_set_mat_row(lp_glpk, row_index, num_kernels+1, ind, val); 01406 glp_set_row_bnds(lp_glpk, row_index, GLP_UP, 0, 0); 01407 SG_FREE(ind); 01408 SG_FREE(val); 01409 01410 //optimize 01411 glp_simplex(lp_glpk, lp_glpk_parm); 01412 bool res = check_glp_status(lp_glpk); 01413 if (!res) 01414 SG_ERROR("Failed to optimize Problem.\n") 01415 01416 int32_t cur_numrows = glp_get_num_rows(lp_glpk); 01417 int32_t cur_numcols = glp_get_num_cols(lp_glpk); 01418 int32_t num_rows=cur_numrows; 01419 ASSERT(cur_numcols<=2*num_kernels+1) 01420 01421 float64_t* col_primal = SG_MALLOC(float64_t, cur_numcols); 01422 float64_t* row_primal = SG_MALLOC(float64_t, cur_numrows); 01423 float64_t* row_dual = SG_MALLOC(float64_t, cur_numrows); 01424 01425 for (int i=0; i<cur_numrows; i++) 01426 { 01427 row_primal[i] = glp_get_row_prim(lp_glpk, i+1); 01428 row_dual[i] = glp_get_row_dual(lp_glpk, i+1); 01429 } 01430 for (int i=0; i<cur_numcols; i++) 01431 col_primal[i] = glp_get_col_prim(lp_glpk, i+1); 01432 01433 obj = -col_primal[2*num_kernels]; 01434 01435 for (int i=0; i<num_kernels; i++) 01436 beta[i] = col_primal[i]; 01437 01438 int32_t num_active_rows=0; 01439 if(res) 01440 { 01441 float64_t max_slack = CMath::INFTY; 01442 int32_t max_idx = -1; 01443 int32_t start_row = 1; 01444 if (C_mkl!=0.0) 01445 start_row += 2*(num_kernels-1); 01446 01447 for (int32_t i= start_row; i<cur_numrows; i++) 01448 { 01449 if (row_dual[i]!=0) 01450 num_active_rows++; 01451 else 01452 { 01453 if (row_primal[i]<max_slack) 01454 { 01455 max_slack = row_primal[i]; 01456 max_idx = i; 01457 } 01458 } 01459 } 01460 01461 if ((num_rows-start_row>CMath::max(100, 2*num_active_rows)) && max_idx!=-1) 01462 { 01463 int del_rows[2]; 01464 del_rows[1] = max_idx+1; 01465 glp_del_rows(lp_glpk, 1, del_rows); 01466 } 01467 } 01468 01469 SG_FREE(row_dual); 01470 SG_FREE(row_primal); 01471 SG_FREE(col_primal); 01472 #else 01473 SG_ERROR("Glpk not enabled at compile time\n") 01474 #endif 01475 01476 return obj; 01477 } 01478 01479 void CMKL::compute_sum_beta(float64_t* sumw) 01480 { 01481 ASSERT(sumw) 01482 ASSERT(svm) 01483 01484 int32_t nsv=svm->get_num_support_vectors(); 01485 int32_t num_kernels = kernel->get_num_subkernels(); 01486 SGVector<float64_t> beta=SGVector<float64_t>(num_kernels); 01487 int32_t nweights=0; 01488 const float64_t* old_beta = kernel->get_subkernel_weights(nweights); 01489 ASSERT(nweights==num_kernels) 01490 ASSERT(old_beta) 01491 01492 for (int32_t i=0; i<num_kernels; i++) 01493 { 01494 beta.vector[i]=0; 01495 sumw[i]=0; 01496 } 01497 01498 for (int32_t n=0; n<num_kernels; n++) 01499 { 01500 beta.vector[n]=1.0; 01501 /* this only copies the value of the first entry of this array 01502 * so it may be freed safely afterwards. */ 01503 kernel->set_subkernel_weights(beta); 01504 01505 for (int32_t i=0; i<nsv; i++) 01506 { 01507 int32_t ii=svm->get_support_vector(i); 01508 01509 for (int32_t j=0; j<nsv; j++) 01510 { 01511 int32_t jj=svm->get_support_vector(j); 01512 sumw[n]+=0.5*svm->get_alpha(i)*svm->get_alpha(j)*kernel->kernel(ii,jj); 01513 } 01514 } 01515 beta[n]=0.0; 01516 } 01517 01518 mkl_iterations++; 01519 kernel->set_subkernel_weights(SGVector<float64_t>( (float64_t*) old_beta, num_kernels, false)); 01520 } 01521 01522 01523 // assumes that all constraints are satisfied 01524 float64_t CMKL::compute_mkl_dual_objective() 01525 { 01526 if (get_solver_type()==ST_ELASTICNET) 01527 { 01528 // -- Compute ElasticnetMKL dual objective 01529 return compute_elasticnet_dual_objective(); 01530 } 01531 01532 int32_t n=get_num_support_vectors(); 01533 float64_t mkl_obj=0; 01534 01535 if (m_labels && kernel && kernel->get_kernel_type() == K_COMBINED) 01536 { 01537 for (index_t k_idx=0; k_idx<((CCombinedKernel*) kernel)->get_num_kernels(); k_idx++) 01538 { 01539 CKernel* kn = ((CCombinedKernel*) kernel)->get_kernel(k_idx); 01540 float64_t sum=0; 01541 for (int32_t i=0; i<n; i++) 01542 { 01543 int32_t ii=get_support_vector(i); 01544 01545 for (int32_t j=0; j<n; j++) 01546 { 01547 int32_t jj=get_support_vector(j); 01548 sum+=get_alpha(i)*get_alpha(j)*kn->kernel(ii,jj); 01549 } 01550 } 01551 01552 if (mkl_norm==1.0) 01553 mkl_obj = CMath::max(mkl_obj, sum); 01554 else 01555 mkl_obj += CMath::pow(sum, mkl_norm/(mkl_norm-1)); 01556 01557 SG_UNREF(kn); 01558 } 01559 01560 if (mkl_norm==1.0) 01561 mkl_obj=-0.5*mkl_obj; 01562 else 01563 mkl_obj= -0.5*CMath::pow(mkl_obj, (mkl_norm-1)/mkl_norm); 01564 01565 mkl_obj+=compute_sum_alpha(); 01566 } 01567 else 01568 SG_ERROR("cannot compute objective, labels or kernel not set\n") 01569 01570 return -mkl_obj; 01571 } 01572 01573 #ifdef USE_CPLEX 01574 void CMKL::set_qnorm_constraints(float64_t* beta, int32_t num_kernels) 01575 { 01576 ASSERT(num_kernels>0) 01577 01578 float64_t* grad_beta=SG_MALLOC(float64_t, num_kernels); 01579 float64_t* hess_beta=SG_MALLOC(float64_t, num_kernels+1); 01580 float64_t* lin_term=SG_MALLOC(float64_t, num_kernels+1); 01581 int* ind=SG_MALLOC(int, num_kernels+1); 01582 01583 //CMath::display_vector(beta, num_kernels, "beta"); 01584 double const_term = 1-CMath::qsq(beta, num_kernels, mkl_norm); 01585 01586 //SG_PRINT("const=%f\n", const_term) 01587 ASSERT(CMath::fequal(const_term, 0.0)) 01588 01589 for (int32_t i=0; i<num_kernels; i++) 01590 { 01591 grad_beta[i]=mkl_norm * pow(beta[i], mkl_norm-1); 01592 hess_beta[i]=0.5*mkl_norm*(mkl_norm-1) * pow(beta[i], mkl_norm-2); 01593 lin_term[i]=grad_beta[i] - 2*beta[i]*hess_beta[i]; 01594 const_term+=grad_beta[i]*beta[i] - CMath::sq(beta[i])*hess_beta[i]; 01595 ind[i]=i; 01596 } 01597 ind[num_kernels]=2*num_kernels; 01598 hess_beta[num_kernels]=0; 01599 lin_term[num_kernels]=0; 01600 01601 int status=0; 01602 int num=CPXgetnumqconstrs (env, lp_cplex); 01603 01604 if (num>0) 01605 { 01606 status = CPXdelqconstrs (env, lp_cplex, 0, 0); 01607 ASSERT(!status) 01608 } 01609 01610 status = CPXaddqconstr (env, lp_cplex, num_kernels+1, num_kernels+1, const_term, 'L', ind, lin_term, 01611 ind, ind, hess_beta, NULL); 01612 ASSERT(!status) 01613 01614 //CPXwriteprob (env, lp_cplex, "prob.lp", NULL); 01615 //CPXqpwrite (env, lp_cplex, "prob.qp"); 01616 01617 SG_FREE(grad_beta); 01618 SG_FREE(hess_beta); 01619 SG_FREE(lin_term); 01620 SG_FREE(ind); 01621 } 01622 #endif // USE_CPLEX