SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Viktor Gal 00008 * 00009 */ 00010 00011 #include <shogun/structure/libncbm.h> 00012 #include <shogun/lib/Time.h> 00013 00014 #include <shogun/lib/external/libqp.h> 00015 #include <shogun/multiclass/GMNPLib.h> 00016 00017 #include <vector> 00018 00019 namespace shogun 00020 { 00021 00022 static float64_t* HMatrix; 00023 static uint32_t maxCPs; 00024 static const float64_t epsilon=0.0; 00025 00026 static const float64_t *get_col(uint32_t i) 00027 { 00028 return (&HMatrix[maxCPs*i]); 00029 } 00030 00031 IGNORE_IN_CLASSLIST struct line_search_res 00032 { 00033 /* */ 00034 float64_t a; 00035 float64_t fval; 00036 float64_t reg; 00037 SGVector<float64_t> solution; 00038 SGVector<float64_t> gradient; 00039 }; 00040 00041 inline static line_search_res zoom 00042 ( 00043 CDualLibQPBMSOSVM *machine, 00044 float64_t lambda, 00045 float64_t a_lo, 00046 float64_t a_hi, 00047 float64_t initial_fval, 00048 SGVector<float64_t>& initial_solution, 00049 SGVector<float64_t>& search_dir, 00050 float64_t wolfe_c1, 00051 float64_t wolfe_c2, 00052 float64_t init_lgrad, 00053 float64_t f_lo, 00054 float64_t g_lo, 00055 float64_t f_hi, 00056 float64_t g_hi 00057 ) 00058 { 00059 line_search_res ls_res; 00060 ls_res.solution.resize_vector(initial_solution.vlen); 00061 ls_res.gradient.resize_vector(initial_solution.vlen); 00062 00063 SGVector<float64_t> cur_solution(initial_solution.vlen); 00064 cur_solution.zero(); 00065 SGVector<float64_t> cur_grad(initial_solution.vlen); 00066 00067 uint32_t iter = 0; 00068 while (1) 00069 { 00070 float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi); 00071 float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi); 00072 float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2); 00073 00074 if (a_lo < a_hi) 00075 { 00076 if ((a_j < a_lo) || (a_j > a_hi)) 00077 { 00078 a_j = 0.5*(a_lo+a_hi); 00079 } 00080 } 00081 else 00082 { 00083 if ((a_j > a_lo) || (a_j < a_hi)) 00084 { 00085 a_j = 0.5*(a_lo+a_hi); 00086 } 00087 } 00088 00089 cur_solution.add(cur_solution.vector, 1.0, 00090 initial_solution.vector, a_j, 00091 search_dir.vector, cur_solution.vlen); 00092 00093 float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector); 00094 float64_t cur_reg 00095 = 0.5*lambda*cur_solution.dot(cur_solution.vector, 00096 cur_solution.vector, cur_solution.vlen); 00097 cur_fval += cur_reg; 00098 00099 cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen); 00100 00101 if 00102 ( 00103 (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad)) 00104 || 00105 (cur_fval > f_lo) 00106 ) 00107 { 00108 a_hi = a_j; 00109 f_hi = cur_fval; 00110 g_hi = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen); 00111 } 00112 else 00113 { 00114 float64_t cur_lgrad 00115 = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen); 00116 00117 if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad) 00118 { 00119 ls_res.a = a_j; 00120 ls_res.fval = cur_fval; 00121 ls_res.reg = cur_reg; 00122 ls_res.gradient = cur_grad; 00123 ls_res.solution = cur_solution; 00124 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval) 00125 return ls_res; 00126 } 00127 00128 if (cur_lgrad*(a_hi - a_lo) >= 0) 00129 { 00130 a_hi = a_lo; 00131 f_hi = f_lo; 00132 g_hi = g_lo; 00133 } 00134 a_lo = a_j; 00135 f_lo = cur_fval; 00136 g_lo = cur_lgrad; 00137 } 00138 00139 if 00140 ( 00141 (CMath::abs(a_lo - a_hi) <= 0.01*a_lo) 00142 || 00143 (iter >= 5) 00144 ) 00145 { 00146 ls_res.a = a_j; 00147 ls_res.fval = cur_fval; 00148 ls_res.reg = cur_reg; 00149 ls_res.gradient = cur_grad; 00150 ls_res.solution = cur_solution; 00151 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval) 00152 return ls_res; 00153 } 00154 iter++; 00155 } 00156 } 00157 00158 inline std::vector<line_search_res> line_search_with_strong_wolfe 00159 ( 00160 CDualLibQPBMSOSVM *machine, 00161 float64_t lambda, 00162 float64_t initial_val, 00163 SGVector<float64_t>& initial_solution, 00164 SGVector<float64_t>& initial_grad, 00165 SGVector<float64_t>& search_dir, 00166 float64_t astart, 00167 float64_t amax = 1.1, 00168 float64_t wolfe_c1 = 1E-4, 00169 float64_t wolfe_c2 = 0.9, 00170 float64_t max_iter = 5 00171 ) 00172 { 00173 /* NOTE: 00174 * model->risk returns only the risk as it's name says 00175 * have to cur_fval = model.risk + (lambda*0.5*w*w') 00176 * 00177 * subgrad should be: subgrad + (lambda*w) 00178 * 00179 */ 00180 00181 uint32_t iter = 0; 00182 00183 initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen); 00184 00185 float64_t initial_lgrad = initial_grad.dot(initial_grad.vector, search_dir.vector, initial_grad.vlen); 00186 float64_t prev_lgrad = initial_lgrad; 00187 float64_t prev_fval = initial_val; 00188 00189 float64_t prev_a = 0; 00190 float64_t cur_a = astart; 00191 00192 std::vector<line_search_res> ret; 00193 while (1) 00194 { 00195 SGVector<float64_t> x(initial_solution.vlen); 00196 SGVector<float64_t> cur_subgrad(initial_solution.vlen); 00197 00198 x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen); 00199 float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector); 00200 float64_t cur_reg = 0.5*lambda*x.dot(x.vector, x.vector, x.vlen); 00201 cur_fval += cur_reg; 00202 00203 cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen); 00204 if (iter == 0) 00205 { 00206 line_search_res initial_step; 00207 initial_step.fval = cur_fval; 00208 initial_step.reg = cur_reg; 00209 initial_step.gradient = cur_subgrad; 00210 initial_step.solution = x; 00211 ret.push_back(initial_step); 00212 } 00213 00214 float64_t cur_lgrad 00215 = cur_subgrad.dot(cur_subgrad.vector, search_dir.vector, 00216 cur_subgrad.vlen); 00217 if 00218 ( 00219 (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad) 00220 || 00221 (cur_fval >= prev_fval && iter > 0) 00222 ) 00223 { 00224 ret.push_back( 00225 zoom(machine, lambda, prev_a, cur_a, initial_val, 00226 initial_solution, search_dir, wolfe_c1, wolfe_c2, 00227 initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad) 00228 ); 00229 return ret; 00230 } 00231 00232 if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad) 00233 { 00234 line_search_res ls_res; 00235 ls_res.a = cur_a; 00236 ls_res.fval = cur_fval; 00237 ls_res.reg = cur_reg; 00238 ls_res.solution = x; 00239 ls_res.gradient = cur_subgrad; 00240 ret.push_back(ls_res); 00241 return ret; 00242 } 00243 00244 if (cur_lgrad >= 0) 00245 { 00246 ret.push_back( 00247 zoom(machine, lambda, cur_a, prev_a, initial_val, 00248 initial_solution, search_dir, wolfe_c1, wolfe_c2, 00249 initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad) 00250 ); 00251 return ret; 00252 } 00253 iter++; 00254 if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter)) 00255 { 00256 line_search_res ls_res; 00257 ls_res.a = cur_a; 00258 ls_res.fval = cur_fval; 00259 ls_res.reg = cur_reg; 00260 ls_res.solution = x; 00261 ls_res.gradient = cur_subgrad; 00262 ret.push_back(ls_res); 00263 return ret; 00264 } 00265 00266 prev_a = cur_a; 00267 prev_fval = cur_fval; 00268 prev_lgrad = cur_lgrad; 00269 00270 cur_a = (cur_a + amax)*0.5; 00271 } 00272 } 00273 00274 inline void update_H(BmrmStatistics& ncbm, 00275 bmrm_ll* head, 00276 bmrm_ll* tail, 00277 SGMatrix<float64_t>& H, 00278 SGVector<float64_t>& diag_H, 00279 float64_t lambda, 00280 uint32_t maxCP, 00281 int32_t w_dim) 00282 { 00283 float64_t* a_2 = get_cutting_plane(tail); 00284 bmrm_ll* cp_ptr=head; 00285 00286 for (uint32_t i=0; i < ncbm.nCP; ++i) 00287 { 00288 float64_t* a_1 = get_cutting_plane(cp_ptr); 00289 cp_ptr=cp_ptr->next; 00290 00291 float64_t dot_val = SGVector<float64_t>::dot(a_2, a_1, w_dim); 00292 00293 H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)] 00294 = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)] 00295 = dot_val/lambda; 00296 } 00297 00298 /* set the diagonal element, i.e. subgrad_i*subgrad_i' */ 00299 float64_t dot_val = SGVector<float64_t>::dot(a_2, a_2, w_dim); 00300 H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda; 00301 00302 diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]; 00303 00304 ncbm.nCP++; 00305 } 00306 00307 00308 BmrmStatistics svm_ncbm_solver( 00309 CDualLibQPBMSOSVM *machine, 00310 float64_t *w, 00311 float64_t TolRel, 00312 float64_t TolAbs, 00313 float64_t _lambda, 00314 uint32_t _BufSize, 00315 bool cleanICP, 00316 uint32_t cleanAfter, 00317 bool is_convex, 00318 bool line_search, 00319 bool verbose 00320 ) 00321 { 00322 BmrmStatistics ncbm; 00323 libqp_state_T qp_exitflag={0, 0, 0, 0}; 00324 int32_t w_dim = machine->get_model()->get_dim(); 00325 00326 maxCPs = _BufSize; 00327 00328 ncbm.nCP=0; 00329 ncbm.nIter=0; 00330 ncbm.exitflag=0; 00331 00332 /* variables for timing the algorithm*/ 00333 CTime ttime; 00334 float64_t tstart, tstop; 00335 tstart=ttime.cur_time_diff(false); 00336 00337 /* matrix of subgradiends */ 00338 SGMatrix<float64_t> A(w_dim, maxCPs); 00339 00340 /* bias vector */ 00341 SGVector<float64_t> bias(maxCPs); 00342 bias.zero(); 00343 00344 /* Matrix for storing H = A*A' */ 00345 SGMatrix<float64_t> H(maxCPs,maxCPs); 00346 HMatrix = H.matrix; 00347 00348 /* diag_H */ 00349 SGVector<float64_t> diag_H(maxCPs); 00350 diag_H.zero(); 00351 00352 /* x the solution vector of the dual problem: 00353 * 1/lambda x*H*x' - x*bias 00354 */ 00355 SGVector<float64_t> x(maxCPs); 00356 x.zero(); 00357 00358 /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */ 00359 float64_t b = 1.0; 00360 uint8_t S = 1; 00361 SGVector<uint32_t> I(maxCPs); 00362 I.set_const(1); 00363 00364 /* libqp paramteres */ 00365 uint32_t QPSolverMaxIter = 0xFFFFFFFF; 00366 float64_t QPSolverTolRel = 1E-9; 00367 00368 /* variables for maintaining inactive planes */ 00369 SGVector<bool> map(maxCPs); 00370 map.set_const(true); 00371 ICP_stats icp_stats; 00372 icp_stats.maxCPs = maxCPs; 00373 icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t); 00374 icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*); 00375 icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t); 00376 icp_stats.H_buff = (float64_t*) LIBBMRM_CALLOC(maxCPs*maxCPs,float64_t); 00377 if 00378 ( 00379 icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL 00380 || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL 00381 ) 00382 { 00383 ncbm.exitflag=-2; 00384 LIBBMRM_FREE(icp_stats.ICPcounter); 00385 LIBBMRM_FREE(icp_stats.ICPs); 00386 LIBBMRM_FREE(icp_stats.ACPs); 00387 LIBBMRM_FREE(icp_stats.H_buff); 00388 return ncbm; 00389 } 00390 00391 /* best */ 00392 float64_t best_Fp = CMath::INFTY; 00393 float64_t best_risk = CMath::INFTY; 00394 SGVector<float64_t> best_w(w_dim); 00395 best_w.zero(); 00396 SGVector<float64_t> best_subgrad(w_dim); 00397 best_subgrad.zero(); 00398 00399 /* initial solution */ 00400 SGVector<float64_t> cur_subgrad(w_dim); 00401 SGVector<float64_t> cur_w(w_dim); 00402 memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim); 00403 00404 float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector); 00405 bias[0] = -cur_risk; 00406 best_Fp = 0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk; 00407 best_risk = cur_risk; 00408 memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim); 00409 memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim); 00410 00411 /* create a double-linked list over the A the subgrad matrix */ 00412 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL; 00413 cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1); 00414 if (cp_list==NULL) 00415 { 00416 ncbm.exitflag=-2; 00417 return ncbm; 00418 } 00419 /* save the subgradient */ 00420 memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim); 00421 map[0] = false; 00422 cp_list->address=&A[0]; 00423 cp_list->idx=0; 00424 cp_list->prev=NULL; 00425 cp_list->next=NULL; 00426 CPList_head=cp_list; 00427 CPList_tail=cp_list; 00428 00429 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim); 00430 tstop=ttime.cur_time_diff(false); 00431 if (verbose) 00432 SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n", 00433 ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk); 00434 00435 float64_t astar = 0.01; 00436 00437 SG_SPRINT("clean icps: %d\n", cleanICP) 00438 while (ncbm.exitflag==0) 00439 { 00440 tstart=ttime.cur_time_diff(false); 00441 ncbm.nIter++; 00442 00443 //diag_H.display_vector(); 00444 //bias.display_vector(); 00445 00446 /* solve the dual of the problem, namely: 00447 * 00448 */ 00449 qp_exitflag = 00450 libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector, 00451 ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0); 00452 00453 ncbm.Fd = -qp_exitflag.QP; 00454 00455 ncbm.qp_exitflag=qp_exitflag.exitflag; 00456 00457 /* Update ICPcounter (add one to unused and reset used) 00458 * + compute number of active CPs */ 00459 ncbm.nzA=0; 00460 00461 for (uint32_t i=0; i < ncbm.nCP; ++i) 00462 { 00463 if (x[i] > epsilon) 00464 { 00465 /* cp was active => reset counter */ 00466 ++ncbm.nzA; 00467 icp_stats.ICPcounter[i]=0; 00468 } 00469 else 00470 { 00471 icp_stats.ICPcounter[i]++; 00472 } 00473 } 00474 00475 /* Inactive Cutting Planes (ICP) removal */ 00476 if (cleanICP) 00477 { 00478 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail, 00479 H.matrix, diag_H.vector, x.vector, 00480 map.vector, cleanAfter, bias.vector, I.vector); 00481 } 00482 00483 /* calculate the new w 00484 * w[i] = -1/lambda*A[i]*x[i] 00485 */ 00486 cur_w.zero(); 00487 cp_ptr=CPList_head; 00488 for (uint32_t i=0; i < ncbm.nCP; ++i) 00489 { 00490 float64_t* A_1 = get_cutting_plane(cp_ptr); 00491 cp_ptr=cp_ptr->next; 00492 SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim); 00493 } 00494 00495 bool calc_gap = false; 00496 if (calc_gap) 00497 { 00498 SGVector<float64_t> scores(ncbm.nCP); 00499 cp_ptr=CPList_head; 00500 00501 for (uint32_t i=0; i < ncbm.nCP; ++i) 00502 { 00503 float64_t* a_1 = get_cutting_plane(cp_ptr); 00504 cp_ptr = cp_ptr->next; 00505 scores[i] = cur_w.dot(cur_w.vector, a_1, w_dim); 00506 } 00507 scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen); 00508 00509 float64_t w_norm = cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen); 00510 float64_t PO = 0.5*_lambda*w_norm + scores.max(scores.vector, scores.vlen); 00511 float64_t QP_gap = PO - ncbm.Fd; 00512 00513 SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap) 00514 } 00515 00516 /* Stopping conditions */ 00517 if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp)) 00518 ncbm.exitflag = 1; 00519 00520 if ((best_Fp - ncbm.Fd) <= TolAbs) 00521 ncbm.exitflag = 2; 00522 00523 if (ncbm.nCP >= maxCPs) 00524 ncbm.exitflag = -1; 00525 00526 tstop=ttime.cur_time_diff(false); 00527 00528 /* Verbose output */ 00529 if (verbose) 00530 SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n", 00531 ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd, 00532 (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp); 00533 00534 std::vector<line_search_res> wbest_candidates; 00535 if (!line_search) 00536 { 00537 cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector); 00538 00539 add_cutting_plane(&CPList_tail, map, A.matrix, 00540 find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim); 00541 00542 bias[ncbm.nCP] = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk; 00543 00544 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim); 00545 00546 // add as a new wbest candidate 00547 line_search_res ls; 00548 ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen); 00549 ls.solution = cur_w; 00550 ls.gradient = cur_subgrad; 00551 00552 wbest_candidates.push_back(ls); 00553 } 00554 else 00555 { 00556 tstart=ttime.cur_time_diff(false); 00557 /* do line searching */ 00558 SGVector<float64_t> search_dir(w_dim); 00559 search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim); 00560 00561 float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen); 00562 float64_t astart; 00563 uint32_t cp_min_approx = 0; 00564 if (cp_min_approx || (ncbm.nIter == 1)) 00565 { 00566 astart = 1.0; 00567 } 00568 else 00569 { 00570 astart = CMath::min(astar/norm_dir,1.0); 00571 if (astart == 0) 00572 astart = 1.0; 00573 } 00574 00575 /* line search */ 00576 std::vector<line_search_res> ls_res 00577 = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart); 00578 00579 if (ls_res[0].fval != ls_res[1].fval) 00580 { 00581 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim); 00582 00583 add_cutting_plane(&CPList_tail, map, A.matrix, 00584 find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim); 00585 00586 bias[ncbm.nCP] 00587 = SGVector<float64_t>::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim) 00588 - (ls_res[0].fval - ls_res[0].reg); 00589 00590 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim); 00591 00592 wbest_candidates.push_back(ls_res[0]); 00593 } 00594 00595 ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim); 00596 00597 add_cutting_plane(&CPList_tail, map, A.matrix, 00598 find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim); 00599 00600 bias[ncbm.nCP] 00601 = ls_res[1].solution.dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim) 00602 - (ls_res[1].fval - ls_res[1].reg); 00603 00604 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim); 00605 00606 wbest_candidates.push_back(ls_res[1]); 00607 00608 if ((best_Fp <= ls_res[1].fval) && (astart != 1)) 00609 { 00610 cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector); 00611 00612 add_cutting_plane(&CPList_tail, map, A.matrix, 00613 find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim); 00614 00615 bias[ncbm.nCP] 00616 = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk; 00617 00618 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim); 00619 00620 /* add as a new wbest candidate */ 00621 line_search_res ls; 00622 ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen); 00623 ls.solution = cur_w; 00624 ls.gradient = cur_subgrad; 00625 SG_SPRINT("%lf\n", ls.fval) 00626 00627 wbest_candidates.push_back(ls); 00628 } 00629 00630 astar = ls_res[1].a * norm_dir; 00631 00632 tstop=ttime.cur_time_diff(false); 00633 SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart) 00634 } 00635 00636 /* search for the best w among the new candidates */ 00637 if (verbose) 00638 SG_SPRINT("\t searching for the best Fp:\n") 00639 for (size_t i = 0; i < wbest_candidates.size(); i++) 00640 { 00641 if (verbose) 00642 SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval) 00643 00644 if (wbest_candidates[i].fval < best_Fp) 00645 { 00646 best_Fp = wbest_candidates[i].fval; 00647 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg; 00648 memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim); 00649 memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim); 00650 00651 ncbm.Fp = best_Fp; 00652 00653 if (verbose) 00654 SG_SPRINT("\t\t new best norm: %f\n", 00655 best_w.twonorm(best_w.vector, w_dim)); 00656 } 00657 00658 if (!is_convex) 00659 { 00660 index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i); 00661 00662 /* conflict */ 00663 float64_t score 00664 = SGVector<float64_t>::dot(best_w.vector, 00665 wbest_candidates[i].gradient.vector, w_dim) 00666 + (-1.0*bias[cp_idx]); 00667 if (score > best_risk) 00668 { 00669 float64_t U 00670 = best_risk 00671 - SGVector<float64_t>::dot(best_w.vector, 00672 wbest_candidates[i].gradient.vector, w_dim); 00673 00674 float64_t L 00675 = best_Fp - wbest_candidates[i].reg 00676 - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector, 00677 wbest_candidates[i].gradient.vector, w_dim); 00678 00679 if (verbose) 00680 SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U) 00681 if (L <= U) 00682 { 00683 if (verbose) 00684 SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L) 00685 bias[cp_idx]= -L; 00686 } 00687 else 00688 { 00689 wbest_candidates[i].gradient.zero(); 00690 SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim); 00691 00692 cp_ptr = CPList_tail; 00693 for (size_t j = wbest_candidates.size()-1; i < j; --j) 00694 { 00695 cp_ptr = cp_ptr->prev; 00696 SG_SPRINT("tail - %d\n (%d)", j, i) 00697 } 00698 00699 float64_t* cp = get_cutting_plane(cp_ptr); 00700 LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t)); 00701 00702 /* update the corresponding column and row in H */ 00703 cp_ptr = CPList_head; 00704 for (uint32_t j = 0; j < ncbm.nCP-1; ++j) 00705 { 00706 float64_t* a = get_cutting_plane(cp_ptr); 00707 cp_ptr = cp_ptr->next; 00708 float64_t dot_val 00709 = SGVector<float64_t>::dot(a, wbest_candidates[i].gradient.vector, w_dim); 00710 00711 H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)] 00712 = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)] 00713 = dot_val/_lambda; 00714 } 00715 00716 diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)] 00717 = SGVector<float64_t>::dot(wbest_candidates[i].gradient.vector, 00718 wbest_candidates[i].gradient.vector, w_dim); 00719 00720 00721 bias[cp_idx] 00722 = best_Fp - wbest_candidates[i].reg 00723 - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector, 00724 wbest_candidates[i].gradient.vector, w_dim); 00725 00726 if (verbose) 00727 SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L) 00728 } 00729 } 00730 } 00731 } 00732 00733 /* Inactive Cutting Planes (ICP) removal 00734 if (cleanICP) 00735 { 00736 clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail, 00737 H.matrix, diag_H.vector, x.vector, 00738 map.vector, cleanAfter, bias.vector, I.vector); 00739 } 00740 */ 00741 } 00742 00743 memcpy(w, best_w.vector, sizeof(float64_t)*w_dim); 00744 00745 /* free ICP_stats variables */ 00746 LIBBMRM_FREE(icp_stats.ICPcounter); 00747 LIBBMRM_FREE(icp_stats.ICPs); 00748 LIBBMRM_FREE(icp_stats.ACPs); 00749 LIBBMRM_FREE(icp_stats.H_buff); 00750 00751 return ncbm; 00752 } 00753 00754 } /* namespace shogun */