SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
libncbm.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Viktor Gal
00008  *
00009  */
00010 
00011 #include <shogun/structure/libncbm.h>
00012 #include <shogun/lib/Time.h>
00013 
00014 #include <shogun/lib/external/libqp.h>
00015 #include <shogun/multiclass/GMNPLib.h>
00016 
00017 #include <vector>
00018 
00019 namespace shogun
00020 {
00021 
00022 static float64_t* HMatrix;
00023 static uint32_t maxCPs;
00024 static const float64_t epsilon=0.0;
00025 
00026 static const float64_t *get_col(uint32_t i)
00027 {
00028     return (&HMatrix[maxCPs*i]);
00029 }
00030 
00031 IGNORE_IN_CLASSLIST struct line_search_res
00032 {
00033     /* */
00034     float64_t a;
00035     float64_t fval;
00036     float64_t reg;
00037     SGVector<float64_t> solution;
00038     SGVector<float64_t> gradient;
00039 };
00040 
00041 inline static line_search_res zoom
00042 (
00043  CDualLibQPBMSOSVM *machine,
00044  float64_t lambda,
00045  float64_t a_lo,
00046  float64_t a_hi,
00047  float64_t initial_fval,
00048  SGVector<float64_t>& initial_solution,
00049  SGVector<float64_t>& search_dir,
00050  float64_t wolfe_c1,
00051  float64_t wolfe_c2,
00052  float64_t init_lgrad,
00053  float64_t f_lo,
00054  float64_t g_lo,
00055  float64_t f_hi,
00056  float64_t g_hi
00057 )
00058 {
00059     line_search_res ls_res;
00060     ls_res.solution.resize_vector(initial_solution.vlen);
00061     ls_res.gradient.resize_vector(initial_solution.vlen);
00062 
00063     SGVector<float64_t> cur_solution(initial_solution.vlen);
00064     cur_solution.zero();
00065     SGVector<float64_t> cur_grad(initial_solution.vlen);
00066 
00067     uint32_t iter = 0;
00068     while (1)
00069     {
00070         float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
00071         float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
00072         float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
00073 
00074         if (a_lo < a_hi)
00075         {
00076             if ((a_j < a_lo) || (a_j > a_hi))
00077             {
00078                 a_j = 0.5*(a_lo+a_hi);
00079             }
00080         }
00081         else
00082         {
00083             if ((a_j > a_lo) || (a_j < a_hi))
00084             {
00085                 a_j = 0.5*(a_lo+a_hi);
00086             }
00087         }
00088 
00089         cur_solution.add(cur_solution.vector, 1.0,
00090                 initial_solution.vector, a_j,
00091                 search_dir.vector, cur_solution.vlen);
00092 
00093         float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
00094         float64_t cur_reg
00095             = 0.5*lambda*cur_solution.dot(cur_solution.vector,
00096                     cur_solution.vector, cur_solution.vlen);
00097         cur_fval += cur_reg;
00098 
00099         cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
00100 
00101         if
00102             (
00103              (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
00104              ||
00105              (cur_fval > f_lo)
00106             )
00107         {
00108             a_hi = a_j;
00109             f_hi = cur_fval;
00110             g_hi = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
00111         }
00112         else
00113         {
00114             float64_t cur_lgrad
00115                 = cur_grad.dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
00116 
00117             if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
00118             {
00119                 ls_res.a = a_j;
00120                 ls_res.fval = cur_fval;
00121                 ls_res.reg = cur_reg;
00122                 ls_res.gradient = cur_grad;
00123                 ls_res.solution = cur_solution;
00124 //              SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
00125                 return ls_res;
00126             }
00127 
00128             if (cur_lgrad*(a_hi - a_lo) >= 0)
00129             {
00130                 a_hi = a_lo;
00131                 f_hi = f_lo;
00132                 g_hi = g_lo;
00133             }
00134             a_lo = a_j;
00135             f_lo = cur_fval;
00136             g_lo = cur_lgrad;
00137         }
00138 
00139         if
00140             (
00141              (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
00142              ||
00143              (iter >= 5)
00144             )
00145         {
00146             ls_res.a = a_j;
00147             ls_res.fval = cur_fval;
00148             ls_res.reg = cur_reg;
00149             ls_res.gradient = cur_grad;
00150             ls_res.solution = cur_solution;
00151 //          SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
00152             return ls_res;
00153         }
00154         iter++;
00155     }
00156 }
00157 
00158 inline std::vector<line_search_res> line_search_with_strong_wolfe
00159 (
00160         CDualLibQPBMSOSVM *machine,
00161         float64_t lambda,
00162         float64_t initial_val,
00163         SGVector<float64_t>& initial_solution,
00164         SGVector<float64_t>& initial_grad,
00165         SGVector<float64_t>& search_dir,
00166         float64_t astart,
00167         float64_t amax = 1.1,
00168         float64_t wolfe_c1 = 1E-4,
00169         float64_t wolfe_c2 = 0.9,
00170         float64_t max_iter = 5
00171 )
00172 {
00173     /* NOTE:
00174      * model->risk returns only the risk as it's name says
00175      * have to cur_fval = model.risk + (lambda*0.5*w*w')
00176      *
00177      * subgrad should be: subgrad + (lambda*w)
00178      *
00179      */
00180 
00181     uint32_t iter = 0;
00182 
00183     initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
00184 
00185     float64_t initial_lgrad = initial_grad.dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
00186     float64_t prev_lgrad = initial_lgrad;
00187     float64_t prev_fval = initial_val;
00188 
00189     float64_t prev_a = 0;
00190     float64_t cur_a = astart;
00191 
00192     std::vector<line_search_res> ret;
00193     while (1)
00194     {
00195         SGVector<float64_t> x(initial_solution.vlen);
00196         SGVector<float64_t> cur_subgrad(initial_solution.vlen);
00197 
00198         x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
00199         float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
00200         float64_t cur_reg = 0.5*lambda*x.dot(x.vector, x.vector, x.vlen);
00201         cur_fval += cur_reg;
00202 
00203         cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
00204         if (iter == 0)
00205         {
00206             line_search_res initial_step;
00207             initial_step.fval = cur_fval;
00208             initial_step.reg = cur_reg;
00209             initial_step.gradient = cur_subgrad;
00210             initial_step.solution = x;
00211             ret.push_back(initial_step);
00212         }
00213 
00214         float64_t cur_lgrad
00215             = cur_subgrad.dot(cur_subgrad.vector, search_dir.vector,
00216                     cur_subgrad.vlen);
00217         if
00218             (
00219              (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
00220              ||
00221              (cur_fval >= prev_fval && iter > 0)
00222             )
00223         {
00224             ret.push_back(
00225                     zoom(machine, lambda, prev_a, cur_a, initial_val,
00226                         initial_solution, search_dir, wolfe_c1, wolfe_c2,
00227                         initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
00228                     );
00229             return ret;
00230         }
00231 
00232         if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
00233         {
00234             line_search_res ls_res;
00235             ls_res.a = cur_a;
00236             ls_res.fval = cur_fval;
00237             ls_res.reg = cur_reg;
00238             ls_res.solution = x;
00239             ls_res.gradient = cur_subgrad;
00240             ret.push_back(ls_res);
00241             return ret;
00242         }
00243 
00244         if (cur_lgrad >= 0)
00245         {
00246             ret.push_back(
00247                     zoom(machine, lambda, cur_a, prev_a, initial_val,
00248                         initial_solution, search_dir, wolfe_c1, wolfe_c2,
00249                         initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
00250                     );
00251             return ret;
00252         }
00253         iter++;
00254         if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
00255         {
00256             line_search_res ls_res;
00257             ls_res.a = cur_a;
00258             ls_res.fval = cur_fval;
00259             ls_res.reg = cur_reg;
00260             ls_res.solution = x;
00261             ls_res.gradient = cur_subgrad;
00262             ret.push_back(ls_res);
00263             return ret;
00264         }
00265 
00266         prev_a = cur_a;
00267         prev_fval = cur_fval;
00268         prev_lgrad = cur_lgrad;
00269 
00270         cur_a = (cur_a + amax)*0.5;
00271     }
00272 }
00273 
00274 inline void update_H(BmrmStatistics& ncbm,
00275         bmrm_ll* head,
00276         bmrm_ll* tail,
00277         SGMatrix<float64_t>& H,
00278         SGVector<float64_t>& diag_H,
00279         float64_t lambda,
00280         uint32_t maxCP,
00281         int32_t w_dim)
00282 {
00283     float64_t* a_2 = get_cutting_plane(tail);
00284     bmrm_ll* cp_ptr=head;
00285 
00286     for (uint32_t i=0; i < ncbm.nCP; ++i)
00287     {
00288         float64_t* a_1 = get_cutting_plane(cp_ptr);
00289         cp_ptr=cp_ptr->next;
00290 
00291         float64_t dot_val = SGVector<float64_t>::dot(a_2, a_1, w_dim);
00292 
00293         H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
00294             = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
00295             = dot_val/lambda;
00296     }
00297 
00298     /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
00299     float64_t dot_val = SGVector<float64_t>::dot(a_2, a_2, w_dim);
00300     H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
00301 
00302     diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
00303 
00304     ncbm.nCP++;
00305 }
00306 
00307 
00308 BmrmStatistics svm_ncbm_solver(
00309         CDualLibQPBMSOSVM *machine,
00310         float64_t         *w,
00311         float64_t         TolRel,
00312         float64_t         TolAbs,
00313         float64_t         _lambda,
00314         uint32_t          _BufSize,
00315         bool              cleanICP,
00316         uint32_t          cleanAfter,
00317         bool              is_convex,
00318         bool              line_search,
00319         bool              verbose
00320         )
00321 {
00322     BmrmStatistics ncbm;
00323     libqp_state_T qp_exitflag={0, 0, 0, 0};
00324     int32_t w_dim = machine->get_model()->get_dim();
00325 
00326     maxCPs = _BufSize;
00327 
00328     ncbm.nCP=0;
00329     ncbm.nIter=0;
00330     ncbm.exitflag=0;
00331 
00332     /* variables for timing the algorithm*/
00333     CTime ttime;
00334     float64_t tstart, tstop;
00335     tstart=ttime.cur_time_diff(false);
00336 
00337     /* matrix of subgradiends */
00338     SGMatrix<float64_t> A(w_dim, maxCPs);
00339 
00340     /* bias vector */
00341     SGVector<float64_t> bias(maxCPs);
00342     bias.zero();
00343 
00344     /* Matrix for storing H = A*A' */
00345     SGMatrix<float64_t> H(maxCPs,maxCPs);
00346     HMatrix = H.matrix;
00347 
00348     /* diag_H */
00349     SGVector<float64_t> diag_H(maxCPs);
00350     diag_H.zero();
00351 
00352     /* x the solution vector of the dual problem:
00353      * 1/lambda x*H*x' - x*bias
00354      */
00355     SGVector<float64_t> x(maxCPs);
00356     x.zero();
00357 
00358     /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
00359     float64_t b = 1.0;
00360     uint8_t S = 1;
00361     SGVector<uint32_t> I(maxCPs);
00362     I.set_const(1);
00363 
00364     /* libqp paramteres */
00365     uint32_t QPSolverMaxIter = 0xFFFFFFFF;
00366     float64_t QPSolverTolRel = 1E-9;
00367 
00368     /* variables for maintaining inactive planes */
00369     SGVector<bool> map(maxCPs);
00370     map.set_const(true);
00371     ICP_stats icp_stats;
00372     icp_stats.maxCPs = maxCPs;
00373     icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
00374     icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
00375     icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
00376     icp_stats.H_buff = (float64_t*) LIBBMRM_CALLOC(maxCPs*maxCPs,float64_t);
00377     if
00378     (
00379         icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
00380         || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
00381     )
00382     {
00383         ncbm.exitflag=-2;
00384         LIBBMRM_FREE(icp_stats.ICPcounter);
00385         LIBBMRM_FREE(icp_stats.ICPs);
00386         LIBBMRM_FREE(icp_stats.ACPs);
00387         LIBBMRM_FREE(icp_stats.H_buff);
00388         return ncbm;
00389     }
00390 
00391     /* best */
00392     float64_t best_Fp = CMath::INFTY;
00393     float64_t best_risk = CMath::INFTY;
00394     SGVector<float64_t> best_w(w_dim);
00395     best_w.zero();
00396     SGVector<float64_t> best_subgrad(w_dim);
00397     best_subgrad.zero();
00398 
00399     /* initial solution */
00400     SGVector<float64_t> cur_subgrad(w_dim);
00401     SGVector<float64_t> cur_w(w_dim);
00402     memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
00403 
00404     float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
00405     bias[0] = -cur_risk;
00406     best_Fp = 0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
00407     best_risk = cur_risk;
00408     memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
00409     memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
00410 
00411     /* create a double-linked list over the A the subgrad matrix */
00412     bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
00413     cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
00414     if (cp_list==NULL)
00415     {
00416         ncbm.exitflag=-2;
00417         return ncbm;
00418     }
00419     /* save the subgradient */
00420     memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
00421     map[0] = false;
00422     cp_list->address=&A[0];
00423     cp_list->idx=0;
00424     cp_list->prev=NULL;
00425     cp_list->next=NULL;
00426     CPList_head=cp_list;
00427     CPList_tail=cp_list;
00428 
00429     update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
00430     tstop=ttime.cur_time_diff(false);
00431     if (verbose)
00432         SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
00433                 ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
00434 
00435     float64_t astar = 0.01;
00436 
00437     SG_SPRINT("clean icps: %d\n", cleanICP)
00438     while (ncbm.exitflag==0)
00439     {
00440         tstart=ttime.cur_time_diff(false);
00441         ncbm.nIter++;
00442 
00443         //diag_H.display_vector();
00444         //bias.display_vector();
00445 
00446         /* solve the dual of the problem, namely:
00447          *
00448          */
00449         qp_exitflag =
00450             libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
00451                     ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
00452 
00453         ncbm.Fd = -qp_exitflag.QP;
00454 
00455         ncbm.qp_exitflag=qp_exitflag.exitflag;
00456 
00457         /* Update ICPcounter (add one to unused and reset used)
00458          * + compute number of active CPs */
00459         ncbm.nzA=0;
00460 
00461         for (uint32_t i=0; i < ncbm.nCP; ++i)
00462         {
00463             if (x[i] > epsilon)
00464             {
00465                 /* cp was active => reset counter */
00466                 ++ncbm.nzA;
00467                 icp_stats.ICPcounter[i]=0;
00468             }
00469             else
00470             {
00471                 icp_stats.ICPcounter[i]++;
00472             }
00473         }
00474 
00475         /* Inactive Cutting Planes (ICP) removal */
00476         if (cleanICP)
00477         {
00478             clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
00479                     H.matrix, diag_H.vector, x.vector,
00480                     map.vector, cleanAfter, bias.vector, I.vector);
00481         }
00482 
00483         /* calculate the new w
00484          * w[i] = -1/lambda*A[i]*x[i]
00485          */
00486         cur_w.zero();
00487         cp_ptr=CPList_head;
00488         for (uint32_t i=0; i < ncbm.nCP; ++i)
00489         {
00490             float64_t* A_1 = get_cutting_plane(cp_ptr);
00491             cp_ptr=cp_ptr->next;
00492             SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
00493         }
00494 
00495         bool calc_gap = false;
00496         if (calc_gap)
00497         {
00498             SGVector<float64_t> scores(ncbm.nCP);
00499             cp_ptr=CPList_head;
00500 
00501             for (uint32_t i=0; i < ncbm.nCP; ++i)
00502             {
00503                 float64_t* a_1 = get_cutting_plane(cp_ptr);
00504                 cp_ptr = cp_ptr->next;
00505                 scores[i] = cur_w.dot(cur_w.vector, a_1, w_dim);
00506             }
00507             scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
00508 
00509             float64_t w_norm = cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
00510             float64_t PO = 0.5*_lambda*w_norm + scores.max(scores.vector, scores.vlen);
00511             float64_t QP_gap = PO - ncbm.Fd;
00512 
00513             SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
00514         }
00515 
00516         /* Stopping conditions */
00517         if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
00518             ncbm.exitflag = 1;
00519 
00520         if ((best_Fp - ncbm.Fd) <= TolAbs)
00521             ncbm.exitflag = 2;
00522 
00523         if (ncbm.nCP >= maxCPs)
00524             ncbm.exitflag = -1;
00525 
00526         tstop=ttime.cur_time_diff(false);
00527 
00528         /* Verbose output */
00529         if (verbose)
00530             SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
00531                     ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
00532                     (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
00533 
00534         std::vector<line_search_res> wbest_candidates;
00535         if (!line_search)
00536         {
00537             cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
00538 
00539             add_cutting_plane(&CPList_tail, map, A.matrix,
00540                     find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
00541 
00542             bias[ncbm.nCP] = cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
00543 
00544             update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
00545 
00546             // add as a new wbest candidate
00547             line_search_res ls;
00548             ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
00549             ls.solution = cur_w;
00550             ls.gradient = cur_subgrad;
00551 
00552             wbest_candidates.push_back(ls);
00553         }
00554         else
00555         {
00556             tstart=ttime.cur_time_diff(false);
00557             /* do line searching */
00558             SGVector<float64_t> search_dir(w_dim);
00559             search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
00560 
00561             float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
00562             float64_t astart;
00563             uint32_t cp_min_approx = 0;
00564             if (cp_min_approx || (ncbm.nIter == 1))
00565             {
00566                 astart = 1.0;
00567             }
00568             else
00569             {
00570                 astart = CMath::min(astar/norm_dir,1.0);
00571                 if (astart == 0)
00572                     astart = 1.0;
00573             }
00574 
00575             /* line search */
00576             std::vector<line_search_res> ls_res
00577                 = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
00578 
00579             if (ls_res[0].fval != ls_res[1].fval)
00580             {
00581                 ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
00582 
00583                 add_cutting_plane(&CPList_tail, map, A.matrix,
00584                         find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
00585 
00586                 bias[ncbm.nCP]
00587                     = SGVector<float64_t>::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
00588                     - (ls_res[0].fval - ls_res[0].reg);
00589 
00590                 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
00591 
00592                 wbest_candidates.push_back(ls_res[0]);
00593             }
00594 
00595             ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
00596 
00597             add_cutting_plane(&CPList_tail, map, A.matrix,
00598                     find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
00599 
00600             bias[ncbm.nCP]
00601                 = ls_res[1].solution.dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
00602                 - (ls_res[1].fval - ls_res[1].reg);
00603 
00604             update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
00605 
00606             wbest_candidates.push_back(ls_res[1]);
00607 
00608             if ((best_Fp <= ls_res[1].fval) && (astart != 1))
00609             {
00610                 cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
00611 
00612                 add_cutting_plane(&CPList_tail, map, A.matrix,
00613                             find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
00614 
00615                 bias[ncbm.nCP]
00616                     =  cur_w.dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
00617 
00618                 update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
00619 
00620                 /* add as a new wbest candidate */
00621                 line_search_res ls;
00622                 ls.fval = cur_risk+0.5*_lambda*cur_w.dot(cur_w.vector, cur_w.vector, cur_w.vlen);
00623                 ls.solution = cur_w;
00624                 ls.gradient = cur_subgrad;
00625                 SG_SPRINT("%lf\n", ls.fval)
00626 
00627                 wbest_candidates.push_back(ls);
00628             }
00629 
00630             astar = ls_res[1].a * norm_dir;
00631 
00632             tstop=ttime.cur_time_diff(false);
00633             SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
00634         }
00635 
00636         /* search for the best w among the new candidates */
00637         if (verbose)
00638             SG_SPRINT("\t searching for the best Fp:\n")
00639         for (size_t i = 0; i < wbest_candidates.size(); i++)
00640         {
00641             if (verbose)
00642                 SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
00643 
00644             if (wbest_candidates[i].fval < best_Fp)
00645             {
00646                 best_Fp = wbest_candidates[i].fval;
00647                 best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
00648                 memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
00649                 memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
00650 
00651                 ncbm.Fp = best_Fp;
00652 
00653                 if (verbose)
00654                     SG_SPRINT("\t\t new best norm: %f\n",
00655                             best_w.twonorm(best_w.vector, w_dim));
00656             }
00657 
00658             if (!is_convex)
00659             {
00660                 index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
00661 
00662                 /* conflict */
00663                 float64_t score
00664                     = SGVector<float64_t>::dot(best_w.vector,
00665                             wbest_candidates[i].gradient.vector, w_dim)
00666                     + (-1.0*bias[cp_idx]);
00667                 if (score > best_risk)
00668                 {
00669                     float64_t U
00670                         = best_risk
00671                         - SGVector<float64_t>::dot(best_w.vector,
00672                                 wbest_candidates[i].gradient.vector, w_dim);
00673 
00674                     float64_t L
00675                         = best_Fp - wbest_candidates[i].reg
00676                         - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
00677                                 wbest_candidates[i].gradient.vector, w_dim);
00678 
00679                     if (verbose)
00680                         SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
00681                     if (L <= U)
00682                     {
00683                         if (verbose)
00684                             SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
00685                         bias[cp_idx]= -L;
00686                     }
00687                     else
00688                     {
00689                         wbest_candidates[i].gradient.zero();
00690                         SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
00691 
00692                         cp_ptr = CPList_tail;
00693                         for (size_t j = wbest_candidates.size()-1; i < j; --j)
00694                         {
00695                             cp_ptr = cp_ptr->prev;
00696                             SG_SPRINT("tail - %d\n (%d)", j, i)
00697                         }
00698 
00699                         float64_t* cp = get_cutting_plane(cp_ptr);
00700                         LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
00701 
00702                         /* update the corresponding column and row in H */
00703                         cp_ptr = CPList_head;
00704                         for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
00705                         {
00706                             float64_t* a = get_cutting_plane(cp_ptr);
00707                             cp_ptr = cp_ptr->next;
00708                             float64_t dot_val
00709                                 = SGVector<float64_t>::dot(a, wbest_candidates[i].gradient.vector, w_dim);
00710 
00711                             H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
00712                                 = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
00713                                 = dot_val/_lambda;
00714                         }
00715 
00716                         diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
00717                             = SGVector<float64_t>::dot(wbest_candidates[i].gradient.vector,
00718                                     wbest_candidates[i].gradient.vector, w_dim);
00719 
00720 
00721                         bias[cp_idx]
00722                             = best_Fp - wbest_candidates[i].reg
00723                             - SGVector<float64_t>::dot(wbest_candidates[i].solution.vector,
00724                                     wbest_candidates[i].gradient.vector, w_dim);
00725 
00726                         if (verbose)
00727                             SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
00728                     }
00729                 }
00730             }
00731         }
00732 
00733         /* Inactive Cutting Planes (ICP) removal
00734         if (cleanICP)
00735         {
00736             clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
00737                     H.matrix, diag_H.vector, x.vector,
00738                     map.vector, cleanAfter, bias.vector, I.vector);
00739         }
00740         */
00741     }
00742 
00743     memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
00744 
00745     /* free ICP_stats variables */
00746     LIBBMRM_FREE(icp_stats.ICPcounter);
00747     LIBBMRM_FREE(icp_stats.ICPs);
00748     LIBBMRM_FREE(icp_stats.ACPs);
00749     LIBBMRM_FREE(icp_stats.H_buff);
00750 
00751     return ncbm;
00752 }
00753 
00754 } /* namespace shogun */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation