SHOGUN
v3.2.0
|
00001 /* 00002 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 #include <shogun/lib/config.h> 00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00035 #include <math.h> 00036 #include <stdio.h> 00037 #include <stdlib.h> 00038 #include <string.h> 00039 #include <stdarg.h> 00040 00041 #include <shogun/mathematics/Math.h> 00042 #include <shogun/optimization/liblinear/shogun_liblinear.h> 00043 #include <shogun/optimization/liblinear/tron.h> 00044 #include <shogun/lib/Time.h> 00045 #include <shogun/lib/Signal.h> 00046 00047 using namespace shogun; 00048 00049 l2r_lr_fun::l2r_lr_fun(const liblinear_problem *p, float64_t* Cs) 00050 { 00051 int l=p->l; 00052 00053 this->m_prob = p; 00054 00055 z = SG_MALLOC(double, l); 00056 D = SG_MALLOC(double, l); 00057 C = Cs; 00058 } 00059 00060 l2r_lr_fun::~l2r_lr_fun() 00061 { 00062 SG_FREE(z); 00063 SG_FREE(D); 00064 } 00065 00066 00067 double l2r_lr_fun::fun(double *w) 00068 { 00069 int i; 00070 double f=0; 00071 float64_t *y=m_prob->y; 00072 int l=m_prob->l; 00073 int32_t n=m_prob->n; 00074 00075 Xv(w, z); 00076 for(i=0;i<l;i++) 00077 { 00078 double yz = y[i]*z[i]; 00079 if (yz >= 0) 00080 f += C[i]*log(1 + exp(-yz)); 00081 else 00082 f += C[i]*(-yz+log(1 + exp(yz))); 00083 } 00084 f += 0.5 *SGVector<float64_t>::dot(w,w,n); 00085 00086 return(f); 00087 } 00088 00089 void l2r_lr_fun::grad(double *w, double *g) 00090 { 00091 int i; 00092 float64_t *y=m_prob->y; 00093 int l=m_prob->l; 00094 int w_size=get_nr_variable(); 00095 00096 for(i=0;i<l;i++) 00097 { 00098 z[i] = 1/(1 + exp(-y[i]*z[i])); 00099 D[i] = z[i]*(1-z[i]); 00100 z[i] = C[i]*(z[i]-1)*y[i]; 00101 } 00102 XTv(z, g); 00103 00104 for(i=0;i<w_size;i++) 00105 g[i] = w[i] + g[i]; 00106 } 00107 00108 int l2r_lr_fun::get_nr_variable() 00109 { 00110 return m_prob->n; 00111 } 00112 00113 void l2r_lr_fun::Hv(double *s, double *Hs) 00114 { 00115 int i; 00116 int l=m_prob->l; 00117 int w_size=get_nr_variable(); 00118 double *wa = SG_MALLOC(double, l); 00119 00120 Xv(s, wa); 00121 for(i=0;i<l;i++) 00122 wa[i] = C[i]*D[i]*wa[i]; 00123 00124 XTv(wa, Hs); 00125 for(i=0;i<w_size;i++) 00126 Hs[i] = s[i] + Hs[i]; 00127 SG_FREE(wa); 00128 } 00129 00130 void l2r_lr_fun::Xv(double *v, double *res_Xv) 00131 { 00132 int32_t l=m_prob->l; 00133 int32_t n=m_prob->n; 00134 float64_t bias=0; 00135 00136 if (m_prob->use_bias) 00137 { 00138 n--; 00139 bias=v[n]; 00140 } 00141 00142 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00143 } 00144 00145 void l2r_lr_fun::XTv(double *v, double *res_XTv) 00146 { 00147 int l=m_prob->l; 00148 int32_t n=m_prob->n; 00149 00150 memset(res_XTv, 0, sizeof(double)*m_prob->n); 00151 00152 if (m_prob->use_bias) 00153 n--; 00154 00155 for (int32_t i=0;i<l;i++) 00156 { 00157 m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n); 00158 00159 if (m_prob->use_bias) 00160 res_XTv[n]+=v[i]; 00161 } 00162 } 00163 00164 l2r_l2_svc_fun::l2r_l2_svc_fun(const liblinear_problem *p, double* Cs) 00165 { 00166 int l=p->l; 00167 00168 this->m_prob = p; 00169 00170 z = SG_MALLOC(double, l); 00171 D = SG_MALLOC(double, l); 00172 I = SG_MALLOC(int, l); 00173 C=Cs; 00174 00175 } 00176 00177 l2r_l2_svc_fun::~l2r_l2_svc_fun() 00178 { 00179 SG_FREE(z); 00180 SG_FREE(D); 00181 SG_FREE(I); 00182 } 00183 00184 double l2r_l2_svc_fun::fun(double *w) 00185 { 00186 int i; 00187 double f=0; 00188 float64_t *y=m_prob->y; 00189 int l=m_prob->l; 00190 int w_size=get_nr_variable(); 00191 00192 Xv(w, z); 00193 for(i=0;i<l;i++) 00194 { 00195 z[i] = y[i]*z[i]; 00196 double d = 1-z[i]; 00197 if (d > 0) 00198 f += C[i]*d*d; 00199 } 00200 f += 0.5*SGVector<float64_t>::dot(w, w, w_size); 00201 00202 return(f); 00203 } 00204 00205 void l2r_l2_svc_fun::grad(double *w, double *g) 00206 { 00207 int i; 00208 float64_t *y=m_prob->y; 00209 int l=m_prob->l; 00210 int w_size=get_nr_variable(); 00211 00212 sizeI = 0; 00213 for (i=0;i<l;i++) 00214 if (z[i] < 1) 00215 { 00216 z[sizeI] = C[i]*y[i]*(z[i]-1); 00217 I[sizeI] = i; 00218 sizeI++; 00219 } 00220 subXTv(z, g); 00221 00222 for(i=0;i<w_size;i++) 00223 g[i] = w[i] + 2*g[i]; 00224 } 00225 00226 int l2r_l2_svc_fun::get_nr_variable() 00227 { 00228 return m_prob->n; 00229 } 00230 00231 void l2r_l2_svc_fun::Hv(double *s, double *Hs) 00232 { 00233 int i; 00234 int l=m_prob->l; 00235 int w_size=get_nr_variable(); 00236 double *wa = SG_MALLOC(double, l); 00237 00238 subXv(s, wa); 00239 for(i=0;i<sizeI;i++) 00240 wa[i] = C[I[i]]*wa[i]; 00241 00242 subXTv(wa, Hs); 00243 for(i=0;i<w_size;i++) 00244 Hs[i] = s[i] + 2*Hs[i]; 00245 SG_FREE(wa); 00246 } 00247 00248 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv) 00249 { 00250 int32_t l=m_prob->l; 00251 int32_t n=m_prob->n; 00252 float64_t bias=0; 00253 00254 if (m_prob->use_bias) 00255 { 00256 n--; 00257 bias=v[n]; 00258 } 00259 00260 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00261 } 00262 00263 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv) 00264 { 00265 int32_t n=m_prob->n; 00266 float64_t bias=0; 00267 00268 if (m_prob->use_bias) 00269 { 00270 n--; 00271 bias=v[n]; 00272 } 00273 00274 m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias); 00275 00276 /*for (int32_t i=0;i<sizeI;i++) 00277 { 00278 res_Xv[i]=m_prob->x->dense_dot(I[i], v, n); 00279 00280 if (m_prob->use_bias) 00281 res_Xv[i]+=bias; 00282 }*/ 00283 } 00284 00285 void l2r_l2_svc_fun::subXTv(double *v, double *XTv) 00286 { 00287 int32_t n=m_prob->n; 00288 00289 if (m_prob->use_bias) 00290 n--; 00291 00292 memset(XTv, 0, sizeof(float64_t)*m_prob->n); 00293 for (int32_t i=0;i<sizeI;i++) 00294 { 00295 m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n); 00296 00297 if (m_prob->use_bias) 00298 XTv[n]+=v[i]; 00299 } 00300 } 00301 00302 l2r_l2_svr_fun::l2r_l2_svr_fun(const liblinear_problem *prob, double *Cs, double p): 00303 l2r_l2_svc_fun(prob, Cs) 00304 { 00305 m_p = p; 00306 } 00307 00308 double l2r_l2_svr_fun::fun(double *w) 00309 { 00310 int i; 00311 double f=0; 00312 double *y=m_prob->y; 00313 int l=m_prob->l; 00314 int w_size=get_nr_variable(); 00315 double d; 00316 00317 Xv(w, z); 00318 00319 for(i=0;i<w_size;i++) 00320 f += w[i]*w[i]; 00321 f /= 2; 00322 for(i=0;i<l;i++) 00323 { 00324 d = z[i] - y[i]; 00325 if(d < -m_p) 00326 f += C[i]*(d+m_p)*(d+m_p); 00327 else if(d > m_p) 00328 f += C[i]*(d-m_p)*(d-m_p); 00329 } 00330 00331 return(f); 00332 } 00333 00334 void l2r_l2_svr_fun::grad(double *w, double *g) 00335 { 00336 int i; 00337 double *y=m_prob->y; 00338 int l=m_prob->l; 00339 int w_size=get_nr_variable(); 00340 double d; 00341 00342 sizeI = 0; 00343 for(i=0;i<l;i++) 00344 { 00345 d = z[i] - y[i]; 00346 00347 // generate index set I 00348 if(d < -m_p) 00349 { 00350 z[sizeI] = C[i]*(d+m_p); 00351 I[sizeI] = i; 00352 sizeI++; 00353 } 00354 else if(d > m_p) 00355 { 00356 z[sizeI] = C[i]*(d-m_p); 00357 I[sizeI] = i; 00358 sizeI++; 00359 } 00360 00361 } 00362 subXTv(z, g); 00363 00364 for(i=0;i<w_size;i++) 00365 g[i] = w[i] + 2*g[i]; 00366 } 00367 00368 00369 // A coordinate descent algorithm for 00370 // multi-class support vector machines by Crammer and Singer 00371 // 00372 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i 00373 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i 00374 // 00375 // where e^m_i = 0 if y_i = m, 00376 // e^m_i = 1 if y_i != m, 00377 // C^m_i = C if m = y_i, 00378 // C^m_i = 0 if m != y_i, 00379 // and w_m(\alpha) = \sum_i \alpha^m_i x_i 00380 // 00381 // Given: 00382 // x, y, C 00383 // eps is the stopping tolerance 00384 // 00385 // solution will be put in w 00386 00387 #define GETI(i) (prob->y[i]) 00388 // To support weights for instances, use GETI(i) (i) 00389 00390 Solver_MCSVM_CS::Solver_MCSVM_CS(const liblinear_problem *p, int n_class, 00391 double *weighted_C, double *w0_reg, 00392 double epsilon, int max_it, double max_time, 00393 mcsvm_state* given_state) 00394 { 00395 this->w_size = p->n; 00396 this->l = p->l; 00397 this->nr_class = n_class; 00398 this->eps = epsilon; 00399 this->max_iter = max_it; 00400 this->prob = p; 00401 this->C = weighted_C; 00402 this->w0 = w0_reg; 00403 this->max_train_time = max_time; 00404 this->state = given_state; 00405 } 00406 00407 Solver_MCSVM_CS::~Solver_MCSVM_CS() 00408 { 00409 } 00410 00411 int compare_double(const void *a, const void *b) 00412 { 00413 if(*(double *)a > *(double *)b) 00414 return -1; 00415 if(*(double *)a < *(double *)b) 00416 return 1; 00417 return 0; 00418 } 00419 00420 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) 00421 { 00422 int r; 00423 double *D=SGVector<float64_t>::clone_vector(state->B, active_i); 00424 00425 if(yi < active_i) 00426 D[yi] += A_i*C_yi; 00427 qsort(D, active_i, sizeof(double), compare_double); 00428 00429 double beta = D[0] - A_i*C_yi; 00430 for(r=1;r<active_i && beta<r*D[r];r++) 00431 beta += D[r]; 00432 00433 beta /= r; 00434 for(r=0;r<active_i;r++) 00435 { 00436 if(r == yi) 00437 alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i); 00438 else 00439 alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i); 00440 } 00441 SG_FREE(D); 00442 } 00443 00444 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG) 00445 { 00446 double bound = 0; 00447 if(m == yi) 00448 bound = C[int32_t(GETI(i))]; 00449 if(alpha_i == bound && state->G[m] < minG) 00450 return true; 00451 return false; 00452 } 00453 00454 void Solver_MCSVM_CS::solve() 00455 { 00456 int i, m, s, k; 00457 int iter = 0; 00458 double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val; 00459 int *index,*d_ind,*alpha_index,*y_index,*active_size_i; 00460 00461 if (!state->allocated) 00462 { 00463 state->w = SG_CALLOC(double, nr_class*w_size); 00464 state->B = SG_CALLOC(double, nr_class); 00465 state->G = SG_CALLOC(double, nr_class); 00466 state->alpha = SG_CALLOC(double, l*nr_class); 00467 state->alpha_new = SG_CALLOC(double, nr_class); 00468 state->index = SG_CALLOC(int, l); 00469 state->QD = SG_CALLOC(double, l); 00470 state->d_ind = SG_CALLOC(int, nr_class); 00471 state->d_val = SG_CALLOC(double, nr_class); 00472 state->alpha_index = SG_CALLOC(int, nr_class*l); 00473 state->y_index = SG_CALLOC(int, l); 00474 state->active_size_i = SG_CALLOC(int, l); 00475 state->allocated = true; 00476 } 00477 w = state->w; 00478 B = state->B; 00479 G = state->G; 00480 alpha = state->alpha; 00481 alpha_new = state->alpha_new; 00482 index = state->index; 00483 QD = state->QD; 00484 d_ind = state->d_ind; 00485 d_val = state->d_val; 00486 alpha_index = state->alpha_index; 00487 y_index = state->y_index; 00488 active_size_i = state->active_size_i; 00489 00490 double* tx = SG_MALLOC(double, w_size); 00491 int dim = prob->x->get_dim_feature_space(); 00492 00493 int active_size = l; 00494 double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking 00495 bool start_from_all = true; 00496 CTime start_time; 00497 // initial 00498 if (!state->inited) 00499 { 00500 for(i=0;i<l;i++) 00501 { 00502 for(m=0;m<nr_class;m++) 00503 alpha_index[i*nr_class+m] = m; 00504 00505 QD[i] = prob->x->dot(i, prob->x,i); 00506 if (prob->use_bias) 00507 QD[i] += 1.0; 00508 00509 active_size_i[i] = nr_class; 00510 y_index[i] = prob->y[i]; 00511 index[i] = i; 00512 } 00513 state->inited = true; 00514 } 00515 00516 while(iter < max_iter && !CSignal::cancel_computations()) 00517 { 00518 double stopping = -CMath::INFTY; 00519 for(i=0;i<active_size;i++) 00520 { 00521 int j = CMath::random(i, active_size-1); 00522 CMath::swap(index[i], index[j]); 00523 } 00524 for(s=0;s<active_size;s++) 00525 { 00526 i = index[s]; 00527 double Ai = QD[i]; 00528 double *alpha_i = &alpha[i*nr_class]; 00529 int *alpha_index_i = &alpha_index[i*nr_class]; 00530 00531 if(Ai > 0) 00532 { 00533 for(m=0;m<active_size_i[i];m++) 00534 G[m] = 1; 00535 if(y_index[i] < active_size_i[i]) 00536 G[y_index[i]] = 0; 00537 00538 memset(tx,0,dim*sizeof(double)); 00539 prob->x->add_to_dense_vec(1.0,i,tx,dim); 00540 for (k=0; k<dim; k++) 00541 { 00542 if (tx[k]==0.0) 00543 continue; 00544 00545 double* w_i = &w[k*nr_class]; 00546 for (m=0; m<active_size_i[i]; m++) 00547 G[m] += w_i[alpha_index_i[m]]*tx[k]; 00548 } 00549 00550 // experimental 00551 // *** 00552 if (prob->use_bias) 00553 { 00554 double *w_i = &w[(w_size-1)*nr_class]; 00555 for(m=0; m<active_size_i[i]; m++) 00556 G[m] += w_i[alpha_index_i[m]]; 00557 } 00558 if (w0) 00559 { 00560 for (k=0; k<dim; k++) 00561 { 00562 double *w0_i = &w0[k*nr_class]; 00563 for(m=0; m<active_size_i[i]; m++) 00564 G[m] += w0_i[alpha_index_i[m]]; 00565 } 00566 } 00567 // *** 00568 00569 double minG = CMath::INFTY; 00570 double maxG = -CMath::INFTY; 00571 for(m=0;m<active_size_i[i];m++) 00572 { 00573 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) 00574 minG = G[m]; 00575 if(G[m] > maxG) 00576 maxG = G[m]; 00577 } 00578 if(y_index[i] < active_size_i[i]) 00579 if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG) 00580 minG = G[y_index[i]]; 00581 00582 for(m=0;m<active_size_i[i];m++) 00583 { 00584 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) 00585 { 00586 active_size_i[i]--; 00587 while(active_size_i[i]>m) 00588 { 00589 if(!be_shrunk(i, active_size_i[i], y_index[i], 00590 alpha_i[alpha_index_i[active_size_i[i]]], minG)) 00591 { 00592 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); 00593 CMath::swap(G[m], G[active_size_i[i]]); 00594 if(y_index[i] == active_size_i[i]) 00595 y_index[i] = m; 00596 else if(y_index[i] == m) 00597 y_index[i] = active_size_i[i]; 00598 break; 00599 } 00600 active_size_i[i]--; 00601 } 00602 } 00603 } 00604 00605 if(active_size_i[i] <= 1) 00606 { 00607 active_size--; 00608 CMath::swap(index[s], index[active_size]); 00609 s--; 00610 continue; 00611 } 00612 00613 if(maxG-minG <= 1e-12) 00614 continue; 00615 else 00616 stopping = CMath::CMath::max(maxG - minG, stopping); 00617 00618 for(m=0;m<active_size_i[i];m++) 00619 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; 00620 00621 solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new); 00622 int nz_d = 0; 00623 for(m=0;m<active_size_i[i];m++) 00624 { 00625 double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; 00626 alpha_i[alpha_index_i[m]] = alpha_new[m]; 00627 if(fabs(d) >= 1e-12) 00628 { 00629 d_ind[nz_d] = alpha_index_i[m]; 00630 d_val[nz_d] = d; 00631 nz_d++; 00632 } 00633 } 00634 00635 memset(tx,0,dim*sizeof(double)); 00636 prob->x->add_to_dense_vec(1.0,i,tx,dim); 00637 for (k=0; k<dim; k++) 00638 { 00639 if (tx[k]==0.0) 00640 continue; 00641 00642 double* w_i = &w[k*nr_class]; 00643 for (m=0; m<nz_d; m++) 00644 w_i[d_ind[m]] += d_val[m]*tx[k]; 00645 } 00646 // experimental 00647 // *** 00648 if (prob->use_bias) 00649 { 00650 double *w_i = &w[(w_size-1)*nr_class]; 00651 for(m=0;m<nz_d;m++) 00652 w_i[d_ind[m]] += d_val[m]; 00653 } 00654 // *** 00655 } 00656 } 00657 00658 iter++; 00659 /* 00660 if(iter % 10 == 0) 00661 { 00662 SG_SINFO(".") 00663 } 00664 */ 00665 00666 if(stopping < eps_shrink) 00667 { 00668 if(stopping < eps && start_from_all == true) 00669 break; 00670 else 00671 { 00672 active_size = l; 00673 for(i=0;i<l;i++) 00674 active_size_i[i] = nr_class; 00675 //SG_SINFO("*") 00676 eps_shrink = CMath::max(eps_shrink/2, eps); 00677 start_from_all = true; 00678 } 00679 } 00680 else 00681 start_from_all = false; 00682 00683 if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff()) 00684 break; 00685 } 00686 00687 SG_SINFO("\noptimization finished, #iter = %d\n",iter) 00688 if (iter >= max_iter) 00689 SG_SINFO("Warning: reaching max number of iterations\n") 00690 00691 SG_FREE(tx); 00692 } 00693 00694 // 00695 // Interface functions 00696 // 00697 00698 void destroy_model(struct liblinear_model *model_) 00699 { 00700 SG_FREE(model_->w); 00701 SG_FREE(model_->label); 00702 SG_FREE(model_); 00703 } 00704 00705 void destroy_param(liblinear_parameter* param) 00706 { 00707 SG_FREE(param->weight_label); 00708 SG_FREE(param->weight); 00709 } 00710 #endif // DOXYGEN_SHOULD_SKIP_THIS