SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2012 Jiayu Zhou and Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/malsar/malsar_clustered.h> 00012 #ifdef HAVE_EIGEN3 00013 #ifndef HAVE_CXX11 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/mathematics/eigen3.h> 00016 #include <iostream> 00017 #include <shogun/lib/external/libqp.h> 00018 00019 using namespace Eigen; 00020 using namespace std; 00021 00022 namespace shogun 00023 { 00024 00025 static double* H_diag_matrix; 00026 static int H_diag_matrix_ld; 00027 00028 static const double* get_col(uint32_t j) 00029 { 00030 return H_diag_matrix + j*H_diag_matrix_ld; 00031 } 00032 00033 malsar_result_t malsar_clustered( 00034 CDotFeatures* features, 00035 double* y, 00036 double rho1, 00037 double rho2, 00038 const malsar_options& options) 00039 { 00040 int task; 00041 int n_feats = features->get_dim_feature_space(); 00042 SG_SDEBUG("n feats = %d\n", n_feats) 00043 int n_vecs = features->get_num_vectors(); 00044 SG_SDEBUG("n vecs = %d\n", n_vecs) 00045 int n_tasks = options.n_tasks; 00046 SG_SDEBUG("n tasks = %d\n", n_tasks) 00047 00048 H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks); 00049 for (int i=0; i<n_tasks; i++) 00050 H_diag_matrix[i*n_tasks+i] = 2.0; 00051 H_diag_matrix_ld = n_tasks; 00052 00053 int iter = 0; 00054 00055 // initialize weight vector and bias for each task 00056 MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); 00057 VectorXd Cs = VectorXd::Zero(n_tasks); 00058 MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks; 00059 MatrixXd IM = Ms; 00060 MatrixXd IMsqinv = Ms; 00061 MatrixXd invEtaMWt = Ms; 00062 00063 MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; 00064 VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; 00065 MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms; 00066 MatrixXd Mzp_Pz; 00067 00068 double eta = rho2/rho1; 00069 double c = rho1*eta*(1+eta); 00070 00071 double t=1, t_old=0; 00072 double gamma=1, gamma_inc=2; 00073 double obj=0.0, obj_old=0.0; 00074 00075 double* diag_H = SG_MALLOC(double, n_tasks); 00076 double* f = SG_MALLOC(double, n_tasks); 00077 double* a = SG_MALLOC(double, n_tasks); 00078 double* lb = SG_MALLOC(double, n_tasks); 00079 double* ub = SG_MALLOC(double, n_tasks); 00080 double* x = SG_CALLOC(double, n_tasks); 00081 00082 //internal::set_is_malloc_allowed(false); 00083 bool done = false; 00084 while (!done && iter <= options.max_iter) 00085 { 00086 double alpha = double(t_old - 1)/t; 00087 SG_SDEBUG("alpha=%f\n",alpha) 00088 00089 // compute search point 00090 Ws = (1+alpha)*Wz - alpha*Wz_old; 00091 Cs = (1+alpha)*Cz - alpha*Cz_old; 00092 Ms = (1+alpha)*Mz - alpha*Mz_old; 00093 00094 // zero gradient 00095 gWs.setZero(); 00096 gCs.setZero(); 00097 //internal::set_is_malloc_allowed(true); 00098 SG_SDEBUG("Computing gradient\n") 00099 IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms); 00100 IMsqinv = (IM*IM).inverse(); 00101 invEtaMWt = IM.inverse()*Ws.transpose(); 00102 gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv; 00103 gWs.noalias() += 2*c*invEtaMWt.transpose(); 00104 //internal::set_is_malloc_allowed(false); 00105 00106 // compute gradient and objective at search point 00107 double Fs = 0; 00108 for (task=0; task<n_tasks; task++) 00109 { 00110 SGVector<index_t> task_idx = options.tasks_indices[task]; 00111 int n_vecs_task = task_idx.vlen; 00112 for (int i=0; i<n_vecs_task; i++) 00113 { 00114 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]); 00115 double bb = CMath::max(aa,0.0); 00116 00117 // avoid underflow when computing exponential loss 00118 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task; 00119 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task; 00120 gCs[task] += b; 00121 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats); 00122 } 00123 } 00124 SG_SDEBUG("Computed gradient\n") 00125 00126 // add regularizer 00127 Fs += c*(Ws*invEtaMWt).trace(); 00128 SG_SDEBUG("Fs = %f \n", Fs) 00129 00130 double Fzp = 0.0; 00131 00132 int inner_iter = 0; 00133 // line search, Armijo-Goldstein scheme 00134 while (inner_iter <= 1) 00135 { 00136 Wzp = Ws - gWs/gamma; 00137 Czp = Cs - gCs/gamma; 00138 // compute singular projection of Ms - gMs/gamma with k 00139 //internal::set_is_malloc_allowed(true); 00140 EigenSolver<MatrixXd> eigensolver; 00141 eigensolver.compute(Ms-gMs/gamma, true); 00142 if (eigensolver.info()!=Eigen::Success) 00143 SG_SERROR("Eigendecomposition failed") 00144 00145 // solve problem 00146 // min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1 00147 for (int i=0; i<n_tasks; i++) 00148 { 00149 diag_H[i] = 2.0; 00150 // TODO fails with C++11 00151 //std::complex<MatrixXd::Scalar> eigenvalue = eigensolver.eigenvalues()[i]; 00152 //cout << "eigenvalue " << eigenvalue << "=" << std::real(eigenvalue) << "+i" << std::imag(eigenvalue) << endl; 00153 f[i] = -2*eigensolver.eigenvalues()[i].real(); 00154 if (f[i]!=f[i]) 00155 SG_SERROR("NaN %d eigenvalue", i) 00156 00157 SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real()) 00158 a[i] = 1.0; 00159 lb[i] = 0.0; 00160 ub[i] = 1.0; 00161 x[i] = double(options.n_clusters)/n_tasks; 00162 } 00163 double b = options.n_clusters;//eigensolver.eigenvalues().sum().real(); 00164 SG_SDEBUG("b = %f\n", b) 00165 SG_SDEBUG("Calling libqp\n") 00166 libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL); 00167 SG_SDEBUG("Libqp objective = %f\n",problem_state.QP) 00168 SG_SDEBUG("Exit code = %d\n",problem_state.exitflag) 00169 SG_SDEBUG("%d iteration passed\n",problem_state.nIter) 00170 SG_SDEBUG("Solution is \n [ ") 00171 for (int i=0; i<n_tasks; i++) 00172 SG_SDEBUG("%f ", x[i]) 00173 SG_SDEBUG("]\n") 00174 Map<VectorXd> Mzp_DiagSigz(x,n_tasks); 00175 Mzp_Pz = eigensolver.eigenvectors().real(); 00176 Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose(); 00177 //internal::set_is_malloc_allowed(false); 00178 // walk in direction of antigradient 00179 for (int i=0; i<n_tasks; i++) 00180 Mzp_DiagSigz[i] += eta; 00181 //internal::set_is_malloc_allowed(true); 00182 invEtaMWt = (Mzp_Pz* 00183 (Mzp_DiagSigz.cwiseInverse().asDiagonal())* 00184 Mzp_Pz.transpose())* 00185 Wzp.transpose(); 00186 //internal::set_is_malloc_allowed(false); 00187 // compute objective at line search point 00188 Fzp = 0.0; 00189 for (task=0; task<n_tasks; task++) 00190 { 00191 SGVector<index_t> task_idx = options.tasks_indices[task]; 00192 int n_vecs_task = task_idx.vlen; 00193 for (int i=0; i<n_vecs_task; i++) 00194 { 00195 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]); 00196 double bb = CMath::max(aa,0.0); 00197 00198 Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task; 00199 } 00200 } 00201 Fzp += c*(Wzp*invEtaMWt).trace(); 00202 00203 // compute delta between line search point and search point 00204 delta_Wzp = Wzp - Ws; 00205 delta_Czp = Czp - Cs; 00206 delta_Mzp = Mzp - Ms; 00207 00208 // norms of delta 00209 double nrm_delta_Wzp = delta_Wzp.squaredNorm(); 00210 double nrm_delta_Czp = delta_Czp.squaredNorm(); 00211 double nrm_delta_Mzp = delta_Mzp.squaredNorm(); 00212 00213 double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3; 00214 00215 double Fzp_gamma = 0.0; 00216 if (n_feats > n_tasks) 00217 { 00218 Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + 00219 (delta_Czp.transpose()*gCs).trace() + 00220 (delta_Mzp.transpose()*gMs).trace() + 00221 (gamma/2)*nrm_delta_Wzp + 00222 (gamma/2)*nrm_delta_Czp + 00223 (gamma/2)*nrm_delta_Mzp; 00224 } 00225 else 00226 { 00227 Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() + 00228 (gCs.transpose()*delta_Czp).trace() + 00229 (gMs.transpose()*delta_Mzp).trace() + 00230 (gamma/2)*nrm_delta_Wzp + 00231 (gamma/2)*nrm_delta_Czp + 00232 (gamma/2)*nrm_delta_Mzp; 00233 } 00234 00235 // break if delta is getting too small 00236 if (r_sum <= 1e-20) 00237 { 00238 done = true; 00239 break; 00240 } 00241 00242 // break if objective at line searc point is smaller than Fzp_gamma 00243 if (Fzp <= Fzp_gamma) 00244 break; 00245 else 00246 gamma *= gamma_inc; 00247 00248 inner_iter++; 00249 } 00250 00251 Wz_old = Wz; 00252 Cz_old = Cz; 00253 Mz_old = Mz; 00254 Wz = Wzp; 00255 Cz = Czp; 00256 Mz = Mzp; 00257 00258 // compute objective value 00259 obj_old = obj; 00260 obj = Fzp; 00261 00262 // check if process should be terminated 00263 switch (options.termination) 00264 { 00265 case 0: 00266 if (iter>=2) 00267 { 00268 if ( CMath::abs(obj-obj_old) <= options.tolerance ) 00269 done = true; 00270 } 00271 break; 00272 case 1: 00273 if (iter>=2) 00274 { 00275 if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) 00276 done = true; 00277 } 00278 break; 00279 case 2: 00280 if (CMath::abs(obj) <= options.tolerance) 00281 done = true; 00282 break; 00283 case 3: 00284 if (iter>=options.max_iter) 00285 done = true; 00286 break; 00287 } 00288 00289 iter++; 00290 t_old = t; 00291 t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); 00292 } 00293 //internal::set_is_malloc_allowed(true); 00294 SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj) 00295 00296 SG_FREE(H_diag_matrix); 00297 SG_FREE(diag_H); 00298 SG_FREE(f); 00299 SG_FREE(a); 00300 SG_FREE(lb); 00301 SG_FREE(ub); 00302 SG_FREE(x); 00303 00304 SGMatrix<float64_t> tasks_w(n_feats, n_tasks); 00305 for (int i=0; i<n_feats; i++) 00306 { 00307 for (task=0; task<n_tasks; task++) 00308 tasks_w(i,task) = Wzp(i,task); 00309 } 00310 //tasks_w.display_matrix(); 00311 SGVector<float64_t> tasks_c(n_tasks); 00312 for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; 00313 return malsar_result_t(tasks_w, tasks_c); 00314 }; 00315 }; 00316 #endif 00317 #endif