SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Fernando José Iglesias García 00008 * Copyright (C) 2012 Fernando José Iglesias García 00009 */ 00010 00011 #include <shogun/structure/HMSVMModel.h> 00012 #include <shogun/features/MatrixFeatures.h> 00013 #include <shogun/structure/TwoStateModel.h> 00014 #include <shogun/structure/Plif.h> 00015 00016 using namespace shogun; 00017 00018 CHMSVMModel::CHMSVMModel() 00019 : CStructuredModel() 00020 { 00021 init(); 00022 } 00023 00024 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs) 00025 : CStructuredModel(features, labels) 00026 { 00027 init(); 00028 m_num_obs = num_obs; 00029 m_num_plif_nodes = 20; 00030 m_use_plifs = use_plifs; 00031 00032 switch (smt) 00033 { 00034 case SMT_TWO_STATE: 00035 m_state_model = new CTwoStateModel(); 00036 break; 00037 case SMT_UNKNOWN: 00038 default: 00039 SG_ERROR("The EStateModelType given is not valid\n") 00040 } 00041 } 00042 00043 CHMSVMModel::~CHMSVMModel() 00044 { 00045 SG_UNREF(m_state_model); 00046 SG_UNREF(m_plif_matrix); 00047 } 00048 00049 int32_t CHMSVMModel::get_dim() const 00050 { 00051 // Shorthand for the number of free states 00052 int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states(); 00053 CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features; 00054 int32_t D = mf->get_num_features(); 00055 00056 if ( m_use_plifs ) 00057 return free_states*(free_states + D*m_num_plif_nodes); 00058 else 00059 return free_states*(free_states + D*m_num_obs); 00060 } 00061 00062 SGVector< float64_t > CHMSVMModel::get_joint_feature_vector( 00063 int32_t feat_idx, 00064 CStructuredData* y) 00065 { 00066 // Shorthand for the number of features of the feature vector 00067 CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features; 00068 int32_t D = mf->get_num_features(); 00069 00070 // Get the sequence of labels 00071 CSequence* label_seq = CSequence::obtain_from_generic(y); 00072 00073 // Initialize psi 00074 SGVector< float64_t > psi(get_dim()); 00075 psi.zero(); 00076 00077 // Translate from labels sequence to state sequence 00078 SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq); 00079 m_transmission_weights.zero(); 00080 00081 for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i ) 00082 m_transmission_weights(state_seq[i],state_seq[i+1]) += 1; 00083 00084 SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx); 00085 REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen, 00086 "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n", 00087 obs.num_rows, D, obs.num_cols, state_seq.vlen) 00088 m_emission_weights.zero(); 00089 index_t aux_idx, weight_idx; 00090 00091 if ( !m_use_plifs ) // Do not use PLiFs 00092 { 00093 for ( int32_t f = 0 ; f < D ; ++f ) 00094 { 00095 aux_idx = f*m_num_obs; 00096 00097 for ( int32_t j = 0 ; j < state_seq.vlen ; ++j ) 00098 { 00099 weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j); 00100 m_emission_weights[weight_idx] += 1; 00101 } 00102 } 00103 00104 m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights, 00105 D, m_num_obs); 00106 } 00107 else // Use PLiFs 00108 { 00109 int32_t S = m_state_model->get_num_states(); 00110 00111 for ( int32_t f = 0 ; f < D ; ++f ) 00112 { 00113 aux_idx = f*m_num_plif_nodes; 00114 00115 for ( int32_t j = 0 ; j < state_seq.vlen ; ++j ) 00116 { 00117 CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]); 00118 SGVector<float64_t> limits = plif->get_plif_limits(); 00119 // The number of supporting points smaller or equal than value 00120 int32_t count = 0; 00121 // The observation value 00122 float64_t value = obs(f,j); 00123 00124 for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i ) 00125 { 00126 if ( limits[i] <= value ) 00127 ++count; 00128 else 00129 break; 00130 } 00131 00132 weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes; 00133 00134 if ( count == 0 ) 00135 m_emission_weights[weight_idx] += 1; 00136 else if ( count == m_num_plif_nodes ) 00137 m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1; 00138 else 00139 { 00140 m_emission_weights[weight_idx + count] += 00141 (value-limits[count-1]) / (limits[count]-limits[count-1]); 00142 00143 m_emission_weights[weight_idx + count-1] += 00144 (limits[count]-value) / (limits[count]-limits[count-1]); 00145 } 00146 00147 SG_UNREF(plif); 00148 } 00149 } 00150 00151 m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights, 00152 D, m_num_plif_nodes); 00153 } 00154 00155 return psi; 00156 } 00157 00158 CResultSet* CHMSVMModel::argmax( 00159 SGVector< float64_t > w, 00160 int32_t feat_idx, 00161 bool const training) 00162 { 00163 int32_t dim = get_dim(); 00164 ASSERT( w.vlen == get_dim() ) 00165 00166 // Shorthand for the number of features of the feature vector 00167 CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features; 00168 int32_t D = mf->get_num_features(); 00169 // Shorthand for the number of states 00170 int32_t S = m_state_model->get_num_states(); 00171 00172 if ( m_use_plifs ) 00173 { 00174 REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with " 00175 "the use_plifs option?\n"); 00176 REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the " 00177 "feature dimension and/or number of states changed from training to prediction?\n"); 00178 } 00179 00180 // Distribution of start states 00181 SGVector< float64_t > p = m_state_model->get_start_states(); 00182 // Distribution of stop states 00183 SGVector< float64_t > q = m_state_model->get_stop_states(); 00184 00185 // Compute the loss-augmented emission matrix: 00186 // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s) 00187 00188 SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx); 00189 00190 int32_t T = x.num_cols; 00191 SGMatrix< float64_t > E(S, T); 00192 E.zero(); 00193 00194 if ( !m_use_plifs ) // Do not use PLiFs 00195 { 00196 index_t em_idx; 00197 m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs); 00198 00199 for ( int32_t i = 0 ; i < T ; ++i ) 00200 { 00201 for ( int32_t j = 0 ; j < D ; ++j ) 00202 { 00203 //FIXME make independent of observation values 00204 em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i)); 00205 00206 for ( int32_t s = 0 ; s < S ; ++s ) 00207 E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx]; 00208 } 00209 } 00210 } 00211 else // Use PLiFs 00212 { 00213 m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes); 00214 00215 for ( int32_t i = 0 ; i < T ; ++i ) 00216 { 00217 for ( int32_t f = 0 ; f < D ; ++f ) 00218 { 00219 for ( int32_t s = 0 ; s < S ; ++s ) 00220 { 00221 CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s); 00222 E(s,i) += plif->lookup( x(f,i) ); 00223 SG_UNREF(plif); 00224 } 00225 } 00226 } 00227 } 00228 00229 // If argmax used while training, add to E the loss matrix (loss-augmented inference) 00230 if ( training ) 00231 { 00232 CSequence* ytrue = 00233 CSequence::obtain_from_generic(m_labels->get_label(feat_idx)); 00234 00235 REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature " 00236 "x^i (%d) and the length of its corresponding label y^i " 00237 "(%d) must be the same.\n", T, ytrue->get_data().size()); 00238 00239 SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue); 00240 00241 ASSERT(loss_matrix.num_rows == E.num_rows && 00242 loss_matrix.num_cols == E.num_cols); 00243 00244 SGVector< float64_t >::add(E.matrix, 1.0, E.matrix, 00245 1.0, loss_matrix.matrix, E.num_rows*E.num_cols); 00246 00247 // Decrement the reference count corresponding to get_label above 00248 SG_UNREF(ytrue); 00249 } 00250 00251 // Initialize the dynamic programming table and the traceback matrix 00252 SGMatrix< float64_t > dp(T, S); 00253 SGMatrix< float64_t > trb(T, S); 00254 m_state_model->reshape_transmission_params(m_transmission_weights, w); 00255 00256 for ( int32_t s = 0 ; s < S ; ++s ) 00257 { 00258 if ( p[s] > -CMath::INFTY ) 00259 { 00260 // dp(0,s) = E(s,0) 00261 dp(0,s) = E[s]; 00262 } 00263 else 00264 { 00265 dp(0,s) = -CMath::INFTY; 00266 } 00267 } 00268 00269 // Viterbi algorithm 00270 int32_t idx; 00271 float64_t tmp_score, e, a; 00272 00273 for ( int32_t i = 1 ; i < T ; ++i ) 00274 { 00275 for ( int32_t cur = 0 ; cur < S ; ++cur ) 00276 { 00277 idx = cur*T + i; 00278 00279 dp[idx] = -CMath::INFTY; 00280 trb[idx] = -1; 00281 00282 // e = E(cur,i) 00283 e = E[i*S + cur]; 00284 00285 for ( int32_t prev = 0 ; prev < S ; ++prev ) 00286 { 00287 // aij = m_transmission_weights(prev, cur) 00288 a = m_transmission_weights[cur*S + prev]; 00289 00290 if ( a > -CMath::INFTY ) 00291 { 00292 // tmp_score = e + a + dp(i-1, prev) 00293 tmp_score = e + a + dp[prev*T + i-1]; 00294 00295 if ( tmp_score > dp[idx] ) 00296 { 00297 dp[idx] = tmp_score; 00298 trb[idx] = prev; 00299 } 00300 } 00301 } 00302 } 00303 } 00304 00305 // Trace back the most likely sequence of states 00306 SGVector< int32_t > opt_path(T); 00307 CResultSet* ret = new CResultSet(); 00308 SG_REF(ret); 00309 ret->score = -CMath::INFTY; 00310 opt_path[T-1] = -1; 00311 00312 for ( int32_t s = 0 ; s < S ; ++s ) 00313 { 00314 idx = s*T + T-1; 00315 00316 if ( q[s] > -CMath::INFTY && dp[idx] > ret->score ) 00317 { 00318 ret->score = dp[idx]; 00319 opt_path[T-1] = s; 00320 } 00321 } 00322 00323 REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n" 00324 "Maybe the state model used cannot produce such sequence.\n" 00325 "If using the TwoStateModel, please use sequences of length greater than two.\n"); 00326 00327 for ( int32_t i = T-1 ; i > 0 ; --i ) 00328 opt_path[i-1] = trb[opt_path[i]*T + i]; 00329 00330 // Populate the CResultSet object to return 00331 CSequence* ypred = m_state_model->states_to_labels(opt_path); 00332 00333 ret->psi_pred = get_joint_feature_vector(feat_idx, ypred); 00334 ret->argmax = ypred; 00335 if ( training ) 00336 { 00337 ret->delta = CStructuredModel::delta_loss(feat_idx, ypred); 00338 ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx); 00339 ret->score -= SGVector< float64_t >::dot(w.vector, ret->psi_truth.vector, dim); 00340 } 00341 00342 return ret; 00343 } 00344 00345 float64_t CHMSVMModel::delta_loss(CStructuredData* y1, CStructuredData* y2) 00346 { 00347 CSequence* seq1 = CSequence::obtain_from_generic(y1); 00348 CSequence* seq2 = CSequence::obtain_from_generic(y2); 00349 00350 // Compute the Hamming loss, number of distinct elements in the sequences 00351 return m_state_model->loss(seq1, seq2); 00352 } 00353 00354 void CHMSVMModel::init_primal_opt( 00355 float64_t regularization, 00356 SGMatrix< float64_t > & A, 00357 SGVector< float64_t > a, 00358 SGMatrix< float64_t > B, 00359 SGVector< float64_t > & b, 00360 SGVector< float64_t > lb, 00361 SGVector< float64_t > ub, 00362 SGMatrix< float64_t > & C) 00363 { 00364 // Shorthand for the number of free states (i.e. states for which parameters are learnt) 00365 int32_t S = ((CSequenceLabels*) m_labels)->get_num_states(); 00366 // Shorthand for the number of features of the feature vector 00367 int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features(); 00368 00369 // Monotonicity constraints for feature scoring functions 00370 SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D); 00371 00372 // Quadratic regularization 00373 00374 float64_t C_small = regularization; 00375 float64_t C_smooth = 0.02*regularization; 00376 // TODO change the representation of C to sparse matrix 00377 C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux); 00378 C.zero(); 00379 for ( int32_t i = 0 ; i < get_dim() ; ++i ) 00380 C(i,i) = C_small; 00381 for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i ) 00382 C(i,i) = C_smooth; 00383 00384 // Smoothness constraints 00385 00386 // For each auxiliary variable, there are two different constraints 00387 // TODO change the representation of A to sparse matrix 00388 A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux); 00389 A.zero(); 00390 00391 // Indices to the beginning of the blocks of scores. Each block is 00392 // formed by the scores of a pair (state, feature) 00393 SGVector< int32_t > score_starts(S*D); 00394 int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs; 00395 for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k ) 00396 score_starts[k] = idx; 00397 00398 // Indices to the beginning of the blocks of variables for smoothness 00399 SGVector< int32_t > aux_starts_smooth(S*D); 00400 for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k ) 00401 aux_starts_smooth[k] = idx; 00402 00403 // Bound the difference between adjacent score values from above and 00404 // below by an auxiliary variable (which then is regularized 00405 // quadratically) 00406 00407 int32_t con_idx = 0, scr_idx, aux_idx; 00408 00409 for ( int32_t i = 0 ; i < score_starts.vlen ; ++i ) 00410 { 00411 scr_idx = score_starts[i]; 00412 aux_idx = aux_starts_smooth[i]; 00413 00414 for ( int32_t j = 0 ; j < delta-1 ; ++j ) 00415 { 00416 A(con_idx, scr_idx) = 1; 00417 A(con_idx, scr_idx+1) = -1; 00418 00419 if ( monotonicity[i] != 1 ) 00420 A(con_idx, aux_idx) = -1; 00421 ++con_idx; 00422 00423 A(con_idx, scr_idx) = -1; 00424 A(con_idx, scr_idx+1) = 1; 00425 00426 if ( monotonicity[i] != -1 ) 00427 A(con_idx, aux_idx) = -1; 00428 ++con_idx; 00429 00430 ++scr_idx, ++aux_idx; 00431 } 00432 } 00433 00434 // Bounds for the smoothness constraints 00435 b = SGVector< float64_t >(2*m_num_aux); 00436 b.zero(); 00437 } 00438 00439 bool CHMSVMModel::check_training_setup() const 00440 { 00441 // Shorthand for the labels in the correct type 00442 CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels; 00443 // Frequency of each state 00444 SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() ); 00445 state_freq.zero(); 00446 00447 CSequence* seq; 00448 int32_t state; 00449 for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i ) 00450 { 00451 seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i)); 00452 00453 SGVector<int32_t> seq_data = seq->get_data(); 00454 for ( int32_t j = 0 ; j < seq_data.size() ; ++j ) 00455 { 00456 state = seq_data[j]; 00457 00458 if ( state < 0 || state >= hmsvm_labels->get_num_states() ) 00459 { 00460 SG_ERROR("Found state out of {0, 1, ..., " 00461 "num_states-1}\n"); 00462 return false; 00463 } 00464 else 00465 { 00466 ++state_freq[state]; 00467 } 00468 } 00469 00470 // Decrement the reference count increased by get_label 00471 SG_UNREF(seq); 00472 } 00473 00474 for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i ) 00475 { 00476 if ( state_freq[i] <= 0 ) 00477 { 00478 SG_ERROR("What? State %d has never appeared\n", i) 00479 return false; 00480 } 00481 } 00482 00483 return true; 00484 } 00485 00486 void CHMSVMModel::init() 00487 { 00488 SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE); 00489 SG_ADD(&m_transmission_weights, "m_transmission_weights", 00490 "Transmission weights used in Viterbi", MS_NOT_AVAILABLE); 00491 SG_ADD(&m_emission_weights, "m_emission_weights", 00492 "Emission weights used in Viterbi", MS_NOT_AVAILABLE); 00493 SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF", 00494 MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter 00495 SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE); 00496 00497 m_num_obs = 0; 00498 m_num_aux = 0; 00499 m_use_plifs = false; 00500 m_state_model = NULL; 00501 m_plif_matrix = NULL; 00502 m_num_plif_nodes = 0; 00503 } 00504 00505 int32_t CHMSVMModel::get_num_aux() const 00506 { 00507 return m_num_aux; 00508 } 00509 00510 int32_t CHMSVMModel::get_num_aux_con() const 00511 { 00512 return 2*m_num_aux; 00513 } 00514 00515 void CHMSVMModel::set_use_plifs(bool use_plifs) 00516 { 00517 m_use_plifs = use_plifs; 00518 } 00519 00520 void CHMSVMModel::init_training() 00521 { 00522 // Shorthands for the number of states, the matrix features and their dimension 00523 int32_t S = m_state_model->get_num_states(); 00524 CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features; 00525 int32_t D = mf->get_num_features(); 00526 00527 // Transmission and emission weights allocation 00528 m_transmission_weights = SGMatrix< float64_t >(S,S); 00529 if ( m_use_plifs ) 00530 m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes); 00531 else 00532 m_emission_weights = SGVector< float64_t >(S*D*m_num_obs); 00533 00534 // Auxiliary variables 00535 00536 // Shorthand for the number of free states 00537 int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states(); 00538 if ( m_use_plifs ) 00539 m_num_aux = free_states*D*(m_num_plif_nodes-1); 00540 else 00541 m_num_aux = free_states*D*(m_num_obs-1); 00542 00543 if ( m_use_plifs ) 00544 { 00545 // Initialize PLiF matrix 00546 m_plif_matrix = new CDynamicObjectArray(S*D); 00547 SG_REF(m_plif_matrix); 00548 00549 // Determine the x values for the supporting points of the PLiFs 00550 00551 // Count the number of points per feature, using all the feature vectors 00552 int32_t N = 0; 00553 for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i ) 00554 { 00555 SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i); 00556 N += feat_vec.num_cols; 00557 } 00558 00559 // Choose the supporting points so that roughly the same number of points fall in each bin 00560 SGVector< float64_t > a = SGVector< float64_t >::linspace_vec(1, N, m_num_plif_nodes+1); 00561 SGVector< index_t > signal_idxs(m_num_plif_nodes); 00562 for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i ) 00563 signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1; 00564 00565 SGVector< float64_t > signal(N); 00566 index_t idx; // used to populate signal 00567 for ( int32_t f = 0 ; f < D ; ++f ) 00568 { 00569 // Get the points of feature f of all the feature vectors 00570 idx = 0; 00571 for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i ) 00572 { 00573 SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i); 00574 for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j ) 00575 signal[idx++] = feat_vec(f,j); 00576 } 00577 00578 signal.qsort(); 00579 SGVector< float64_t > limits(m_num_plif_nodes); 00580 for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i ) 00581 limits[i] = signal[ signal_idxs[i] ]; 00582 00583 // Set the PLiFs' supporting points 00584 for ( int32_t s = 0 ; s < S ; ++s ) 00585 { 00586 CPlif* plif = new CPlif(m_num_plif_nodes); 00587 plif->set_plif_limits(limits); 00588 plif->set_min_value(-CMath::INFTY); 00589 plif->set_max_value(CMath::INFTY); 00590 m_plif_matrix->push_back(plif); 00591 } 00592 } 00593 } 00594 } 00595 00596 SGMatrix< float64_t > CHMSVMModel::get_transmission_weights() const 00597 { 00598 return m_transmission_weights; 00599 } 00600 00601 SGVector< float64_t > CHMSVMModel::get_emission_weights() const 00602 { 00603 return m_emission_weights; 00604 } 00605 00606 CStateModel* CHMSVMModel::get_state_model() const 00607 { 00608 SG_REF(m_state_model); 00609 return m_state_model; 00610 }