SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
HMSVMModel.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Fernando José Iglesias García
00008  * Copyright (C) 2012 Fernando José Iglesias García
00009  */
00010 
00011 #include <shogun/structure/HMSVMModel.h>
00012 #include <shogun/features/MatrixFeatures.h>
00013 #include <shogun/structure/TwoStateModel.h>
00014 #include <shogun/structure/Plif.h>
00015 
00016 using namespace shogun;
00017 
00018 CHMSVMModel::CHMSVMModel()
00019 : CStructuredModel()
00020 {
00021     init();
00022 }
00023 
00024 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs)
00025 : CStructuredModel(features, labels)
00026 {
00027     init();
00028     m_num_obs = num_obs;
00029     m_num_plif_nodes = 20;
00030     m_use_plifs = use_plifs;
00031 
00032     switch (smt)
00033     {
00034         case SMT_TWO_STATE:
00035             m_state_model = new CTwoStateModel();
00036             break;
00037         case SMT_UNKNOWN:
00038         default:
00039             SG_ERROR("The EStateModelType given is not valid\n")
00040     }
00041 }
00042 
00043 CHMSVMModel::~CHMSVMModel()
00044 {
00045     SG_UNREF(m_state_model);
00046     SG_UNREF(m_plif_matrix);
00047 }
00048 
00049 int32_t CHMSVMModel::get_dim() const
00050 {
00051     // Shorthand for the number of free states
00052     int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
00053     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00054     int32_t D = mf->get_num_features();
00055 
00056     if ( m_use_plifs )
00057         return free_states*(free_states + D*m_num_plif_nodes);
00058     else
00059         return free_states*(free_states + D*m_num_obs);
00060 }
00061 
00062 SGVector< float64_t > CHMSVMModel::get_joint_feature_vector(
00063         int32_t feat_idx,
00064         CStructuredData* y)
00065 {
00066     // Shorthand for the number of features of the feature vector
00067     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00068     int32_t D = mf->get_num_features();
00069 
00070     // Get the sequence of labels
00071     CSequence* label_seq = CSequence::obtain_from_generic(y);
00072 
00073     // Initialize psi
00074     SGVector< float64_t > psi(get_dim());
00075     psi.zero();
00076 
00077     // Translate from labels sequence to state sequence
00078     SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
00079     m_transmission_weights.zero();
00080 
00081     for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
00082         m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
00083 
00084     SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
00085     REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen,
00086         "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
00087         obs.num_rows, D, obs.num_cols, state_seq.vlen)
00088     m_emission_weights.zero();
00089     index_t aux_idx, weight_idx;
00090 
00091     if ( !m_use_plifs ) // Do not use PLiFs
00092     {
00093         for ( int32_t f = 0 ; f < D ; ++f )
00094         {
00095             aux_idx = f*m_num_obs;
00096 
00097             for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
00098             {
00099                 weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
00100                 m_emission_weights[weight_idx] += 1;
00101             }
00102         }
00103 
00104         m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
00105                 D, m_num_obs);
00106     }
00107     else    // Use PLiFs
00108     {
00109         int32_t S = m_state_model->get_num_states();
00110 
00111         for ( int32_t f = 0 ; f < D ; ++f )
00112         {
00113             aux_idx = f*m_num_plif_nodes;
00114 
00115             for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
00116             {
00117                 CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]);
00118                 SGVector<float64_t> limits = plif->get_plif_limits();
00119                 // The number of supporting points smaller or equal than value
00120                 int32_t count = 0;
00121                 // The observation value
00122                 float64_t value = obs(f,j);
00123 
00124                 for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
00125                 {
00126                     if ( limits[i] <= value )
00127                         ++count;
00128                     else
00129                         break;
00130                 }
00131 
00132                 weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
00133 
00134                 if ( count == 0 )
00135                     m_emission_weights[weight_idx] += 1;
00136                 else if ( count == m_num_plif_nodes )
00137                     m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
00138                 else
00139                 {
00140                     m_emission_weights[weight_idx + count] +=
00141                         (value-limits[count-1]) / (limits[count]-limits[count-1]);
00142 
00143                     m_emission_weights[weight_idx + count-1] +=
00144                         (limits[count]-value) / (limits[count]-limits[count-1]);
00145                 }
00146 
00147                 SG_UNREF(plif);
00148             }
00149         }
00150 
00151         m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
00152                 D, m_num_plif_nodes);
00153     }
00154 
00155     return psi;
00156 }
00157 
00158 CResultSet* CHMSVMModel::argmax(
00159         SGVector< float64_t > w,
00160         int32_t feat_idx,
00161         bool const training)
00162 {
00163     int32_t dim = get_dim();
00164     ASSERT( w.vlen == get_dim() )
00165 
00166     // Shorthand for the number of features of the feature vector
00167     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00168     int32_t D = mf->get_num_features();
00169     // Shorthand for the number of states
00170     int32_t S = m_state_model->get_num_states();
00171 
00172     if ( m_use_plifs )
00173     {
00174         REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with "
00175                 "the use_plifs option?\n");
00176         REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the "
00177                 "feature dimension and/or number of states changed from training to prediction?\n");
00178     }
00179 
00180     // Distribution of start states
00181     SGVector< float64_t > p = m_state_model->get_start_states();
00182     // Distribution of stop states
00183     SGVector< float64_t > q = m_state_model->get_stop_states();
00184 
00185     // Compute the loss-augmented emission matrix:
00186     // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
00187 
00188     SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
00189 
00190     int32_t T = x.num_cols;
00191     SGMatrix< float64_t > E(S, T);
00192     E.zero();
00193 
00194     if ( !m_use_plifs ) // Do not use PLiFs
00195     {
00196         index_t em_idx;
00197         m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
00198 
00199         for ( int32_t i = 0 ; i < T ; ++i )
00200         {
00201             for ( int32_t j = 0 ; j < D ; ++j )
00202             {
00203                 //FIXME make independent of observation values
00204                 em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
00205 
00206                 for ( int32_t s = 0 ; s < S ; ++s )
00207                     E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
00208             }
00209         }
00210     }
00211     else    // Use PLiFs
00212     {
00213         m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes);
00214 
00215         for ( int32_t i = 0 ; i < T ; ++i )
00216         {
00217             for ( int32_t f = 0 ; f < D ; ++f )
00218             {
00219                 for ( int32_t s = 0 ; s < S ; ++s )
00220                 {
00221                     CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s);
00222                     E(s,i) += plif->lookup( x(f,i) );
00223                     SG_UNREF(plif);
00224                 }
00225             }
00226         }
00227     }
00228 
00229     // If argmax used while training, add to E the loss matrix (loss-augmented inference)
00230     if ( training )
00231     {
00232         CSequence* ytrue =
00233             CSequence::obtain_from_generic(m_labels->get_label(feat_idx));
00234 
00235         REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
00236             "x^i (%d) and the length of its corresponding label y^i "
00237             "(%d) must be the same.\n", T, ytrue->get_data().size());
00238 
00239         SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
00240 
00241         ASSERT(loss_matrix.num_rows == E.num_rows &&
00242                loss_matrix.num_cols == E.num_cols);
00243 
00244         SGVector< float64_t >::add(E.matrix, 1.0, E.matrix,
00245                 1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
00246 
00247         // Decrement the reference count corresponding to get_label above
00248         SG_UNREF(ytrue);
00249     }
00250 
00251     // Initialize the dynamic programming table and the traceback matrix
00252     SGMatrix< float64_t >  dp(T, S);
00253     SGMatrix< float64_t > trb(T, S);
00254     m_state_model->reshape_transmission_params(m_transmission_weights, w);
00255 
00256     for ( int32_t s = 0 ; s < S ; ++s )
00257     {
00258         if ( p[s] > -CMath::INFTY )
00259         {
00260             // dp(0,s) = E(s,0)
00261             dp(0,s) = E[s];
00262         }
00263         else
00264         {
00265             dp(0,s) = -CMath::INFTY;
00266         }
00267     }
00268 
00269     // Viterbi algorithm
00270     int32_t idx;
00271     float64_t tmp_score, e, a;
00272 
00273     for ( int32_t i = 1 ; i < T ; ++i )
00274     {
00275         for ( int32_t cur = 0 ; cur < S ; ++cur )
00276         {
00277             idx = cur*T + i;
00278 
00279              dp[idx] = -CMath::INFTY;
00280             trb[idx] = -1;
00281 
00282             // e = E(cur,i)
00283             e = E[i*S + cur];
00284 
00285             for ( int32_t prev = 0 ; prev < S ; ++prev )
00286             {
00287                 // aij = m_transmission_weights(prev, cur)
00288                 a = m_transmission_weights[cur*S + prev];
00289 
00290                 if ( a > -CMath::INFTY )
00291                 {
00292                     // tmp_score = e + a + dp(i-1, prev)
00293                     tmp_score = e + a + dp[prev*T + i-1];
00294 
00295                     if ( tmp_score > dp[idx] )
00296                     {
00297                          dp[idx] = tmp_score;
00298                         trb[idx] = prev;
00299                     }
00300                 }
00301             }
00302         }
00303     }
00304 
00305     // Trace back the most likely sequence of states
00306     SGVector< int32_t > opt_path(T);
00307     CResultSet* ret = new CResultSet();
00308     SG_REF(ret);
00309     ret->score = -CMath::INFTY;
00310     opt_path[T-1] = -1;
00311 
00312     for ( int32_t s = 0 ; s < S ; ++s )
00313     {
00314         idx = s*T + T-1;
00315 
00316         if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
00317         {
00318             ret->score = dp[idx];
00319             opt_path[T-1] = s;
00320         }
00321     }
00322 
00323     REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n"
00324             "Maybe the state model used cannot produce such sequence.\n"
00325             "If using the TwoStateModel, please use sequences of length greater than two.\n");
00326 
00327     for ( int32_t i = T-1 ; i > 0 ; --i )
00328         opt_path[i-1] = trb[opt_path[i]*T + i];
00329 
00330     // Populate the CResultSet object to return
00331     CSequence* ypred = m_state_model->states_to_labels(opt_path);
00332 
00333     ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
00334     ret->argmax   = ypred;
00335     if ( training )
00336     {
00337         ret->delta     = CStructuredModel::delta_loss(feat_idx, ypred);
00338         ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx);
00339         ret->score    -= SGVector< float64_t >::dot(w.vector, ret->psi_truth.vector, dim);
00340     }
00341 
00342     return ret;
00343 }
00344 
00345 float64_t CHMSVMModel::delta_loss(CStructuredData* y1, CStructuredData* y2)
00346 {
00347     CSequence* seq1 = CSequence::obtain_from_generic(y1);
00348     CSequence* seq2 = CSequence::obtain_from_generic(y2);
00349 
00350     // Compute the Hamming loss, number of distinct elements in the sequences
00351     return m_state_model->loss(seq1, seq2);
00352 }
00353 
00354 void CHMSVMModel::init_primal_opt(
00355         float64_t regularization,
00356         SGMatrix< float64_t > & A,
00357         SGVector< float64_t > a,
00358         SGMatrix< float64_t > B,
00359         SGVector< float64_t > & b,
00360         SGVector< float64_t > lb,
00361         SGVector< float64_t > ub,
00362         SGMatrix< float64_t > & C)
00363 {
00364     // Shorthand for the number of free states (i.e. states for which parameters are learnt)
00365     int32_t S = ((CSequenceLabels*) m_labels)->get_num_states();
00366     // Shorthand for the number of features of the feature vector
00367     int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
00368 
00369     // Monotonicity constraints for feature scoring functions
00370     SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
00371 
00372     // Quadratic regularization
00373 
00374     float64_t C_small = regularization;
00375     float64_t C_smooth = 0.02*regularization;
00376     // TODO change the representation of C to sparse matrix
00377     C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
00378     C.zero();
00379     for ( int32_t i = 0 ; i < get_dim() ; ++i )
00380         C(i,i) = C_small;
00381     for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
00382         C(i,i) = C_smooth;
00383 
00384     // Smoothness constraints
00385 
00386     // For each auxiliary variable, there are two different constraints
00387     // TODO change the representation of A to sparse matrix
00388     A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
00389     A.zero();
00390 
00391     // Indices to the beginning of the blocks of scores. Each block is
00392     // formed by the scores of a pair (state, feature)
00393     SGVector< int32_t > score_starts(S*D);
00394     int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
00395     for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k )
00396         score_starts[k] = idx;
00397 
00398     // Indices to the beginning of the blocks of variables for smoothness
00399     SGVector< int32_t > aux_starts_smooth(S*D);
00400     for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
00401         aux_starts_smooth[k] = idx;
00402 
00403     // Bound the difference between adjacent score values from above and
00404     // below by an auxiliary variable (which then is regularized
00405     // quadratically)
00406 
00407     int32_t con_idx = 0, scr_idx, aux_idx;
00408 
00409     for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
00410     {
00411         scr_idx = score_starts[i];
00412         aux_idx = aux_starts_smooth[i];
00413 
00414         for ( int32_t j = 0 ; j < delta-1 ; ++j )
00415         {
00416             A(con_idx, scr_idx)   =  1;
00417             A(con_idx, scr_idx+1) = -1;
00418 
00419             if ( monotonicity[i] != 1 )
00420                 A(con_idx, aux_idx) = -1;
00421             ++con_idx;
00422 
00423             A(con_idx, scr_idx)   = -1;
00424             A(con_idx, scr_idx+1) =  1;
00425 
00426             if ( monotonicity[i] != -1 )
00427                 A(con_idx, aux_idx) = -1;
00428             ++con_idx;
00429 
00430             ++scr_idx, ++aux_idx;
00431         }
00432     }
00433 
00434     // Bounds for the smoothness constraints
00435     b = SGVector< float64_t >(2*m_num_aux);
00436     b.zero();
00437 }
00438 
00439 bool CHMSVMModel::check_training_setup() const
00440 {
00441     // Shorthand for the labels in the correct type
00442     CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels;
00443     // Frequency of each state
00444     SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
00445     state_freq.zero();
00446 
00447     CSequence* seq;
00448     int32_t state;
00449     for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
00450     {
00451         seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
00452 
00453         SGVector<int32_t> seq_data = seq->get_data();
00454         for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
00455         {
00456             state = seq_data[j];
00457 
00458             if ( state < 0 || state >= hmsvm_labels->get_num_states() )
00459             {
00460                 SG_ERROR("Found state out of {0, 1, ..., "
00461                      "num_states-1}\n");
00462                 return false;
00463             }
00464             else
00465             {
00466                 ++state_freq[state];
00467             }
00468         }
00469 
00470         // Decrement the reference count increased by get_label
00471         SG_UNREF(seq);
00472     }
00473 
00474     for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
00475     {
00476         if ( state_freq[i] <= 0 )
00477         {
00478             SG_ERROR("What? State %d has never appeared\n", i)
00479             return false;
00480         }
00481     }
00482 
00483     return true;
00484 }
00485 
00486 void CHMSVMModel::init()
00487 {
00488     SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
00489     SG_ADD(&m_transmission_weights, "m_transmission_weights",
00490             "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
00491     SG_ADD(&m_emission_weights, "m_emission_weights",
00492             "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
00493     SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF",
00494             MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter
00495     SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE);
00496 
00497     m_num_obs = 0;
00498     m_num_aux = 0;
00499     m_use_plifs = false;
00500     m_state_model = NULL;
00501     m_plif_matrix = NULL;
00502     m_num_plif_nodes = 0;
00503 }
00504 
00505 int32_t CHMSVMModel::get_num_aux() const
00506 {
00507     return m_num_aux;
00508 }
00509 
00510 int32_t CHMSVMModel::get_num_aux_con() const
00511 {
00512     return 2*m_num_aux;
00513 }
00514 
00515 void CHMSVMModel::set_use_plifs(bool use_plifs)
00516 {
00517     m_use_plifs = use_plifs;
00518 }
00519 
00520 void CHMSVMModel::init_training()
00521 {
00522     // Shorthands for the number of states, the matrix features and their dimension
00523     int32_t S = m_state_model->get_num_states();
00524     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00525     int32_t D = mf->get_num_features();
00526 
00527     // Transmission and emission weights allocation
00528     m_transmission_weights = SGMatrix< float64_t >(S,S);
00529     if ( m_use_plifs )
00530         m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes);
00531     else
00532         m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
00533 
00534     // Auxiliary variables
00535 
00536     // Shorthand for the number of free states
00537     int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
00538     if ( m_use_plifs )
00539         m_num_aux = free_states*D*(m_num_plif_nodes-1);
00540     else
00541         m_num_aux = free_states*D*(m_num_obs-1);
00542 
00543     if ( m_use_plifs )
00544     {
00545         // Initialize PLiF matrix
00546         m_plif_matrix = new CDynamicObjectArray(S*D);
00547         SG_REF(m_plif_matrix);
00548 
00549         // Determine the x values for the supporting points of the PLiFs
00550 
00551         // Count the number of points per feature, using all the feature vectors
00552         int32_t N = 0;
00553         for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
00554         {
00555             SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
00556             N += feat_vec.num_cols;
00557         }
00558 
00559         // Choose the supporting points so that roughly the same number of points fall in each bin
00560         SGVector< float64_t > a = SGVector< float64_t >::linspace_vec(1, N, m_num_plif_nodes+1);
00561         SGVector< index_t > signal_idxs(m_num_plif_nodes);
00562         for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i )
00563             signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1;
00564 
00565         SGVector< float64_t > signal(N);
00566         index_t idx; // used to populate signal
00567         for ( int32_t f = 0 ; f < D ; ++f )
00568         {
00569             // Get the points of feature f of all the feature vectors
00570             idx = 0;
00571             for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
00572             {
00573                 SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
00574                 for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j )
00575                     signal[idx++] = feat_vec(f,j);
00576             }
00577 
00578             signal.qsort();
00579             SGVector< float64_t > limits(m_num_plif_nodes);
00580             for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
00581                 limits[i] = signal[ signal_idxs[i] ];
00582 
00583             // Set the PLiFs' supporting points
00584             for ( int32_t s = 0 ; s < S ; ++s )
00585             {
00586                 CPlif* plif = new CPlif(m_num_plif_nodes);
00587                 plif->set_plif_limits(limits);
00588                 plif->set_min_value(-CMath::INFTY);
00589                 plif->set_max_value(CMath::INFTY);
00590                 m_plif_matrix->push_back(plif);
00591             }
00592         }
00593     }
00594 }
00595 
00596 SGMatrix< float64_t > CHMSVMModel::get_transmission_weights() const
00597 {
00598     return m_transmission_weights;
00599 }
00600 
00601 SGVector< float64_t > CHMSVMModel::get_emission_weights() const
00602 {
00603     return m_emission_weights;
00604 }
00605 
00606 CStateModel* CHMSVMModel::get_state_model() const
00607 {
00608     SG_REF(m_state_model);
00609     return m_state_model;
00610 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation