SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
DynProg.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Written (W) 2008-2009 Jonas Behr
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/structure/DynProg.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/lib/config.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/Alphabet.h>
00019 #include <shogun/structure/Plif.h>
00020 #include <shogun/structure/IntronList.h>
00021 
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <time.h>
00025 #include <ctype.h>
00026 
00027 using namespace shogun;
00028 
00029 //#define USE_TMP_ARRAYCLASS
00030 //#define DYNPROG_DEBUG
00031 
00032 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00033 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00034 int32_t CDynProg::frame_plifs[3]={4,5,6};
00035 int32_t CDynProg::num_words_default[4]=   {64,256,1024,4096} ;
00036 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00037                                     1,1,1,1,1,1,1,1,
00038                                     0,0,0,0,0,0,0,0,
00039                                     0,0,0,0,0,0,0,0} ;
00040 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00041                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00042 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00043                                        1,1,1,1,1,1,1,1} ; // which string should be used
00044 
00045 CDynProg::CDynProg(int32_t num_svms /*= 8 */)
00046 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00047     m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00048     m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00049     m_end_state_distribution_q_deriv(1),
00050 
00051       // multi svm
00052       m_num_degrees(4),
00053       m_num_svms(num_svms),
00054       m_word_degree(word_degree_default, m_num_degrees, true, true),
00055       m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00056       m_cum_num_words_array(m_cum_num_words.get_array()),
00057       m_num_words(num_words_default, m_num_degrees, true, true),
00058       m_num_words_array(m_num_words.get_array()),
00059       m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00060       m_mod_words_array(m_mod_words.get_array()),
00061       m_sign_words(sign_words_default, m_num_svms, true, true),
00062       m_sign_words_array(m_sign_words.get_array()),
00063       m_string_words(string_words_default, m_num_svms, true, true),
00064       m_string_words_array(m_string_words.get_array()),
00065       //m_svm_pos_start(m_num_degrees),
00066       m_num_unique_words(m_num_degrees),
00067       m_svm_arrays_clean(true),
00068 
00069       m_max_a_id(0), m_observation_matrix(1,1,1),
00070       m_pos(1),
00071       m_seq_len(0),
00072       m_orf_info(1,2),
00073       m_plif_list(1),
00074       m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
00075       m_segment_ids(1),
00076       m_segment_mask(1),
00077       m_my_state_seq(1),
00078       m_my_pos_seq(1),
00079       m_my_scores(1),
00080       m_my_losses(1),
00081       m_scores(1),
00082       m_states(1,1),
00083       m_positions(1,1),
00084 
00085       m_seq_sparse1(NULL),
00086       m_seq_sparse2(NULL),
00087       m_plif_matrices(NULL),
00088 
00089       m_genestr_stop(1),
00090       m_intron_list(NULL),
00091       m_num_intron_plifs(0),
00092       m_lin_feat(1,1), //by Jonas
00093       m_raw_intensities(NULL),
00094       m_probe_pos(NULL),
00095       m_num_probes_cum(NULL),
00096       m_num_lin_feat_plifs_cum(NULL),
00097       m_num_raw_data(0),
00098 
00099       m_long_transitions(true),
00100       m_long_transition_threshold(1000)
00101 {
00102     trans_list_forward = NULL ;
00103     trans_list_forward_cnt = NULL ;
00104     trans_list_forward_val = NULL ;
00105     trans_list_forward_id = NULL ;
00106     trans_list_len = 0 ;
00107 
00108     mem_initialized = true ;
00109 
00110     m_N=1;
00111 
00112     m_raw_intensities = NULL;
00113     m_probe_pos = NULL;
00114     m_num_probes_cum = SG_MALLOC(int32_t, 100);
00115     m_num_probes_cum[0] = 0;
00116     //m_use_tiling=false;
00117     m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100);
00118     m_num_lin_feat_plifs_cum[0] = m_num_svms;
00119     m_num_raw_data = 0;
00120 #ifdef ARRAY_STATISTICS
00121     m_word_degree.set_array_name("word_degree");
00122 #endif
00123 
00124     m_transition_matrix_a_id.set_array_name("transition_matrix_a_id");
00125     m_transition_matrix_a.set_array_name("transition_matrix_a");
00126     m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv");
00127     m_mod_words.set_array_name("mod_words");
00128     m_orf_info.set_array_name("orf_info");
00129     m_segment_sum_weights.set_array_name("segment_sum_weights");
00130     m_dict_weights.set_array_name("dict_weights");
00131     m_states.set_array_name("states");
00132     m_positions.set_array_name("positions");
00133     m_lin_feat.set_array_name("lin_feat");
00134 
00135 
00136     m_observation_matrix.set_array_name("m_observation_matrix");
00137     m_segment_loss.set_array_name("m_segment_loss");
00138     m_seg_loss_obj = new CSegmentLoss();
00139 }
00140 
00141 CDynProg::~CDynProg()
00142 {
00143     if (trans_list_forward_cnt)
00144         SG_FREE(trans_list_forward_cnt);
00145     if (trans_list_forward)
00146     {
00147         for (int32_t i=0; i<trans_list_len; i++)
00148         {
00149             if (trans_list_forward[i])
00150                 SG_FREE(trans_list_forward[i]);
00151         }
00152         SG_FREE(trans_list_forward);
00153     }
00154     if (trans_list_forward_val)
00155     {
00156         for (int32_t i=0; i<trans_list_len; i++)
00157         {
00158             if (trans_list_forward_val[i])
00159                 SG_FREE(trans_list_forward_val[i]);
00160         }
00161         SG_FREE(trans_list_forward_val);
00162     }
00163     if (trans_list_forward_id)
00164     {
00165         for (int32_t i=0; i<trans_list_len; i++)
00166         {
00167             if (trans_list_forward_id[i])
00168                 SG_FREE(trans_list_forward_id[i]);
00169         }
00170         SG_FREE(trans_list_forward_id);
00171     }
00172     if (m_raw_intensities)
00173         SG_FREE(m_raw_intensities);
00174     if (m_probe_pos)
00175         SG_FREE(m_probe_pos);
00176     if (m_num_probes_cum)
00177       SG_FREE(m_num_probes_cum);
00178     if (m_num_lin_feat_plifs_cum)
00179       SG_FREE(m_num_lin_feat_plifs_cum);
00180 
00181     delete m_intron_list;
00182 
00183     SG_UNREF(m_seq_sparse1);
00184     SG_UNREF(m_seq_sparse2);
00185     SG_UNREF(m_plif_matrices);
00186     SG_UNREF(m_seg_loss_obj);
00187 }
00188 
00190 int32_t CDynProg::get_num_svms()
00191 {
00192     return m_num_svms;
00193 }
00194 
00195 void CDynProg::precompute_stop_codons()
00196 {
00197     int32_t length=m_genestr.get_dim1();
00198 
00199     m_genestr_stop.resize_array(length) ;
00200     m_genestr_stop.set_const(0) ;
00201     m_genestr_stop.set_array_name("genestr_stop") ;
00202     {
00203         for (int32_t i=0; i<length-2; i++)
00204             if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
00205                     (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
00206                       (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00207                      ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00208             {
00209                 m_genestr_stop.element(i)=true ;
00210             }
00211             else
00212                 m_genestr_stop.element(i)=false ;
00213         m_genestr_stop.element(length-2)=false ;
00214         m_genestr_stop.element(length-1)=false ;
00215     }
00216 }
00217 
00218 void CDynProg::set_num_states(int32_t p_N)
00219 {
00220     m_N=p_N ;
00221 
00222     m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00223     m_transition_matrix_a.resize_array(m_N,m_N) ;
00224     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00225     m_initial_state_distribution_p.resize_array(m_N) ;
00226     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00227     m_end_state_distribution_q.resize_array(m_N);
00228     m_end_state_distribution_q_deriv.resize_array(m_N) ;
00229 
00230     m_orf_info.resize_array(m_N,2) ;
00231 }
00232 
00233 int32_t CDynProg::get_num_states()
00234 {
00235     return m_N;
00236 }
00237 
00238 void CDynProg::init_tiling_data(
00239     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00240 {
00241     m_num_raw_data++;
00242     m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00243 
00244     int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]);
00245     float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]);
00246 
00247 
00248     if (m_num_raw_data==1){
00249         memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00250         memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00251         //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2)
00252     }else{
00253         memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00254         memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00255         memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00256         memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00257     }
00258     SG_FREE(m_probe_pos);
00259     SG_FREE(m_raw_intensities);
00260     m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes);
00261     m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes);
00262 
00263     //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00264     //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00265 
00266 }
00267 
00268 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00269 {
00270     m_lin_feat.resize_array(p_num_svms, m_seq_len);
00271 
00272     // initialize array
00273     for (int s=0; s<p_num_svms; s++)
00274       for (int p=0; p<m_seq_len; p++)
00275         m_lin_feat.set_element(0.0, s, p) ;
00276 }
00277 
00278 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00279 {
00280     int32_t dim1, dim2;
00281     m_lin_feat.get_array_size(dim1, dim2);
00282     ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1])
00283     ASSERT(dim2==m_seq_len) // == number of candidate positions
00284 
00285 
00286 
00287     float64_t* arr = m_lin_feat.get_array();
00288     float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2);
00289     memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00290     for(int32_t j=0;j<m_seq_len;j++)
00291                 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00292             tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00293 
00294     m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later
00295     SG_FREE(tmp);
00296 
00297     /*for(int32_t j=0;j<5;j++)
00298     {
00299         for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
00300         {
00301             SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j))
00302         }
00303         SG_PRINT("\n")
00304     }
00305     m_lin_feat.get_array_size(dim1,dim2);
00306     SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2)*/
00307 
00308     //SG_PRINT("resize_lin_feat: done\n")
00309 }
00310 
00311 void CDynProg::precompute_tiling_plifs(
00312     CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00313 {
00314     m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00315     float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs);
00316     float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
00317     for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00318         svm_value[i]=0.0;
00319     int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
00320     for (int32_t i=0; i<num_tiling_plifs; i++)
00321     {
00322         tiling_plif[i]=0.0;
00323         CPlif * plif = PEN[tiling_plif_ids[i]];
00324         tiling_rows[i] = plif->get_use_svm();
00325 
00326         ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00327     }
00328     resize_lin_feat(num_tiling_plifs);
00329 
00330 
00331     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00332     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00333     int32_t num=m_num_probes_cum[m_num_raw_data-1];
00334 
00335     for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00336     {
00337         while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00338         {
00339             for (int32_t i=0; i<num_tiling_plifs; i++)
00340             {
00341                 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00342                 CPlif * plif = PEN[tiling_plif_ids[i]];
00343                 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1)
00344                 plif->set_do_calc(true);
00345                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00346                 plif->set_do_calc(false);
00347             }
00348             p_tiling_data++;
00349             p_tiling_pos++;
00350             num++;
00351         }
00352         for (int32_t i=0; i<num_tiling_plifs; i++)
00353             m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00354     }
00355     SG_FREE(svm_value);
00356     SG_FREE(tiling_plif);
00357     SG_FREE(tiling_rows);
00358 }
00359 
00360 void CDynProg::create_word_string()
00361 {
00362     SG_FREE(m_wordstr);
00363     m_wordstr=SG_MALLOC(uint16_t**, 5440);
00364     int32_t k=0;
00365     int32_t genestr_len=m_genestr.get_dim1();
00366 
00367     m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees);
00368     for (int32_t j=0; j<m_num_degrees; j++)
00369     {
00370         m_wordstr[k][j]=NULL ;
00371         {
00372             m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
00373             for (int32_t i=0; i<genestr_len; i++)
00374                 switch (m_genestr[i])
00375                 {
00376                     case 'A':
00377                     case 'a': m_wordstr[k][j][i]=0 ; break ;
00378                     case 'C':
00379                     case 'c': m_wordstr[k][j][i]=1 ; break ;
00380                     case 'G':
00381                     case 'g': m_wordstr[k][j][i]=2 ; break ;
00382                     case 'T':
00383                     case 't': m_wordstr[k][j][i]=3 ; break ;
00384                     default: ASSERT(0)
00385                 }
00386             CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00387         }
00388     }
00389 }
00390 
00391 void CDynProg::precompute_content_values()
00392 {
00393     for (int32_t s=0; s<m_num_svms; s++)
00394       m_lin_feat.set_element(0.0, s, 0);
00395 
00396     for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00397     {
00398         int32_t from_pos = m_pos[p];
00399         int32_t to_pos = m_pos[p+1];
00400         float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms);
00401         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos)
00402 
00403         ASSERT(from_pos<=m_genestr.get_dim1())
00404         ASSERT(to_pos<=m_genestr.get_dim1())
00405 
00406         for (int32_t s=0; s<m_num_svms; s++)
00407             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00408 
00409         for (int32_t i=from_pos; i<to_pos; i++)
00410         {
00411             for (int32_t j=0; j<m_num_degrees; j++)
00412             {
00413                 uint16_t word = m_wordstr[0][j][i] ;
00414                 for (int32_t s=0; s<m_num_svms; s++)
00415                 {
00416                     // check if this k-mer should be considered for this SVM
00417                     if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00418                         continue;
00419                     my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00420                 }
00421             }
00422         }
00423         for (int32_t s=0; s<m_num_svms; s++)
00424         {
00425             float64_t prev = m_lin_feat.get_element(s, p);
00426             //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev)
00427             if (prev<-1e20 || prev>1e20)
00428             {
00429                 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev)
00430                 prev=0 ;
00431             }
00432             m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00433         }
00434         SG_FREE(my_svm_values_unnormalized);
00435     }
00436     //for (int32_t j=0; j<m_num_degrees; j++)
00437     //  SG_FREE(m_wordstr[0][j]);
00438     //SG_FREE(m_wordstr[0]);
00439 }
00440 
00441 void CDynProg::set_p_vector(SGVector<float64_t> p)
00442 {
00443     if (!(p.vlen==m_N))
00444         SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N)
00445 
00446     m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true);
00447 }
00448 
00449 void CDynProg::set_q_vector(SGVector<float64_t> q)
00450 {
00451     if (!(q.vlen==m_N))
00452         SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N)
00453     m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true);
00454 }
00455 
00456 void CDynProg::set_a(SGMatrix<float64_t> a)
00457 {
00458     ASSERT(a.num_cols==m_N)
00459     ASSERT(a.num_rows==m_N)
00460     m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true);
00461     m_transition_matrix_a_deriv.resize_array(m_N, m_N);
00462 }
00463 
00464 void CDynProg::set_a_id(SGMatrix<int32_t> a)
00465 {
00466     ASSERT(a.num_cols==m_N)
00467     ASSERT(a.num_rows==m_N)
00468     m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true);
00469     m_max_a_id = 0;
00470     for (int32_t i=0; i<m_N; i++)
00471     {
00472         for (int32_t j=0; j<m_N; j++)
00473             m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00474     }
00475 }
00476 
00477 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans)
00478 {
00479     int32_t num_trans=a_trans.num_rows;
00480     int32_t num_cols=a_trans.num_cols;
00481 
00482     //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans");
00483 
00484     if (!((num_cols==3) || (num_cols==4)))
00485         SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols)
00486 
00487     SG_FREE(trans_list_forward);
00488     SG_FREE(trans_list_forward_cnt);
00489     SG_FREE(trans_list_forward_val);
00490     SG_FREE(trans_list_forward_id);
00491 
00492     trans_list_forward = NULL ;
00493     trans_list_forward_cnt = NULL ;
00494     trans_list_forward_val = NULL ;
00495     trans_list_len = 0 ;
00496 
00497     m_transition_matrix_a.set_const(0) ;
00498     m_transition_matrix_a_id.set_const(0) ;
00499 
00500     mem_initialized = true ;
00501 
00502     trans_list_forward_cnt=NULL ;
00503     trans_list_len = m_N ;
00504     trans_list_forward = SG_MALLOC(T_STATES*, m_N);
00505     trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N);
00506     trans_list_forward_val = SG_MALLOC(float64_t*, m_N);
00507     trans_list_forward_id = SG_MALLOC(int32_t*, m_N);
00508 
00509     int32_t start_idx=0;
00510     for (int32_t j=0; j<m_N; j++)
00511     {
00512         int32_t old_start_idx=start_idx;
00513 
00514         while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j)
00515         {
00516             start_idx++;
00517 
00518             if (start_idx>1 && start_idx<num_trans)
00519                 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans])
00520         }
00521 
00522         if (start_idx>1 && start_idx<num_trans)
00523             ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans])
00524 
00525         int32_t len=start_idx-old_start_idx;
00526         ASSERT(len>=0)
00527 
00528         trans_list_forward_cnt[j] = 0 ;
00529 
00530         if (len>0)
00531         {
00532             trans_list_forward[j]     = SG_MALLOC(T_STATES, len);
00533             trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
00534             trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
00535         }
00536         else
00537         {
00538             trans_list_forward[j]     = NULL;
00539             trans_list_forward_val[j] = NULL;
00540             trans_list_forward_id[j]  = NULL;
00541         }
00542     }
00543 
00544     for (int32_t i=0; i<num_trans; i++)
00545     {
00546         int32_t from_state   = (int32_t)a_trans.matrix[i] ;
00547         int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ;
00548         float64_t val = a_trans.matrix[i+num_trans*2] ;
00549         int32_t id = 0 ;
00550         if (num_cols==4)
00551             id = (int32_t)a_trans.matrix[i+num_trans*3] ;
00552         //SG_DEBUG("id=%i\n", id)
00553 
00554         ASSERT(to_state>=0 && to_state<m_N)
00555         ASSERT(from_state>=0 && from_state<m_N)
00556 
00557         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00558         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00559         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00560         trans_list_forward_cnt[to_state]++ ;
00561         m_transition_matrix_a.element(from_state, to_state) = val ;
00562         m_transition_matrix_a_id.element(from_state, to_state) = id ;
00563         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state))
00564     } ;
00565 
00566     m_max_a_id = 0 ;
00567     for (int32_t i=0; i<m_N; i++)
00568         for (int32_t j=0; j<m_N; j++)
00569         {
00570             //if (m_transition_matrix_a_id.element(i,j))
00571             //SG_DEBUG("(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j))
00572             m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00573         }
00574     //SG_DEBUG("m_max_a_id=%i\n", m_max_a_id)
00575 }
00576 
00577 
00578 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input)
00579 {
00580     //for (int32_t i=0; i<mod_words_input.num_cols; i++)
00581     //{
00582     //  for (int32_t j=0; j<mod_words_input.num_rows; j++)
00583     //      SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j])
00584     //  SG_PRINT("\n")
00585     //}
00586     m_svm_arrays_clean=false ;
00587 
00588     ASSERT(m_num_svms==mod_words_input.num_rows)
00589     ASSERT(mod_words_input.num_cols==2)
00590 
00591     m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ;
00592     m_mod_words_array = m_mod_words.get_array() ;
00593 
00594     /*SG_DEBUG("m_mod_words=[")
00595     for (int32_t i=0; i<mod_words_input.num_rows; i++)
00596         SG_DEBUG("%i, ", p_mod_words_array[i])
00597         SG_DEBUG("]\n") */
00598 }
00599 
00600 bool CDynProg::check_svm_arrays()
00601 {
00602     //SG_DEBUG("wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings)
00603     if ((m_word_degree.get_dim1()==m_num_degrees) &&
00604             (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00605             (m_num_words.get_dim1()==m_num_degrees) &&
00606             //(word_used.get_dim1()==m_num_degrees) &&
00607             //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) &&
00608             //(word_used.get_dim3()==m_num_strings) &&
00609             //      (svm_values_unnormalized.get_dim1()==m_num_degrees) &&
00610             //      (svm_values_unnormalized.get_dim2()==m_num_svms) &&
00611             //(m_svm_pos_start.get_dim1()==m_num_degrees) &&
00612             (m_num_unique_words.get_dim1()==m_num_degrees) &&
00613             (m_mod_words.get_dim1()==m_num_svms) &&
00614             (m_mod_words.get_dim2()==2) &&
00615             (m_sign_words.get_dim1()==m_num_svms) &&
00616             (m_string_words.get_dim1()==m_num_svms))
00617     {
00618         m_svm_arrays_clean=true ;
00619         return true ;
00620     }
00621     else
00622     {
00623         if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00624             (m_mod_words.get_dim1()==m_num_svms) &&
00625             (m_mod_words.get_dim2()==2) &&
00626             (m_sign_words.get_dim1()==m_num_svms) &&
00627             (m_string_words.get_dim1()==m_num_svms))
00628             SG_PRINT("OK\n")
00629         else
00630             SG_PRINT("not OK\n")
00631 
00632         if (!(m_word_degree.get_dim1()==m_num_degrees))
00633             SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees")
00634         if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00635             SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1")
00636         if (!(m_num_words.get_dim1()==m_num_degrees))
00637             SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees")
00638         //if (!(m_svm_pos_start.get_dim1()==m_num_degrees))
00639         //  SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees")
00640         if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00641             SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees")
00642         if (!(m_mod_words.get_dim1()==m_num_svms))
00643             SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms")
00644         if (!(m_mod_words.get_dim2()==2))
00645             SG_WARNING("SVM array: m_mod_words.get_dim2()!=2")
00646         if (!(m_sign_words.get_dim1()==m_num_svms))
00647             SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms")
00648         if (!(m_string_words.get_dim1()==m_num_svms))
00649             SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms")
00650 
00651         m_svm_arrays_clean=false ;
00652         return false ;
00653     }
00654 }
00655 
00656 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq)
00657 {
00658     if (seq.num_dims!=3)
00659         SG_ERROR("Expected 3-dimensional Matrix\n")
00660 
00661     int32_t N=seq.dims[0];
00662     int32_t cand_pos=seq.dims[1];
00663     int32_t max_num_features=seq.dims[2];
00664 
00665     if (!m_svm_arrays_clean)
00666     {
00667         SG_ERROR("SVM arrays not clean")
00668         return ;
00669     } ;
00670 
00671     ASSERT(N==m_N)
00672     ASSERT(cand_pos==m_seq_len)
00673     ASSERT(m_initial_state_distribution_p.get_dim1()==N)
00674     ASSERT(m_end_state_distribution_q.get_dim1()==N)
00675 
00676     m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ;
00677 }
00678 int32_t CDynProg::get_num_positions()
00679 {
00680     return m_seq_len;
00681 }
00682 
00683 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path)
00684 {
00685     ASSERT(seg_path.num_rows==2)
00686     ASSERT(seg_path.num_cols==m_seq_len)
00687 
00688     if (seg_path.matrix!=NULL)
00689     {
00690         int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len);
00691         float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len);
00692         for (int32_t i=0; i<m_seq_len; i++)
00693         {
00694                 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ;
00695                 segment_mask[i] = seg_path.matrix[2*i+1] ;
00696         }
00697         best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00698         SG_FREE(segment_ids);
00699         SG_FREE(segment_mask);
00700     }
00701     else
00702     {
00703         int32_t *izeros = SG_MALLOC(int32_t, m_seq_len);
00704         float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len);
00705         for (int32_t i=0; i<m_seq_len; i++)
00706         {
00707             izeros[i]=0 ;
00708             dzeros[i]=0.0 ;
00709         }
00710         best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00711         SG_FREE(izeros);
00712         SG_FREE(dzeros);
00713     }
00714 }
00715 
00716 void CDynProg::set_pos(SGVector<int32_t> pos)
00717 {
00718     m_pos.set_array(pos.vector, pos.vlen, true, true) ;
00719     m_seq_len = pos.vlen;
00720 }
00721 
00722 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info)
00723 {
00724     if (orf_info.num_cols!=2)
00725         SG_ERROR("orf_info size incorrect %i!=2\n", orf_info.num_cols)
00726 
00727     m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ;
00728     m_orf_info.set_array_name("orf_info") ;
00729 }
00730 
00731 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00732 {
00733     if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00734         SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n")
00735 
00736     SG_UNREF(m_seq_sparse1);
00737     SG_UNREF(m_seq_sparse2);
00738 
00739     m_seq_sparse1=seq_sparse1;
00740     m_seq_sparse2=seq_sparse2;
00741     SG_REF(m_seq_sparse1);
00742     SG_REF(m_seq_sparse2);
00743 }
00744 
00745 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00746 {
00747     SG_UNREF(m_plif_matrices);
00748 
00749     m_plif_matrices=pm;
00750 
00751     SG_REF(m_plif_matrices);
00752 }
00753 
00754 void CDynProg::set_gene_string(SGVector<char> genestr)
00755 {
00756     ASSERT(genestr.vector)
00757     ASSERT(genestr.vlen>0)
00758 
00759     m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ;
00760 }
00761 
00762 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00763 {
00764     ASSERT(my_state_seq && m_seq_len>0)
00765     m_my_state_seq.resize_array(m_seq_len);
00766     for (int32_t i=0; i<m_seq_len; i++)
00767         m_my_state_seq[i]=my_state_seq[i];
00768 }
00769 
00770 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00771 {
00772     ASSERT(my_pos_seq && m_seq_len>0)
00773     m_my_pos_seq.resize_array(m_seq_len);
00774     for (int32_t i=0; i<m_seq_len; i++)
00775         m_my_pos_seq[i]=my_pos_seq[i];
00776 }
00777 
00778 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights)
00779 {
00780     if (m_num_svms!=dictionary_weights.num_cols)
00781     {
00782         SG_ERROR("m_dict_weights array does not match num_svms=%i!=%i\n",
00783                 m_num_svms, dictionary_weights.num_cols) ;
00784     }
00785 
00786     m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ;
00787 
00788     // initialize, so it does not bother when not used
00789     m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00790     m_segment_loss.set_const(0) ;
00791     m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00792     m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00793     m_segment_ids.set_const(0) ;
00794     m_segment_mask.set_const(0) ;
00795 }
00796 
00797 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss)
00798 {
00799     int32_t m=segment_loss.num_rows;
00800     int32_t n=segment_loss.num_cols;
00801     // here we need two matrices. Store it in one: 2N x N
00802     if (2*m!=n)
00803         SG_ERROR("segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n)
00804 
00805     if (m!=m_max_a_id+1)
00806         SG_ERROR("segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1)
00807 
00808     m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ;
00809     /*for (int32_t i=0; i<n; i++)
00810         for (int32_t j=0; j<n; j++)
00811         SG_DEBUG("loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) */
00812 }
00813 
00814 void CDynProg::best_path_set_segment_ids_mask(
00815     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00816 {
00817 
00818     if (m!=m_observation_matrix.get_dim2())
00819         SG_ERROR("size of segment_ids or segment_mask (%i)  does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2())
00820     int32_t max_id = 0;
00821     for (int32_t i=1;i<m;i++)
00822         max_id = CMath::max(max_id,segment_ids[i]);
00823     //SG_PRINT("max_id: %i, m:%i\n",max_id, m)
00824     m_segment_ids.set_array(segment_ids, m, true, true) ;
00825     m_segment_ids.set_array_name("m_segment_ids");
00826     m_segment_mask.set_array(segment_mask, m, true, true) ;
00827     m_segment_mask.set_array_name("m_segment_mask");
00828 
00829     m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00830     m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00831     m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00832 }
00833 
00834 SGVector<float64_t> CDynProg::get_scores()
00835 {
00836     SGVector<float64_t> scores(m_scores.get_dim1());
00837     memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1()));
00838 
00839     return scores;
00840 }
00841 
00842 SGMatrix<int32_t> CDynProg::get_states()
00843 {
00844     SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2());
00845 
00846     int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() );
00847     memcpy(states.matrix ,m_states.get_array(),sz);
00848 
00849     return states;
00850 }
00851 
00852 SGMatrix<int32_t> CDynProg::get_positions()
00853 {
00854    SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2());
00855 
00856    int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2());
00857    memcpy(positions.matrix, m_positions.get_array(),sz);
00858 
00859    return positions;
00860 }
00861 
00862 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00863 {
00864    ASSERT(scores && seq_len)
00865 
00866    *seq_len=m_my_scores.get_dim1();
00867 
00868    int32_t sz = sizeof(float64_t)*(*seq_len);
00869 
00870    *scores = SG_MALLOC(float64_t, *seq_len);
00871    ASSERT(*scores)
00872 
00873    memcpy(*scores,m_my_scores.get_array(),sz);
00874 }
00875 
00876 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00877 {
00878     ASSERT(losses && seq_len)
00879 
00880     *seq_len=m_my_losses.get_dim1();
00881 
00882    int32_t sz = sizeof(float64_t)*(*seq_len);
00883 
00884    *losses = SG_MALLOC(float64_t, *seq_len);
00885    ASSERT(*losses)
00886 
00887    memcpy(*losses,m_my_losses.get_array(),sz);
00888 }
00889 
00891 
00892 bool CDynProg::extend_orf(
00893     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
00894     int32_t to)
00895 {
00896 #ifdef DYNPROG_TIMING_DETAIL
00897     MyTime.start() ;
00898 #endif
00899 
00900     if (start<0)
00901         start=0 ;
00902     if (to<0)
00903         to=0 ;
00904 
00905     int32_t orf_target = orf_to-orf_from ;
00906     if (orf_target<0) orf_target+=3 ;
00907 
00908     int32_t pos ;
00909     if (last_pos==to)
00910         pos = to-orf_to-3 ;
00911     else
00912         pos=last_pos ;
00913 
00914     if (pos<0)
00915     {
00916 #ifdef DYNPROG_TIMING_DETAIL
00917         MyTime.stop() ;
00918         orf_time += MyTime.time_diff_sec() ;
00919 #endif
00920         return true ;
00921     }
00922 
00923     for (; pos>=start; pos-=3)
00924         if (m_genestr_stop[pos])
00925         {
00926 #ifdef DYNPROG_TIMING_DETAIL
00927             MyTime.stop() ;
00928             orf_time += MyTime.time_diff_sec() ;
00929 #endif
00930             return false ;
00931         }
00932 
00933 
00934     last_pos = CMath::min(pos+3,to-orf_to-3) ;
00935 
00936 #ifdef DYNPROG_TIMING_DETAIL
00937     MyTime.stop() ;
00938     orf_time += MyTime.time_diff_sec() ;
00939 #endif
00940     return true ;
00941 }
00942 
00943 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
00944         int16_t nbest, bool with_loss, bool with_multiple_sequences)
00945     {
00946 
00947     //FIXME we need checks here if all the fields are of right size
00948     //SG_PRINT("m_seq_len: %i\n", m_seq_len)
00949     //SG_PRINT("m_pos[0]: %i\n", m_pos[0])
00950     //SG_PRINT("\n")
00951 
00952     //FIXME these variables can go away when compute_nbest_paths uses them
00953     //instead of the local pointers below
00954     const float64_t* seq_array = m_observation_matrix.get_array();
00955     m_scores.resize_array(nbest) ;
00956     m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00957     m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00958 
00959     for (int32_t i=0; i<nbest; i++)
00960     {
00961         m_scores[i]=-1;
00962         for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
00963         {
00964             m_states.element(i,j)=-1;
00965             m_positions.element(i,j)=-1;
00966         }
00967     }
00968     float64_t* prob_nbest=m_scores.get_array();
00969     int32_t* my_state_seq=m_states.get_array();
00970     int32_t* my_pos_seq=m_positions.get_array();
00971 
00972     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
00973     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
00974     //END FIXME
00975 
00976 
00977 #ifdef DYNPROG_TIMING
00978         segment_init_time = 0.0 ;
00979         segment_pos_time = 0.0 ;
00980         segment_extend_time = 0.0 ;
00981         segment_clean_time = 0.0 ;
00982         orf_time = 0.0 ;
00983         svm_init_time = 0.0 ;
00984         svm_pos_time = 0.0 ;
00985         svm_clean_time = 0.0 ;
00986         inner_loop_time = 0.0 ;
00987         content_svm_values_time = 0.0 ;
00988         content_plifs_time = 0.0 ;
00989         inner_loop_max_time = 0.0 ;
00990         long_transition_time = 0.0 ;
00991 
00992         MyTime2.start() ;
00993 #endif
00994 
00995         if (!m_svm_arrays_clean)
00996         {
00997             SG_ERROR("SVM arrays not clean")
00998             return ;
00999         }
01000 
01001 #ifdef DYNPROG_DEBUG
01002         m_transition_matrix_a.set_array_name("transition_matrix");
01003         m_transition_matrix_a.display_array();
01004         m_mod_words.display_array() ;
01005         m_sign_words.display_array() ;
01006         m_string_words.display_array() ;
01007         //SG_PRINT("use_orf = %i\n", use_orf)
01008 #endif
01009 
01010         int32_t max_look_back = 1000 ;
01011         bool use_svm = false ;
01012 
01013         SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals)
01014 
01015         //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++)
01016       //   SG_PRINT("(%i)%0.2f ",i,seq_array[i])
01017 
01018         CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
01019         PEN.set_array_name("PEN");
01020 
01021         CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d,  CPlifBase*
01022         PEN_state_signals.set_array_name("state_signals");
01023 
01024         CDynamicArray<float64_t> seq(m_N, m_seq_len) ; // 2d
01025         seq.set_array_name("seq") ;
01026         seq.set_const(0) ;
01027 
01028 #ifdef DYNPROG_DEBUG
01029         SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data)
01030         SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs)
01031         SG_PRINT("m_num_svms: %i\n", m_num_svms)
01032         SG_PRINT("m_num_lin_feat_plifs_cum: ")
01033         for (int i=0; i<=m_num_raw_data; i++)
01034             SG_PRINT(" %i  ",m_num_lin_feat_plifs_cum[i])
01035         SG_PRINT("\n")
01036 #endif
01037 
01038         float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01039         { // initialize svm_svalue
01040             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01041                 svm_value[s]=0 ;
01042         }
01043 
01044         { // convert seq_input to seq
01045             // this is independent of the svm values
01046 
01047             CDynamicArray<float64_t> *seq_input=NULL ; // 3d
01048             if (seq_array!=NULL)
01049             {
01050                 //SG_PRINT("using dense seq_array\n")
01051 
01052                 seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01053                 seq_input->set_array_name("seq_input") ;
01054                 //seq_input.display_array() ;
01055 
01056                 ASSERT(m_seq_sparse1==NULL)
01057                 ASSERT(m_seq_sparse2==NULL)
01058             } else
01059             {
01060                 SG_PRINT("using sparse seq_array\n")
01061 
01062                 ASSERT(m_seq_sparse1!=NULL)
01063                 ASSERT(m_seq_sparse2!=NULL)
01064                 ASSERT(max_num_signals==2)
01065             }
01066 
01067             for (int32_t i=0; i<m_N; i++)
01068                 for (int32_t j=0; j<m_seq_len; j++)
01069                     seq.element(i,j) = 0 ;
01070 
01071             for (int32_t i=0; i<m_N; i++)
01072                 for (int32_t j=0; j<m_seq_len; j++)
01073                     for (int32_t k=0; k<max_num_signals; k++)
01074                     {
01075                         if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01076                         {
01077                             // no plif
01078                             if (seq_input!=NULL)
01079                                 seq.element(i,j) = seq_input->element(i,j,k) ;
01080                             else
01081                             {
01082                                 if (k==0)
01083                                     seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01084                                 if (k==1)
01085                                     seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01086                             }
01087                             break ;
01088                         }
01089                         if (PEN_state_signals.element(i,k)!=NULL)
01090                         {
01091                             if (seq_input!=NULL)
01092                             {
01093                                 // just one plif
01094                                 if (CMath::is_finite(seq_input->element(i,j,k)))
01095                                     seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01096                                 else
01097                                     // keep infinity values
01098                                     seq.element(i,j) = seq_input->element(i, j, k) ;
01099                             }
01100                             else
01101                             {
01102                                 if (k==0)
01103                                 {
01104                                     // just one plif
01105                                     if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01106                                         seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01107                                     else
01108                                         // keep infinity values
01109                                         seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01110                                 }
01111                                 if (k==1)
01112                                 {
01113                                     // just one plif
01114                                     if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01115                                         seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01116                                     else
01117                                         // keep infinity values
01118                                         seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01119                                 }
01120                             }
01121                         }
01122                         else
01123                             break ;
01124                     }
01125             delete seq_input;
01126             SG_FREE(svm_value);
01127         }
01128 
01129         // allow longer transitions than look_back
01130         bool long_transitions = m_long_transitions ;
01131         CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ; // 2d
01132         long_transition_content_start_position.set_array_name("long_transition_content_start_position");
01133 #ifdef DYNPROG_DEBUG
01134         CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ; // 2d
01135         long_transition_content_end_position.set_array_name("long_transition_content_end_position");
01136 #endif
01137         CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ; // 2d
01138         long_transition_content_start.set_array_name("long_transition_content_start");
01139 
01140         CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ; // 2d
01141         long_transition_content_scores.set_array_name("long_transition_content_scores");
01142 #ifdef DYNPROG_DEBUG
01143 
01144         CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ; // 2d
01145         long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen");
01146 
01147         CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ; // 2d
01148         long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev");
01149 
01150         CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ; // 2d
01151         long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem");
01152 #endif
01153         CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ; // 2d
01154         long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss");
01155 
01156         if (nbest!=1)
01157         {
01158             SG_ERROR("Long transitions are not supported for nbest!=1")
01159             long_transitions = false ;
01160         }
01161         long_transition_content_scores.set_const(-CMath::INFTY);
01162 #ifdef DYNPROG_DEBUG
01163         long_transition_content_scores_pen.set_const(0) ;
01164         long_transition_content_scores_elem.set_const(0) ;
01165         long_transition_content_scores_prev.set_const(0) ;
01166 #endif
01167         if (with_loss)
01168             long_transition_content_scores_loss.set_const(0) ;
01169         long_transition_content_start.set_const(0) ;
01170         long_transition_content_start_position.set_const(0) ;
01171 #ifdef DYNPROG_DEBUG
01172         long_transition_content_end_position.set_const(0) ;
01173 #endif
01174 
01175         svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01176         { // initialize svm_svalue
01177             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01178                 svm_value[s]=0 ;
01179         }
01180 
01181         CDynamicArray<int32_t> look_back(m_N,m_N) ; // 2d
01182         look_back.set_array_name("look_back");
01183         //CDynamicArray<int32_t> look_back_orig(m_N,m_N) ;
01184         //look_back.set_array_name("look_back_orig");
01185 
01186 
01187         { // determine maximal length of look-back
01188             for (int32_t i=0; i<m_N; i++)
01189                 for (int32_t j=0; j<m_N; j++)
01190                 {
01191                     look_back.set_element(INT_MAX, i, j) ;
01192                     //look_back_orig.set_element(INT_MAX, i, j) ;
01193                 }
01194 
01195             for (int32_t j=0; j<m_N; j++)
01196             {
01197                 // only consider transitions that are actually allowed
01198                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01199                 const T_STATES *elem_list = trans_list_forward[j] ;
01200 
01201                 for (int32_t i=0; i<num_elem; i++)
01202                 {
01203                     T_STATES ii = elem_list[i] ;
01204 
01205                     CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ;
01206                     if (penij==NULL)
01207                     {
01208                         if (long_transitions)
01209                         {
01210                             look_back.set_element(m_long_transition_threshold, j, ii) ;
01211                             //look_back_orig.set_element(m_long_transition_max, j, ii) ;
01212                         }
01213                         continue ;
01214                     }
01215 
01216                     /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */
01217                     if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01218                     {
01219                         look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01220                         //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01221                         if (CMath::ceil(penij->get_max_value()) > max_look_back)
01222                         {
01223                             SG_DEBUG("%d %d -> value: %f\n", ii,j,penij->get_max_value())
01224                             max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01225                         }
01226                     }
01227                     else
01228                     {
01229                         look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01230                         //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
01231                     }
01232 
01233                     if (penij->uses_svm_values())
01234                         use_svm=true ;
01235                 }
01236             }
01237             /* make sure max_look_back is at least as long as a long transition */
01238             if (long_transitions)
01239                 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01240 
01241             /* make sure max_look_back is not longer than the whole string */
01242             max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01243 
01244             int32_t num_long_transitions = 0 ;
01245             for (int32_t i=0; i<m_N; i++)
01246                 for (int32_t j=0; j<m_N; j++)
01247                 {
01248                     if (look_back.get_element(i,j)==m_long_transition_threshold)
01249                         num_long_transitions++ ;
01250                     if (look_back.get_element(i,j)==INT_MAX)
01251                     {
01252                         if (long_transitions)
01253                         {
01254                             look_back.set_element(m_long_transition_threshold, i, j) ;
01255                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01256                         }
01257                         else
01258                         {
01259                             look_back.set_element(max_look_back, i, j) ;
01260                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01261                         }
01262                     }
01263                 }
01264             SG_DEBUG("Using %i long transitions\n", num_long_transitions)
01265         }
01266         //SG_PRINT("max_look_back: %i \n", max_look_back)
01267 
01268         //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1())
01269         SG_DEBUG("use_svm=%i\n", use_svm)
01270 
01271         SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest)
01272         const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01273         SG_DEBUG("look_back_buflen=%i\n", look_back_buflen)
01274         /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01275           look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01276           m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01277           m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/
01278 
01279         //bool is_big = (mem_use>200) || (m_seq_len>5000) ;
01280 
01281         /*if (is_big)
01282           {
01283           SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n",
01284           m_seq_len, m_N, max_look_back, nbest) ;
01285           SG_DEBUG("allocating %1.2fMB of memory\n",
01286           mem_use) ;
01287           }*/
01288         ASSERT(nbest<32000)
01289 
01290         CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ; // 3d
01291         delta.set_array_name("delta");
01292         float64_t* delta_array = delta.get_array() ;
01293         //delta.set_const(0) ;
01294 
01295         CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ; // 3d
01296         psi.set_array_name("psi");
01297         //psi.set_const(0) ;
01298 
01299         CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ; // 3d
01300         ktable.set_array_name("ktable");
01301         //ktable.set_const(0) ;
01302 
01303         CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ; // 3d
01304         ptable.set_array_name("ptable");
01305         //ptable.set_const(0) ;
01306 
01307         CDynamicArray<float64_t> delta_end(nbest) ;
01308         delta_end.set_array_name("delta_end");
01309         //delta_end.set_const(0) ;
01310 
01311         CDynamicArray<T_STATES> path_ends(nbest) ;
01312         path_ends.set_array_name("path_ends");
01313         //path_ends.set_const(0) ;
01314 
01315         CDynamicArray<int16_t> ktable_end(nbest) ;
01316         ktable_end.set_array_name("ktable_end");
01317         //ktable_end.set_const(0) ;
01318 
01319         float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen);
01320         memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01321         int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
01322         memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01323 
01324         CDynamicArray<float64_t> oldtempvv(look_back_buflen) ;
01325         oldtempvv.set_array_name("oldtempvv");
01326 
01327         CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ;
01328         oldtempvv2.set_array_name("oldtempvv2");
01329         //oldtempvv.set_const(0) ;
01330         //oldtempvv.display_size() ;
01331 
01332         CDynamicArray<int32_t> oldtempii(look_back_buflen) ;
01333         oldtempii.set_array_name("oldtempii");
01334 
01335         CDynamicArray<int32_t> oldtempii2(look_back_buflen) ;
01336         oldtempii2.set_array_name("oldtempii2");
01337         //oldtempii.set_const(0) ;
01338 
01339         CDynamicArray<T_STATES> state_seq(m_seq_len) ;
01340         state_seq.set_array_name("state_seq");
01341         //state_seq.set_const(0) ;
01342 
01343         CDynamicArray<int32_t> pos_seq(m_seq_len) ;
01344         pos_seq.set_array_name("pos_seq");
01345         //pos_seq.set_const(0) ;
01346 
01347 
01348         m_dict_weights.set_array_name("dict_weights") ;
01349         m_word_degree.set_array_name("word_degree") ;
01350         m_cum_num_words.set_array_name("cum_num_words") ;
01351         m_num_words.set_array_name("num_words") ;
01352         //word_used.set_array_name("word_used") ;
01353         //svm_values_unnormalized.set_array_name("svm_values_unnormalized") ;
01354         //m_svm_pos_start.set_array_name("svm_pos_start") ;
01355         m_num_unique_words.set_array_name("num_unique_words") ;
01356 
01357         PEN.set_array_name("PEN") ;
01358         seq.set_array_name("seq") ;
01359 
01360         delta.set_array_name("delta") ;
01361         psi.set_array_name("psi") ;
01362         ktable.set_array_name("ktable") ;
01363         ptable.set_array_name("ptable") ;
01364         delta_end.set_array_name("delta_end") ;
01365         path_ends.set_array_name("path_ends") ;
01366         ktable_end.set_array_name("ktable_end") ;
01367 
01368 #ifdef USE_TMP_ARRAYCLASS
01369         fixedtempvv.set_array_name("fixedtempvv") ;
01370         fixedtempii.set_array_name("fixedtempvv") ;
01371 #endif
01372 
01373         oldtempvv.set_array_name("oldtempvv") ;
01374         oldtempvv2.set_array_name("oldtempvv2") ;
01375         oldtempii.set_array_name("oldtempii") ;
01376         oldtempii2.set_array_name("oldtempii2") ;
01377 
01378 
01380 
01381 #ifdef DYNPROG_DEBUG
01382         state_seq.display_size() ;
01383         pos_seq.display_size() ;
01384 
01385         m_dict_weights.display_size() ;
01386         m_word_degree.display_array() ;
01387         m_cum_num_words.display_array() ;
01388         m_num_words.display_array() ;
01389         //word_used.display_size() ;
01390         //svm_values_unnormalized.display_size() ;
01391         //m_svm_pos_start.display_array() ;
01392         m_num_unique_words.display_array() ;
01393 
01394         PEN.display_size() ;
01395         PEN_state_signals.display_size() ;
01396         seq.display_size() ;
01397         m_orf_info.display_size() ;
01398 
01399         //m_genestr_stop.display_size() ;
01400         delta.display_size() ;
01401         psi.display_size() ;
01402         ktable.display_size() ;
01403         ptable.display_size() ;
01404         delta_end.display_size() ;
01405         path_ends.display_size() ;
01406         ktable_end.display_size() ;
01407 
01408 #ifdef USE_TMP_ARRAYCLASS
01409         fixedtempvv.display_size() ;
01410         fixedtempii.display_size() ;
01411 #endif
01412 
01413         //oldtempvv.display_size() ;
01414         //oldtempii.display_size() ;
01415 
01416         state_seq.display_size() ;
01417         pos_seq.display_size() ;
01418 
01419         //seq.set_const(0) ;
01420 
01421 #endif //DYNPROG_DEBUG
01422 
01424 
01425 
01426 
01427         {
01428             for (int32_t s=0; s<m_num_svms; s++)
01429                 ASSERT(m_string_words_array[s]<1)
01430         }
01431 
01432 
01433         //CDynamicArray<int32_t*> trans_matrix_svms(m_N,m_N); // 2d
01434         //CDynamicArray<int32_t> trans_matrix_num_svms(m_N,m_N); // 2d
01435 
01436         { // initialization
01437 
01438             for (T_STATES i=0; i<m_N; i++)
01439             {
01440                 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01441                 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01442                 psi.element(0,i,0)   = 0 ;
01443                 if (nbest>1)
01444                     ktable.element(0,i,0)  = 0 ;
01445                 ptable.element(0,i,0)  = 0 ;
01446                 for (int16_t k=1; k<nbest; k++)
01447                 {
01448                     int32_t dim1, dim2, dim3 ;
01449                     delta.get_array_size(dim1, dim2, dim3) ;
01450                     //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3)
01451                     //delta.element(0, i, k)    = -CMath::INFTY ;
01452                     delta.element(delta_array, 0, i, k, m_seq_len, m_N)    = -CMath::INFTY ;
01453                     psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
01454                     if (nbest>1)
01455                         ktable.element(0,i,k)     = 0 ;
01456                     ptable.element(0,i,k)     = 0 ;
01457                 }
01458                 /*
01459                    for (T_STATES j=0; j<m_N; j++)
01460                    {
01461                    CPlifBase * penalty = PEN.element(i,j) ;
01462                    int32_t num_current_svms=0;
01463                    int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
01464                    if (penalty)
01465                    {
01466                    SG_PRINT("trans %i -> %i \n",i,j)
01467                    penalty->get_used_svms(&num_current_svms, svm_ids);
01468                    trans_matrix_svms.set_element(svm_ids,i,j);
01469                    for (int32_t l=0;l<num_current_svms;l++)
01470                    SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l])
01471                    trans_matrix_num_svms.set_element(num_current_svms,i,j);
01472                    }
01473                    }
01474                    */
01475 
01476             }
01477         }
01478 
01479         SG_DEBUG("START_RECURSION \n\n")
01480 
01481         // recursion
01482         for (int32_t t=1; t<m_seq_len; t++)
01483         {
01484             //if (is_big && t%(1+(m_seq_len/1000))==1)
01485             //  SG_PROGRESS(t, 0, m_seq_len)
01486             //SG_PRINT("%i\n", t)
01487 
01488             for (T_STATES j=0; j<m_N; j++)
01489             {
01490                 if (seq.element(j,t)<=-1e20)
01491                 { // if we cannot observe the symbol here, then we can omit the rest
01492                     for (int16_t k=0; k<nbest; k++)
01493                     {
01494                         delta.element(delta_array, t, j, k, m_seq_len, m_N)    = seq.element(j,t) ;
01495                         psi.element(t,j,k)         = 0 ;
01496                         if (nbest>1)
01497                             ktable.element(t,j,k)  = 0 ;
01498                         ptable.element(t,j,k)      = 0 ;
01499                     }
01500                 }
01501                 else
01502                 {
01503                     const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01504                     const T_STATES *elem_list = trans_list_forward[j] ;
01505                     const float64_t *elem_val      = trans_list_forward_val[j] ;
01506                     const int32_t *elem_id      = trans_list_forward_id[j] ;
01507 
01508                     int32_t fixed_list_len = 0 ;
01509                     float64_t fixedtempvv_ = CMath::INFTY ;
01510                     int32_t fixedtempii_ = 0 ;
01511                     bool fixedtemplong = false ;
01512 
01513                     for (int32_t i=0; i<num_elem; i++)
01514                     {
01515                         T_STATES ii = elem_list[i] ;
01516 
01517                         const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01518 
01519                         /*int32_t look_back = max_look_back ;
01520                           if (0)
01521                           { // find lookback length
01522                           CPlifBase *pen = (CPlifBase*) penalty ;
01523                           if (pen!=NULL)
01524                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01525                           if (look_back>=1e6)
01526                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen)
01527                           ASSERT(look_back<1e6)
01528                           } */
01529 
01530                         int32_t look_back_ = look_back.element(j, ii) ;
01531 
01532                         int32_t orf_from = m_orf_info.element(ii,0) ;
01533                         int32_t orf_to   = m_orf_info.element(j,1) ;
01534                         if((orf_from!=-1)!=(orf_to!=-1))
01535                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
01536                         ASSERT((orf_from!=-1)==(orf_to!=-1))
01537 
01538                         int32_t orf_target = -1 ;
01539                         if (orf_from!=-1)
01540                         {
01541                             orf_target=orf_to-orf_from ;
01542                             if (orf_target<0)
01543                                 orf_target+=3 ;
01544                             ASSERT(orf_target>=0 && orf_target<3)
01545                         }
01546 
01547                         int32_t orf_last_pos = m_pos[t] ;
01548 #ifdef DYNPROG_TIMING
01549                         MyTime3.start() ;
01550 #endif
01551                         int32_t num_ok_pos = 0 ;
01552 
01553                         for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01554                         {
01555                             bool ok ;
01556                             //int32_t plen=t-ts;
01557 
01558                             /*for (int32_t s=0; s<m_num_svms; s++)
01559                               if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
01560                               (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
01561                               {
01562                               SG_DEBUG("s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen])
01563                               }*/
01564 
01565                             if (orf_target==-1)
01566                                 ok=true ;
01567                             else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01568                                 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01569                             else
01570                                 ok=false ;
01571 
01572                             if (ok)
01573                             {
01574 
01575                                 float64_t segment_loss = 0.0 ;
01576                                 if (with_loss)
01577                                 {
01578                                     segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01579                                     //if (segment_loss!=segment_loss2)
01580                                         //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2)
01581                                 }
01583                                 // BEST_PATH_TRANS
01585 
01586                                 int32_t frame = orf_from;//m_orf_info.element(ii,0);
01587                                 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01588 
01589                                 float64_t pen_val = 0.0 ;
01590                                 if (penalty)
01591                                 {
01592 #ifdef DYNPROG_TIMING_DETAIL
01593                                     MyTime.start() ;
01594 #endif
01595                                     pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01596 
01597 #ifdef DYNPROG_TIMING_DETAIL
01598                                     MyTime.stop() ;
01599                                     content_plifs_time += MyTime.time_diff_sec() ;
01600 #endif
01601                                 }
01602 
01603 #ifdef DYNPROG_TIMING_DETAIL
01604                                 MyTime.start() ;
01605 #endif
01606                                 num_ok_pos++ ;
01607 
01608                                 if (nbest==1)
01609                                 {
01610                                     float64_t  val        = elem_val[i] + pen_val ;
01611                                     if (with_loss)
01612                                         val              += segment_loss ;
01613 
01614                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01615 
01616                                     if (mval<fixedtempvv_)
01617                                     {
01618                                         fixedtempvv_ = mval ;
01619                                         fixedtempii_ = ii + ts*m_N;
01620                                         fixed_list_len = 1 ;
01621                                         fixedtemplong = false ;
01622                                     }
01623                                 }
01624                                 else
01625                                 {
01626                                     for (int16_t diff=0; diff<nbest; diff++)
01627                                     {
01628                                         float64_t  val        = elem_val[i]  ;
01629                                         val                  += pen_val ;
01630                                         if (with_loss)
01631                                             val              += segment_loss ;
01632 
01633                                         float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01634 
01635                                         /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
01636                                         /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
01637                                         /* fixed_list_len has the number of elements in fixedtempvv */
01638 
01639                                         if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01640                                         {
01641                                             if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01642                                             {
01643                                                 fixedtempvv[fixed_list_len] = mval ;
01644                                                 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01645                                                 fixed_list_len++ ;
01646                                             }
01647                                             else  // must have mval < fixedtempvv[fixed_list_len-1]
01648                                             {
01649                                                 int32_t addhere = fixed_list_len;
01650                                                 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01651                                                     addhere--;
01652 
01653                                                 // move everything from addhere+1 one forward
01654                                                 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01655                                                 {
01656                                                     fixedtempvv[jj] = fixedtempvv[jj-1];
01657                                                     fixedtempii[jj] = fixedtempii[jj-1];
01658                                                 }
01659 
01660                                                 fixedtempvv[addhere] = mval;
01661                                                 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01662 
01663                                                 if (fixed_list_len < nbest)
01664                                                     fixed_list_len++;
01665                                             }
01666                                         }
01667                                     }
01668                                 }
01669 #ifdef DYNPROG_TIMING_DETAIL
01670                                 MyTime.stop() ;
01671                                 inner_loop_max_time += MyTime.time_diff_sec() ;
01672 #endif
01673                             }
01674                         }
01675 #ifdef DYNPROG_TIMING
01676                         MyTime3.stop() ;
01677                         inner_loop_time += MyTime3.time_diff_sec() ;
01678 #endif
01679                     }
01680                     for (int32_t i=0; i<num_elem; i++)
01681                     {
01682                         T_STATES ii = elem_list[i] ;
01683 
01684                         const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01685 
01686                         /*int32_t look_back = max_look_back ;
01687                           if (0)
01688                           { // find lookback length
01689                           CPlifBase *pen = (CPlifBase*) penalty ;
01690                           if (pen!=NULL)
01691                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01692                           if (look_back>=1e6)
01693                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen)
01694                           ASSERT(look_back<1e6)
01695                           } */
01696 
01697                         int32_t look_back_ = look_back.element(j, ii) ;
01698                         //int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
01699 
01700                         int32_t orf_from = m_orf_info.element(ii,0) ;
01701                         int32_t orf_to   = m_orf_info.element(j,1) ;
01702                         if((orf_from!=-1)!=(orf_to!=-1))
01703                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
01704                         ASSERT((orf_from!=-1)==(orf_to!=-1))
01705 
01706                         int32_t orf_target = -1 ;
01707                         if (orf_from!=-1)
01708                         {
01709                             orf_target=orf_to-orf_from ;
01710                             if (orf_target<0)
01711                                 orf_target+=3 ;
01712                             ASSERT(orf_target>=0 && orf_target<3)
01713                         }
01714 
01715                         //int32_t loss_last_pos = t ;
01716                         //float64_t last_loss = 0.0 ;
01717 
01718 #ifdef DYNPROG_TIMING
01719                         MyTime3.start() ;
01720 #endif
01721 
01722                         /* long transition stuff */
01723                         /* only do this, if
01724                          * this feature is enabled
01725                          * this is not a transition with ORF restrictions
01726                          * the loss is switched off
01727                          * nbest=1
01728                          */
01729 #ifdef DYNPROG_TIMING
01730                         MyTime3.start() ;
01731 #endif
01732                         // long transitions, only when not considering ORFs
01733                         if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
01734                         {
01735 
01736                             // update table for 5' part  of the long segment
01737 
01738                             int32_t start = long_transition_content_start.get_element(ii, j) ;
01739                             int32_t end_5p_part = start ;
01740                             for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
01741                             {
01742                                 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away
01743                                 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
01744                                     end_5p_part++ ;
01745 
01746                                 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t)
01747                                 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold)
01748 
01749                                 float64_t pen_val = 0.0;
01750                                 /* recompute penalty, if necessary */
01751                                 if (penalty)
01752                                 {
01753                                     int32_t frame = m_orf_info.element(ii,0);
01754                                     lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part
01755                                     pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
01756                                 }
01757 
01758                                 /*if (m_pos[start_5p_part]==1003)
01759                                   {
01760                                   SG_PRINT("Part1: %i - %i   vs  %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part])
01761                                   SG_PRINT("Part1: ts=%i  t=%i  start_5p_part=%i  m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len)
01762                                   }*/
01763 
01764                                 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
01765                                 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check
01766 
01767                                 float64_t segment_loss_part1=0.0 ;
01768                                 if (with_loss)
01769                                 {  // this is the loss from the start of the long segment (5' part + middle section)
01770 
01771                                     segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure
01772 
01773                                     mval_trans -= segment_loss_part1 ;
01774                                 }
01775 
01776 
01777                                 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/)
01778                                 {
01779                                     // this restricts the maximal length of segments,
01780                                     // but the current implementation is not valid since the
01781                                     // long transition is discarded without loocking if there
01782                                     // is a second best long transition in between
01783                                     long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
01784                                     long_transition_content_start_position.set_element(0, ii, j) ;
01785                                     if (with_loss)
01786                                         long_transition_content_scores_loss.set_element(0.0, ii, j) ;
01787 #ifdef DYNPROG_DEBUG
01788                                     long_transition_content_scores_pen.set_element(0.0, ii, j) ;
01789                                     long_transition_content_scores_elem.set_element(0.0, ii, j) ;
01790                                     long_transition_content_scores_prev.set_element(0.0, ii, j) ;
01791                                     long_transition_content_end_position.set_element(0, ii, j) ;
01792 #endif
01793                                 }
01794                                 if (with_loss)
01795                                 {
01796                                     float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
01797                                     float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
01798                                     float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
01799                                     long_transition_content_scores.set_element(score, ii, j) ;
01800                                     long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
01801 #ifdef DYNPROG_DEBUG
01802                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01803 #endif
01804 
01805                                 }
01806                                 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
01807                                 {
01808                                     /* then the old long transition is either too far away or worse than the current one */
01809                                     long_transition_content_scores.set_element(-mval_trans, ii, j) ;
01810                                     long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
01811                                     if (with_loss)
01812                                         long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
01813 #ifdef DYNPROG_DEBUG
01814                                     long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
01815                                     long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
01816                                     long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
01817                                     /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) +
01818                                       long_transition_content_scores_elem.get_element(ii, j) +
01819                                       long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/
01820                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01821 #endif
01822                                 }
01823                                 //
01824                                 // this sets the position where the search for better 5'parts is started the next time
01825                                 // whithout this the prediction takes ages
01826                                 //
01827                                 long_transition_content_start.set_element(start_5p_part, ii, j) ;
01828                             }
01829 
01830                             // consider the 3' part at the end of the long segment:
01831                             // * with length = m_long_transition_threshold
01832                             // * content prediction and loss only for this part
01833 
01834                             // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold
01835                             // precompute: only depends on t
01836                             int ts = t;
01837                             while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
01838                                 ts-- ;
01839 
01840                             if (ts>0)
01841                             {
01842                                 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold))
01843 
01844 
01845                                 /* only consider this transition, if the right position was found */
01846                                 float pen_val_3p = 0.0 ;
01847                                 if (penalty)
01848                                 {
01849                                     int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
01850                                     lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01851                                     pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01852                                 }
01853 
01854                                 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
01855 
01856                                 {
01857 #ifdef DYNPROG_DEBUG
01858                                     float64_t segment_loss_part2=0.0 ;
01859                                     float64_t segment_loss_part1=0.0 ;
01860 #endif
01861                                     float64_t segment_loss_total=0.0 ;
01862 
01863                                     if (with_loss)
01864                                     {   // this is the loss for the 3' end fragment of the segment
01865                                         // (the 5' end and the middle section loss is already contained in mval)
01866 
01867 #ifdef DYNPROG_DEBUG
01868                                         // this is an alternative, which should be identical, if the loss is additive
01869                                         segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
01870                                         //mval -= segment_loss_part2 ;
01871                                         segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
01872 #endif
01873                                         segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
01874                                         mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
01875                                     }
01876 
01877 #ifdef DYNPROG_DEBUG
01878                                     if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
01879                                     {
01880                                         SG_PRINT("Part2: %i,%i,%i: val=%1.6f  pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i,  loss=%1.1f/%1.1f (%i,%i)\n",
01881                                                  m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
01882                                                  long_transition_content_scores.get_element(ii, j),
01883                                                  long_transition_content_scores_pen.get_element(ii, j),
01884                                                  long_transition_content_scores_prev.get_element(ii, j),
01885                                                  long_transition_content_scores_elem.get_element(ii, j),
01886                                                  long_transition_content_scores_loss.get_element(ii, j),
01887                                                  m_pos[long_transition_content_start_position.get_element(ii,j)],
01888                                                  m_pos[long_transition_content_end_position.get_element(ii,j)],
01889                                                  m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
01890                                         SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] )
01891                                     }
01892 
01893                                     if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
01894                                     {
01895                                         SG_ERROR("LOSS: total=%1.1f (%i-%i)  part1=%1.1f/%1.1f (%i-%i)  part2=%1.1f (%i-%i)  sum=%1.1f  diff=%1.1f\n",
01896                                                  segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
01897                                                  long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
01898                                                  segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
01899                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
01900                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
01901                                     }
01902 #endif
01903                                 }
01904 
01905                                 // prefer simpler version to guarantee optimality
01906                                 //
01907                                 // original:
01908                                 /* if ((mval < fixedtempvv_) &&
01909                                     (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */
01910                                 if (mval < fixedtempvv_)
01911                                 {
01912                                     /* then the long transition is better than the short one => replace it */
01913                                     int32_t fromtjk =  fixedtempii_ ;
01914                                     /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n",
01915                                       m_pos[t], j,
01916                                       mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii,
01917                                       m_pos[long_transition_content_position.get_element(ii, j)],
01918                                       fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
01919                                     ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong)
01920 
01921                                     fixedtempvv_ = mval ;
01922                                     fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
01923                                     fixed_list_len = 1 ;
01924                                     fixedtemplong = true ;
01925                                 }
01926                             }
01927                         }
01928                     }
01929 #ifdef DYNPROG_TIMING
01930                     MyTime3.stop() ;
01931                     long_transition_time += MyTime3.time_diff_sec() ;
01932 #endif
01933 
01934 
01935                     int32_t numEnt = fixed_list_len;
01936 
01937                     float64_t minusscore;
01938                     int64_t fromtjk;
01939 
01940                     for (int16_t k=0; k<nbest; k++)
01941                     {
01942                         if (k<numEnt)
01943                         {
01944                             if (nbest==1)
01945                             {
01946                                 minusscore = fixedtempvv_ ;
01947                                 fromtjk = fixedtempii_ ;
01948                             }
01949                             else
01950                             {
01951                                 minusscore = fixedtempvv[k];
01952                                 fromtjk = fixedtempii[k];
01953                             }
01954 
01955                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -minusscore + seq.element(j,t);
01956                             psi.element(t,j,k)      = (fromtjk%m_N) ;
01957                             if (nbest>1)
01958                                 ktable.element(t,j,k)   = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
01959                             ptable.element(t,j,k)   = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
01960                         }
01961                         else
01962                         {
01963                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -CMath::INFTY ;
01964                             psi.element(t,j,k)      = 0 ;
01965                             if (nbest>1)
01966                                 ktable.element(t,j,k)     = 0 ;
01967                             ptable.element(t,j,k)     = 0 ;
01968                         }
01969                     }
01970                 }
01971             }
01972         }
01973         { //termination
01974             int32_t list_len = 0 ;
01975             for (int16_t diff=0; diff<nbest; diff++)
01976             {
01977                 for (T_STATES i=0; i<m_N; i++)
01978                 {
01979                     oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
01980                     oldtempii[list_len] = i + diff*m_N ;
01981                     list_len++ ;
01982                 }
01983             }
01984 
01985             CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
01986 
01987             for (int16_t k=0; k<nbest; k++)
01988             {
01989                 delta_end.element(k) = -oldtempvv[k] ;
01990                 path_ends.element(k) = (oldtempii[k]%m_N) ;
01991                 if (nbest>1)
01992                     ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
01993             }
01994 
01995 
01996         }
01997 
01998         { //state sequence backtracking
01999             for (int16_t k=0; k<nbest; k++)
02000             {
02001                 prob_nbest[k]= delta_end.element(k) ;
02002 
02003                 int32_t i         = 0 ;
02004                 state_seq[i]  = path_ends.element(k) ;
02005                 int16_t q   = 0 ;
02006                 if (nbest>1)
02007                     q=ktable_end.element(k) ;
02008                 pos_seq[i]    = m_seq_len-1 ;
02009 
02010                 while (pos_seq[i]>0)
02011                 {
02012                     ASSERT(i+1<m_seq_len)
02013                     //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q)
02014                     state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02015                     pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02016                     if (nbest>1)
02017                         q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02018                     i++ ;
02019                 }
02020                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q)
02021                 int32_t num_states = i+1 ;
02022                 for (i=0; i<num_states;i++)
02023                 {
02024                     my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02025                     my_pos_seq[i+k*m_seq_len]   = pos_seq[num_states-i-1] ;
02026                 }
02027                 if (num_states<m_seq_len)
02028                 {
02029                     my_state_seq[num_states+k*m_seq_len]=-1 ;
02030                     my_pos_seq[num_states+k*m_seq_len]=-1 ;
02031                 }
02032             }
02033         }
02034 
02035         //if (is_big)
02036         //  SG_PRINT("DONE.     \n")
02037 
02038 
02039 #ifdef DYNPROG_TIMING
02040         MyTime2.stop() ;
02041 
02042         //if (is_big)
02043         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  content_svm_values_time=%1.2f  content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec())
02044 #endif
02045 
02046         SG_FREE(fixedtempvv);
02047         SG_FREE(fixedtempii);
02048     }
02049 
02050 
02051 void CDynProg::best_path_trans_deriv(
02052     int32_t *my_state_seq, int32_t *my_pos_seq,
02053     int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02054 {
02055     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02056     m_end_state_distribution_q_deriv.resize_array(m_N) ;
02057     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02058     //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
02059     //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
02060     m_my_scores.resize_array(my_seq_len);
02061     m_my_losses.resize_array(my_seq_len);
02062     float64_t* my_scores=m_my_scores.get_array();
02063     float64_t* my_losses=m_my_losses.get_array();
02064     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02065     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02066 
02067     if (!m_svm_arrays_clean)
02068     {
02069         SG_ERROR("SVM arrays not clean")
02070         return ;
02071     } ;
02072     //SG_PRINT("genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num)
02073     //m_mod_words.display() ;
02074     //m_sign_words.display() ;
02075     //m_string_words.display() ;
02076 
02077     bool use_svm = false ;
02078 
02079     CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
02080     PEN.set_array_name("PEN");
02081 
02082     CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase*
02083     PEN_state_signals.set_array_name("PEN_state_signals");
02084 
02085     CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02086     seq_input.set_array_name("seq_input");
02087 
02088     { // determine whether to use svm outputs and clear derivatives
02089         for (int32_t i=0; i<m_N; i++)
02090             for (int32_t j=0; j<m_N; j++)
02091             {
02092                 CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ;
02093                 if (penij==NULL)
02094                     continue ;
02095 
02096                 if (penij->uses_svm_values())
02097                     use_svm=true ;
02098                 penij->penalty_clear_derivative() ;
02099             }
02100         for (int32_t i=0; i<m_N; i++)
02101             for (int32_t j=0; j<max_num_signals; j++)
02102             {
02103                 CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ;
02104                 if (penij==NULL)
02105                     continue ;
02106                 if (penij->uses_svm_values())
02107                     use_svm=true ;
02108                 penij->penalty_clear_derivative() ;
02109             }
02110     }
02111 
02112     { // set derivatives of p, q and a to zero
02113 
02114         for (int32_t i=0; i<m_N; i++)
02115         {
02116             m_initial_state_distribution_p_deriv.element(i)=0 ;
02117             m_end_state_distribution_q_deriv.element(i)=0 ;
02118             for (int32_t j=0; j<m_N; j++)
02119                 m_transition_matrix_a_deriv.element(i,j)=0 ;
02120         }
02121     }
02122 
02123     { // clear score vector
02124         for (int32_t i=0; i<my_seq_len; i++)
02125         {
02126             my_scores[i]=0.0 ;
02127             my_losses[i]=0.0 ;
02128         }
02129     }
02130 
02131     //int32_t total_len = 0 ;
02132 
02133     //m_transition_matrix_a.display_array() ;
02134     //m_transition_matrix_a_id.display_array() ;
02135 
02136     // compute derivatives for given path
02137     float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02138     float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02139     float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02140     for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02141     {
02142         svm_value[s]=0 ;
02143         svm_value_part1[s]=0 ;
02144         svm_value_part2[s]=0 ;
02145     }
02146 
02147     //#ifdef DYNPROG_DEBUG
02148     float64_t total_score = 0.0 ;
02149     float64_t total_loss = 0.0 ;
02150     //#endif
02151 
02152     ASSERT(my_state_seq[0]>=0)
02153     m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02154     my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02155 
02156     ASSERT(my_state_seq[my_seq_len-1]>=0)
02157     m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02158     my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02159 
02160     //#ifdef DYNPROG_DEBUG
02161     total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02162     //#endif
02163 
02164     SG_DEBUG("m_seq_len=%i\n", my_seq_len)
02165     for (int32_t i=0; i<my_seq_len-1; i++)
02166     {
02167         if (my_state_seq[i+1]==-1)
02168             break ;
02169         int32_t from_state = my_state_seq[i] ;
02170         int32_t to_state   = my_state_seq[i+1] ;
02171         int32_t from_pos   = my_pos_seq[i] ;
02172         int32_t to_pos     = my_pos_seq[i+1] ;
02173 
02174         int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02175         my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02176 
02177 #ifdef DYNPROG_DEBUG
02178 
02179 
02180         if (i>0)// test if segment loss is additive
02181         {
02182             float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02183             float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02184             float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02185             SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3)
02186             if (CMath::abs(loss1+loss2-loss3)>0)
02187             {
02188                 SG_PRINT("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
02189             }
02190         }
02191         io->set_loglevel(M_DEBUG) ;
02192         SG_DEBUG("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
02193 #endif
02194         // increase usage of this transition
02195         m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02196         my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02197         //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state))
02198 #ifdef DYNPROG_DEBUG
02199         SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i])
02200 #endif
02201 
02202         /*int32_t last_svm_pos[m_num_degrees] ;
02203           for (int32_t qq=0; qq<m_num_degrees; qq++)
02204           last_svm_pos[qq]=-1 ;*/
02205 
02206         bool is_long_transition = false ;
02207         if (m_long_transitions)
02208         {
02209             if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02210                 is_long_transition = true ;
02211             if (m_orf_info.element(from_state,0)!=-1)
02212                 is_long_transition = false ;
02213         }
02214 
02215         int32_t from_pos_thresh = from_pos ;
02216         int32_t to_pos_thresh = to_pos ;
02217 
02218         if (use_svm)
02219         {
02220             if (is_long_transition)
02221             {
02222 
02223                 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // *
02224                     from_pos_thresh++ ;
02225                 ASSERT(from_pos_thresh<to_pos)
02226                 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold) // *
02227                 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold)// *
02228 
02229                 int32_t frame = m_orf_info.element(from_state,0);
02230                 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02231 
02232 #ifdef DYNPROG_DEBUG
02233                 SG_PRINT("part1: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1])
02234                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02235                     SG_PRINT("%1.4f  ", svm_value_part1[s])
02236                 SG_PRINT("\n")
02237 #endif
02238 
02239                 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // *
02240                     to_pos_thresh-- ;
02241                 ASSERT(to_pos_thresh>0)
02242                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold)  // *
02243                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold)  // *
02244 
02245                 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02246 
02247 #ifdef DYNPROG_DEBUG
02248                 SG_PRINT("part2: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1])
02249                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02250                     SG_PRINT("%1.4f  ", svm_value_part2[s])
02251                 SG_PRINT("\n")
02252 #endif
02253             }
02254             else
02255             {
02256                 /* normal case */
02257 
02258                 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos])
02259                 int32_t frame = m_orf_info.element(from_state,0);
02260                 if (false)//(frame>=0)
02261                 {
02262                     int32_t num_current_svms=0;
02263                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02264                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame)
02265                     ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
02266                     SG_PRINT("\n")
02267                 }
02268 
02269                 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02270 #ifdef DYNPROG_DEBUG
02271                 SG_PRINT("part2: pos1: %i  pos2: %i   \nsvm_values: ", m_pos[from_pos], m_pos[to_pos])
02272                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02273                     SG_PRINT("%1.4f  ", svm_value[s])
02274                 SG_PRINT("\n")
02275 #endif
02276             }
02277         }
02278 
02279         if (PEN.element(to_state, from_state)!=NULL)
02280         {
02281             float64_t nscore = 0 ;
02282             if (is_long_transition)
02283             {
02284                 float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02285                 float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02286                 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02287             }
02288             else
02289                 nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02290 
02291             if (false)//(nscore<-1e9)
02292                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
02293                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
02294 
02295             my_scores[i] += nscore ;
02296 
02297             for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
02298             {
02299                 svm_value[s]=-CMath::INFTY;
02300                 svm_value_part1[s]=-CMath::INFTY;
02301                 svm_value_part2[s]=-CMath::INFTY;
02302             }
02303 
02304 #ifdef DYNPROG_DEBUG
02305             //SG_DEBUG("%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos])
02306 #endif
02307             if (is_long_transition)
02308             {
02309 #ifdef DYNPROG_DEBUG
02310                 float64_t sum_score = 0.0 ;
02311 
02312                 for (int kk=0; kk<i; kk++)
02313                     sum_score += my_scores[i] ;
02314 
02315                 SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i)  2: %1.6f (%i-%i) \n",
02316                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
02317                         nscore, sum_score,
02318                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
02319                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02320 #endif
02321             }
02322 
02323             if (is_long_transition)
02324             {
02325                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02326                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02327             }
02328             else
02329                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02330 
02331             //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data)
02332 
02333             // for tiling array and rna-seq data every single measurement must be added to the derivative
02334             // in contrast to the content svm predictions where we have a single value per transition;
02335             // content svm predictions have already been added to the derivative, thus we start with d=1
02336             // instead of d=0
02337             if (is_long_transition)
02338             {
02339                 for (int32_t d=1; d<=m_num_raw_data; d++)
02340                 {
02341                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02342                         svm_value[s]=-CMath::INFTY;
02343                     float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02344                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02345                     for (int32_t k=0;k<num_intensities;k++)
02346                     {
02347                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02348                             svm_value[j]=intensities[k];
02349 
02350                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02351 
02352                     }
02353                     num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02354                     for (int32_t k=0;k<num_intensities;k++)
02355                     {
02356                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02357                             svm_value[j]=intensities[k];
02358 
02359                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02360 
02361                     }
02362                     SG_FREE(intensities);
02363 
02364                 }
02365             }
02366             else
02367             {
02368                 for (int32_t d=1; d<=m_num_raw_data; d++)
02369                 {
02370                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02371                         svm_value[s]=-CMath::INFTY;
02372                     float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02373                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02374                     //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities)
02375                     for (int32_t k=0;k<num_intensities;k++)
02376                     {
02377                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02378                             svm_value[j]=intensities[k];
02379 
02380                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
02381 
02382                     }
02383                     SG_FREE(intensities);
02384                 }
02385             }
02386 
02387         }
02388 #ifdef DYNPROG_DEBUG
02389         SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i])
02390 #endif
02391 
02392         //SG_DEBUG("emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0)
02393         for (int32_t k=0; k<max_num_signals; k++)
02394         {
02395             if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02396             {
02397 #ifdef DYNPROG_DEBUG
02398                 SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k))
02399 #endif
02400                 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02401                 //if (seq_input.element(to_state, to_pos, k) !=0)
02402                 //  SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k))
02403                 break ;
02404             }
02405             if (PEN_state_signals.element(to_state, k)!=NULL)
02406             {
02407                 float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter)
02408                 my_scores[i] += nscore ;
02409 #ifdef DYNPROG_DEBUG
02410                 if (false)//(nscore<-1e9)
02411                 {
02412                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
02413                         is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
02414                     for (int x=0; x<23; x++)
02415                     {
02416                         for (int i=-10; i<10; i++)
02417                             SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k))
02418                         SG_PRINT("\n")
02419                     }
02420 
02421                 }
02422 #endif
02423                 //break ;
02424                 //int32_t num_current_svms=0;
02425                 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02426                 //SG_PRINT("PEN_state_signals->id: ")
02427                 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
02428                 //SG_PRINT("\n")
02429                 //if (nscore != 0)
02430                 //SG_PRINT("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k)
02431 #ifdef DYNPROG_DEBUG
02432                 SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k)
02433 #endif
02434                 ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter)
02435             } else
02436                 break ;
02437         }
02438 
02439         //#ifdef DYNPROG_DEBUG
02440         //SG_PRINT("scores[%i]=%f (final) \n", i, my_scores[i])
02441         //SG_PRINT("losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss)
02442         total_score += my_scores[i] ;
02443         total_loss += my_losses[i] ;
02444         //#endif
02445     }
02446     //#ifdef DYNPROG_DEBUG
02447     //SG_PRINT("total score = %f \n", total_score)
02448     //SG_PRINT("total loss = %f \n", total_loss)
02449     //#endif
02450     SG_FREE(svm_value);
02451     SG_FREE(svm_value_part1);
02452     SG_FREE(svm_value_part2);
02453 }
02454 
02455 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02456 {
02457     ASSERT(from_pos<to_pos)
02458     int32_t num_intensities = 0;
02459     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[type-1]];
02460     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02461     int32_t last_pos;
02462     int32_t num = m_num_probes_cum[type-1];
02463     while (*p_tiling_pos<to_pos)
02464     {
02465         if (*p_tiling_pos>=from_pos)
02466         {
02467             intensities[num_intensities] = *p_tiling_data;
02468             num_intensities++;
02469         }
02470         num++;
02471         if (num>=m_num_probes_cum[type])
02472             break;
02473         last_pos = *p_tiling_pos;
02474         p_tiling_pos++;
02475         p_tiling_data++;
02476         ASSERT(last_pos<*p_tiling_pos)
02477     }
02478     return num_intensities;
02479 }
02480 
02481 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02482 {
02483 #ifdef DYNPROG_TIMING_DETAIL
02484     MyTime.start() ;
02485 #endif
02486 //  ASSERT(from_state<to_state)
02487 //  if (!(from_pos<to_pos))
02488 //      SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos)
02489     for (int32_t i=0;i<m_num_svms;i++)
02490     {
02491         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02492         float64_t from_val = m_lin_feat.get_element(i, from_state);
02493         svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02494     }
02495     for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02496     {
02497         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02498         float64_t from_val = m_lin_feat.get_element(i, from_state);
02499         svm_values[i] = to_val-from_val ;
02500     }
02501     if (m_intron_list)
02502     {
02503         int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs);
02504         m_intron_list->get_intron_support(support, from_state, to_state);
02505         int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02506         int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
02507         int32_t cnt = 0;
02508         for (int32_t i=intron_list_start; i<intron_list_end;i++)
02509         {
02510             svm_values[i] = (float64_t) (support[cnt]);
02511             cnt++;
02512         }
02513         //if (to_pos>3990 && to_pos<4010)
02514         //  SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1])
02515         SG_FREE(support);
02516     }
02517     // find the correct row with precomputed frame predictions
02518     if (frame!=-1)
02519     {
02520         svm_values[frame_plifs[0]] = 1e10;
02521         svm_values[frame_plifs[1]] = 1e10;
02522         svm_values[frame_plifs[2]] = 1e10;
02523         int32_t global_frame = from_pos%3;
02524         int32_t row = ((global_frame+frame)%3)+4;
02525         float64_t to_val   = m_lin_feat.get_element(row, to_state);
02526         float64_t from_val = m_lin_feat.get_element(row, from_state);
02527         svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02528     }
02529 #ifdef DYNPROG_TIMING_DETAIL
02530     MyTime.stop() ;
02531     content_svm_values_time += MyTime.time_diff_sec() ;
02532 #endif
02533 }
02534 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02535 {
02536     m_intron_list = intron_list;
02537     m_num_intron_plifs = num_plifs;
02538 }
02539 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation