SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2007 Soeren Sonnenburg 00008 * Written (W) 1999-2007 Gunnar Raetsch 00009 * Written (W) 2008-2009 Jonas Behr 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/structure/DynProg.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/lib/config.h> 00017 #include <shogun/features/StringFeatures.h> 00018 #include <shogun/features/Alphabet.h> 00019 #include <shogun/structure/Plif.h> 00020 #include <shogun/structure/IntronList.h> 00021 00022 #include <stdlib.h> 00023 #include <stdio.h> 00024 #include <time.h> 00025 #include <ctype.h> 00026 00027 using namespace shogun; 00028 00029 //#define USE_TMP_ARRAYCLASS 00030 //#define DYNPROG_DEBUG 00031 00032 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ; 00033 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ; 00034 int32_t CDynProg::frame_plifs[3]={4,5,6}; 00035 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ; 00036 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1, 00037 1,1,1,1,1,1,1,1, 00038 0,0,0,0,0,0,0,0, 00039 0,0,0,0,0,0,0,0} ; 00040 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true, 00041 false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts 00042 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0, 00043 1,1,1,1,1,1,1,1} ; // which string should be used 00044 00045 CDynProg::CDynProg(int32_t num_svms /*= 8 */) 00046 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1), 00047 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1), 00048 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1), 00049 m_end_state_distribution_q_deriv(1), 00050 00051 // multi svm 00052 m_num_degrees(4), 00053 m_num_svms(num_svms), 00054 m_word_degree(word_degree_default, m_num_degrees, true, true), 00055 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true), 00056 m_cum_num_words_array(m_cum_num_words.get_array()), 00057 m_num_words(num_words_default, m_num_degrees, true, true), 00058 m_num_words_array(m_num_words.get_array()), 00059 m_mod_words(mod_words_default, m_num_svms, 2, true, true), 00060 m_mod_words_array(m_mod_words.get_array()), 00061 m_sign_words(sign_words_default, m_num_svms, true, true), 00062 m_sign_words_array(m_sign_words.get_array()), 00063 m_string_words(string_words_default, m_num_svms, true, true), 00064 m_string_words_array(m_string_words.get_array()), 00065 //m_svm_pos_start(m_num_degrees), 00066 m_num_unique_words(m_num_degrees), 00067 m_svm_arrays_clean(true), 00068 00069 m_max_a_id(0), m_observation_matrix(1,1,1), 00070 m_pos(1), 00071 m_seq_len(0), 00072 m_orf_info(1,2), 00073 m_plif_list(1), 00074 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 00075 m_segment_ids(1), 00076 m_segment_mask(1), 00077 m_my_state_seq(1), 00078 m_my_pos_seq(1), 00079 m_my_scores(1), 00080 m_my_losses(1), 00081 m_scores(1), 00082 m_states(1,1), 00083 m_positions(1,1), 00084 00085 m_seq_sparse1(NULL), 00086 m_seq_sparse2(NULL), 00087 m_plif_matrices(NULL), 00088 00089 m_genestr_stop(1), 00090 m_intron_list(NULL), 00091 m_num_intron_plifs(0), 00092 m_lin_feat(1,1), //by Jonas 00093 m_raw_intensities(NULL), 00094 m_probe_pos(NULL), 00095 m_num_probes_cum(NULL), 00096 m_num_lin_feat_plifs_cum(NULL), 00097 m_num_raw_data(0), 00098 00099 m_long_transitions(true), 00100 m_long_transition_threshold(1000) 00101 { 00102 trans_list_forward = NULL ; 00103 trans_list_forward_cnt = NULL ; 00104 trans_list_forward_val = NULL ; 00105 trans_list_forward_id = NULL ; 00106 trans_list_len = 0 ; 00107 00108 mem_initialized = true ; 00109 00110 m_N=1; 00111 00112 m_raw_intensities = NULL; 00113 m_probe_pos = NULL; 00114 m_num_probes_cum = SG_MALLOC(int32_t, 100); 00115 m_num_probes_cum[0] = 0; 00116 //m_use_tiling=false; 00117 m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100); 00118 m_num_lin_feat_plifs_cum[0] = m_num_svms; 00119 m_num_raw_data = 0; 00120 #ifdef ARRAY_STATISTICS 00121 m_word_degree.set_array_name("word_degree"); 00122 #endif 00123 00124 m_transition_matrix_a_id.set_array_name("transition_matrix_a_id"); 00125 m_transition_matrix_a.set_array_name("transition_matrix_a"); 00126 m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv"); 00127 m_mod_words.set_array_name("mod_words"); 00128 m_orf_info.set_array_name("orf_info"); 00129 m_segment_sum_weights.set_array_name("segment_sum_weights"); 00130 m_dict_weights.set_array_name("dict_weights"); 00131 m_states.set_array_name("states"); 00132 m_positions.set_array_name("positions"); 00133 m_lin_feat.set_array_name("lin_feat"); 00134 00135 00136 m_observation_matrix.set_array_name("m_observation_matrix"); 00137 m_segment_loss.set_array_name("m_segment_loss"); 00138 m_seg_loss_obj = new CSegmentLoss(); 00139 } 00140 00141 CDynProg::~CDynProg() 00142 { 00143 if (trans_list_forward_cnt) 00144 SG_FREE(trans_list_forward_cnt); 00145 if (trans_list_forward) 00146 { 00147 for (int32_t i=0; i<trans_list_len; i++) 00148 { 00149 if (trans_list_forward[i]) 00150 SG_FREE(trans_list_forward[i]); 00151 } 00152 SG_FREE(trans_list_forward); 00153 } 00154 if (trans_list_forward_val) 00155 { 00156 for (int32_t i=0; i<trans_list_len; i++) 00157 { 00158 if (trans_list_forward_val[i]) 00159 SG_FREE(trans_list_forward_val[i]); 00160 } 00161 SG_FREE(trans_list_forward_val); 00162 } 00163 if (trans_list_forward_id) 00164 { 00165 for (int32_t i=0; i<trans_list_len; i++) 00166 { 00167 if (trans_list_forward_id[i]) 00168 SG_FREE(trans_list_forward_id[i]); 00169 } 00170 SG_FREE(trans_list_forward_id); 00171 } 00172 if (m_raw_intensities) 00173 SG_FREE(m_raw_intensities); 00174 if (m_probe_pos) 00175 SG_FREE(m_probe_pos); 00176 if (m_num_probes_cum) 00177 SG_FREE(m_num_probes_cum); 00178 if (m_num_lin_feat_plifs_cum) 00179 SG_FREE(m_num_lin_feat_plifs_cum); 00180 00181 delete m_intron_list; 00182 00183 SG_UNREF(m_seq_sparse1); 00184 SG_UNREF(m_seq_sparse2); 00185 SG_UNREF(m_plif_matrices); 00186 SG_UNREF(m_seg_loss_obj); 00187 } 00188 00190 int32_t CDynProg::get_num_svms() 00191 { 00192 return m_num_svms; 00193 } 00194 00195 void CDynProg::precompute_stop_codons() 00196 { 00197 int32_t length=m_genestr.get_dim1(); 00198 00199 m_genestr_stop.resize_array(length) ; 00200 m_genestr_stop.set_const(0) ; 00201 m_genestr_stop.set_array_name("genestr_stop") ; 00202 { 00203 for (int32_t i=0; i<length-2; i++) 00204 if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 00205 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 00206 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) || 00207 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') ))) 00208 { 00209 m_genestr_stop.element(i)=true ; 00210 } 00211 else 00212 m_genestr_stop.element(i)=false ; 00213 m_genestr_stop.element(length-2)=false ; 00214 m_genestr_stop.element(length-1)=false ; 00215 } 00216 } 00217 00218 void CDynProg::set_num_states(int32_t p_N) 00219 { 00220 m_N=p_N ; 00221 00222 m_transition_matrix_a_id.resize_array(m_N,m_N) ; 00223 m_transition_matrix_a.resize_array(m_N,m_N) ; 00224 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 00225 m_initial_state_distribution_p.resize_array(m_N) ; 00226 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 00227 m_end_state_distribution_q.resize_array(m_N); 00228 m_end_state_distribution_q_deriv.resize_array(m_N) ; 00229 00230 m_orf_info.resize_array(m_N,2) ; 00231 } 00232 00233 int32_t CDynProg::get_num_states() 00234 { 00235 return m_N; 00236 } 00237 00238 void CDynProg::init_tiling_data( 00239 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes) 00240 { 00241 m_num_raw_data++; 00242 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes; 00243 00244 int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]); 00245 float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]); 00246 00247 00248 if (m_num_raw_data==1){ 00249 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00250 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00251 //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2) 00252 }else{ 00253 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 00254 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t)); 00255 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t)); 00256 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t)); 00257 } 00258 SG_FREE(m_probe_pos); 00259 SG_FREE(m_raw_intensities); 00260 m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes); 00261 m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes); 00262 00263 //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00264 //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00265 00266 } 00267 00268 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms) 00269 { 00270 m_lin_feat.resize_array(p_num_svms, m_seq_len); 00271 00272 // initialize array 00273 for (int s=0; s<p_num_svms; s++) 00274 for (int p=0; p<m_seq_len; p++) 00275 m_lin_feat.set_element(0.0, s, p) ; 00276 } 00277 00278 void CDynProg::resize_lin_feat(const int32_t num_new_feat) 00279 { 00280 int32_t dim1, dim2; 00281 m_lin_feat.get_array_size(dim1, dim2); 00282 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]) 00283 ASSERT(dim2==m_seq_len) // == number of candidate positions 00284 00285 00286 00287 float64_t* arr = m_lin_feat.get_array(); 00288 float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2); 00289 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ; 00290 for(int32_t j=0;j<m_seq_len;j++) 00291 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++) 00292 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k]; 00293 00294 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 00295 SG_FREE(tmp); 00296 00297 /*for(int32_t j=0;j<5;j++) 00298 { 00299 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++) 00300 { 00301 SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j)) 00302 } 00303 SG_PRINT("\n") 00304 } 00305 m_lin_feat.get_array_size(dim1,dim2); 00306 SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2)*/ 00307 00308 //SG_PRINT("resize_lin_feat: done\n") 00309 } 00310 00311 void CDynProg::precompute_tiling_plifs( 00312 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs) 00313 { 00314 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs; 00315 float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs); 00316 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 00317 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++) 00318 svm_value[i]=0.0; 00319 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs); 00320 for (int32_t i=0; i<num_tiling_plifs; i++) 00321 { 00322 tiling_plif[i]=0.0; 00323 CPlif * plif = PEN[tiling_plif_ids[i]]; 00324 tiling_rows[i] = plif->get_use_svm(); 00325 00326 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i) 00327 } 00328 resize_lin_feat(num_tiling_plifs); 00329 00330 00331 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]]; 00332 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]]; 00333 int32_t num=m_num_probes_cum[m_num_raw_data-1]; 00334 00335 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++) 00336 { 00337 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx]) 00338 { 00339 for (int32_t i=0; i<num_tiling_plifs; i++) 00340 { 00341 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data; 00342 CPlif * plif = PEN[tiling_plif_ids[i]]; 00343 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1) 00344 plif->set_do_calc(true); 00345 tiling_plif[i]+=plif->lookup_penalty(0,svm_value); 00346 plif->set_do_calc(false); 00347 } 00348 p_tiling_data++; 00349 p_tiling_pos++; 00350 num++; 00351 } 00352 for (int32_t i=0; i<num_tiling_plifs; i++) 00353 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx); 00354 } 00355 SG_FREE(svm_value); 00356 SG_FREE(tiling_plif); 00357 SG_FREE(tiling_rows); 00358 } 00359 00360 void CDynProg::create_word_string() 00361 { 00362 SG_FREE(m_wordstr); 00363 m_wordstr=SG_MALLOC(uint16_t**, 5440); 00364 int32_t k=0; 00365 int32_t genestr_len=m_genestr.get_dim1(); 00366 00367 m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees); 00368 for (int32_t j=0; j<m_num_degrees; j++) 00369 { 00370 m_wordstr[k][j]=NULL ; 00371 { 00372 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len); 00373 for (int32_t i=0; i<genestr_len; i++) 00374 switch (m_genestr[i]) 00375 { 00376 case 'A': 00377 case 'a': m_wordstr[k][j][i]=0 ; break ; 00378 case 'C': 00379 case 'c': m_wordstr[k][j][i]=1 ; break ; 00380 case 'G': 00381 case 'g': m_wordstr[k][j][i]=2 ; break ; 00382 case 'T': 00383 case 't': m_wordstr[k][j][i]=3 ; break ; 00384 default: ASSERT(0) 00385 } 00386 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ; 00387 } 00388 } 00389 } 00390 00391 void CDynProg::precompute_content_values() 00392 { 00393 for (int32_t s=0; s<m_num_svms; s++) 00394 m_lin_feat.set_element(0.0, s, 0); 00395 00396 for (int32_t p=0 ; p<m_seq_len-1 ; p++) 00397 { 00398 int32_t from_pos = m_pos[p]; 00399 int32_t to_pos = m_pos[p+1]; 00400 float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms); 00401 //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos) 00402 00403 ASSERT(from_pos<=m_genestr.get_dim1()) 00404 ASSERT(to_pos<=m_genestr.get_dim1()) 00405 00406 for (int32_t s=0; s<m_num_svms; s++) 00407 my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p); 00408 00409 for (int32_t i=from_pos; i<to_pos; i++) 00410 { 00411 for (int32_t j=0; j<m_num_degrees; j++) 00412 { 00413 uint16_t word = m_wordstr[0][j][i] ; 00414 for (int32_t s=0; s<m_num_svms; s++) 00415 { 00416 // check if this k-mer should be considered for this SVM 00417 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1)) 00418 continue; 00419 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ; 00420 } 00421 } 00422 } 00423 for (int32_t s=0; s<m_num_svms; s++) 00424 { 00425 float64_t prev = m_lin_feat.get_element(s, p); 00426 //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) 00427 if (prev<-1e20 || prev>1e20) 00428 { 00429 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) 00430 prev=0 ; 00431 } 00432 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1); 00433 } 00434 SG_FREE(my_svm_values_unnormalized); 00435 } 00436 //for (int32_t j=0; j<m_num_degrees; j++) 00437 // SG_FREE(m_wordstr[0][j]); 00438 //SG_FREE(m_wordstr[0]); 00439 } 00440 00441 void CDynProg::set_p_vector(SGVector<float64_t> p) 00442 { 00443 if (!(p.vlen==m_N)) 00444 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N) 00445 00446 m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true); 00447 } 00448 00449 void CDynProg::set_q_vector(SGVector<float64_t> q) 00450 { 00451 if (!(q.vlen==m_N)) 00452 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N) 00453 m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true); 00454 } 00455 00456 void CDynProg::set_a(SGMatrix<float64_t> a) 00457 { 00458 ASSERT(a.num_cols==m_N) 00459 ASSERT(a.num_rows==m_N) 00460 m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true); 00461 m_transition_matrix_a_deriv.resize_array(m_N, m_N); 00462 } 00463 00464 void CDynProg::set_a_id(SGMatrix<int32_t> a) 00465 { 00466 ASSERT(a.num_cols==m_N) 00467 ASSERT(a.num_rows==m_N) 00468 m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true); 00469 m_max_a_id = 0; 00470 for (int32_t i=0; i<m_N; i++) 00471 { 00472 for (int32_t j=0; j<m_N; j++) 00473 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)); 00474 } 00475 } 00476 00477 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans) 00478 { 00479 int32_t num_trans=a_trans.num_rows; 00480 int32_t num_cols=a_trans.num_cols; 00481 00482 //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans"); 00483 00484 if (!((num_cols==3) || (num_cols==4))) 00485 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols) 00486 00487 SG_FREE(trans_list_forward); 00488 SG_FREE(trans_list_forward_cnt); 00489 SG_FREE(trans_list_forward_val); 00490 SG_FREE(trans_list_forward_id); 00491 00492 trans_list_forward = NULL ; 00493 trans_list_forward_cnt = NULL ; 00494 trans_list_forward_val = NULL ; 00495 trans_list_len = 0 ; 00496 00497 m_transition_matrix_a.set_const(0) ; 00498 m_transition_matrix_a_id.set_const(0) ; 00499 00500 mem_initialized = true ; 00501 00502 trans_list_forward_cnt=NULL ; 00503 trans_list_len = m_N ; 00504 trans_list_forward = SG_MALLOC(T_STATES*, m_N); 00505 trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N); 00506 trans_list_forward_val = SG_MALLOC(float64_t*, m_N); 00507 trans_list_forward_id = SG_MALLOC(int32_t*, m_N); 00508 00509 int32_t start_idx=0; 00510 for (int32_t j=0; j<m_N; j++) 00511 { 00512 int32_t old_start_idx=start_idx; 00513 00514 while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j) 00515 { 00516 start_idx++; 00517 00518 if (start_idx>1 && start_idx<num_trans) 00519 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]) 00520 } 00521 00522 if (start_idx>1 && start_idx<num_trans) 00523 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]) 00524 00525 int32_t len=start_idx-old_start_idx; 00526 ASSERT(len>=0) 00527 00528 trans_list_forward_cnt[j] = 0 ; 00529 00530 if (len>0) 00531 { 00532 trans_list_forward[j] = SG_MALLOC(T_STATES, len); 00533 trans_list_forward_val[j] = SG_MALLOC(float64_t, len); 00534 trans_list_forward_id[j] = SG_MALLOC(int32_t, len); 00535 } 00536 else 00537 { 00538 trans_list_forward[j] = NULL; 00539 trans_list_forward_val[j] = NULL; 00540 trans_list_forward_id[j] = NULL; 00541 } 00542 } 00543 00544 for (int32_t i=0; i<num_trans; i++) 00545 { 00546 int32_t from_state = (int32_t)a_trans.matrix[i] ; 00547 int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ; 00548 float64_t val = a_trans.matrix[i+num_trans*2] ; 00549 int32_t id = 0 ; 00550 if (num_cols==4) 00551 id = (int32_t)a_trans.matrix[i+num_trans*3] ; 00552 //SG_DEBUG("id=%i\n", id) 00553 00554 ASSERT(to_state>=0 && to_state<m_N) 00555 ASSERT(from_state>=0 && from_state<m_N) 00556 00557 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ; 00558 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ; 00559 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ; 00560 trans_list_forward_cnt[to_state]++ ; 00561 m_transition_matrix_a.element(from_state, to_state) = val ; 00562 m_transition_matrix_a_id.element(from_state, to_state) = id ; 00563 //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state)) 00564 } ; 00565 00566 m_max_a_id = 0 ; 00567 for (int32_t i=0; i<m_N; i++) 00568 for (int32_t j=0; j<m_N; j++) 00569 { 00570 //if (m_transition_matrix_a_id.element(i,j)) 00571 //SG_DEBUG("(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) 00572 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ; 00573 } 00574 //SG_DEBUG("m_max_a_id=%i\n", m_max_a_id) 00575 } 00576 00577 00578 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input) 00579 { 00580 //for (int32_t i=0; i<mod_words_input.num_cols; i++) 00581 //{ 00582 // for (int32_t j=0; j<mod_words_input.num_rows; j++) 00583 // SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j]) 00584 // SG_PRINT("\n") 00585 //} 00586 m_svm_arrays_clean=false ; 00587 00588 ASSERT(m_num_svms==mod_words_input.num_rows) 00589 ASSERT(mod_words_input.num_cols==2) 00590 00591 m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ; 00592 m_mod_words_array = m_mod_words.get_array() ; 00593 00594 /*SG_DEBUG("m_mod_words=[") 00595 for (int32_t i=0; i<mod_words_input.num_rows; i++) 00596 SG_DEBUG("%i, ", p_mod_words_array[i]) 00597 SG_DEBUG("]\n") */ 00598 } 00599 00600 bool CDynProg::check_svm_arrays() 00601 { 00602 //SG_DEBUG("wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings) 00603 if ((m_word_degree.get_dim1()==m_num_degrees) && 00604 (m_cum_num_words.get_dim1()==m_num_degrees+1) && 00605 (m_num_words.get_dim1()==m_num_degrees) && 00606 //(word_used.get_dim1()==m_num_degrees) && 00607 //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) && 00608 //(word_used.get_dim3()==m_num_strings) && 00609 // (svm_values_unnormalized.get_dim1()==m_num_degrees) && 00610 // (svm_values_unnormalized.get_dim2()==m_num_svms) && 00611 //(m_svm_pos_start.get_dim1()==m_num_degrees) && 00612 (m_num_unique_words.get_dim1()==m_num_degrees) && 00613 (m_mod_words.get_dim1()==m_num_svms) && 00614 (m_mod_words.get_dim2()==2) && 00615 (m_sign_words.get_dim1()==m_num_svms) && 00616 (m_string_words.get_dim1()==m_num_svms)) 00617 { 00618 m_svm_arrays_clean=true ; 00619 return true ; 00620 } 00621 else 00622 { 00623 if ((m_num_unique_words.get_dim1()==m_num_degrees) && 00624 (m_mod_words.get_dim1()==m_num_svms) && 00625 (m_mod_words.get_dim2()==2) && 00626 (m_sign_words.get_dim1()==m_num_svms) && 00627 (m_string_words.get_dim1()==m_num_svms)) 00628 SG_PRINT("OK\n") 00629 else 00630 SG_PRINT("not OK\n") 00631 00632 if (!(m_word_degree.get_dim1()==m_num_degrees)) 00633 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") 00634 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1)) 00635 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") 00636 if (!(m_num_words.get_dim1()==m_num_degrees)) 00637 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") 00638 //if (!(m_svm_pos_start.get_dim1()==m_num_degrees)) 00639 // SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") 00640 if (!(m_num_unique_words.get_dim1()==m_num_degrees)) 00641 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") 00642 if (!(m_mod_words.get_dim1()==m_num_svms)) 00643 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") 00644 if (!(m_mod_words.get_dim2()==2)) 00645 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") 00646 if (!(m_sign_words.get_dim1()==m_num_svms)) 00647 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") 00648 if (!(m_string_words.get_dim1()==m_num_svms)) 00649 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") 00650 00651 m_svm_arrays_clean=false ; 00652 return false ; 00653 } 00654 } 00655 00656 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq) 00657 { 00658 if (seq.num_dims!=3) 00659 SG_ERROR("Expected 3-dimensional Matrix\n") 00660 00661 int32_t N=seq.dims[0]; 00662 int32_t cand_pos=seq.dims[1]; 00663 int32_t max_num_features=seq.dims[2]; 00664 00665 if (!m_svm_arrays_clean) 00666 { 00667 SG_ERROR("SVM arrays not clean") 00668 return ; 00669 } ; 00670 00671 ASSERT(N==m_N) 00672 ASSERT(cand_pos==m_seq_len) 00673 ASSERT(m_initial_state_distribution_p.get_dim1()==N) 00674 ASSERT(m_end_state_distribution_q.get_dim1()==N) 00675 00676 m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ; 00677 } 00678 int32_t CDynProg::get_num_positions() 00679 { 00680 return m_seq_len; 00681 } 00682 00683 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path) 00684 { 00685 ASSERT(seg_path.num_rows==2) 00686 ASSERT(seg_path.num_cols==m_seq_len) 00687 00688 if (seg_path.matrix!=NULL) 00689 { 00690 int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len); 00691 float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len); 00692 for (int32_t i=0; i<m_seq_len; i++) 00693 { 00694 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ; 00695 segment_mask[i] = seg_path.matrix[2*i+1] ; 00696 } 00697 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ; 00698 SG_FREE(segment_ids); 00699 SG_FREE(segment_mask); 00700 } 00701 else 00702 { 00703 int32_t *izeros = SG_MALLOC(int32_t, m_seq_len); 00704 float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len); 00705 for (int32_t i=0; i<m_seq_len; i++) 00706 { 00707 izeros[i]=0 ; 00708 dzeros[i]=0.0 ; 00709 } 00710 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ; 00711 SG_FREE(izeros); 00712 SG_FREE(dzeros); 00713 } 00714 } 00715 00716 void CDynProg::set_pos(SGVector<int32_t> pos) 00717 { 00718 m_pos.set_array(pos.vector, pos.vlen, true, true) ; 00719 m_seq_len = pos.vlen; 00720 } 00721 00722 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info) 00723 { 00724 if (orf_info.num_cols!=2) 00725 SG_ERROR("orf_info size incorrect %i!=2\n", orf_info.num_cols) 00726 00727 m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ; 00728 m_orf_info.set_array_name("orf_info") ; 00729 } 00730 00731 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2) 00732 { 00733 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2)) 00734 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n") 00735 00736 SG_UNREF(m_seq_sparse1); 00737 SG_UNREF(m_seq_sparse2); 00738 00739 m_seq_sparse1=seq_sparse1; 00740 m_seq_sparse2=seq_sparse2; 00741 SG_REF(m_seq_sparse1); 00742 SG_REF(m_seq_sparse2); 00743 } 00744 00745 void CDynProg::set_plif_matrices(CPlifMatrix* pm) 00746 { 00747 SG_UNREF(m_plif_matrices); 00748 00749 m_plif_matrices=pm; 00750 00751 SG_REF(m_plif_matrices); 00752 } 00753 00754 void CDynProg::set_gene_string(SGVector<char> genestr) 00755 { 00756 ASSERT(genestr.vector) 00757 ASSERT(genestr.vlen>0) 00758 00759 m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ; 00760 } 00761 00762 void CDynProg::set_my_state_seq(int32_t* my_state_seq) 00763 { 00764 ASSERT(my_state_seq && m_seq_len>0) 00765 m_my_state_seq.resize_array(m_seq_len); 00766 for (int32_t i=0; i<m_seq_len; i++) 00767 m_my_state_seq[i]=my_state_seq[i]; 00768 } 00769 00770 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq) 00771 { 00772 ASSERT(my_pos_seq && m_seq_len>0) 00773 m_my_pos_seq.resize_array(m_seq_len); 00774 for (int32_t i=0; i<m_seq_len; i++) 00775 m_my_pos_seq[i]=my_pos_seq[i]; 00776 } 00777 00778 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights) 00779 { 00780 if (m_num_svms!=dictionary_weights.num_cols) 00781 { 00782 SG_ERROR("m_dict_weights array does not match num_svms=%i!=%i\n", 00783 m_num_svms, dictionary_weights.num_cols) ; 00784 } 00785 00786 m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ; 00787 00788 // initialize, so it does not bother when not used 00789 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ; 00790 m_segment_loss.set_const(0) ; 00791 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ; 00792 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ; 00793 m_segment_ids.set_const(0) ; 00794 m_segment_mask.set_const(0) ; 00795 } 00796 00797 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss) 00798 { 00799 int32_t m=segment_loss.num_rows; 00800 int32_t n=segment_loss.num_cols; 00801 // here we need two matrices. Store it in one: 2N x N 00802 if (2*m!=n) 00803 SG_ERROR("segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) 00804 00805 if (m!=m_max_a_id+1) 00806 SG_ERROR("segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) 00807 00808 m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ; 00809 /*for (int32_t i=0; i<n; i++) 00810 for (int32_t j=0; j<n; j++) 00811 SG_DEBUG("loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) */ 00812 } 00813 00814 void CDynProg::best_path_set_segment_ids_mask( 00815 int32_t* segment_ids, float64_t* segment_mask, int32_t m) 00816 { 00817 00818 if (m!=m_observation_matrix.get_dim2()) 00819 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2()) 00820 int32_t max_id = 0; 00821 for (int32_t i=1;i<m;i++) 00822 max_id = CMath::max(max_id,segment_ids[i]); 00823 //SG_PRINT("max_id: %i, m:%i\n",max_id, m) 00824 m_segment_ids.set_array(segment_ids, m, true, true) ; 00825 m_segment_ids.set_array_name("m_segment_ids"); 00826 m_segment_mask.set_array(segment_mask, m, true, true) ; 00827 m_segment_mask.set_array_name("m_segment_mask"); 00828 00829 m_seg_loss_obj->set_segment_mask(&m_segment_mask); 00830 m_seg_loss_obj->set_segment_ids(&m_segment_ids); 00831 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len); 00832 } 00833 00834 SGVector<float64_t> CDynProg::get_scores() 00835 { 00836 SGVector<float64_t> scores(m_scores.get_dim1()); 00837 memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1())); 00838 00839 return scores; 00840 } 00841 00842 SGMatrix<int32_t> CDynProg::get_states() 00843 { 00844 SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2()); 00845 00846 int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() ); 00847 memcpy(states.matrix ,m_states.get_array(),sz); 00848 00849 return states; 00850 } 00851 00852 SGMatrix<int32_t> CDynProg::get_positions() 00853 { 00854 SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2()); 00855 00856 int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2()); 00857 memcpy(positions.matrix, m_positions.get_array(),sz); 00858 00859 return positions; 00860 } 00861 00862 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len) 00863 { 00864 ASSERT(scores && seq_len) 00865 00866 *seq_len=m_my_scores.get_dim1(); 00867 00868 int32_t sz = sizeof(float64_t)*(*seq_len); 00869 00870 *scores = SG_MALLOC(float64_t, *seq_len); 00871 ASSERT(*scores) 00872 00873 memcpy(*scores,m_my_scores.get_array(),sz); 00874 } 00875 00876 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len) 00877 { 00878 ASSERT(losses && seq_len) 00879 00880 *seq_len=m_my_losses.get_dim1(); 00881 00882 int32_t sz = sizeof(float64_t)*(*seq_len); 00883 00884 *losses = SG_MALLOC(float64_t, *seq_len); 00885 ASSERT(*losses) 00886 00887 memcpy(*losses,m_my_losses.get_array(),sz); 00888 } 00889 00891 00892 bool CDynProg::extend_orf( 00893 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, 00894 int32_t to) 00895 { 00896 #ifdef DYNPROG_TIMING_DETAIL 00897 MyTime.start() ; 00898 #endif 00899 00900 if (start<0) 00901 start=0 ; 00902 if (to<0) 00903 to=0 ; 00904 00905 int32_t orf_target = orf_to-orf_from ; 00906 if (orf_target<0) orf_target+=3 ; 00907 00908 int32_t pos ; 00909 if (last_pos==to) 00910 pos = to-orf_to-3 ; 00911 else 00912 pos=last_pos ; 00913 00914 if (pos<0) 00915 { 00916 #ifdef DYNPROG_TIMING_DETAIL 00917 MyTime.stop() ; 00918 orf_time += MyTime.time_diff_sec() ; 00919 #endif 00920 return true ; 00921 } 00922 00923 for (; pos>=start; pos-=3) 00924 if (m_genestr_stop[pos]) 00925 { 00926 #ifdef DYNPROG_TIMING_DETAIL 00927 MyTime.stop() ; 00928 orf_time += MyTime.time_diff_sec() ; 00929 #endif 00930 return false ; 00931 } 00932 00933 00934 last_pos = CMath::min(pos+3,to-orf_to-3) ; 00935 00936 #ifdef DYNPROG_TIMING_DETAIL 00937 MyTime.stop() ; 00938 orf_time += MyTime.time_diff_sec() ; 00939 #endif 00940 return true ; 00941 } 00942 00943 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf, 00944 int16_t nbest, bool with_loss, bool with_multiple_sequences) 00945 { 00946 00947 //FIXME we need checks here if all the fields are of right size 00948 //SG_PRINT("m_seq_len: %i\n", m_seq_len) 00949 //SG_PRINT("m_pos[0]: %i\n", m_pos[0]) 00950 //SG_PRINT("\n") 00951 00952 //FIXME these variables can go away when compute_nbest_paths uses them 00953 //instead of the local pointers below 00954 const float64_t* seq_array = m_observation_matrix.get_array(); 00955 m_scores.resize_array(nbest) ; 00956 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00957 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00958 00959 for (int32_t i=0; i<nbest; i++) 00960 { 00961 m_scores[i]=-1; 00962 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++) 00963 { 00964 m_states.element(i,j)=-1; 00965 m_positions.element(i,j)=-1; 00966 } 00967 } 00968 float64_t* prob_nbest=m_scores.get_array(); 00969 int32_t* my_state_seq=m_states.get_array(); 00970 int32_t* my_pos_seq=m_positions.get_array(); 00971 00972 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 00973 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 00974 //END FIXME 00975 00976 00977 #ifdef DYNPROG_TIMING 00978 segment_init_time = 0.0 ; 00979 segment_pos_time = 0.0 ; 00980 segment_extend_time = 0.0 ; 00981 segment_clean_time = 0.0 ; 00982 orf_time = 0.0 ; 00983 svm_init_time = 0.0 ; 00984 svm_pos_time = 0.0 ; 00985 svm_clean_time = 0.0 ; 00986 inner_loop_time = 0.0 ; 00987 content_svm_values_time = 0.0 ; 00988 content_plifs_time = 0.0 ; 00989 inner_loop_max_time = 0.0 ; 00990 long_transition_time = 0.0 ; 00991 00992 MyTime2.start() ; 00993 #endif 00994 00995 if (!m_svm_arrays_clean) 00996 { 00997 SG_ERROR("SVM arrays not clean") 00998 return ; 00999 } 01000 01001 #ifdef DYNPROG_DEBUG 01002 m_transition_matrix_a.set_array_name("transition_matrix"); 01003 m_transition_matrix_a.display_array(); 01004 m_mod_words.display_array() ; 01005 m_sign_words.display_array() ; 01006 m_string_words.display_array() ; 01007 //SG_PRINT("use_orf = %i\n", use_orf) 01008 #endif 01009 01010 int32_t max_look_back = 1000 ; 01011 bool use_svm = false ; 01012 01013 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) 01014 01015 //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++) 01016 // SG_PRINT("(%i)%0.2f ",i,seq_array[i]) 01017 01018 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase* 01019 PEN.set_array_name("PEN"); 01020 01021 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase* 01022 PEN_state_signals.set_array_name("state_signals"); 01023 01024 CDynamicArray<float64_t> seq(m_N, m_seq_len) ; // 2d 01025 seq.set_array_name("seq") ; 01026 seq.set_const(0) ; 01027 01028 #ifdef DYNPROG_DEBUG 01029 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data) 01030 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs) 01031 SG_PRINT("m_num_svms: %i\n", m_num_svms) 01032 SG_PRINT("m_num_lin_feat_plifs_cum: ") 01033 for (int i=0; i<=m_num_raw_data; i++) 01034 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]) 01035 SG_PRINT("\n") 01036 #endif 01037 01038 float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01039 { // initialize svm_svalue 01040 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01041 svm_value[s]=0 ; 01042 } 01043 01044 { // convert seq_input to seq 01045 // this is independent of the svm values 01046 01047 CDynamicArray<float64_t> *seq_input=NULL ; // 3d 01048 if (seq_array!=NULL) 01049 { 01050 //SG_PRINT("using dense seq_array\n") 01051 01052 seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ; 01053 seq_input->set_array_name("seq_input") ; 01054 //seq_input.display_array() ; 01055 01056 ASSERT(m_seq_sparse1==NULL) 01057 ASSERT(m_seq_sparse2==NULL) 01058 } else 01059 { 01060 SG_PRINT("using sparse seq_array\n") 01061 01062 ASSERT(m_seq_sparse1!=NULL) 01063 ASSERT(m_seq_sparse2!=NULL) 01064 ASSERT(max_num_signals==2) 01065 } 01066 01067 for (int32_t i=0; i<m_N; i++) 01068 for (int32_t j=0; j<m_seq_len; j++) 01069 seq.element(i,j) = 0 ; 01070 01071 for (int32_t i=0; i<m_N; i++) 01072 for (int32_t j=0; j<m_seq_len; j++) 01073 for (int32_t k=0; k<max_num_signals; k++) 01074 { 01075 if ((PEN_state_signals.element(i,k)==NULL) && (k==0)) 01076 { 01077 // no plif 01078 if (seq_input!=NULL) 01079 seq.element(i,j) = seq_input->element(i,j,k) ; 01080 else 01081 { 01082 if (k==0) 01083 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ; 01084 if (k==1) 01085 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ; 01086 } 01087 break ; 01088 } 01089 if (PEN_state_signals.element(i,k)!=NULL) 01090 { 01091 if (seq_input!=NULL) 01092 { 01093 // just one plif 01094 if (CMath::is_finite(seq_input->element(i,j,k))) 01095 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ; 01096 else 01097 // keep infinity values 01098 seq.element(i,j) = seq_input->element(i, j, k) ; 01099 } 01100 else 01101 { 01102 if (k==0) 01103 { 01104 // just one plif 01105 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j))) 01106 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ; 01107 else 01108 // keep infinity values 01109 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ; 01110 } 01111 if (k==1) 01112 { 01113 // just one plif 01114 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j))) 01115 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ; 01116 else 01117 // keep infinity values 01118 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ; 01119 } 01120 } 01121 } 01122 else 01123 break ; 01124 } 01125 delete seq_input; 01126 SG_FREE(svm_value); 01127 } 01128 01129 // allow longer transitions than look_back 01130 bool long_transitions = m_long_transitions ; 01131 CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ; // 2d 01132 long_transition_content_start_position.set_array_name("long_transition_content_start_position"); 01133 #ifdef DYNPROG_DEBUG 01134 CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ; // 2d 01135 long_transition_content_end_position.set_array_name("long_transition_content_end_position"); 01136 #endif 01137 CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ; // 2d 01138 long_transition_content_start.set_array_name("long_transition_content_start"); 01139 01140 CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ; // 2d 01141 long_transition_content_scores.set_array_name("long_transition_content_scores"); 01142 #ifdef DYNPROG_DEBUG 01143 01144 CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ; // 2d 01145 long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen"); 01146 01147 CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ; // 2d 01148 long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev"); 01149 01150 CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ; // 2d 01151 long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem"); 01152 #endif 01153 CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ; // 2d 01154 long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss"); 01155 01156 if (nbest!=1) 01157 { 01158 SG_ERROR("Long transitions are not supported for nbest!=1") 01159 long_transitions = false ; 01160 } 01161 long_transition_content_scores.set_const(-CMath::INFTY); 01162 #ifdef DYNPROG_DEBUG 01163 long_transition_content_scores_pen.set_const(0) ; 01164 long_transition_content_scores_elem.set_const(0) ; 01165 long_transition_content_scores_prev.set_const(0) ; 01166 #endif 01167 if (with_loss) 01168 long_transition_content_scores_loss.set_const(0) ; 01169 long_transition_content_start.set_const(0) ; 01170 long_transition_content_start_position.set_const(0) ; 01171 #ifdef DYNPROG_DEBUG 01172 long_transition_content_end_position.set_const(0) ; 01173 #endif 01174 01175 svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01176 { // initialize svm_svalue 01177 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01178 svm_value[s]=0 ; 01179 } 01180 01181 CDynamicArray<int32_t> look_back(m_N,m_N) ; // 2d 01182 look_back.set_array_name("look_back"); 01183 //CDynamicArray<int32_t> look_back_orig(m_N,m_N) ; 01184 //look_back.set_array_name("look_back_orig"); 01185 01186 01187 { // determine maximal length of look-back 01188 for (int32_t i=0; i<m_N; i++) 01189 for (int32_t j=0; j<m_N; j++) 01190 { 01191 look_back.set_element(INT_MAX, i, j) ; 01192 //look_back_orig.set_element(INT_MAX, i, j) ; 01193 } 01194 01195 for (int32_t j=0; j<m_N; j++) 01196 { 01197 // only consider transitions that are actually allowed 01198 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01199 const T_STATES *elem_list = trans_list_forward[j] ; 01200 01201 for (int32_t i=0; i<num_elem; i++) 01202 { 01203 T_STATES ii = elem_list[i] ; 01204 01205 CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ; 01206 if (penij==NULL) 01207 { 01208 if (long_transitions) 01209 { 01210 look_back.set_element(m_long_transition_threshold, j, ii) ; 01211 //look_back_orig.set_element(m_long_transition_max, j, ii) ; 01212 } 01213 continue ; 01214 } 01215 01216 /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */ 01217 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions)) 01218 { 01219 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01220 //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01221 if (CMath::ceil(penij->get_max_value()) > max_look_back) 01222 { 01223 SG_DEBUG("%d %d -> value: %f\n", ii,j,penij->get_max_value()) 01224 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value())); 01225 } 01226 } 01227 else 01228 { 01229 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ; 01230 //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ; 01231 } 01232 01233 if (penij->uses_svm_values()) 01234 use_svm=true ; 01235 } 01236 } 01237 /* make sure max_look_back is at least as long as a long transition */ 01238 if (long_transitions) 01239 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ; 01240 01241 /* make sure max_look_back is not longer than the whole string */ 01242 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ; 01243 01244 int32_t num_long_transitions = 0 ; 01245 for (int32_t i=0; i<m_N; i++) 01246 for (int32_t j=0; j<m_N; j++) 01247 { 01248 if (look_back.get_element(i,j)==m_long_transition_threshold) 01249 num_long_transitions++ ; 01250 if (look_back.get_element(i,j)==INT_MAX) 01251 { 01252 if (long_transitions) 01253 { 01254 look_back.set_element(m_long_transition_threshold, i, j) ; 01255 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01256 } 01257 else 01258 { 01259 look_back.set_element(max_look_back, i, j) ; 01260 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01261 } 01262 } 01263 } 01264 SG_DEBUG("Using %i long transitions\n", num_long_transitions) 01265 } 01266 //SG_PRINT("max_look_back: %i \n", max_look_back) 01267 01268 //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) 01269 SG_DEBUG("use_svm=%i\n", use_svm) 01270 01271 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest) 01272 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ; 01273 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) 01274 /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+ 01275 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+ 01276 m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+ 01277 m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/ 01278 01279 //bool is_big = (mem_use>200) || (m_seq_len>5000) ; 01280 01281 /*if (is_big) 01282 { 01283 SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 01284 m_seq_len, m_N, max_look_back, nbest) ; 01285 SG_DEBUG("allocating %1.2fMB of memory\n", 01286 mem_use) ; 01287 }*/ 01288 ASSERT(nbest<32000) 01289 01290 CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ; // 3d 01291 delta.set_array_name("delta"); 01292 float64_t* delta_array = delta.get_array() ; 01293 //delta.set_const(0) ; 01294 01295 CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ; // 3d 01296 psi.set_array_name("psi"); 01297 //psi.set_const(0) ; 01298 01299 CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ; // 3d 01300 ktable.set_array_name("ktable"); 01301 //ktable.set_const(0) ; 01302 01303 CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ; // 3d 01304 ptable.set_array_name("ptable"); 01305 //ptable.set_const(0) ; 01306 01307 CDynamicArray<float64_t> delta_end(nbest) ; 01308 delta_end.set_array_name("delta_end"); 01309 //delta_end.set_const(0) ; 01310 01311 CDynamicArray<T_STATES> path_ends(nbest) ; 01312 path_ends.set_array_name("path_ends"); 01313 //path_ends.set_const(0) ; 01314 01315 CDynamicArray<int16_t> ktable_end(nbest) ; 01316 ktable_end.set_array_name("ktable_end"); 01317 //ktable_end.set_const(0) ; 01318 01319 float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen); 01320 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ; 01321 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen); 01322 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ; 01323 01324 CDynamicArray<float64_t> oldtempvv(look_back_buflen) ; 01325 oldtempvv.set_array_name("oldtempvv"); 01326 01327 CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ; 01328 oldtempvv2.set_array_name("oldtempvv2"); 01329 //oldtempvv.set_const(0) ; 01330 //oldtempvv.display_size() ; 01331 01332 CDynamicArray<int32_t> oldtempii(look_back_buflen) ; 01333 oldtempii.set_array_name("oldtempii"); 01334 01335 CDynamicArray<int32_t> oldtempii2(look_back_buflen) ; 01336 oldtempii2.set_array_name("oldtempii2"); 01337 //oldtempii.set_const(0) ; 01338 01339 CDynamicArray<T_STATES> state_seq(m_seq_len) ; 01340 state_seq.set_array_name("state_seq"); 01341 //state_seq.set_const(0) ; 01342 01343 CDynamicArray<int32_t> pos_seq(m_seq_len) ; 01344 pos_seq.set_array_name("pos_seq"); 01345 //pos_seq.set_const(0) ; 01346 01347 01348 m_dict_weights.set_array_name("dict_weights") ; 01349 m_word_degree.set_array_name("word_degree") ; 01350 m_cum_num_words.set_array_name("cum_num_words") ; 01351 m_num_words.set_array_name("num_words") ; 01352 //word_used.set_array_name("word_used") ; 01353 //svm_values_unnormalized.set_array_name("svm_values_unnormalized") ; 01354 //m_svm_pos_start.set_array_name("svm_pos_start") ; 01355 m_num_unique_words.set_array_name("num_unique_words") ; 01356 01357 PEN.set_array_name("PEN") ; 01358 seq.set_array_name("seq") ; 01359 01360 delta.set_array_name("delta") ; 01361 psi.set_array_name("psi") ; 01362 ktable.set_array_name("ktable") ; 01363 ptable.set_array_name("ptable") ; 01364 delta_end.set_array_name("delta_end") ; 01365 path_ends.set_array_name("path_ends") ; 01366 ktable_end.set_array_name("ktable_end") ; 01367 01368 #ifdef USE_TMP_ARRAYCLASS 01369 fixedtempvv.set_array_name("fixedtempvv") ; 01370 fixedtempii.set_array_name("fixedtempvv") ; 01371 #endif 01372 01373 oldtempvv.set_array_name("oldtempvv") ; 01374 oldtempvv2.set_array_name("oldtempvv2") ; 01375 oldtempii.set_array_name("oldtempii") ; 01376 oldtempii2.set_array_name("oldtempii2") ; 01377 01378 01380 01381 #ifdef DYNPROG_DEBUG 01382 state_seq.display_size() ; 01383 pos_seq.display_size() ; 01384 01385 m_dict_weights.display_size() ; 01386 m_word_degree.display_array() ; 01387 m_cum_num_words.display_array() ; 01388 m_num_words.display_array() ; 01389 //word_used.display_size() ; 01390 //svm_values_unnormalized.display_size() ; 01391 //m_svm_pos_start.display_array() ; 01392 m_num_unique_words.display_array() ; 01393 01394 PEN.display_size() ; 01395 PEN_state_signals.display_size() ; 01396 seq.display_size() ; 01397 m_orf_info.display_size() ; 01398 01399 //m_genestr_stop.display_size() ; 01400 delta.display_size() ; 01401 psi.display_size() ; 01402 ktable.display_size() ; 01403 ptable.display_size() ; 01404 delta_end.display_size() ; 01405 path_ends.display_size() ; 01406 ktable_end.display_size() ; 01407 01408 #ifdef USE_TMP_ARRAYCLASS 01409 fixedtempvv.display_size() ; 01410 fixedtempii.display_size() ; 01411 #endif 01412 01413 //oldtempvv.display_size() ; 01414 //oldtempii.display_size() ; 01415 01416 state_seq.display_size() ; 01417 pos_seq.display_size() ; 01418 01419 //seq.set_const(0) ; 01420 01421 #endif //DYNPROG_DEBUG 01422 01424 01425 01426 01427 { 01428 for (int32_t s=0; s<m_num_svms; s++) 01429 ASSERT(m_string_words_array[s]<1) 01430 } 01431 01432 01433 //CDynamicArray<int32_t*> trans_matrix_svms(m_N,m_N); // 2d 01434 //CDynamicArray<int32_t> trans_matrix_num_svms(m_N,m_N); // 2d 01435 01436 { // initialization 01437 01438 for (T_STATES i=0; i<m_N; i++) 01439 { 01440 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01441 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01442 psi.element(0,i,0) = 0 ; 01443 if (nbest>1) 01444 ktable.element(0,i,0) = 0 ; 01445 ptable.element(0,i,0) = 0 ; 01446 for (int16_t k=1; k<nbest; k++) 01447 { 01448 int32_t dim1, dim2, dim3 ; 01449 delta.get_array_size(dim1, dim2, dim3) ; 01450 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) 01451 //delta.element(0, i, k) = -CMath::INFTY ; 01452 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ; 01453 psi.element(0,i,0) = 0 ; // <--- what's this for? 01454 if (nbest>1) 01455 ktable.element(0,i,k) = 0 ; 01456 ptable.element(0,i,k) = 0 ; 01457 } 01458 /* 01459 for (T_STATES j=0; j<m_N; j++) 01460 { 01461 CPlifBase * penalty = PEN.element(i,j) ; 01462 int32_t num_current_svms=0; 01463 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 01464 if (penalty) 01465 { 01466 SG_PRINT("trans %i -> %i \n",i,j) 01467 penalty->get_used_svms(&num_current_svms, svm_ids); 01468 trans_matrix_svms.set_element(svm_ids,i,j); 01469 for (int32_t l=0;l<num_current_svms;l++) 01470 SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]) 01471 trans_matrix_num_svms.set_element(num_current_svms,i,j); 01472 } 01473 } 01474 */ 01475 01476 } 01477 } 01478 01479 SG_DEBUG("START_RECURSION \n\n") 01480 01481 // recursion 01482 for (int32_t t=1; t<m_seq_len; t++) 01483 { 01484 //if (is_big && t%(1+(m_seq_len/1000))==1) 01485 // SG_PROGRESS(t, 0, m_seq_len) 01486 //SG_PRINT("%i\n", t) 01487 01488 for (T_STATES j=0; j<m_N; j++) 01489 { 01490 if (seq.element(j,t)<=-1e20) 01491 { // if we cannot observe the symbol here, then we can omit the rest 01492 for (int16_t k=0; k<nbest; k++) 01493 { 01494 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ; 01495 psi.element(t,j,k) = 0 ; 01496 if (nbest>1) 01497 ktable.element(t,j,k) = 0 ; 01498 ptable.element(t,j,k) = 0 ; 01499 } 01500 } 01501 else 01502 { 01503 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01504 const T_STATES *elem_list = trans_list_forward[j] ; 01505 const float64_t *elem_val = trans_list_forward_val[j] ; 01506 const int32_t *elem_id = trans_list_forward_id[j] ; 01507 01508 int32_t fixed_list_len = 0 ; 01509 float64_t fixedtempvv_ = CMath::INFTY ; 01510 int32_t fixedtempii_ = 0 ; 01511 bool fixedtemplong = false ; 01512 01513 for (int32_t i=0; i<num_elem; i++) 01514 { 01515 T_STATES ii = elem_list[i] ; 01516 01517 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ; 01518 01519 /*int32_t look_back = max_look_back ; 01520 if (0) 01521 { // find lookback length 01522 CPlifBase *pen = (CPlifBase*) penalty ; 01523 if (pen!=NULL) 01524 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01525 if (look_back>=1e6) 01526 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) 01527 ASSERT(look_back<1e6) 01528 } */ 01529 01530 int32_t look_back_ = look_back.element(j, ii) ; 01531 01532 int32_t orf_from = m_orf_info.element(ii,0) ; 01533 int32_t orf_to = m_orf_info.element(j,1) ; 01534 if((orf_from!=-1)!=(orf_to!=-1)) 01535 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) 01536 ASSERT((orf_from!=-1)==(orf_to!=-1)) 01537 01538 int32_t orf_target = -1 ; 01539 if (orf_from!=-1) 01540 { 01541 orf_target=orf_to-orf_from ; 01542 if (orf_target<0) 01543 orf_target+=3 ; 01544 ASSERT(orf_target>=0 && orf_target<3) 01545 } 01546 01547 int32_t orf_last_pos = m_pos[t] ; 01548 #ifdef DYNPROG_TIMING 01549 MyTime3.start() ; 01550 #endif 01551 int32_t num_ok_pos = 0 ; 01552 01553 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--) 01554 { 01555 bool ok ; 01556 //int32_t plen=t-ts; 01557 01558 /*for (int32_t s=0; s<m_num_svms; s++) 01559 if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) || 01560 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6)) 01561 { 01562 SG_DEBUG("s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]) 01563 }*/ 01564 01565 if (orf_target==-1) 01566 ok=true ; 01567 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target) 01568 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ; 01569 else 01570 ok=false ; 01571 01572 if (ok) 01573 { 01574 01575 float64_t segment_loss = 0.0 ; 01576 if (with_loss) 01577 { 01578 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]); 01579 //if (segment_loss!=segment_loss2) 01580 //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2) 01581 } 01583 // BEST_PATH_TRANS 01585 01586 int32_t frame = orf_from;//m_orf_info.element(ii,0); 01587 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01588 01589 float64_t pen_val = 0.0 ; 01590 if (penalty) 01591 { 01592 #ifdef DYNPROG_TIMING_DETAIL 01593 MyTime.start() ; 01594 #endif 01595 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01596 01597 #ifdef DYNPROG_TIMING_DETAIL 01598 MyTime.stop() ; 01599 content_plifs_time += MyTime.time_diff_sec() ; 01600 #endif 01601 } 01602 01603 #ifdef DYNPROG_TIMING_DETAIL 01604 MyTime.start() ; 01605 #endif 01606 num_ok_pos++ ; 01607 01608 if (nbest==1) 01609 { 01610 float64_t val = elem_val[i] + pen_val ; 01611 if (with_loss) 01612 val += segment_loss ; 01613 01614 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ; 01615 01616 if (mval<fixedtempvv_) 01617 { 01618 fixedtempvv_ = mval ; 01619 fixedtempii_ = ii + ts*m_N; 01620 fixed_list_len = 1 ; 01621 fixedtemplong = false ; 01622 } 01623 } 01624 else 01625 { 01626 for (int16_t diff=0; diff<nbest; diff++) 01627 { 01628 float64_t val = elem_val[i] ; 01629 val += pen_val ; 01630 if (with_loss) 01631 val += segment_loss ; 01632 01633 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ; 01634 01635 /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */ 01636 /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/ 01637 /* fixed_list_len has the number of elements in fixedtempvv */ 01638 01639 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1]))) 01640 { 01641 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) ) 01642 { 01643 fixedtempvv[fixed_list_len] = mval ; 01644 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest; 01645 fixed_list_len++ ; 01646 } 01647 else // must have mval < fixedtempvv[fixed_list_len-1] 01648 { 01649 int32_t addhere = fixed_list_len; 01650 while ((addhere > 0) && (mval < fixedtempvv[addhere-1])) 01651 addhere--; 01652 01653 // move everything from addhere+1 one forward 01654 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--) 01655 { 01656 fixedtempvv[jj] = fixedtempvv[jj-1]; 01657 fixedtempii[jj] = fixedtempii[jj-1]; 01658 } 01659 01660 fixedtempvv[addhere] = mval; 01661 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest; 01662 01663 if (fixed_list_len < nbest) 01664 fixed_list_len++; 01665 } 01666 } 01667 } 01668 } 01669 #ifdef DYNPROG_TIMING_DETAIL 01670 MyTime.stop() ; 01671 inner_loop_max_time += MyTime.time_diff_sec() ; 01672 #endif 01673 } 01674 } 01675 #ifdef DYNPROG_TIMING 01676 MyTime3.stop() ; 01677 inner_loop_time += MyTime3.time_diff_sec() ; 01678 #endif 01679 } 01680 for (int32_t i=0; i<num_elem; i++) 01681 { 01682 T_STATES ii = elem_list[i] ; 01683 01684 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ; 01685 01686 /*int32_t look_back = max_look_back ; 01687 if (0) 01688 { // find lookback length 01689 CPlifBase *pen = (CPlifBase*) penalty ; 01690 if (pen!=NULL) 01691 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01692 if (look_back>=1e6) 01693 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) 01694 ASSERT(look_back<1e6) 01695 } */ 01696 01697 int32_t look_back_ = look_back.element(j, ii) ; 01698 //int32_t look_back_orig_ = look_back_orig.element(j, ii) ; 01699 01700 int32_t orf_from = m_orf_info.element(ii,0) ; 01701 int32_t orf_to = m_orf_info.element(j,1) ; 01702 if((orf_from!=-1)!=(orf_to!=-1)) 01703 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) 01704 ASSERT((orf_from!=-1)==(orf_to!=-1)) 01705 01706 int32_t orf_target = -1 ; 01707 if (orf_from!=-1) 01708 { 01709 orf_target=orf_to-orf_from ; 01710 if (orf_target<0) 01711 orf_target+=3 ; 01712 ASSERT(orf_target>=0 && orf_target<3) 01713 } 01714 01715 //int32_t loss_last_pos = t ; 01716 //float64_t last_loss = 0.0 ; 01717 01718 #ifdef DYNPROG_TIMING 01719 MyTime3.start() ; 01720 #endif 01721 01722 /* long transition stuff */ 01723 /* only do this, if 01724 * this feature is enabled 01725 * this is not a transition with ORF restrictions 01726 * the loss is switched off 01727 * nbest=1 01728 */ 01729 #ifdef DYNPROG_TIMING 01730 MyTime3.start() ; 01731 #endif 01732 // long transitions, only when not considering ORFs 01733 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold ) 01734 { 01735 01736 // update table for 5' part of the long segment 01737 01738 int32_t start = long_transition_content_start.get_element(ii, j) ; 01739 int32_t end_5p_part = start ; 01740 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++) 01741 { 01742 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away 01743 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold) 01744 end_5p_part++ ; 01745 01746 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) 01747 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) 01748 01749 float64_t pen_val = 0.0; 01750 /* recompute penalty, if necessary */ 01751 if (penalty) 01752 { 01753 int32_t frame = m_orf_info.element(ii,0); 01754 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part 01755 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ; 01756 } 01757 01758 /*if (m_pos[start_5p_part]==1003) 01759 { 01760 SG_PRINT("Part1: %i - %i vs %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) 01761 SG_PRINT("Part1: ts=%i t=%i start_5p_part=%i m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) 01762 }*/ 01763 01764 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ; 01765 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check 01766 01767 float64_t segment_loss_part1=0.0 ; 01768 if (with_loss) 01769 { // this is the loss from the start of the long segment (5' part + middle section) 01770 01771 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure 01772 01773 mval_trans -= segment_loss_part1 ; 01774 } 01775 01776 01777 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/) 01778 { 01779 // this restricts the maximal length of segments, 01780 // but the current implementation is not valid since the 01781 // long transition is discarded without loocking if there 01782 // is a second best long transition in between 01783 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ; 01784 long_transition_content_start_position.set_element(0, ii, j) ; 01785 if (with_loss) 01786 long_transition_content_scores_loss.set_element(0.0, ii, j) ; 01787 #ifdef DYNPROG_DEBUG 01788 long_transition_content_scores_pen.set_element(0.0, ii, j) ; 01789 long_transition_content_scores_elem.set_element(0.0, ii, j) ; 01790 long_transition_content_scores_prev.set_element(0.0, ii, j) ; 01791 long_transition_content_end_position.set_element(0, ii, j) ; 01792 #endif 01793 } 01794 if (with_loss) 01795 { 01796 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ; 01797 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]); 01798 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ; 01799 long_transition_content_scores.set_element(score, ii, j) ; 01800 long_transition_content_scores_loss.set_element(new_loss, ii, j) ; 01801 #ifdef DYNPROG_DEBUG 01802 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01803 #endif 01804 01805 } 01806 if (-long_transition_content_scores.get_element(ii, j) > mval_trans ) 01807 { 01808 /* then the old long transition is either too far away or worse than the current one */ 01809 long_transition_content_scores.set_element(-mval_trans, ii, j) ; 01810 long_transition_content_start_position.set_element(start_5p_part, ii, j) ; 01811 if (with_loss) 01812 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ; 01813 #ifdef DYNPROG_DEBUG 01814 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ; 01815 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ; 01816 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ; 01817 /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) + 01818 long_transition_content_scores_elem.get_element(ii, j) + 01819 long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/ 01820 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01821 #endif 01822 } 01823 // 01824 // this sets the position where the search for better 5'parts is started the next time 01825 // whithout this the prediction takes ages 01826 // 01827 long_transition_content_start.set_element(start_5p_part, ii, j) ; 01828 } 01829 01830 // consider the 3' part at the end of the long segment: 01831 // * with length = m_long_transition_threshold 01832 // * content prediction and loss only for this part 01833 01834 // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 01835 // precompute: only depends on t 01836 int ts = t; 01837 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold) 01838 ts-- ; 01839 01840 if (ts>0) 01841 { 01842 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) 01843 01844 01845 /* only consider this transition, if the right position was found */ 01846 float pen_val_3p = 0.0 ; 01847 if (penalty) 01848 { 01849 int32_t frame = orf_from ; //m_orf_info.element(ii, 0); 01850 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01851 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01852 } 01853 01854 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ; 01855 01856 { 01857 #ifdef DYNPROG_DEBUG 01858 float64_t segment_loss_part2=0.0 ; 01859 float64_t segment_loss_part1=0.0 ; 01860 #endif 01861 float64_t segment_loss_total=0.0 ; 01862 01863 if (with_loss) 01864 { // this is the loss for the 3' end fragment of the segment 01865 // (the 5' end and the middle section loss is already contained in mval) 01866 01867 #ifdef DYNPROG_DEBUG 01868 // this is an alternative, which should be identical, if the loss is additive 01869 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]); 01870 //mval -= segment_loss_part2 ; 01871 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]); 01872 #endif 01873 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]); 01874 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ; 01875 } 01876 01877 #ifdef DYNPROG_DEBUG 01878 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561) 01879 { 01880 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n", 01881 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 01882 long_transition_content_scores.get_element(ii, j), 01883 long_transition_content_scores_pen.get_element(ii, j), 01884 long_transition_content_scores_prev.get_element(ii, j), 01885 long_transition_content_scores_elem.get_element(ii, j), 01886 long_transition_content_scores_loss.get_element(ii, j), 01887 m_pos[long_transition_content_start_position.get_element(ii,j)], 01888 m_pos[long_transition_content_end_position.get_element(ii,j)], 01889 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ; 01890 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] ) 01891 } 01892 01893 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3) 01894 { 01895 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n", 01896 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 01897 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)], 01898 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 01899 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 01900 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ; 01901 } 01902 #endif 01903 } 01904 01905 // prefer simpler version to guarantee optimality 01906 // 01907 // original: 01908 /* if ((mval < fixedtempvv_) && 01909 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */ 01910 if (mval < fixedtempvv_) 01911 { 01912 /* then the long transition is better than the short one => replace it */ 01913 int32_t fromtjk = fixedtempii_ ; 01914 /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 01915 m_pos[t], j, 01916 mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 01917 m_pos[long_transition_content_position.get_element(ii, j)], 01918 fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/ 01919 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) 01920 01921 fixedtempvv_ = mval ; 01922 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ; 01923 fixed_list_len = 1 ; 01924 fixedtemplong = true ; 01925 } 01926 } 01927 } 01928 } 01929 #ifdef DYNPROG_TIMING 01930 MyTime3.stop() ; 01931 long_transition_time += MyTime3.time_diff_sec() ; 01932 #endif 01933 01934 01935 int32_t numEnt = fixed_list_len; 01936 01937 float64_t minusscore; 01938 int64_t fromtjk; 01939 01940 for (int16_t k=0; k<nbest; k++) 01941 { 01942 if (k<numEnt) 01943 { 01944 if (nbest==1) 01945 { 01946 minusscore = fixedtempvv_ ; 01947 fromtjk = fixedtempii_ ; 01948 } 01949 else 01950 { 01951 minusscore = fixedtempvv[k]; 01952 fromtjk = fixedtempii[k]; 01953 } 01954 01955 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t); 01956 psi.element(t,j,k) = (fromtjk%m_N) ; 01957 if (nbest>1) 01958 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ; 01959 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ; 01960 } 01961 else 01962 { 01963 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ; 01964 psi.element(t,j,k) = 0 ; 01965 if (nbest>1) 01966 ktable.element(t,j,k) = 0 ; 01967 ptable.element(t,j,k) = 0 ; 01968 } 01969 } 01970 } 01971 } 01972 } 01973 { //termination 01974 int32_t list_len = 0 ; 01975 for (int16_t diff=0; diff<nbest; diff++) 01976 { 01977 for (T_STATES i=0; i<m_N; i++) 01978 { 01979 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ; 01980 oldtempii[list_len] = i + diff*m_N ; 01981 list_len++ ; 01982 } 01983 } 01984 01985 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ; 01986 01987 for (int16_t k=0; k<nbest; k++) 01988 { 01989 delta_end.element(k) = -oldtempvv[k] ; 01990 path_ends.element(k) = (oldtempii[k]%m_N) ; 01991 if (nbest>1) 01992 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ; 01993 } 01994 01995 01996 } 01997 01998 { //state sequence backtracking 01999 for (int16_t k=0; k<nbest; k++) 02000 { 02001 prob_nbest[k]= delta_end.element(k) ; 02002 02003 int32_t i = 0 ; 02004 state_seq[i] = path_ends.element(k) ; 02005 int16_t q = 0 ; 02006 if (nbest>1) 02007 q=ktable_end.element(k) ; 02008 pos_seq[i] = m_seq_len-1 ; 02009 02010 while (pos_seq[i]>0) 02011 { 02012 ASSERT(i+1<m_seq_len) 02013 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) 02014 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q); 02015 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ; 02016 if (nbest>1) 02017 q = ktable.element(pos_seq[i], state_seq[i], q) ; 02018 i++ ; 02019 } 02020 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) 02021 int32_t num_states = i+1 ; 02022 for (i=0; i<num_states;i++) 02023 { 02024 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ; 02025 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ; 02026 } 02027 if (num_states<m_seq_len) 02028 { 02029 my_state_seq[num_states+k*m_seq_len]=-1 ; 02030 my_pos_seq[num_states+k*m_seq_len]=-1 ; 02031 } 02032 } 02033 } 02034 02035 //if (is_big) 02036 // SG_PRINT("DONE. \n") 02037 02038 02039 #ifdef DYNPROG_TIMING 02040 MyTime2.stop() ; 02041 02042 //if (is_big) 02043 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) 02044 #endif 02045 02046 SG_FREE(fixedtempvv); 02047 SG_FREE(fixedtempii); 02048 } 02049 02050 02051 void CDynProg::best_path_trans_deriv( 02052 int32_t *my_state_seq, int32_t *my_pos_seq, 02053 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) 02054 { 02055 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 02056 m_end_state_distribution_q_deriv.resize_array(m_N) ; 02057 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 02058 //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ; 02059 //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ; 02060 m_my_scores.resize_array(my_seq_len); 02061 m_my_losses.resize_array(my_seq_len); 02062 float64_t* my_scores=m_my_scores.get_array(); 02063 float64_t* my_losses=m_my_losses.get_array(); 02064 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 02065 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 02066 02067 if (!m_svm_arrays_clean) 02068 { 02069 SG_ERROR("SVM arrays not clean") 02070 return ; 02071 } ; 02072 //SG_PRINT("genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) 02073 //m_mod_words.display() ; 02074 //m_sign_words.display() ; 02075 //m_string_words.display() ; 02076 02077 bool use_svm = false ; 02078 02079 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase* 02080 PEN.set_array_name("PEN"); 02081 02082 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase* 02083 PEN_state_signals.set_array_name("PEN_state_signals"); 02084 02085 CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 02086 seq_input.set_array_name("seq_input"); 02087 02088 { // determine whether to use svm outputs and clear derivatives 02089 for (int32_t i=0; i<m_N; i++) 02090 for (int32_t j=0; j<m_N; j++) 02091 { 02092 CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ; 02093 if (penij==NULL) 02094 continue ; 02095 02096 if (penij->uses_svm_values()) 02097 use_svm=true ; 02098 penij->penalty_clear_derivative() ; 02099 } 02100 for (int32_t i=0; i<m_N; i++) 02101 for (int32_t j=0; j<max_num_signals; j++) 02102 { 02103 CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ; 02104 if (penij==NULL) 02105 continue ; 02106 if (penij->uses_svm_values()) 02107 use_svm=true ; 02108 penij->penalty_clear_derivative() ; 02109 } 02110 } 02111 02112 { // set derivatives of p, q and a to zero 02113 02114 for (int32_t i=0; i<m_N; i++) 02115 { 02116 m_initial_state_distribution_p_deriv.element(i)=0 ; 02117 m_end_state_distribution_q_deriv.element(i)=0 ; 02118 for (int32_t j=0; j<m_N; j++) 02119 m_transition_matrix_a_deriv.element(i,j)=0 ; 02120 } 02121 } 02122 02123 { // clear score vector 02124 for (int32_t i=0; i<my_seq_len; i++) 02125 { 02126 my_scores[i]=0.0 ; 02127 my_losses[i]=0.0 ; 02128 } 02129 } 02130 02131 //int32_t total_len = 0 ; 02132 02133 //m_transition_matrix_a.display_array() ; 02134 //m_transition_matrix_a_id.display_array() ; 02135 02136 // compute derivatives for given path 02137 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02138 float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02139 float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02140 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02141 { 02142 svm_value[s]=0 ; 02143 svm_value_part1[s]=0 ; 02144 svm_value_part2[s]=0 ; 02145 } 02146 02147 //#ifdef DYNPROG_DEBUG 02148 float64_t total_score = 0.0 ; 02149 float64_t total_loss = 0.0 ; 02150 //#endif 02151 02152 ASSERT(my_state_seq[0]>=0) 02153 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ; 02154 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ; 02155 02156 ASSERT(my_state_seq[my_seq_len-1]>=0) 02157 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ; 02158 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]); 02159 02160 //#ifdef DYNPROG_DEBUG 02161 total_score += my_scores[0] + my_scores[my_seq_len-1] ; 02162 //#endif 02163 02164 SG_DEBUG("m_seq_len=%i\n", my_seq_len) 02165 for (int32_t i=0; i<my_seq_len-1; i++) 02166 { 02167 if (my_state_seq[i+1]==-1) 02168 break ; 02169 int32_t from_state = my_state_seq[i] ; 02170 int32_t to_state = my_state_seq[i+1] ; 02171 int32_t from_pos = my_pos_seq[i] ; 02172 int32_t to_pos = my_pos_seq[i+1] ; 02173 02174 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ; 02175 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id); 02176 02177 #ifdef DYNPROG_DEBUG 02178 02179 02180 if (i>0)// test if segment loss is additive 02181 { 02182 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id); 02183 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id); 02184 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id); 02185 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3) 02186 if (CMath::abs(loss1+loss2-loss3)>0) 02187 { 02188 SG_PRINT("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) 02189 } 02190 } 02191 io->set_loglevel(M_DEBUG) ; 02192 SG_DEBUG("%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) 02193 #endif 02194 // increase usage of this transition 02195 m_transition_matrix_a_deriv.element(from_state, to_state)++ ; 02196 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ; 02197 //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state)) 02198 #ifdef DYNPROG_DEBUG 02199 SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i]) 02200 #endif 02201 02202 /*int32_t last_svm_pos[m_num_degrees] ; 02203 for (int32_t qq=0; qq<m_num_degrees; qq++) 02204 last_svm_pos[qq]=-1 ;*/ 02205 02206 bool is_long_transition = false ; 02207 if (m_long_transitions) 02208 { 02209 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold) 02210 is_long_transition = true ; 02211 if (m_orf_info.element(from_state,0)!=-1) 02212 is_long_transition = false ; 02213 } 02214 02215 int32_t from_pos_thresh = from_pos ; 02216 int32_t to_pos_thresh = to_pos ; 02217 02218 if (use_svm) 02219 { 02220 if (is_long_transition) 02221 { 02222 02223 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // * 02224 from_pos_thresh++ ; 02225 ASSERT(from_pos_thresh<to_pos) 02226 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold) // * 02227 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold)// * 02228 02229 int32_t frame = m_orf_info.element(from_state,0); 02230 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame); 02231 02232 #ifdef DYNPROG_DEBUG 02233 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) 02234 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02235 SG_PRINT("%1.4f ", svm_value_part1[s]) 02236 SG_PRINT("\n") 02237 #endif 02238 02239 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // * 02240 to_pos_thresh-- ; 02241 ASSERT(to_pos_thresh>0) 02242 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) // * 02243 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) // * 02244 02245 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame); 02246 02247 #ifdef DYNPROG_DEBUG 02248 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) 02249 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02250 SG_PRINT("%1.4f ", svm_value_part2[s]) 02251 SG_PRINT("\n") 02252 #endif 02253 } 02254 else 02255 { 02256 /* normal case */ 02257 02258 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]) 02259 int32_t frame = m_orf_info.element(from_state,0); 02260 if (false)//(frame>=0) 02261 { 02262 int32_t num_current_svms=0; 02263 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02264 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame) 02265 ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids); 02266 SG_PRINT("\n") 02267 } 02268 02269 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame); 02270 #ifdef DYNPROG_DEBUG 02271 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) 02272 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02273 SG_PRINT("%1.4f ", svm_value[s]) 02274 SG_PRINT("\n") 02275 #endif 02276 } 02277 } 02278 02279 if (PEN.element(to_state, from_state)!=NULL) 02280 { 02281 float64_t nscore = 0 ; 02282 if (is_long_transition) 02283 { 02284 float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ; 02285 float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ; 02286 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ; 02287 } 02288 else 02289 nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ; 02290 02291 if (false)//(nscore<-1e9) 02292 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n", 02293 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ; 02294 02295 my_scores[i] += nscore ; 02296 02297 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/ 02298 { 02299 svm_value[s]=-CMath::INFTY; 02300 svm_value_part1[s]=-CMath::INFTY; 02301 svm_value_part2[s]=-CMath::INFTY; 02302 } 02303 02304 #ifdef DYNPROG_DEBUG 02305 //SG_DEBUG("%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) 02306 #endif 02307 if (is_long_transition) 02308 { 02309 #ifdef DYNPROG_DEBUG 02310 float64_t sum_score = 0.0 ; 02311 02312 for (int kk=0; kk<i; kk++) 02313 sum_score += my_scores[i] ; 02314 02315 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n", 02316 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 02317 nscore, sum_score, 02318 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 02319 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ; 02320 #endif 02321 } 02322 02323 if (is_long_transition) 02324 { 02325 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ; 02326 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ; 02327 } 02328 else 02329 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ; 02330 02331 //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) 02332 02333 // for tiling array and rna-seq data every single measurement must be added to the derivative 02334 // in contrast to the content svm predictions where we have a single value per transition; 02335 // content svm predictions have already been added to the derivative, thus we start with d=1 02336 // instead of d=0 02337 if (is_long_transition) 02338 { 02339 for (int32_t d=1; d<=m_num_raw_data; d++) 02340 { 02341 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02342 svm_value[s]=-CMath::INFTY; 02343 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02344 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d); 02345 for (int32_t k=0;k<num_intensities;k++) 02346 { 02347 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02348 svm_value[j]=intensities[k]; 02349 02350 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02351 02352 } 02353 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d); 02354 for (int32_t k=0;k<num_intensities;k++) 02355 { 02356 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02357 svm_value[j]=intensities[k]; 02358 02359 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02360 02361 } 02362 SG_FREE(intensities); 02363 02364 } 02365 } 02366 else 02367 { 02368 for (int32_t d=1; d<=m_num_raw_data; d++) 02369 { 02370 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02371 svm_value[s]=-CMath::INFTY; 02372 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02373 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d); 02374 //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities) 02375 for (int32_t k=0;k<num_intensities;k++) 02376 { 02377 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02378 svm_value[j]=intensities[k]; 02379 02380 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ; 02381 02382 } 02383 SG_FREE(intensities); 02384 } 02385 } 02386 02387 } 02388 #ifdef DYNPROG_DEBUG 02389 SG_DEBUG("%i. scores[i]=%f\n", i, my_scores[i]) 02390 #endif 02391 02392 //SG_DEBUG("emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) 02393 for (int32_t k=0; k<max_num_signals; k++) 02394 { 02395 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0)) 02396 { 02397 #ifdef DYNPROG_DEBUG 02398 SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) 02399 #endif 02400 my_scores[i] += seq_input.element(to_state, to_pos, k) ; 02401 //if (seq_input.element(to_state, to_pos, k) !=0) 02402 // SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k)) 02403 break ; 02404 } 02405 if (PEN_state_signals.element(to_state, k)!=NULL) 02406 { 02407 float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter) 02408 my_scores[i] += nscore ; 02409 #ifdef DYNPROG_DEBUG 02410 if (false)//(nscore<-1e9) 02411 { 02412 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n", 02413 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ; 02414 for (int x=0; x<23; x++) 02415 { 02416 for (int i=-10; i<10; i++) 02417 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k)) 02418 SG_PRINT("\n") 02419 } 02420 02421 } 02422 #endif 02423 //break ; 02424 //int32_t num_current_svms=0; 02425 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02426 //SG_PRINT("PEN_state_signals->id: ") 02427 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids); 02428 //SG_PRINT("\n") 02429 //if (nscore != 0) 02430 //SG_PRINT("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) 02431 #ifdef DYNPROG_DEBUG 02432 SG_DEBUG("%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) 02433 #endif 02434 ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter) 02435 } else 02436 break ; 02437 } 02438 02439 //#ifdef DYNPROG_DEBUG 02440 //SG_PRINT("scores[%i]=%f (final) \n", i, my_scores[i]) 02441 //SG_PRINT("losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) 02442 total_score += my_scores[i] ; 02443 total_loss += my_losses[i] ; 02444 //#endif 02445 } 02446 //#ifdef DYNPROG_DEBUG 02447 //SG_PRINT("total score = %f \n", total_score) 02448 //SG_PRINT("total loss = %f \n", total_loss) 02449 //#endif 02450 SG_FREE(svm_value); 02451 SG_FREE(svm_value_part1); 02452 SG_FREE(svm_value_part2); 02453 } 02454 02455 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type) 02456 { 02457 ASSERT(from_pos<to_pos) 02458 int32_t num_intensities = 0; 02459 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]]; 02460 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]]; 02461 int32_t last_pos; 02462 int32_t num = m_num_probes_cum[type-1]; 02463 while (*p_tiling_pos<to_pos) 02464 { 02465 if (*p_tiling_pos>=from_pos) 02466 { 02467 intensities[num_intensities] = *p_tiling_data; 02468 num_intensities++; 02469 } 02470 num++; 02471 if (num>=m_num_probes_cum[type]) 02472 break; 02473 last_pos = *p_tiling_pos; 02474 p_tiling_pos++; 02475 p_tiling_data++; 02476 ASSERT(last_pos<*p_tiling_pos) 02477 } 02478 return num_intensities; 02479 } 02480 02481 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame) 02482 { 02483 #ifdef DYNPROG_TIMING_DETAIL 02484 MyTime.start() ; 02485 #endif 02486 // ASSERT(from_state<to_state) 02487 // if (!(from_pos<to_pos)) 02488 // SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos) 02489 for (int32_t i=0;i<m_num_svms;i++) 02490 { 02491 float64_t to_val = m_lin_feat.get_element(i, to_state); 02492 float64_t from_val = m_lin_feat.get_element(i, from_state); 02493 svm_values[i] = (to_val-from_val)/(to_pos-from_pos); 02494 } 02495 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++) 02496 { 02497 float64_t to_val = m_lin_feat.get_element(i, to_state); 02498 float64_t from_val = m_lin_feat.get_element(i, from_state); 02499 svm_values[i] = to_val-from_val ; 02500 } 02501 if (m_intron_list) 02502 { 02503 int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs); 02504 m_intron_list->get_intron_support(support, from_state, to_state); 02505 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data]; 02506 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 02507 int32_t cnt = 0; 02508 for (int32_t i=intron_list_start; i<intron_list_end;i++) 02509 { 02510 svm_values[i] = (float64_t) (support[cnt]); 02511 cnt++; 02512 } 02513 //if (to_pos>3990 && to_pos<4010) 02514 // SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]) 02515 SG_FREE(support); 02516 } 02517 // find the correct row with precomputed frame predictions 02518 if (frame!=-1) 02519 { 02520 svm_values[frame_plifs[0]] = 1e10; 02521 svm_values[frame_plifs[1]] = 1e10; 02522 svm_values[frame_plifs[2]] = 1e10; 02523 int32_t global_frame = from_pos%3; 02524 int32_t row = ((global_frame+frame)%3)+4; 02525 float64_t to_val = m_lin_feat.get_element(row, to_state); 02526 float64_t from_val = m_lin_feat.get_element(row, from_state); 02527 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos); 02528 } 02529 #ifdef DYNPROG_TIMING_DETAIL 02530 MyTime.stop() ; 02531 content_svm_values_time += MyTime.time_diff_sec() ; 02532 #endif 02533 } 02534 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs) 02535 { 02536 m_intron_list = intron_list; 02537 m_num_intron_plifs = num_plifs; 02538 } 02539