SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2009 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _TRIE_H___ 00013 #define _TRIE_H___ 00014 00015 #include <string.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/base/DynArray.h> 00019 #include <shogun/mathematics/Math.h> 00020 #include <shogun/base/SGObject.h> 00021 00022 namespace shogun 00023 { 00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00025 00026 // sentinel is 0xFFFFFFFC or float -2 00027 #define NO_CHILD ((int32_t)-1073741824) 00028 00029 #define WEIGHTS_IN_TRIE 00030 //#define TRIE_CHECK_EVERYTHING 00031 00032 #ifdef TRIE_CHECK_EVERYTHING 00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x) 00034 #else 00035 #define TRIE_ASSERT_EVERYTHING(x) 00036 #endif 00037 00038 //#define TRIE_ASSERT(x) ASSERT(x) 00039 #define TRIE_ASSERT(x) 00040 00041 #define TRIE_TERMINAL_CHARACTER 7 00042 00044 struct ConsensusEntry 00045 { 00047 uint64_t string; 00049 float32_t score; 00051 int32_t bt; 00052 }; 00053 00055 struct POIMTrie 00056 { 00058 float64_t weight; 00059 #ifdef TRIE_CHECK_EVERYTHING 00060 00061 bool has_seq; 00063 bool has_floats; 00064 #endif 00065 union 00066 { 00068 float32_t child_weights[4]; 00070 int32_t children[4]; 00072 uint8_t seq[16] ; 00073 }; 00074 00076 float64_t S; 00078 float64_t L; 00080 float64_t R; 00081 }; 00082 00084 struct DNATrie 00085 { 00087 float64_t weight; 00088 #ifdef TRIE_CHECK_EVERYTHING 00089 00090 bool has_seq; 00092 bool has_floats; 00093 #endif 00094 union 00095 { 00097 float32_t child_weights[4]; 00099 int32_t children[4]; 00101 uint8_t seq[16] ; 00102 }; 00103 }; 00104 00106 struct TreeParseInfo { 00108 int32_t num_sym; 00110 int32_t num_feat; 00112 int32_t p; 00114 int32_t k; 00116 int32_t* nofsKmers; 00118 float64_t* margFactors; 00120 int32_t* x; 00122 int32_t* substrs; 00124 int32_t y0; 00126 float64_t* C_k; 00128 float64_t* L_k; 00130 float64_t* R_k; 00131 }; 00132 00133 #endif // DOXYGEN_SHOULD_SKIP_THIS 00134 00135 template <class Trie> class CTrie; 00136 00137 #define IGNORE_IN_CLASSLIST 00138 00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject 00157 { 00158 public: 00160 CTrie(); 00161 00168 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true); 00169 00171 CTrie(const CTrie & to_copy); 00172 virtual ~CTrie(); 00173 00175 const CTrie & operator=(const CTrie & to_copy); 00176 00184 bool compare_traverse( 00185 int32_t node, const CTrie & other, int32_t other_node); 00186 00192 bool compare(const CTrie & other); 00193 00200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const; 00201 00208 int32_t find_deepest_node( 00209 int32_t start_node, int32_t &deepest_node) const; 00210 00215 void display_node(int32_t node) const; 00216 00218 void destroy(); 00219 00224 void set_degree(int32_t d); 00225 00232 void create(int32_t len, bool p_use_compact_terminal_nodes=true); 00233 00239 void delete_trees(bool p_use_compact_terminal_nodes=true); 00240 00251 void add_to_trie( 00252 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha, 00253 float64_t *weights, bool degree_times_position_weights); 00254 00261 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth); 00262 00268 float64_t* compute_abs_weights(int32_t &len); 00269 00282 float64_t compute_by_tree_helper( 00283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00284 int32_t weight_pos, float64_t * weights, 00285 bool degree_times_position_weights) ; 00286 00301 void compute_by_tree_helper( 00302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 00303 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 00304 int32_t mkl_stepsize, float64_t * weights, 00305 bool degree_times_position_weights); 00306 00321 void compute_scoring_helper( 00322 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 00323 int32_t max_degree, int32_t num_feat, int32_t num_sym, 00324 int32_t sym_offset, int32_t offs, float64_t* result); 00325 00338 void add_example_to_tree_mismatch_recursion( 00339 int32_t tree, int32_t i, float64_t alpha, int32_t *vec, 00340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, 00341 int32_t max_mismatch, float64_t * weights); 00342 00352 void traverse( 00353 int32_t tree, const int32_t p, struct TreeParseInfo info, 00354 const int32_t depth, int32_t* const x, const int32_t k); 00355 00365 void count( 00366 const float64_t w, const int32_t depth, 00367 const struct TreeParseInfo info, const int32_t p, int32_t* x, 00368 const int32_t k); 00369 00376 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights); 00377 00386 float64_t get_cumulative_score( 00387 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights); 00388 00398 void fill_backtracking_table_recursion( 00399 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 00400 DynArray<ConsensusEntry>* table, float64_t* weights); 00401 00410 void fill_backtracking_table( 00411 int32_t pos, DynArray<ConsensusEntry>* prev, 00412 DynArray<ConsensusEntry>* cur, bool cumulative, 00413 float64_t* weights); 00414 00420 void POIMs_extract_W(float64_t* const* const W, const int32_t K); 00421 00426 void POIMs_precalc_SLR(const float64_t* const distrib); 00427 00437 void POIMs_get_SLR( 00438 const int32_t parentIdx, const int32_t sym, const int32_t depth, 00439 float64_t* S, float64_t* L, float64_t* R); 00440 00447 void POIMs_add_SLR( 00448 float64_t* const* const poims, const int32_t K, 00449 const int32_t debug); 00450 00455 inline bool get_use_compact_terminal_nodes() 00456 { 00457 return use_compact_terminal_nodes ; 00458 } 00459 00465 inline void set_use_compact_terminal_nodes( 00466 bool p_use_compact_terminal_nodes) 00467 { 00468 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 00469 } 00470 00475 inline int32_t get_num_used_nodes() 00476 { 00477 return TreeMemPtr; 00478 } 00479 00484 inline void set_position_weights(float64_t* p_position_weights) 00485 { 00486 position_weights=p_position_weights; 00487 } 00488 00493 inline int32_t get_node(bool last_node=false) 00494 { 00495 int32_t ret = TreeMemPtr++; 00496 check_treemem() ; 00497 00498 if (last_node) 00499 { 00500 for (int32_t q=0; q<4; q++) 00501 TreeMem[ret].child_weights[q]=0.0; 00502 } 00503 else 00504 { 00505 for (int32_t q=0; q<4; q++) 00506 TreeMem[ret].children[q]=NO_CHILD; 00507 } 00508 #ifdef TRIE_CHECK_EVERYTHING 00509 TreeMem[ret].has_seq=false ; 00510 TreeMem[ret].has_floats=false ; 00511 #endif 00512 TreeMem[ret].weight=0.0; 00513 return ret ; 00514 } 00515 00517 inline void check_treemem() 00518 { 00519 if (TreeMemPtr+10 < TreeMemPtrMax) 00520 return; 00521 SG_DEBUG("Extending TreeMem from %i to %i elements\n", 00522 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2)); 00523 int32_t old_sz=TreeMemPtrMax; 00524 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2); 00525 TreeMem = SG_REALLOC(Trie, TreeMem, old_sz, TreeMemPtrMax); 00526 } 00527 00532 inline void set_weights_in_tree(bool weights_in_tree_) 00533 { 00534 weights_in_tree = weights_in_tree_; 00535 } 00536 00541 inline bool get_weights_in_tree() 00542 { 00543 return weights_in_tree; 00544 } 00545 00555 void POIMs_extract_W_helper( 00556 const int32_t nodeIdx, const int32_t depth, const int32_t offset, 00557 const int32_t y0, float64_t* const* const W, const int32_t K); 00558 00571 void POIMs_calc_SLR_helper1( 00572 const float64_t* const distrib, const int32_t i, 00573 const int32_t nodeIdx, int32_t left_tries_idx[4], 00574 const int32_t depth, int32_t const lastSym, float64_t* S, 00575 float64_t* L, float64_t* R); 00576 00577 00588 void POIMs_calc_SLR_helper2( 00589 const float64_t* const distrib, const int32_t i, 00590 const int32_t nodeIdx, int32_t left_tries_idx[4], 00591 const int32_t depth, float64_t* S, float64_t* L, float64_t* R); 00592 00603 void POIMs_add_SLR_helper1( 00604 const int32_t nodeIdx, const int32_t depth,const int32_t i, 00605 const int32_t y0, float64_t* const* const poims, const int32_t K, 00606 const int32_t debug); 00607 00621 void POIMs_add_SLR_helper2( 00622 float64_t* const* const poims, const int32_t K, const int32_t k, 00623 const int32_t i, const int32_t y, const float64_t valW, 00624 const float64_t valS, const float64_t valL, const float64_t valR, 00625 const int32_t debug); 00626 00628 virtual const char* get_name() const { return "Trie"; } 00629 00630 public: 00632 int32_t NUM_SYMS; 00633 00634 protected: 00636 int32_t length; 00638 int32_t * trees; 00639 00641 int32_t degree; 00643 float64_t* position_weights; 00644 00646 Trie* TreeMem; 00648 int32_t TreeMemPtr; 00650 int32_t TreeMemPtrMax; 00652 bool use_compact_terminal_nodes; 00653 00655 bool weights_in_tree; 00656 00658 int32_t* nofsKmers; 00659 }; 00660 template <class Trie> 00661 CTrie<Trie>::CTrie() 00662 : CSGObject(), degree(0), position_weights(NULL), 00663 use_compact_terminal_nodes(false), 00664 weights_in_tree(true) 00665 { 00666 00667 TreeMemPtrMax=0; 00668 TreeMemPtr=0; 00669 TreeMem=NULL; 00670 00671 length=0; 00672 trees=NULL; 00673 00674 NUM_SYMS=4; 00675 } 00676 00677 template <class Trie> 00678 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes) 00679 : CSGObject(), degree(d), position_weights(NULL), 00680 use_compact_terminal_nodes(p_use_compact_terminal_nodes), 00681 weights_in_tree(true) 00682 { 00683 TreeMemPtrMax=1024*1024/sizeof(Trie); 00684 TreeMemPtr=0; 00685 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax); 00686 00687 length=0; 00688 trees=NULL; 00689 00690 NUM_SYMS=4; 00691 } 00692 00693 template <class Trie> 00694 CTrie<Trie>::CTrie(const CTrie & to_copy) 00695 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL), 00696 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes) 00697 { 00698 if (to_copy.position_weights!=NULL) 00699 { 00700 position_weights = to_copy.position_weights; 00701 /*SG_MALLOC(float64_t, to_copy.length); 00702 for (int32_t i=0; i<to_copy.length; i++) 00703 position_weights[i]=to_copy.position_weights[i]; */ 00704 } 00705 else 00706 position_weights=NULL; 00707 00708 TreeMemPtrMax=to_copy.TreeMemPtrMax; 00709 TreeMemPtr=to_copy.TreeMemPtr; 00710 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax); 00711 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)); 00712 00713 length=to_copy.length; 00714 trees=SG_MALLOC(int32_t, length); 00715 for (int32_t i=0; i<length; i++) 00716 trees[i]=to_copy.trees[i]; 00717 00718 NUM_SYMS=4; 00719 } 00720 00721 template <class Trie> 00722 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy) 00723 { 00724 degree=to_copy.degree ; 00725 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ; 00726 00727 SG_FREE(position_weights); 00728 position_weights=NULL ; 00729 if (to_copy.position_weights!=NULL) 00730 { 00731 position_weights=to_copy.position_weights ; 00732 /*position_weights = SG_MALLOC(float64_t, to_copy.length); 00733 for (int32_t i=0; i<to_copy.length; i++) 00734 position_weights[i]=to_copy.position_weights[i] ;*/ 00735 } 00736 else 00737 position_weights=NULL ; 00738 00739 TreeMemPtrMax=to_copy.TreeMemPtrMax ; 00740 TreeMemPtr=to_copy.TreeMemPtr ; 00741 SG_FREE(TreeMem) ; 00742 TreeMem = SG_MALLOC(Trie, TreeMemPtrMax); 00743 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ; 00744 00745 length = to_copy.length ; 00746 if (trees) 00747 SG_FREE(trees); 00748 trees=SG_MALLOC(int32_t, length); 00749 for (int32_t i=0; i<length; i++) 00750 trees[i]=to_copy.trees[i] ; 00751 00752 return *this ; 00753 } 00754 00755 template <class Trie> 00756 int32_t CTrie<Trie>::find_deepest_node( 00757 int32_t start_node, int32_t& deepest_node) const 00758 { 00759 #ifdef TRIE_CHECK_EVERYTHING 00760 int32_t ret=0 ; 00761 SG_DEBUG("start_node=%i\n", start_node) 00762 00763 if (start_node==NO_CHILD) 00764 { 00765 for (int32_t i=0; i<length; i++) 00766 { 00767 int32_t my_deepest_node ; 00768 int32_t depth=find_deepest_node(i, my_deepest_node) ; 00769 SG_DEBUG("start_node %i depth=%i\n", i, depth) 00770 if (depth>ret) 00771 { 00772 deepest_node=my_deepest_node ; 00773 ret=depth ; 00774 } 00775 } 00776 return ret ; 00777 } 00778 00779 if (TreeMem[start_node].has_seq) 00780 { 00781 for (int32_t q=0; q<16; q++) 00782 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER) 00783 ret++ ; 00784 deepest_node=start_node ; 00785 return ret ; 00786 } 00787 if (TreeMem[start_node].has_floats) 00788 { 00789 deepest_node=start_node ; 00790 return 1 ; 00791 } 00792 00793 for (int32_t q=0; q<4; q++) 00794 { 00795 int32_t my_deepest_node ; 00796 if (TreeMem[start_node].children[q]==NO_CHILD) 00797 continue ; 00798 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ; 00799 if (depth>ret) 00800 { 00801 deepest_node=my_deepest_node ; 00802 ret=depth ; 00803 } 00804 } 00805 return ret ; 00806 #else 00807 SG_ERROR("not implemented\n") 00808 return 0 ; 00809 #endif 00810 } 00811 00812 template <class Trie> 00813 int32_t CTrie<Trie>::compact_nodes( 00814 int32_t start_node, int32_t depth, float64_t * weights) 00815 { 00816 SG_ERROR("code buggy\n") 00817 00818 int32_t ret=0 ; 00819 00820 if (start_node==NO_CHILD) 00821 { 00822 for (int32_t i=0; i<length; i++) 00823 compact_nodes(i,1, weights) ; 00824 return 0 ; 00825 } 00826 if (start_node<0) 00827 return -1 ; 00828 00829 if (depth==degree-1) 00830 { 00831 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) 00832 int32_t num_used=0 ; 00833 for (int32_t q=0; q<4; q++) 00834 if (TreeMem[start_node].child_weights[q]!=0.0) 00835 num_used++ ; 00836 if (num_used>1) 00837 return -1 ; 00838 return 1 ; 00839 } 00840 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) 00841 00842 int32_t num_used = 0 ; 00843 int32_t q_used=-1 ; 00844 00845 for (int32_t q=0; q<4; q++) 00846 { 00847 if (TreeMem[start_node].children[q]==NO_CHILD) 00848 continue ; 00849 num_used++ ; 00850 q_used=q ; 00851 } 00852 if (num_used>1) 00853 { 00854 if (depth>=degree-2) 00855 return -1 ; 00856 for (int32_t q=0; q<4; q++) 00857 { 00858 if (TreeMem[start_node].children[q]==NO_CHILD) 00859 continue ; 00860 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ; 00861 if (num<=2) 00862 continue ; 00863 int32_t node=get_node() ; 00864 00865 int32_t last_node=TreeMem[start_node].children[q] ; 00866 if (weights_in_tree) 00867 { 00868 ASSERT(weights[depth]!=0.0) 00869 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ; 00870 } 00871 else 00872 TreeMem[node].weight=TreeMem[last_node].weight ; 00873 00874 #ifdef TRIE_CHECK_EVERYTHING 00875 TreeMem[node].has_seq=true ; 00876 #endif 00877 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 00878 for (int32_t n=0; n<num; n++) 00879 { 00880 ASSERT(depth+n+1<=degree-1) 00881 ASSERT(last_node!=NO_CHILD) 00882 if (depth+n+1==degree-1) 00883 { 00884 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) 00885 int32_t k ; 00886 for (k=0; k<4; k++) 00887 if (TreeMem[last_node].child_weights[k]!=0.0) 00888 break ; 00889 if (k==4) 00890 break ; 00891 TreeMem[node].seq[n]=k ; 00892 break ; 00893 } 00894 else 00895 { 00896 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) 00897 int32_t k ; 00898 for (k=0; k<4; k++) 00899 if (TreeMem[last_node].children[k]!=NO_CHILD) 00900 break ; 00901 if (k==4) 00902 break ; 00903 TreeMem[node].seq[n]=k ; 00904 last_node=TreeMem[last_node].children[k] ; 00905 } 00906 } 00907 TreeMem[start_node].children[q]=-node ; 00908 } 00909 return -1 ; 00910 } 00911 if (num_used==0) 00912 return 0 ; 00913 00914 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ; 00915 if (ret<0) 00916 return ret ; 00917 return ret+1 ; 00918 } 00919 00920 00921 template <class Trie> 00922 bool CTrie<Trie>::compare_traverse( 00923 int32_t node, const CTrie<Trie> & other, int32_t other_node) 00924 { 00925 SG_DEBUG("checking nodes %i and %i\n", node, other_node) 00926 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5) 00927 { 00928 SG_DEBUG("CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) 00929 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") 00930 display_node(node) ; 00931 SG_DEBUG("============================================================\n") 00932 other.display_node(other_node) ; 00933 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") 00934 return false ; 00935 } 00936 00937 #ifdef TRIE_CHECK_EVERYTHING 00938 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq) 00939 { 00940 SG_DEBUG("CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) 00941 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") 00942 display_node(node) ; 00943 SG_DEBUG("============================================================\n") 00944 other.display_node(other_node) ; 00945 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") 00946 return false ; 00947 } 00948 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats) 00949 { 00950 SG_DEBUG("CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) 00951 return false ; 00952 } 00953 if (other.TreeMem[other_node].has_floats) 00954 { 00955 for (int32_t q=0; q<4; q++) 00956 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5) 00957 { 00958 SG_DEBUG("CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) 00959 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") 00960 display_node(node) ; 00961 SG_DEBUG("============================================================\n") 00962 other.display_node(other_node) ; 00963 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") 00964 return false ; 00965 } 00966 } 00967 if (other.TreeMem[other_node].has_seq) 00968 { 00969 for (int32_t q=0; q<16; q++) 00970 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4))) 00971 { 00972 SG_DEBUG("CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) 00973 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") 00974 display_node(node) ; 00975 SG_DEBUG("============================================================\n") 00976 other.display_node(other_node) ; 00977 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") 00978 return false ; 00979 } 00980 } 00981 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats) 00982 { 00983 for (int32_t q=0; q<4; q++) 00984 { 00985 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD)) 00986 continue ; 00987 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD)) 00988 { 00989 SG_DEBUG("CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) 00990 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") 00991 display_node(node) ; 00992 SG_DEBUG("============================================================\n") 00993 other.display_node(other_node) ; 00994 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") 00995 return false ; 00996 } 00997 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q]))) 00998 return false ; 00999 } 01000 } 01001 #else 01002 SG_ERROR("not implemented\n") 01003 #endif 01004 01005 return true ; 01006 } 01007 01008 template <class Trie> 01009 bool CTrie<Trie>::compare(const CTrie<Trie> & other) 01010 { 01011 bool ret=true ; 01012 for (int32_t i=0; i<length; i++) 01013 if (!compare_traverse(trees[i], other, other.trees[i])) 01014 return false ; 01015 else 01016 SG_DEBUG("two tries at %i identical\n", i) 01017 01018 return ret ; 01019 } 01020 01021 template <class Trie> 01022 bool CTrie<Trie>::find_node( 01023 int32_t node, int32_t * trace, int32_t& trace_len) const 01024 { 01025 #ifdef TRIE_CHECK_EVERYTHING 01026 ASSERT(trace_len-1>=0) 01027 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax)) 01028 if (TreeMem[trace[trace_len-1]].has_seq) 01029 return false ; 01030 if (TreeMem[trace[trace_len-1]].has_floats) 01031 return false ; 01032 01033 for (int32_t q=0; q<4; q++) 01034 { 01035 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD) 01036 continue ; 01037 int32_t tl=trace_len+1 ; 01038 if (TreeMem[trace[trace_len-1]].children[q]>=0) 01039 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ; 01040 else 01041 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ; 01042 01043 if (trace[trace_len]==node) 01044 { 01045 trace_len=tl ; 01046 return true ; 01047 } 01048 if (find_node(node, trace, tl)) 01049 { 01050 trace_len=tl ; 01051 return true ; 01052 } 01053 } 01054 trace_len=0 ; 01055 return false ; 01056 #else 01057 SG_ERROR("not implemented\n") 01058 return false ; 01059 #endif 01060 } 01061 01062 template <class Trie> 01063 void CTrie<Trie>::display_node(int32_t node) const 01064 { 01065 #ifdef TRIE_CHECK_EVERYTHING 01066 int32_t * trace=SG_MALLOC(int32_t, 2*degree); 01067 int32_t trace_len=-1 ; 01068 bool found = false ; 01069 int32_t tree=-1 ; 01070 for (tree=0; tree<length; tree++) 01071 { 01072 trace[0]=trees[tree] ; 01073 trace_len=1 ; 01074 found=find_node(node, trace, trace_len) ; 01075 if (found) 01076 break ; 01077 } 01078 ASSERT(found) 01079 SG_PRINT("position %i trace: ", tree) 01080 01081 for (int32_t i=0; i<trace_len-1; i++) 01082 { 01083 int32_t branch=-1 ; 01084 for (int32_t q=0; q<4; q++) 01085 if (abs(TreeMem[trace[i]].children[q])==trace[i+1]) 01086 { 01087 branch=q; 01088 break ; 01089 } 01090 ASSERT(branch!=-1) 01091 char acgt[5]="ACGT" ; 01092 SG_PRINT("%c", acgt[branch]) 01093 } 01094 SG_PRINT("\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) 01095 if (TreeMem[node].has_floats) 01096 { 01097 for (int32_t q=0; q<4; q++) 01098 SG_PRINT("child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) 01099 } 01100 if (TreeMem[node].has_seq) 01101 { 01102 for (int32_t q=0; q<16; q++) 01103 SG_PRINT("seq[%i] = %i\n", q, TreeMem[node].seq[q]) 01104 } 01105 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats) 01106 { 01107 for (int32_t q=0; q<4; q++) 01108 { 01109 if (TreeMem[node].children[q]!=NO_CHILD) 01110 { 01111 SG_PRINT("children[%i] = %i -> \n", q, TreeMem[node].children[q]) 01112 display_node(abs(TreeMem[node].children[q])) ; 01113 } 01114 else 01115 SG_PRINT("children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) 01116 } 01117 01118 } 01119 01120 SG_FREE(trace); 01121 #else 01122 SG_ERROR("not implemented\n") 01123 #endif 01124 } 01125 01126 01127 template <class Trie> CTrie<Trie>::~CTrie() 01128 { 01129 destroy() ; 01130 01131 SG_FREE(TreeMem) ; 01132 } 01133 01134 template <class Trie> void CTrie<Trie>::destroy() 01135 { 01136 if (trees!=NULL) 01137 { 01138 delete_trees(); 01139 for (int32_t i=0; i<length; i++) 01140 trees[i] = NO_CHILD; 01141 SG_FREE(trees); 01142 01143 TreeMemPtr=0; 01144 length=0; 01145 trees=NULL; 01146 } 01147 } 01148 01149 template <class Trie> void CTrie<Trie>::set_degree(int32_t d) 01150 { 01151 delete_trees(get_use_compact_terminal_nodes()); 01152 degree=d; 01153 } 01154 01155 template <class Trie> void CTrie<Trie>::create( 01156 int32_t len, bool p_use_compact_terminal_nodes) 01157 { 01158 destroy(); 01159 01160 trees=SG_MALLOC(int32_t, len); 01161 TreeMemPtr=0 ; 01162 for (int32_t i=0; i<len; i++) 01163 trees[i]=get_node(degree==1); 01164 length = len ; 01165 01166 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01167 } 01168 01169 01170 template <class Trie> void CTrie<Trie>::delete_trees( 01171 bool p_use_compact_terminal_nodes) 01172 { 01173 if (trees==NULL) 01174 return; 01175 01176 TreeMemPtr=0 ; 01177 for (int32_t i=0; i<length; i++) 01178 trees[i]=get_node(degree==1); 01179 01180 use_compact_terminal_nodes=p_use_compact_terminal_nodes ; 01181 } 01182 01183 template <class Trie> 01184 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth) 01185 { 01186 float64_t ret=0 ; 01187 01188 if (tree==NO_CHILD) 01189 return 0 ; 01190 TRIE_ASSERT(tree>=0) 01191 01192 if (depth==degree-2) 01193 { 01194 ret+=(TreeMem[tree].weight) ; 01195 01196 for (int32_t k=0; k<4; k++) 01197 ret+=(TreeMem[tree].child_weights[k]) ; 01198 01199 return ret ; 01200 } 01201 01202 ret+=(TreeMem[tree].weight) ; 01203 01204 for (int32_t i=0; i<4; i++) 01205 if (TreeMem[tree].children[i]!=NO_CHILD) 01206 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ; 01207 01208 return ret ; 01209 } 01210 01211 01212 template <class Trie> 01213 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len) 01214 { 01215 float64_t * sum=SG_MALLOC(float64_t, length*4); 01216 for (int32_t i=0; i<length*4; i++) 01217 sum[i]=0 ; 01218 len=length ; 01219 01220 for (int32_t i=0; i<length; i++) 01221 { 01222 TRIE_ASSERT(trees[i]!=NO_CHILD) 01223 for (int32_t k=0; k<4; k++) 01224 { 01225 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ; 01226 } 01227 } 01228 01229 return sum ; 01230 } 01231 01232 template <class Trie> 01233 void CTrie<Trie>::add_example_to_tree_mismatch_recursion( 01234 int32_t tree, int32_t i, float64_t alpha, 01235 int32_t *vec, int32_t len_rem, 01236 int32_t degree_rec, int32_t mismatch_rec, 01237 int32_t max_mismatch, float64_t * weights) 01238 { 01239 if (tree==NO_CHILD) 01240 tree=trees[i] ; 01241 TRIE_ASSERT(tree!=NO_CHILD) 01242 01243 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree)) 01244 return ; 01245 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ; 01246 01247 int32_t subtree = NO_CHILD ; 01248 01249 if (degree_rec==degree-1) 01250 { 01251 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) 01252 if (weights_in_tree) 01253 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]; 01254 else 01255 if (weights[degree_rec]!=0.0) 01256 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01257 if (mismatch_rec+1<=max_mismatch) 01258 for (int32_t o=0; o<3; o++) 01259 { 01260 if (weights_in_tree) 01261 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01262 else 01263 if (weights[degree_rec]!=0.0) 01264 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01265 } 01266 return ; 01267 } 01268 else 01269 { 01270 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01271 if (TreeMem[tree].children[vec[0]]!=NO_CHILD) 01272 { 01273 subtree=TreeMem[tree].children[vec[0]] ; 01274 if (weights_in_tree) 01275 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]; 01276 else 01277 if (weights[degree_rec]!=0.0) 01278 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec]; 01279 } 01280 else 01281 { 01282 int32_t tmp = get_node(degree_rec==degree-2); 01283 ASSERT(tmp>=0) 01284 TreeMem[tree].children[vec[0]]=tmp ; 01285 subtree=tmp ; 01286 #ifdef TRIE_CHECK_EVERYTHING 01287 if (degree_rec==degree-2) 01288 TreeMem[subtree].has_floats=true ; 01289 #endif 01290 if (weights_in_tree) 01291 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ; 01292 else 01293 if (weights[degree_rec]!=0.0) 01294 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ; 01295 else 01296 TreeMem[subtree].weight = 0.0 ; 01297 } 01298 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01299 &vec[1], len_rem-1, 01300 degree_rec+1, mismatch_rec, max_mismatch, weights) ; 01301 01302 if (mismatch_rec+1<=max_mismatch) 01303 { 01304 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01305 for (int32_t o=0; o<3; o++) 01306 { 01307 int32_t ot = other[vec[0]][o] ; 01308 if (TreeMem[tree].children[ot]!=NO_CHILD) 01309 { 01310 subtree=TreeMem[tree].children[ot] ; 01311 if (weights_in_tree) 01312 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]; 01313 else 01314 if (weights[degree_rec]!=0.0) 01315 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec]; 01316 } 01317 else 01318 { 01319 int32_t tmp = get_node(degree_rec==degree-2); 01320 ASSERT(tmp>=0) 01321 TreeMem[tree].children[ot]=tmp ; 01322 subtree=tmp ; 01323 #ifdef TRIE_CHECK_EVERYTHING 01324 if (degree_rec==degree-2) 01325 TreeMem[subtree].has_floats=true ; 01326 #endif 01327 01328 if (weights_in_tree) 01329 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ; 01330 else 01331 if (weights[degree_rec]!=0.0) 01332 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ; 01333 else 01334 TreeMem[subtree].weight = 0.0 ; 01335 } 01336 01337 add_example_to_tree_mismatch_recursion(subtree, i, alpha, 01338 &vec[1], len_rem-1, 01339 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ; 01340 } 01341 } 01342 } 01343 } 01344 01345 template <class Trie> 01346 void CTrie<Trie>::compute_scoring_helper( 01347 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, 01348 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset, 01349 int32_t offs, float64_t* result) 01350 { 01351 if (i+j<num_feat) 01352 { 01353 float64_t decay=1.0; //no decay by default 01354 //if (j>d) 01355 // decay=pow(0.5,j); //marginalize out lower order matches 01356 01357 if (j<degree-1) 01358 { 01359 for (int32_t k=0; k<num_sym; k++) 01360 { 01361 if (TreeMem[tree].children[k]!=NO_CHILD) 01362 { 01363 int32_t child=TreeMem[tree].children[k]; 01364 //continue recursion if not yet at max_degree, else add to result 01365 if (d<max_degree-1) 01366 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01367 else 01368 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight; 01369 01371 if (d==0) 01372 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result); 01373 } 01374 } 01375 } 01376 else if (j==degree-1) 01377 { 01378 for (int32_t k=0; k<num_sym; k++) 01379 { 01380 //continue recursion if not yet at max_degree, else add to result 01381 if (d<max_degree-1 && i<num_feat-1) 01382 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result); 01383 else 01384 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k]; 01385 } 01386 } 01387 } 01388 } 01389 01390 template <class Trie> 01391 void CTrie<Trie>::traverse( 01392 int32_t tree, const int32_t p, struct TreeParseInfo info, 01393 const int32_t depth, int32_t* const x, const int32_t k) 01394 { 01395 const int32_t num_sym = info.num_sym; 01396 const int32_t y0 = info.y0; 01397 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] ); 01398 //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] ); 01399 //if( !( info.y0 == temp ) ) { 01400 // printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth ); 01401 //} 01402 //ASSERT( info.y0 == temp ) 01403 int32_t sym; 01404 ASSERT( depth < degree ) 01405 //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] ) 01406 if (depth<degree-1) 01407 { 01408 for( sym=0; sym<num_sym; ++sym ) { 01409 const int32_t childNum = TreeMem[tree].children[ sym ]; 01410 if( childNum != NO_CHILD ) { 01411 int32_t child = childNum ; 01412 x[depth] = sym; 01413 info.substrs[depth+1] = y0 + sym; 01414 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01415 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ) 01416 count( TreeMem[child].weight, depth, info, p, x, k ); 01417 traverse( child, p, info, depth+1, x, k ); 01418 x[depth] = -1; 01419 } 01420 } 01421 } 01422 else if( depth == degree-1 ) 01423 { 01424 for( sym=0; sym<num_sym; ++sym ) { 01425 const float64_t w = TreeMem[tree].child_weights[ sym ]; 01426 if( w != 0.0 ) { 01427 x[depth] = sym; 01428 info.substrs[depth+1] = y0 + sym; 01429 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym; 01430 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) ) 01431 count( w, depth, info, p, x, k ); 01432 x[depth] = -1; 01433 } 01434 } 01435 } 01436 //info.substrs[depth+1] = -1; 01437 //info.y0 = temp; 01438 } 01439 01440 template <class Trie> 01441 void CTrie<Trie>::count( 01442 const float64_t w, const int32_t depth, const struct TreeParseInfo info, 01443 const int32_t p, int32_t* x, const int32_t k) 01444 { 01445 ASSERT( fabs(w) < 1e10 ) 01446 ASSERT( x[depth] >= 0 ) 01447 ASSERT( x[depth+1] < 0 ) 01448 if ( depth < k ) { 01449 return; 01450 } 01451 //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) ) 01452 const int32_t nofKmers = info.nofsKmers[k]; 01453 const float64_t margWeight = w * info.margFactors[ depth-k ]; 01454 const int32_t m_a = depth - k + 1; 01455 const int32_t m_b = info.num_feat - p; 01456 const int32_t m = ( m_a < m_b ) ? m_a : m_b; 01457 // all proper k-substrings 01458 const int32_t offset0 = nofKmers * p; 01459 register int32_t i; 01460 register int32_t offset; 01461 offset = offset0; 01462 for( i = 0; i < m; ++i ) { 01463 const int32_t y = info.substrs[i+k+1]; 01464 info.C_k[ y + offset ] += margWeight; 01465 offset += nofKmers; 01466 } 01467 if( depth > k ) { 01468 // k-prefix 01469 const int32_t offsR = info.substrs[k+1] + offset0; 01470 info.R_k[offsR] += margWeight; 01471 // k-suffix 01472 if( p+depth-k < info.num_feat ) { 01473 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k); 01474 info.L_k[offsL] += margWeight; 01475 } 01476 } 01477 // # N.x = substring represented by N 01478 // # N.d = length of N.x 01479 // # N.s = starting position of N.x 01480 // # N.w = weight for feature represented by N 01481 // if( N.d >= k ) 01482 // margContrib = w / 4^(N.d-k) 01483 // for i = 1 to (N.d-k+1) 01484 // y = N.x[i:(i+k-1)] # overlapped k-mer 01485 // C_k[ N.s+i-1, y ] += margContrib 01486 // end; 01487 // if( N.d > k ) 01488 // L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib # j-suffix of N.x 01489 // R_k[ N.s, N.x[1:k] ] += margContrib # j-prefix of N.x 01490 // end; 01491 // end; 01492 } 01493 01494 template <class Trie> 01495 void CTrie<Trie>::add_to_trie( 01496 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha, 01497 float64_t *weights, bool degree_times_position_weights) 01498 { 01499 int32_t tree = trees[i] ; 01500 //ASSERT(seq_offset==0) 01501 01502 int32_t max_depth = 0 ; 01503 float64_t* weights_column ; 01504 if (degree_times_position_weights) 01505 weights_column = &weights[(i+seq_offset)*degree] ; 01506 else 01507 weights_column = weights ; 01508 01509 if (weights_in_tree) 01510 { 01511 for (int32_t j=0; (j<degree) && (i+j<length); j++) 01512 if (CMath::abs(weights_column[j]*alpha)>0) 01513 max_depth = j+1 ; 01514 } 01515 else 01516 // don't use the weights 01517 max_depth=degree ; 01518 01519 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++) 01520 { 01521 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) 01522 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD)) 01523 { 01524 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0) 01525 { 01526 // special treatment of the next nodes 01527 TRIE_ASSERT(j >= degree-16) 01528 // get the right element 01529 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01530 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01531 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ; 01532 01533 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) 01534 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) 01535 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) 01536 01537 // check whether the same string is stored 01538 int32_t mismatch_pos = -1 ; 01539 { 01540 int32_t k ; 01541 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++) 01542 { 01543 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) 01544 // ### 01545 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER)) 01546 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ; 01547 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) 01548 TRIE_ASSERT(k<16) 01549 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k]) 01550 { 01551 mismatch_pos=k ; 01552 break ; 01553 } 01554 } 01555 } 01556 01557 // what happens when the .seq sequence is longer than vec? should we branch??? 01558 01559 if (mismatch_pos==-1) 01560 // if so, then just increase the weight by alpha and stop 01561 TreeMem[node].weight+=alpha ; 01562 else 01563 // otherwise 01564 // 1. replace current node with new node 01565 // 2. create new nodes until mismatching positon 01566 // 2. add a branch with old string (old node) and the new string (new node) 01567 { 01568 // replace old node 01569 int32_t last_node=tree ; 01570 01571 // create new nodes until mismatch 01572 int32_t k ; 01573 for (k=0; k<mismatch_pos; k++) 01574 { 01575 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) 01576 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) 01577 01578 int32_t tmp=get_node(); 01579 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ; 01580 last_node=tmp ; 01581 if (weights_in_tree) 01582 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ; 01583 else 01584 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ; 01585 TRIE_ASSERT(j+k!=degree-1) 01586 } 01587 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER)) 01588 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ; 01589 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) 01590 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) 01591 01592 if (j+k==degree-1) 01593 { 01594 // init child weights with zero if after dropping out 01595 // of the k<mismatch_pos loop we are one level below degree 01596 // (keep this even after get_node() change!) 01597 for (int32_t q=0; q<4; q++) 01598 TreeMem[last_node].child_weights[q]=0.0 ; 01599 if (weights_in_tree) 01600 { 01601 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01602 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ; 01603 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ; 01604 } 01605 else 01606 { 01607 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01608 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ; 01609 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ; 01610 } 01611 01612 #ifdef TRIE_CHECK_EVERYTHING 01613 TreeMem[last_node].has_floats=true ; 01614 #endif 01615 } 01616 else 01617 { 01618 // the branch for the existing string 01619 if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER 01620 { 01621 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ; 01622 01623 // move string by mismatch_pos positions 01624 for (int32_t q=0; q<16; q++) 01625 { 01626 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length)) 01627 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ; 01628 else 01629 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ; 01630 } 01631 #ifdef TRIE_CHECK_EVERYTHING 01632 TreeMem[node].has_seq=true ; 01633 #endif 01634 } 01635 01636 // the new branch 01637 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) 01638 int32_t tmp = get_node() ; 01639 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ; 01640 last_node=tmp ; 01641 01642 TreeMem[last_node].weight = alpha ; 01643 #ifdef TRIE_CHECK_EVERYTHING 01644 TreeMem[last_node].has_seq = true ; 01645 #endif 01646 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01647 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++) 01648 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ; 01649 } 01650 } 01651 break ; 01652 } 01653 else 01654 { 01655 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ; 01656 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) 01657 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01658 if (weights_in_tree) 01659 TreeMem[tree].weight += alpha*weights_column[j]; 01660 else 01661 TreeMem[tree].weight += alpha ; 01662 } 01663 } 01664 else if (j==degree-1) 01665 { 01666 // special treatment of the last node 01667 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01668 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) 01669 if (weights_in_tree) 01670 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ; 01671 else 01672 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha; 01673 01674 break; 01675 } 01676 else 01677 { 01678 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ; 01679 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01680 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01681 01682 int32_t tmp = get_node((j==degree-2) && (!use_seq)); 01683 if (use_seq) 01684 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ; 01685 else 01686 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ; 01687 tree=tmp ; 01688 01689 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) 01690 #ifdef TRIE_CHECK_EVERYTHING 01691 TreeMem[tree].has_seq = use_seq ; 01692 #endif 01693 if (use_seq) 01694 { 01695 TreeMem[tree].weight = alpha ; 01696 // important to have the terminal characters (see ###) 01697 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ; 01698 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++) 01699 { 01700 TRIE_ASSERT(q<16) 01701 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ; 01702 } 01703 break ; 01704 } 01705 else 01706 { 01707 if (weights_in_tree) 01708 TreeMem[tree].weight = alpha*weights_column[j] ; 01709 else 01710 TreeMem[tree].weight = alpha ; 01711 #ifdef TRIE_CHECK_EVERYTHING 01712 if (j==degree-2) 01713 TreeMem[tree].has_floats = true ; 01714 #endif 01715 } 01716 } 01717 } 01718 } 01719 01720 template <class Trie> 01721 float64_t CTrie<Trie>::compute_by_tree_helper( 01722 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01723 int32_t weight_pos, float64_t* weights, 01724 bool degree_times_position_weights) 01725 { 01726 int32_t tree = trees[tree_pos] ; 01727 01728 if ((position_weights!=NULL) && (position_weights[weight_pos]==0)) 01729 return 0.0; 01730 01731 float64_t *weights_column=NULL ; 01732 if (degree_times_position_weights) 01733 weights_column=&weights[weight_pos*degree] ; 01734 else // weights is a vector (1 x degree) 01735 weights_column=weights ; 01736 01737 float64_t sum=0 ; 01738 for (int32_t j=0; seq_pos+j < len; j++) 01739 { 01740 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) 01741 01742 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01743 { 01744 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01745 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01746 { 01747 tree = - TreeMem[tree].children[vec[seq_pos+j]]; 01748 TRIE_ASSERT(tree>=0) 01749 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) 01750 float64_t this_weight=0.0 ; 01751 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01752 { 01753 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) 01754 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01755 break ; 01756 this_weight += weights_column[j+k] ; 01757 } 01758 sum += TreeMem[tree].weight * this_weight ; 01759 break ; 01760 } 01761 else 01762 { 01763 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01764 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01765 if (weights_in_tree) 01766 sum += TreeMem[tree].weight ; 01767 else 01768 sum += TreeMem[tree].weight * weights_column[j] ; 01769 } ; 01770 } 01771 else 01772 { 01773 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01774 if (j==degree-1) 01775 { 01776 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) 01777 if (weights_in_tree) 01778 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01779 else 01780 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ; 01781 } 01782 else 01783 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) 01784 01785 break; 01786 } 01787 } 01788 01789 if (position_weights!=NULL) 01790 return sum*position_weights[weight_pos] ; 01791 else 01792 return sum ; 01793 } 01794 01795 template <class Trie> 01796 void CTrie<Trie>::compute_by_tree_helper( 01797 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos, 01798 int32_t weight_pos, float64_t* LevelContrib, float64_t factor, 01799 int32_t mkl_stepsize, float64_t * weights, 01800 bool degree_times_position_weights) 01801 { 01802 int32_t tree = trees[tree_pos] ; 01803 if (factor==0) 01804 return ; 01805 01806 if (position_weights!=NULL) 01807 { 01808 factor *= position_weights[weight_pos] ; 01809 if (factor==0) 01810 return ; 01811 if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree) 01812 { 01813 for (int32_t j=0; seq_pos+j<len; j++) 01814 { 01815 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01816 { 01817 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01818 { 01819 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01820 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) 01821 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01822 { 01823 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01824 break ; 01825 if (weights_in_tree) 01826 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01827 else 01828 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01829 } 01830 break ; 01831 } 01832 else 01833 { 01834 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01835 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01836 if (weights_in_tree) 01837 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01838 else 01839 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01840 } 01841 } 01842 else 01843 { 01844 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01845 if (j==degree-1) 01846 { 01847 if (weights_in_tree) 01848 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01849 else 01850 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01851 } 01852 } 01853 } 01854 } 01855 else // with position_weigths, weights is a matrix (len x degree) 01856 { 01857 for (int32_t j=0; seq_pos+j<len; j++) 01858 { 01859 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01860 { 01861 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01862 { 01863 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01864 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) 01865 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01866 { 01867 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01868 break ; 01869 if (weights_in_tree) 01870 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01871 else 01872 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01873 } 01874 break ; 01875 } 01876 else 01877 { 01878 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01879 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01880 if (weights_in_tree) 01881 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ; 01882 else 01883 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ; 01884 } 01885 } 01886 else 01887 { 01888 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01889 if (j==degree-1) 01890 { 01891 if (weights_in_tree) 01892 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01893 else 01894 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ; 01895 } 01896 break ; 01897 } 01898 } 01899 } 01900 } 01901 else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree) 01902 { 01903 for (int32_t j=0; seq_pos+j<len; j++) 01904 { 01905 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01906 { 01907 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01908 { 01909 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01910 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) 01911 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01912 { 01913 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01914 break ; 01915 if (weights_in_tree) 01916 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01917 else 01918 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ; 01919 } 01920 break ; 01921 } 01922 else 01923 { 01924 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01925 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01926 if (weights_in_tree) 01927 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ; 01928 else 01929 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ; 01930 } 01931 } 01932 else 01933 { 01934 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01935 if (j==degree-1) 01936 { 01937 if (weights_in_tree) 01938 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ; 01939 else 01940 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ; 01941 } 01942 break ; 01943 } 01944 } 01945 } 01946 else // no position_weigths, weights is a matrix (len x degree) 01947 { 01948 /*if (!position_mask) 01949 { 01950 position_mask = SG_MALLOC(bool, len); 01951 for (int32_t i=0; i<len; i++) 01952 { 01953 position_mask[i]=false ; 01954 01955 for (int32_t j=0; j<degree; j++) 01956 if (weights[i*degree+j]!=0.0) 01957 { 01958 position_mask[i]=true ; 01959 break ; 01960 } 01961 } 01962 } 01963 if (position_mask[weight_pos]==0) 01964 return ;*/ 01965 01966 for (int32_t j=0; seq_pos+j<len; j++) 01967 { 01968 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD)) 01969 { 01970 if (TreeMem[tree].children[vec[seq_pos+j]]<0) 01971 { 01972 tree = -TreeMem[tree].children[vec[seq_pos+j]]; 01973 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) 01974 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++) 01975 { 01976 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k]) 01977 break ; 01978 if (weights_in_tree) 01979 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ; 01980 else 01981 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ; 01982 } 01983 break ; 01984 } 01985 else 01986 { 01987 tree=TreeMem[tree].children[vec[seq_pos+j]]; 01988 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01989 if (weights_in_tree) 01990 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ; 01991 else 01992 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ; 01993 } 01994 } 01995 else 01996 { 01997 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) 01998 if (j==degree-1) 01999 { 02000 if (weights_in_tree) 02001 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ; 02002 else 02003 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ; 02004 } 02005 break ; 02006 } 02007 } 02008 } 02009 } 02010 02011 template <class Trie> 02012 void CTrie<Trie>::fill_backtracking_table_recursion( 02013 Trie* tree, int32_t depth, uint64_t seq, float64_t value, 02014 DynArray<ConsensusEntry>* table, float64_t* weights) 02015 { 02016 float64_t w=1.0; 02017 02018 if (weights_in_tree || depth==0) 02019 value+=tree->weight; 02020 else 02021 { 02022 w=weights[degree-1]; 02023 value+=weights[depth-1]*tree->weight; 02024 } 02025 02026 if (degree-1==depth) 02027 { 02028 for (int32_t sym=0; sym<4; sym++) 02029 { 02030 float64_t v=w*tree->child_weights[sym]; 02031 if (v!=0.0) 02032 { 02033 ConsensusEntry entry; 02034 entry.bt=-1; 02035 entry.score=value+v; 02036 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02037 02038 table->append_element(entry); 02039 } 02040 } 02041 } 02042 else 02043 { 02044 for (int32_t sym=0; sym<4; sym++) 02045 { 02046 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1)); 02047 if (tree->children[sym] != NO_CHILD) 02048 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights); 02049 } 02050 } 02051 } 02052 02053 template <class Trie> 02054 float64_t CTrie<Trie>::get_cumulative_score( 02055 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights) 02056 { 02057 float64_t result=0.0; 02058 02059 //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq) 02060 02061 for (int32_t i=pos; i<pos+deg && i<length; i++) 02062 { 02063 //SG_PRINT("loop %d\n", i) 02064 Trie* tree = &TreeMem[trees[i]]; 02065 02066 for (int32_t d=0; d<deg-i+pos; d++) 02067 { 02068 //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos))) 02069 ASSERT(d-1<degree) 02070 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3); 02071 02072 float64_t w=1.0; 02073 if (!weights_in_tree) 02074 w=weights[d]; 02075 02076 ASSERT(tree->children[sym] != NO_CHILD) 02077 tree=&TreeMem[tree->children[sym]]; 02078 result+=w*tree->weight; 02079 } 02080 } 02081 //SG_PRINT("cum: %f\n", result) 02082 return result; 02083 } 02084 02085 template <class Trie> 02086 void CTrie<Trie>::fill_backtracking_table( 02087 int32_t pos, DynArray<ConsensusEntry>* prev, 02088 DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights) 02089 { 02090 ASSERT(pos>=0 && pos<length) 02091 ASSERT(!use_compact_terminal_nodes) 02092 02093 Trie* t = &TreeMem[trees[pos]]; 02094 02095 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights); 02096 02097 02098 if (cumulative) 02099 { 02100 int32_t num_cur=cur->get_num_elements(); 02101 for (int32_t i=0; i<num_cur; i++) 02102 { 02103 ConsensusEntry entry=cur->get_element(i); 02104 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights); 02105 cur->set_element(entry,i); 02106 //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt) 02107 } 02108 } 02109 02110 //if previous tree exists find maximum scoring path 02111 //for each element in cur and update bt table 02112 if (prev) 02113 { 02114 int32_t num_cur=cur->get_num_elements(); 02115 int32_t num_prev=prev->get_num_elements(); 02116 02117 for (int32_t i=0; i<num_cur; i++) 02118 { 02119 //uint64_t str_cur_old= cur->get_element(i).string; 02120 uint64_t str_cur= cur->get_element(i).string >> 2; 02121 //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur) 02122 02123 int32_t bt=-1; 02124 float64_t max_score=0.0; 02125 02126 for (int32_t j=0; j<num_prev; j++) 02127 { 02128 //uint64_t str_prev_old= prev->get_element(j).string; 02129 uint64_t mask= 02130 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1)))); 02131 uint64_t str_prev= mask & prev->get_element(j).string; 02132 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask) 02133 02134 if (str_cur == str_prev) 02135 { 02136 float64_t sc=prev->get_element(j).score+cur->get_element(i).score; 02137 if (bt==-1 || sc>max_score) 02138 { 02139 bt=j; 02140 max_score=sc; 02141 02142 //SG_PRINT("new_max[%i,%i] = %f\n", j,i, max_score) 02143 } 02144 } 02145 } 02146 02147 ASSERT(bt!=-1) 02148 ConsensusEntry entry; 02149 entry.bt=bt; 02150 entry.score=max_score; 02151 entry.string=cur->get_element(i).string; 02152 cur->set_element(entry, i); 02153 //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt) 02154 } 02155 } 02156 } 02157 } 02158 #endif // _TRIE_H___