SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Trie.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2009 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/mathematics/Math.h>
00020 #include <shogun/base/SGObject.h>
00021 
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 
00026 // sentinel is 0xFFFFFFFC or float -2
00027 #define NO_CHILD ((int32_t)-1073741824)
00028 
00029 #define WEIGHTS_IN_TRIE
00030 //#define TRIE_CHECK_EVERYTHING
00031 
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x)
00036 #endif
00037 
00038 //#define TRIE_ASSERT(x) ASSERT(x)
00039 #define TRIE_ASSERT(x)
00040 
00041 #define TRIE_TERMINAL_CHARACTER  7
00042 
00044 struct ConsensusEntry
00045 {
00047     uint64_t string;
00049     float32_t score;
00051     int32_t bt;
00052 };
00053 
00055 struct POIMTrie
00056 {
00058     float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060 
00061     bool has_seq;
00063     bool has_floats;
00064 #endif
00065     union
00066     {
00068         float32_t child_weights[4];
00070         int32_t children[4];
00072         uint8_t seq[16] ;
00073     };
00074 
00076     float64_t S;
00078     float64_t L;
00080     float64_t R;
00081 };
00082 
00084 struct DNATrie
00085 {
00087     float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089 
00090     bool has_seq;
00092     bool has_floats;
00093 #endif
00094     union
00095     {
00097         float32_t child_weights[4];
00099         int32_t children[4];
00101         uint8_t seq[16] ;
00102     };
00103 };
00104 
00106 struct TreeParseInfo {
00108     int32_t num_sym;
00110     int32_t num_feat;
00112     int32_t p;
00114     int32_t k;
00116     int32_t* nofsKmers;
00118     float64_t* margFactors;
00120     int32_t* x;
00122     int32_t* substrs;
00124     int32_t y0;
00126     float64_t* C_k;
00128     float64_t* L_k;
00130     float64_t* R_k;
00131 };
00132 
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134 
00135 template <class Trie> class CTrie;
00136 
00137 #define IGNORE_IN_CLASSLIST
00138 
00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject
00157 {
00158     public:
00160         CTrie();
00161 
00168         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00169 
00171         CTrie(const CTrie & to_copy);
00172         virtual ~CTrie();
00173 
00175         const CTrie & operator=(const CTrie & to_copy);
00176 
00184         bool compare_traverse(
00185             int32_t node, const CTrie & other, int32_t other_node);
00186 
00192         bool compare(const CTrie & other);
00193 
00200         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00201 
00208         int32_t find_deepest_node(
00209             int32_t start_node, int32_t &deepest_node) const;
00210 
00215         void display_node(int32_t node) const;
00216 
00218         void destroy();
00219 
00224         void set_degree(int32_t d);
00225 
00232         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00233 
00239         void delete_trees(bool p_use_compact_terminal_nodes=true);
00240 
00251         void add_to_trie(
00252             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00253             float64_t *weights, bool degree_times_position_weights);
00254 
00261         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00262 
00268         float64_t* compute_abs_weights(int32_t &len);
00269 
00282         float64_t compute_by_tree_helper(
00283             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00284             int32_t weight_pos, float64_t * weights,
00285             bool degree_times_position_weights) ;
00286 
00301         void compute_by_tree_helper(
00302             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00303             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00304             int32_t mkl_stepsize, float64_t * weights,
00305             bool degree_times_position_weights);
00306 
00321         void compute_scoring_helper(
00322             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00323             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00324             int32_t sym_offset, int32_t offs, float64_t* result);
00325 
00338         void add_example_to_tree_mismatch_recursion(
00339             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00340             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00341             int32_t max_mismatch, float64_t * weights);
00342 
00352         void traverse(
00353             int32_t tree, const int32_t p, struct TreeParseInfo info,
00354             const int32_t depth, int32_t* const x, const int32_t k);
00355 
00365         void count(
00366             const float64_t w, const int32_t depth,
00367             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00368             const int32_t k);
00369 
00376         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00377 
00386         float64_t get_cumulative_score(
00387             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00388 
00398         void fill_backtracking_table_recursion(
00399             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00400             DynArray<ConsensusEntry>* table, float64_t* weights);
00401 
00410         void fill_backtracking_table(
00411             int32_t pos, DynArray<ConsensusEntry>* prev,
00412             DynArray<ConsensusEntry>* cur, bool cumulative,
00413             float64_t* weights);
00414 
00420         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00421 
00426         void POIMs_precalc_SLR(const float64_t* const distrib);
00427 
00437         void POIMs_get_SLR(
00438             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00439             float64_t* S, float64_t* L, float64_t* R);
00440 
00447         void POIMs_add_SLR(
00448             float64_t* const* const poims, const int32_t K,
00449             const int32_t debug);
00450 
00455         inline bool get_use_compact_terminal_nodes()
00456         {
00457             return use_compact_terminal_nodes ;
00458         }
00459 
00465         inline void set_use_compact_terminal_nodes(
00466             bool p_use_compact_terminal_nodes)
00467         {
00468             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00469         }
00470 
00475         inline int32_t get_num_used_nodes()
00476         {
00477             return TreeMemPtr;
00478         }
00479 
00484         inline void set_position_weights(float64_t* p_position_weights)
00485         {
00486             position_weights=p_position_weights;
00487         }
00488 
00493         inline int32_t get_node(bool last_node=false)
00494         {
00495             int32_t ret = TreeMemPtr++;
00496             check_treemem() ;
00497 
00498             if (last_node)
00499             {
00500                 for (int32_t q=0; q<4; q++)
00501                     TreeMem[ret].child_weights[q]=0.0;
00502             }
00503             else
00504             {
00505                 for (int32_t q=0; q<4; q++)
00506                     TreeMem[ret].children[q]=NO_CHILD;
00507             }
00508 #ifdef TRIE_CHECK_EVERYTHING
00509             TreeMem[ret].has_seq=false ;
00510             TreeMem[ret].has_floats=false ;
00511 #endif
00512             TreeMem[ret].weight=0.0;
00513             return ret ;
00514         }
00515 
00517         inline void check_treemem()
00518         {
00519             if (TreeMemPtr+10 < TreeMemPtrMax)
00520                 return;
00521             SG_DEBUG("Extending TreeMem from %i to %i elements\n",
00522                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00523             int32_t old_sz=TreeMemPtrMax;
00524             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00525             TreeMem = SG_REALLOC(Trie, TreeMem, old_sz, TreeMemPtrMax);
00526         }
00527 
00532         inline void set_weights_in_tree(bool weights_in_tree_)
00533         {
00534             weights_in_tree = weights_in_tree_;
00535         }
00536 
00541         inline bool get_weights_in_tree()
00542         {
00543             return weights_in_tree;
00544         }
00545 
00555         void POIMs_extract_W_helper(
00556             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00557             const int32_t y0, float64_t* const* const W, const int32_t K);
00558 
00571         void POIMs_calc_SLR_helper1(
00572             const float64_t* const distrib, const int32_t i,
00573             const int32_t nodeIdx, int32_t left_tries_idx[4],
00574             const int32_t depth, int32_t const lastSym, float64_t* S,
00575             float64_t* L, float64_t* R);
00576 
00577 
00588         void POIMs_calc_SLR_helper2(
00589             const float64_t* const distrib, const int32_t i,
00590             const int32_t nodeIdx, int32_t left_tries_idx[4],
00591             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00592 
00603         void POIMs_add_SLR_helper1(
00604             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00605             const int32_t y0, float64_t* const* const poims, const int32_t K,
00606             const int32_t debug);
00607 
00621         void POIMs_add_SLR_helper2(
00622             float64_t* const* const poims, const int32_t K, const int32_t k,
00623             const int32_t i, const int32_t y, const float64_t valW,
00624             const float64_t valS, const float64_t valL, const float64_t valR,
00625             const int32_t debug);
00626 
00628         virtual const char* get_name() const { return "Trie"; }
00629 
00630     public:
00632         int32_t NUM_SYMS;
00633 
00634     protected:
00636         int32_t length;
00638         int32_t * trees;
00639 
00641         int32_t degree;
00643         float64_t*  position_weights;
00644 
00646         Trie* TreeMem;
00648         int32_t TreeMemPtr;
00650         int32_t TreeMemPtrMax;
00652         bool use_compact_terminal_nodes;
00653 
00655         bool weights_in_tree;
00656 
00658         int32_t* nofsKmers;
00659 };
00660     template <class Trie>
00661     CTrie<Trie>::CTrie()
00662     : CSGObject(), degree(0), position_weights(NULL),
00663         use_compact_terminal_nodes(false),
00664         weights_in_tree(true)
00665     {
00666 
00667         TreeMemPtrMax=0;
00668         TreeMemPtr=0;
00669         TreeMem=NULL;
00670 
00671         length=0;
00672         trees=NULL;
00673 
00674         NUM_SYMS=4;
00675     }
00676 
00677     template <class Trie>
00678     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00679     : CSGObject(), degree(d), position_weights(NULL),
00680         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00681         weights_in_tree(true)
00682     {
00683         TreeMemPtrMax=1024*1024/sizeof(Trie);
00684         TreeMemPtr=0;
00685         TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00686 
00687         length=0;
00688         trees=NULL;
00689 
00690         NUM_SYMS=4;
00691     }
00692 
00693     template <class Trie>
00694     CTrie<Trie>::CTrie(const CTrie & to_copy)
00695     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00696         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00697     {
00698         if (to_copy.position_weights!=NULL)
00699         {
00700             position_weights = to_copy.position_weights;
00701             /*SG_MALLOC(float64_t, to_copy.length);
00702               for (int32_t i=0; i<to_copy.length; i++)
00703               position_weights[i]=to_copy.position_weights[i]; */
00704         }
00705         else
00706             position_weights=NULL;
00707 
00708         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00709         TreeMemPtr=to_copy.TreeMemPtr;
00710         TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00711         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00712 
00713         length=to_copy.length;
00714         trees=SG_MALLOC(int32_t, length);
00715         for (int32_t i=0; i<length; i++)
00716             trees[i]=to_copy.trees[i];
00717 
00718         NUM_SYMS=4;
00719     }
00720 
00721     template <class Trie>
00722 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00723 {
00724     degree=to_copy.degree ;
00725     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00726 
00727     SG_FREE(position_weights);
00728     position_weights=NULL ;
00729     if (to_copy.position_weights!=NULL)
00730     {
00731         position_weights=to_copy.position_weights ;
00732         /*position_weights = SG_MALLOC(float64_t, to_copy.length);
00733           for (int32_t i=0; i<to_copy.length; i++)
00734           position_weights[i]=to_copy.position_weights[i] ;*/
00735     }
00736     else
00737         position_weights=NULL ;
00738 
00739     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00740     TreeMemPtr=to_copy.TreeMemPtr ;
00741     SG_FREE(TreeMem) ;
00742     TreeMem = SG_MALLOC(Trie, TreeMemPtrMax);
00743     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00744 
00745     length = to_copy.length ;
00746     if (trees)
00747         SG_FREE(trees);
00748     trees=SG_MALLOC(int32_t, length);
00749     for (int32_t i=0; i<length; i++)
00750         trees[i]=to_copy.trees[i] ;
00751 
00752     return *this ;
00753 }
00754 
00755 template <class Trie>
00756 int32_t CTrie<Trie>::find_deepest_node(
00757     int32_t start_node, int32_t& deepest_node) const
00758 {
00759 #ifdef TRIE_CHECK_EVERYTHING
00760     int32_t ret=0 ;
00761     SG_DEBUG("start_node=%i\n", start_node)
00762 
00763     if (start_node==NO_CHILD)
00764     {
00765         for (int32_t i=0; i<length; i++)
00766         {
00767             int32_t my_deepest_node ;
00768             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00769             SG_DEBUG("start_node %i depth=%i\n", i, depth)
00770             if (depth>ret)
00771             {
00772                 deepest_node=my_deepest_node ;
00773                 ret=depth ;
00774             }
00775         }
00776         return ret ;
00777     }
00778 
00779     if (TreeMem[start_node].has_seq)
00780     {
00781         for (int32_t q=0; q<16; q++)
00782             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00783                 ret++ ;
00784         deepest_node=start_node ;
00785         return ret ;
00786     }
00787     if (TreeMem[start_node].has_floats)
00788     {
00789         deepest_node=start_node ;
00790         return 1 ;
00791     }
00792 
00793     for (int32_t q=0; q<4; q++)
00794     {
00795         int32_t my_deepest_node ;
00796         if (TreeMem[start_node].children[q]==NO_CHILD)
00797             continue ;
00798         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00799         if (depth>ret)
00800         {
00801             deepest_node=my_deepest_node ;
00802             ret=depth ;
00803         }
00804     }
00805     return ret ;
00806 #else
00807     SG_ERROR("not implemented\n")
00808     return 0 ;
00809 #endif
00810 }
00811 
00812     template <class Trie>
00813 int32_t CTrie<Trie>::compact_nodes(
00814     int32_t start_node, int32_t depth, float64_t * weights)
00815 {
00816     SG_ERROR("code buggy\n")
00817 
00818     int32_t ret=0 ;
00819 
00820     if (start_node==NO_CHILD)
00821     {
00822         for (int32_t i=0; i<length; i++)
00823             compact_nodes(i,1, weights) ;
00824         return 0 ;
00825     }
00826     if (start_node<0)
00827         return -1 ;
00828 
00829     if (depth==degree-1)
00830     {
00831         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats)
00832         int32_t num_used=0 ;
00833         for (int32_t q=0; q<4; q++)
00834             if (TreeMem[start_node].child_weights[q]!=0.0)
00835                 num_used++ ;
00836         if (num_used>1)
00837             return -1 ;
00838         return 1 ;
00839     }
00840     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats)
00841 
00842     int32_t num_used = 0 ;
00843     int32_t q_used=-1 ;
00844 
00845     for (int32_t q=0; q<4; q++)
00846     {
00847         if (TreeMem[start_node].children[q]==NO_CHILD)
00848             continue ;
00849         num_used++ ;
00850         q_used=q ;
00851     }
00852     if (num_used>1)
00853     {
00854         if (depth>=degree-2)
00855             return -1 ;
00856         for (int32_t q=0; q<4; q++)
00857         {
00858             if (TreeMem[start_node].children[q]==NO_CHILD)
00859                 continue ;
00860             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00861             if (num<=2)
00862                 continue ;
00863             int32_t node=get_node() ;
00864 
00865             int32_t last_node=TreeMem[start_node].children[q] ;
00866             if (weights_in_tree)
00867             {
00868                 ASSERT(weights[depth]!=0.0)
00869                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00870             }
00871             else
00872                 TreeMem[node].weight=TreeMem[last_node].weight ;
00873 
00874 #ifdef TRIE_CHECK_EVERYTHING
00875             TreeMem[node].has_seq=true ;
00876 #endif
00877             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00878             for (int32_t n=0; n<num; n++)
00879             {
00880                 ASSERT(depth+n+1<=degree-1)
00881                 ASSERT(last_node!=NO_CHILD)
00882                 if (depth+n+1==degree-1)
00883                 {
00884                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats)
00885                     int32_t  k ;
00886                     for (k=0; k<4; k++)
00887                         if (TreeMem[last_node].child_weights[k]!=0.0)
00888                             break ;
00889                     if (k==4)
00890                         break ;
00891                     TreeMem[node].seq[n]=k ;
00892                     break ;
00893                 }
00894                 else
00895                 {
00896                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats)
00897                     int32_t k ;
00898                     for (k=0; k<4; k++)
00899                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00900                             break ;
00901                     if (k==4)
00902                         break ;
00903                     TreeMem[node].seq[n]=k ;
00904                     last_node=TreeMem[last_node].children[k] ;
00905                 }
00906             }
00907             TreeMem[start_node].children[q]=-node ;
00908         }
00909         return -1 ;
00910     }
00911     if (num_used==0)
00912         return 0 ;
00913 
00914     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00915     if (ret<0)
00916         return ret ;
00917     return ret+1 ;
00918 }
00919 
00920 
00921     template <class Trie>
00922 bool CTrie<Trie>::compare_traverse(
00923     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00924 {
00925     SG_DEBUG("checking nodes %i and %i\n", node, other_node)
00926     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00927     {
00928         SG_DEBUG("CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight)
00929         SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
00930         display_node(node) ;
00931         SG_DEBUG("============================================================\n")
00932         other.display_node(other_node) ;
00933         SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
00934         return false ;
00935     }
00936 
00937 #ifdef TRIE_CHECK_EVERYTHING
00938     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00939     {
00940         SG_DEBUG("CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq)
00941         SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
00942         display_node(node) ;
00943         SG_DEBUG("============================================================\n")
00944         other.display_node(other_node) ;
00945         SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
00946         return false ;
00947     }
00948     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00949     {
00950         SG_DEBUG("CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats)
00951         return false ;
00952     }
00953     if (other.TreeMem[other_node].has_floats)
00954     {
00955         for (int32_t q=0; q<4; q++)
00956             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00957             {
00958                 SG_DEBUG("CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q])
00959                 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
00960                 display_node(node) ;
00961                 SG_DEBUG("============================================================\n")
00962                 other.display_node(other_node) ;
00963                 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
00964                 return false ;
00965             }
00966     }
00967     if (other.TreeMem[other_node].has_seq)
00968     {
00969         for (int32_t q=0; q<16; q++)
00970             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00971             {
00972                 SG_DEBUG("CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q])
00973                 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
00974                 display_node(node) ;
00975                 SG_DEBUG("============================================================\n")
00976                 other.display_node(other_node) ;
00977                 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
00978                 return false ;
00979             }
00980     }
00981     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00982     {
00983         for (int32_t q=0; q<4; q++)
00984         {
00985             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00986                 continue ;
00987             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00988             {
00989                 SG_DEBUG("CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q])
00990                 SG_DEBUG(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
00991                 display_node(node) ;
00992                 SG_DEBUG("============================================================\n")
00993                 other.display_node(other_node) ;
00994                 SG_DEBUG("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
00995                 return false ;
00996             }
00997             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00998                 return false ;
00999         }
01000     }
01001 #else
01002     SG_ERROR("not implemented\n")
01003 #endif
01004 
01005     return true ;
01006 }
01007 
01008     template <class Trie>
01009 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
01010 {
01011     bool ret=true ;
01012     for (int32_t i=0; i<length; i++)
01013         if (!compare_traverse(trees[i], other, other.trees[i]))
01014             return false ;
01015         else
01016             SG_DEBUG("two tries at %i identical\n", i)
01017 
01018     return ret ;
01019 }
01020 
01021 template <class Trie>
01022 bool CTrie<Trie>::find_node(
01023     int32_t node, int32_t * trace, int32_t& trace_len) const
01024 {
01025 #ifdef TRIE_CHECK_EVERYTHING
01026     ASSERT(trace_len-1>=0)
01027     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01028         if (TreeMem[trace[trace_len-1]].has_seq)
01029             return false ;
01030     if (TreeMem[trace[trace_len-1]].has_floats)
01031         return false ;
01032 
01033     for (int32_t q=0; q<4; q++)
01034     {
01035         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01036             continue ;
01037         int32_t tl=trace_len+1 ;
01038         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01039             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01040         else
01041             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01042 
01043         if (trace[trace_len]==node)
01044         {
01045             trace_len=tl ;
01046             return true ;
01047         }
01048         if (find_node(node, trace, tl))
01049         {
01050             trace_len=tl ;
01051             return true ;
01052         }
01053     }
01054     trace_len=0 ;
01055     return false ;
01056 #else
01057     SG_ERROR("not implemented\n")
01058     return false ;
01059 #endif
01060 }
01061 
01062 template <class Trie>
01063 void CTrie<Trie>::display_node(int32_t node) const
01064 {
01065 #ifdef TRIE_CHECK_EVERYTHING
01066     int32_t * trace=SG_MALLOC(int32_t, 2*degree);
01067     int32_t trace_len=-1 ;
01068     bool found = false ;
01069     int32_t tree=-1 ;
01070     for (tree=0; tree<length; tree++)
01071     {
01072         trace[0]=trees[tree] ;
01073         trace_len=1 ;
01074         found=find_node(node, trace, trace_len) ;
01075         if (found)
01076             break ;
01077     }
01078     ASSERT(found)
01079     SG_PRINT("position %i  trace: ", tree)
01080 
01081     for (int32_t i=0; i<trace_len-1; i++)
01082     {
01083         int32_t branch=-1 ;
01084         for (int32_t q=0; q<4; q++)
01085             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01086             {
01087                 branch=q;
01088                 break ;
01089             }
01090         ASSERT(branch!=-1)
01091         char acgt[5]="ACGT" ;
01092         SG_PRINT("%c", acgt[branch])
01093     }
01094     SG_PRINT("\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats)
01095     if (TreeMem[node].has_floats)
01096     {
01097         for (int32_t q=0; q<4; q++)
01098             SG_PRINT("child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q])
01099     }
01100     if (TreeMem[node].has_seq)
01101     {
01102         for (int32_t q=0; q<16; q++)
01103             SG_PRINT("seq[%i] = %i\n", q, TreeMem[node].seq[q])
01104     }
01105     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01106     {
01107         for (int32_t q=0; q<4; q++)
01108         {
01109             if (TreeMem[node].children[q]!=NO_CHILD)
01110             {
01111                 SG_PRINT("children[%i] = %i -> \n", q, TreeMem[node].children[q])
01112                 display_node(abs(TreeMem[node].children[q])) ;
01113             }
01114             else
01115                 SG_PRINT("children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q])
01116         }
01117 
01118     }
01119 
01120     SG_FREE(trace);
01121 #else
01122     SG_ERROR("not implemented\n")
01123 #endif
01124 }
01125 
01126 
01127 template <class Trie> CTrie<Trie>::~CTrie()
01128 {
01129     destroy() ;
01130 
01131     SG_FREE(TreeMem) ;
01132 }
01133 
01134 template <class Trie> void CTrie<Trie>::destroy()
01135 {
01136     if (trees!=NULL)
01137     {
01138         delete_trees();
01139         for (int32_t i=0; i<length; i++)
01140             trees[i] = NO_CHILD;
01141         SG_FREE(trees);
01142 
01143         TreeMemPtr=0;
01144         length=0;
01145         trees=NULL;
01146     }
01147 }
01148 
01149 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01150 {
01151     delete_trees(get_use_compact_terminal_nodes());
01152     degree=d;
01153 }
01154 
01155 template <class Trie> void CTrie<Trie>::create(
01156     int32_t len, bool p_use_compact_terminal_nodes)
01157 {
01158     destroy();
01159 
01160     trees=SG_MALLOC(int32_t, len);
01161     TreeMemPtr=0 ;
01162     for (int32_t i=0; i<len; i++)
01163         trees[i]=get_node(degree==1);
01164     length = len ;
01165 
01166     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01167 }
01168 
01169 
01170 template <class Trie> void CTrie<Trie>::delete_trees(
01171     bool p_use_compact_terminal_nodes)
01172 {
01173     if (trees==NULL)
01174         return;
01175 
01176     TreeMemPtr=0 ;
01177     for (int32_t i=0; i<length; i++)
01178         trees[i]=get_node(degree==1);
01179 
01180     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01181 }
01182 
01183     template <class Trie>
01184 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01185 {
01186     float64_t ret=0 ;
01187 
01188     if (tree==NO_CHILD)
01189         return 0 ;
01190     TRIE_ASSERT(tree>=0)
01191 
01192     if (depth==degree-2)
01193     {
01194         ret+=(TreeMem[tree].weight) ;
01195 
01196         for (int32_t k=0; k<4; k++)
01197             ret+=(TreeMem[tree].child_weights[k]) ;
01198 
01199         return ret ;
01200     }
01201 
01202     ret+=(TreeMem[tree].weight) ;
01203 
01204     for (int32_t i=0; i<4; i++)
01205         if (TreeMem[tree].children[i]!=NO_CHILD)
01206             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01207 
01208     return ret ;
01209 }
01210 
01211 
01212     template <class Trie>
01213 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01214 {
01215     float64_t * sum=SG_MALLOC(float64_t, length*4);
01216     for (int32_t i=0; i<length*4; i++)
01217         sum[i]=0 ;
01218     len=length ;
01219 
01220     for (int32_t i=0; i<length; i++)
01221     {
01222         TRIE_ASSERT(trees[i]!=NO_CHILD)
01223         for (int32_t k=0; k<4; k++)
01224         {
01225             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01226         }
01227     }
01228 
01229     return sum ;
01230 }
01231 
01232     template <class Trie>
01233 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01234     int32_t tree,  int32_t i, float64_t alpha,
01235         int32_t *vec, int32_t len_rem,
01236         int32_t degree_rec, int32_t mismatch_rec,
01237         int32_t max_mismatch, float64_t * weights)
01238 {
01239     if (tree==NO_CHILD)
01240         tree=trees[i] ;
01241     TRIE_ASSERT(tree!=NO_CHILD)
01242 
01243     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01244         return ;
01245     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01246 
01247     int32_t subtree = NO_CHILD ;
01248 
01249     if (degree_rec==degree-1)
01250     {
01251         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
01252         if (weights_in_tree)
01253             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01254         else
01255             if (weights[degree_rec]!=0.0)
01256                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01257         if (mismatch_rec+1<=max_mismatch)
01258             for (int32_t o=0; o<3; o++)
01259             {
01260                 if (weights_in_tree)
01261                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01262                 else
01263                     if (weights[degree_rec]!=0.0)
01264                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01265             }
01266         return ;
01267     }
01268     else
01269     {
01270         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01271         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01272         {
01273             subtree=TreeMem[tree].children[vec[0]] ;
01274             if (weights_in_tree)
01275                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01276             else
01277                 if (weights[degree_rec]!=0.0)
01278                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01279         }
01280         else
01281         {
01282             int32_t tmp = get_node(degree_rec==degree-2);
01283             ASSERT(tmp>=0)
01284             TreeMem[tree].children[vec[0]]=tmp ;
01285             subtree=tmp ;
01286 #ifdef TRIE_CHECK_EVERYTHING
01287             if (degree_rec==degree-2)
01288                 TreeMem[subtree].has_floats=true ;
01289 #endif
01290             if (weights_in_tree)
01291                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01292             else
01293                 if (weights[degree_rec]!=0.0)
01294                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01295                 else
01296                     TreeMem[subtree].weight = 0.0 ;
01297         }
01298         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01299                 &vec[1], len_rem-1,
01300                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01301 
01302         if (mismatch_rec+1<=max_mismatch)
01303         {
01304             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01305             for (int32_t o=0; o<3; o++)
01306             {
01307                 int32_t ot = other[vec[0]][o] ;
01308                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01309                 {
01310                     subtree=TreeMem[tree].children[ot] ;
01311                     if (weights_in_tree)
01312                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01313                     else
01314                         if (weights[degree_rec]!=0.0)
01315                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01316                 }
01317                 else
01318                 {
01319                     int32_t tmp = get_node(degree_rec==degree-2);
01320                     ASSERT(tmp>=0)
01321                     TreeMem[tree].children[ot]=tmp ;
01322                     subtree=tmp ;
01323 #ifdef TRIE_CHECK_EVERYTHING
01324                     if (degree_rec==degree-2)
01325                         TreeMem[subtree].has_floats=true ;
01326 #endif
01327 
01328                     if (weights_in_tree)
01329                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01330                     else
01331                         if (weights[degree_rec]!=0.0)
01332                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01333                         else
01334                             TreeMem[subtree].weight = 0.0 ;
01335                 }
01336 
01337                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01338                         &vec[1], len_rem-1,
01339                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01340             }
01341         }
01342     }
01343 }
01344 
01345     template <class Trie>
01346 void CTrie<Trie>::compute_scoring_helper(
01347     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01348     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01349     int32_t offs, float64_t* result)
01350 {
01351     if (i+j<num_feat)
01352     {
01353         float64_t decay=1.0; //no decay by default
01354         //if (j>d)
01355         //  decay=pow(0.5,j); //marginalize out lower order matches
01356 
01357         if (j<degree-1)
01358         {
01359             for (int32_t k=0; k<num_sym; k++)
01360             {
01361                 if (TreeMem[tree].children[k]!=NO_CHILD)
01362                 {
01363                     int32_t child=TreeMem[tree].children[k];
01364                     //continue recursion if not yet at max_degree, else add to result
01365                     if (d<max_degree-1)
01366                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01367                     else
01368                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01369 
01371                     if (d==0)
01372                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01373                 }
01374             }
01375         }
01376         else if (j==degree-1)
01377         {
01378             for (int32_t k=0; k<num_sym; k++)
01379             {
01380                 //continue recursion if not yet at max_degree, else add to result
01381                 if (d<max_degree-1 && i<num_feat-1)
01382                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01383                 else
01384                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01385             }
01386         }
01387     }
01388 }
01389 
01390     template <class Trie>
01391 void CTrie<Trie>::traverse(
01392     int32_t tree, const int32_t p, struct TreeParseInfo info,
01393     const int32_t depth, int32_t* const x, const int32_t k)
01394 {
01395     const int32_t num_sym = info.num_sym;
01396     const int32_t y0 = info.y0;
01397     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01398     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01399     //if( !( info.y0 == temp ) ) {
01400     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01401     //}
01402     //ASSERT( info.y0 == temp )
01403     int32_t sym;
01404     ASSERT( depth < degree )
01405     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] )
01406     if (depth<degree-1)
01407     {
01408         for( sym=0; sym<num_sym; ++sym ) {
01409             const int32_t childNum = TreeMem[tree].children[ sym ];
01410             if( childNum != NO_CHILD ) {
01411                 int32_t child = childNum ;
01412                 x[depth] = sym;
01413                 info.substrs[depth+1] = y0 + sym;
01414                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01415                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) )
01416                 count( TreeMem[child].weight, depth, info, p, x, k );
01417                 traverse( child, p, info, depth+1, x, k );
01418                 x[depth] = -1;
01419             }
01420         }
01421     }
01422     else if( depth == degree-1 )
01423     {
01424         for( sym=0; sym<num_sym; ++sym ) {
01425             const float64_t w = TreeMem[tree].child_weights[ sym ];
01426             if( w != 0.0 ) {
01427                 x[depth] = sym;
01428                 info.substrs[depth+1] = y0 + sym;
01429                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01430                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) )
01431                 count( w, depth, info, p, x, k );
01432                 x[depth] = -1;
01433             }
01434         }
01435     }
01436     //info.substrs[depth+1] = -1;
01437     //info.y0 = temp;
01438 }
01439 
01440     template <class Trie>
01441 void CTrie<Trie>::count(
01442     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01443     const int32_t p, int32_t* x, const int32_t k)
01444 {
01445     ASSERT( fabs(w) < 1e10 )
01446     ASSERT( x[depth] >= 0 )
01447     ASSERT( x[depth+1] < 0 )
01448     if ( depth < k ) {
01449         return;
01450     }
01451     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) )
01452     const int32_t nofKmers = info.nofsKmers[k];
01453     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01454     const int32_t m_a = depth - k + 1;
01455     const int32_t m_b = info.num_feat - p;
01456     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01457     // all proper k-substrings
01458     const int32_t offset0 = nofKmers * p;
01459     register int32_t i;
01460     register int32_t offset;
01461     offset = offset0;
01462     for( i = 0; i < m; ++i ) {
01463         const int32_t y = info.substrs[i+k+1];
01464         info.C_k[ y + offset ] += margWeight;
01465         offset += nofKmers;
01466     }
01467     if( depth > k ) {
01468         // k-prefix
01469         const int32_t offsR = info.substrs[k+1] + offset0;
01470         info.R_k[offsR] += margWeight;
01471         // k-suffix
01472         if( p+depth-k < info.num_feat ) {
01473             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01474             info.L_k[offsL] += margWeight;
01475         }
01476     }
01477     //    # N.x = substring represented by N
01478     //    # N.d = length of N.x
01479     //    # N.s = starting position of N.x
01480     //    # N.w = weight for feature represented by N
01481     //    if( N.d >= k )
01482     //      margContrib = w / 4^(N.d-k)
01483     //      for i = 1 to (N.d-k+1)
01484     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01485     //        C_k[ N.s+i-1, y ] += margContrib
01486     //      end;
01487     //      if( N.d > k )
01488     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01489     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01490     //      end;
01491     //    end;
01492 }
01493 
01494     template <class Trie>
01495 void CTrie<Trie>::add_to_trie(
01496     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01497     float64_t *weights, bool degree_times_position_weights)
01498 {
01499     int32_t tree = trees[i] ;
01500     //ASSERT(seq_offset==0)
01501 
01502     int32_t max_depth = 0 ;
01503     float64_t* weights_column ;
01504     if (degree_times_position_weights)
01505         weights_column = &weights[(i+seq_offset)*degree] ;
01506     else
01507         weights_column = weights ;
01508 
01509     if (weights_in_tree)
01510     {
01511         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01512             if (CMath::abs(weights_column[j]*alpha)>0)
01513                 max_depth = j+1 ;
01514     }
01515     else
01516         // don't use the weights
01517         max_depth=degree ;
01518 
01519     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01520     {
01521         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4))
01522         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01523         {
01524             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01525             {
01526                 // special treatment of the next nodes
01527                 TRIE_ASSERT(j >= degree-16)
01528                 // get the right element
01529                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01530                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01531                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01532 
01533                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax))
01534                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq)
01535                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats)
01536 
01537                 // check whether the same string is stored
01538                 int32_t mismatch_pos = -1 ;
01539                 {
01540                     int32_t k ;
01541                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01542                     {
01543                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
01544                         // ###
01545                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01546                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01547                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER))
01548                         TRIE_ASSERT(k<16)
01549                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01550                         {
01551                             mismatch_pos=k ;
01552                             break ;
01553                         }
01554                     }
01555                 }
01556 
01557                 // what happens when the .seq sequence is longer than vec? should we branch???
01558 
01559                 if (mismatch_pos==-1)
01560                     // if so, then just increase the weight by alpha and stop
01561                     TreeMem[node].weight+=alpha ;
01562                 else
01563                     // otherwise
01564                     // 1. replace current node with new node
01565                     // 2. create new nodes until mismatching positon
01566                     // 2. add a branch with old string (old node) and the new string (new node)
01567                 {
01568                     // replace old node
01569                     int32_t last_node=tree ;
01570 
01571                     // create new nodes until mismatch
01572                     int32_t k ;
01573                     for (k=0; k<mismatch_pos; k++)
01574                     {
01575                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
01576                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k])
01577 
01578                         int32_t tmp=get_node();
01579                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01580                         last_node=tmp ;
01581                         if (weights_in_tree)
01582                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01583                         else
01584                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01585                         TRIE_ASSERT(j+k!=degree-1)
01586                     }
01587                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01588                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01589                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER))
01590                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos])
01591 
01592                     if (j+k==degree-1)
01593                     {
01594                         // init child weights with zero if after dropping out
01595                         // of the k<mismatch_pos loop we are one level below degree
01596                         // (keep this even after get_node() change!)
01597                         for (int32_t q=0; q<4; q++)
01598                             TreeMem[last_node].child_weights[q]=0.0 ;
01599                         if (weights_in_tree)
01600                         {
01601                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01602                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01603                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01604                         }
01605                         else
01606                         {
01607                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01608                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01609                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01610                         }
01611 
01612 #ifdef TRIE_CHECK_EVERYTHING
01613                         TreeMem[last_node].has_floats=true ;
01614 #endif
01615                     }
01616                     else
01617                     {
01618                         // the branch for the existing string
01619                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01620                         {
01621                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01622 
01623                             // move string by mismatch_pos positions
01624                             for (int32_t q=0; q<16; q++)
01625                             {
01626                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01627                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01628                                 else
01629                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01630                             }
01631 #ifdef TRIE_CHECK_EVERYTHING
01632                             TreeMem[node].has_seq=true ;
01633 #endif
01634                         }
01635 
01636                         // the new branch
01637                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4))
01638                         int32_t tmp = get_node() ;
01639                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01640                         last_node=tmp ;
01641 
01642                         TreeMem[last_node].weight = alpha ;
01643 #ifdef TRIE_CHECK_EVERYTHING
01644                         TreeMem[last_node].has_seq = true ;
01645 #endif
01646                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01647                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01648                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01649                     }
01650                 }
01651                 break ;
01652             }
01653             else
01654             {
01655                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01656                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax))
01657                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01658                 if (weights_in_tree)
01659                     TreeMem[tree].weight += alpha*weights_column[j];
01660                 else
01661                     TreeMem[tree].weight += alpha ;
01662             }
01663         }
01664         else if (j==degree-1)
01665         {
01666             // special treatment of the last node
01667             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01668             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
01669             if (weights_in_tree)
01670                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01671             else
01672                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01673 
01674             break;
01675         }
01676         else
01677         {
01678             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01679             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01680             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01681 
01682             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01683             if (use_seq)
01684                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01685             else
01686                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01687             tree=tmp ;
01688 
01689             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax))
01690 #ifdef TRIE_CHECK_EVERYTHING
01691             TreeMem[tree].has_seq = use_seq ;
01692 #endif
01693             if (use_seq)
01694             {
01695                 TreeMem[tree].weight = alpha ;
01696                 // important to have the terminal characters (see ###)
01697                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01698                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01699                 {
01700                     TRIE_ASSERT(q<16)
01701                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01702                 }
01703                 break ;
01704             }
01705             else
01706             {
01707                 if (weights_in_tree)
01708                     TreeMem[tree].weight = alpha*weights_column[j] ;
01709                 else
01710                     TreeMem[tree].weight = alpha ;
01711 #ifdef TRIE_CHECK_EVERYTHING
01712                 if (j==degree-2)
01713                     TreeMem[tree].has_floats = true ;
01714 #endif
01715             }
01716         }
01717     }
01718 }
01719 
01720     template <class Trie>
01721 float64_t CTrie<Trie>::compute_by_tree_helper(
01722     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01723         int32_t weight_pos, float64_t* weights,
01724         bool degree_times_position_weights)
01725 {
01726     int32_t tree = trees[tree_pos] ;
01727 
01728     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01729         return 0.0;
01730 
01731     float64_t *weights_column=NULL ;
01732     if (degree_times_position_weights)
01733         weights_column=&weights[weight_pos*degree] ;
01734     else // weights is a vector (1 x degree)
01735         weights_column=weights ;
01736 
01737     float64_t sum=0 ;
01738     for (int32_t j=0; seq_pos+j < len; j++)
01739     {
01740         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0))
01741 
01742         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01743         {
01744             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01745             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01746             {
01747                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01748                 TRIE_ASSERT(tree>=0)
01749                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
01750                 float64_t this_weight=0.0 ;
01751                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01752                 {
01753                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0))
01754                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01755                         break ;
01756                     this_weight += weights_column[j+k] ;
01757                 }
01758                 sum += TreeMem[tree].weight * this_weight ;
01759                 break ;
01760             }
01761             else
01762             {
01763                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01764                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01765                 if (weights_in_tree)
01766                     sum += TreeMem[tree].weight ;
01767                 else
01768                     sum += TreeMem[tree].weight * weights_column[j] ;
01769             } ;
01770         }
01771         else
01772         {
01773             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01774             if (j==degree-1)
01775             {
01776                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
01777                 if (weights_in_tree)
01778                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01779                 else
01780                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01781             }
01782             else
01783                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
01784 
01785             break;
01786         }
01787     }
01788 
01789     if (position_weights!=NULL)
01790         return sum*position_weights[weight_pos] ;
01791     else
01792         return sum ;
01793 }
01794 
01795     template <class Trie>
01796 void CTrie<Trie>::compute_by_tree_helper(
01797     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01798     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01799     int32_t mkl_stepsize, float64_t * weights,
01800     bool degree_times_position_weights)
01801 {
01802     int32_t tree = trees[tree_pos] ;
01803     if (factor==0)
01804         return ;
01805 
01806     if (position_weights!=NULL)
01807     {
01808         factor *= position_weights[weight_pos] ;
01809         if (factor==0)
01810             return ;
01811         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01812         {
01813             for (int32_t j=0; seq_pos+j<len; j++)
01814             {
01815                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01816                 {
01817                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01818                     {
01819                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01820                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
01821                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01822                         {
01823                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01824                                 break ;
01825                             if (weights_in_tree)
01826                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01827                             else
01828                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01829                         }
01830                         break ;
01831                     }
01832                     else
01833                     {
01834                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01835                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01836                         if (weights_in_tree)
01837                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01838                         else
01839                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01840                     }
01841                 }
01842                 else
01843                 {
01844                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01845                     if (j==degree-1)
01846                     {
01847                         if (weights_in_tree)
01848                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01849                         else
01850                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01851                     }
01852                 }
01853             }
01854         }
01855         else // with position_weigths, weights is a matrix (len x degree)
01856         {
01857             for (int32_t j=0; seq_pos+j<len; j++)
01858             {
01859                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01860                 {
01861                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01862                     {
01863                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01864                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
01865                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01866                         {
01867                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01868                                 break ;
01869                             if (weights_in_tree)
01870                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01871                             else
01872                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01873                         }
01874                         break ;
01875                     }
01876                     else
01877                     {
01878                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01879                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01880                         if (weights_in_tree)
01881                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01882                         else
01883                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01884                     }
01885                 }
01886                 else
01887                 {
01888                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01889                     if (j==degree-1)
01890                     {
01891                         if (weights_in_tree)
01892                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01893                         else
01894                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01895                     }
01896                     break ;
01897                 }
01898             }
01899         }
01900     }
01901     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01902     {
01903         for (int32_t j=0; seq_pos+j<len; j++)
01904         {
01905             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01906             {
01907                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01908                 {
01909                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01910                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
01911                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01912                     {
01913                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01914                             break ;
01915                         if (weights_in_tree)
01916                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01917                         else
01918                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01919                     }
01920                     break ;
01921                 }
01922                 else
01923                 {
01924                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01925                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01926                     if (weights_in_tree)
01927                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01928                     else
01929                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01930                 }
01931             }
01932             else
01933             {
01934                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01935                 if (j==degree-1)
01936                 {
01937                     if (weights_in_tree)
01938                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01939                     else
01940                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01941                 }
01942                 break ;
01943             }
01944         }
01945     }
01946     else // no position_weigths, weights is a matrix (len x degree)
01947     {
01948         /*if (!position_mask)
01949           {
01950           position_mask = SG_MALLOC(bool, len);
01951           for (int32_t i=0; i<len; i++)
01952           {
01953           position_mask[i]=false ;
01954 
01955           for (int32_t j=0; j<degree; j++)
01956           if (weights[i*degree+j]!=0.0)
01957           {
01958           position_mask[i]=true ;
01959           break ;
01960           }
01961           }
01962           }
01963           if (position_mask[weight_pos]==0)
01964           return ;*/
01965 
01966         for (int32_t j=0; seq_pos+j<len; j++)
01967         {
01968             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01969             {
01970                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01971                 {
01972                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01973                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
01974                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01975                     {
01976                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01977                             break ;
01978                         if (weights_in_tree)
01979                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01980                         else
01981                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01982                     }
01983                     break ;
01984                 }
01985                 else
01986                 {
01987                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01988                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01989                     if (weights_in_tree)
01990                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01991                     else
01992                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01993                 }
01994             }
01995             else
01996             {
01997                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
01998                 if (j==degree-1)
01999                 {
02000                     if (weights_in_tree)
02001                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
02002                     else
02003                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
02004                 }
02005                 break ;
02006             }
02007         }
02008     }
02009 }
02010 
02011     template <class Trie>
02012 void CTrie<Trie>::fill_backtracking_table_recursion(
02013     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
02014     DynArray<ConsensusEntry>* table, float64_t* weights)
02015 {
02016     float64_t w=1.0;
02017 
02018     if (weights_in_tree || depth==0)
02019         value+=tree->weight;
02020     else
02021     {
02022         w=weights[degree-1];
02023         value+=weights[depth-1]*tree->weight;
02024     }
02025 
02026     if (degree-1==depth)
02027     {
02028         for (int32_t sym=0; sym<4; sym++)
02029         {
02030             float64_t v=w*tree->child_weights[sym];
02031             if (v!=0.0)
02032             {
02033                 ConsensusEntry entry;
02034                 entry.bt=-1;
02035                 entry.score=value+v;
02036                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02037 
02038                 table->append_element(entry);
02039             }
02040         }
02041     }
02042     else
02043     {
02044         for (int32_t sym=0; sym<4; sym++)
02045         {
02046             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02047             if (tree->children[sym] != NO_CHILD)
02048                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02049         }
02050     }
02051 }
02052 
02053     template <class Trie>
02054 float64_t CTrie<Trie>::get_cumulative_score(
02055     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02056 {
02057     float64_t result=0.0;
02058 
02059     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq)
02060 
02061     for (int32_t i=pos; i<pos+deg && i<length; i++)
02062     {
02063         //SG_PRINT("loop %d\n", i)
02064         Trie* tree = &TreeMem[trees[i]];
02065 
02066         for (int32_t d=0; d<deg-i+pos; d++)
02067         {
02068             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)))
02069             ASSERT(d-1<degree)
02070             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02071 
02072             float64_t w=1.0;
02073             if (!weights_in_tree)
02074                 w=weights[d];
02075 
02076             ASSERT(tree->children[sym] != NO_CHILD)
02077             tree=&TreeMem[tree->children[sym]];
02078             result+=w*tree->weight;
02079         }
02080     }
02081     //SG_PRINT("cum: %f\n", result)
02082     return result;
02083 }
02084 
02085     template <class Trie>
02086 void CTrie<Trie>::fill_backtracking_table(
02087     int32_t pos, DynArray<ConsensusEntry>* prev,
02088     DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02089 {
02090     ASSERT(pos>=0 && pos<length)
02091     ASSERT(!use_compact_terminal_nodes)
02092 
02093     Trie* t = &TreeMem[trees[pos]];
02094 
02095     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02096 
02097 
02098     if (cumulative)
02099     {
02100         int32_t num_cur=cur->get_num_elements();
02101         for (int32_t i=0; i<num_cur; i++)
02102         {
02103             ConsensusEntry entry=cur->get_element(i);
02104             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02105             cur->set_element(entry,i);
02106             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt)
02107         }
02108     }
02109 
02110     //if previous tree exists find maximum scoring path
02111     //for each element in cur and update bt table
02112     if (prev)
02113     {
02114         int32_t num_cur=cur->get_num_elements();
02115         int32_t num_prev=prev->get_num_elements();
02116 
02117         for (int32_t i=0; i<num_cur; i++)
02118         {
02119             //uint64_t str_cur_old= cur->get_element(i).string;
02120             uint64_t str_cur= cur->get_element(i).string >> 2;
02121             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur)
02122 
02123             int32_t bt=-1;
02124             float64_t max_score=0.0;
02125 
02126             for (int32_t j=0; j<num_prev; j++)
02127             {
02128                 //uint64_t str_prev_old= prev->get_element(j).string;
02129                 uint64_t mask=
02130                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02131                 uint64_t str_prev=  mask & prev->get_element(j).string;
02132                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask)
02133 
02134                 if (str_cur == str_prev)
02135                 {
02136                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02137                     if (bt==-1 || sc>max_score)
02138                     {
02139                         bt=j;
02140                         max_score=sc;
02141 
02142                         //SG_PRINT("new_max[%i,%i] = %f\n", j,i, max_score)
02143                     }
02144                 }
02145             }
02146 
02147             ASSERT(bt!=-1)
02148             ConsensusEntry entry;
02149             entry.bt=bt;
02150             entry.score=max_score;
02151             entry.string=cur->get_element(i).string;
02152             cur->set_element(entry, i);
02153             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt)
02154         }
02155     }
02156 }
02157 }
02158 #endif // _TRIE_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation