SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Alphabet.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006-2009 Soeren Sonnenburg
00008  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <string.h>
00012 #include <math.h>
00013 #include <ctype.h>
00014 
00015 #include <shogun/features/Alphabet.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/base/Parameter.h>
00018 
00019 using namespace shogun;
00020 
00021 //define numbers for the bases
00022 const uint8_t CAlphabet::B_A=0;
00023 const uint8_t CAlphabet::B_C=1;
00024 const uint8_t CAlphabet::B_G=2;
00025 const uint8_t CAlphabet::B_T=3;
00026 const uint8_t CAlphabet::B_0=4;
00027 const uint8_t CAlphabet::MAPTABLE_UNDEF=0xff;
00028 const char* CAlphabet::alphabet_names[18]={
00029     "DNA","RAWDNA", "RNA", "PROTEIN", "BINARY", "ALPHANUM",
00030     "CUBE", "RAW", "IUPAC_NUCLEIC_ACID", "IUPAC_AMINO_ACID",
00031     "NONE", "DIGIT", "DIGIT2", "RAWDIGIT", "RAWDIGIT2", "UNKNOWN",
00032     "SNP", "RAWSNP"};
00033 
00034 
00035 CAlphabet::CAlphabet()
00036   : CSGObject()
00037 {
00038     init();
00039 }
00040 
00041 CAlphabet::CAlphabet(char* al, int32_t len)
00042 : CSGObject()
00043 {
00044     init();
00045 
00046     EAlphabet alpha=NONE;
00047 
00048     if (len>=(int32_t) strlen("DNA") && !strncmp(al, "DNA", strlen("DNA")))
00049         alpha = DNA;
00050     else if (len>=(int32_t) strlen("RAWDNA") && !strncmp(al, "RAWDNA", strlen("RAWDNA")))
00051         alpha = RAWDNA;
00052     else if (len>=(int32_t) strlen("RNA") && !strncmp(al, "RNA", strlen("RNA")))
00053         alpha = RNA;
00054     else if (len>=(int32_t) strlen("PROTEIN") && !strncmp(al, "PROTEIN", strlen("PROTEIN")))
00055         alpha = PROTEIN;
00056     else if (len>=(int32_t) strlen("BINARY") && !strncmp(al, "BINARY", strlen("IBINARY")))
00057         alpha = BINARY;
00058     else if (len>=(int32_t) strlen("ALPHANUM") && !strncmp(al, "ALPHANUM", strlen("ALPHANUM")))
00059         alpha = ALPHANUM;
00060     else if (len>=(int32_t) strlen("CUBE") && !strncmp(al, "CUBE", strlen("CUBE")))
00061         alpha = CUBE;
00062     else if (len>=(int32_t) strlen("DIGIT2") && !strncmp(al, "DIGIT2", strlen("DIGIT2")))
00063         alpha = DIGIT2;
00064     else if (len>=(int32_t) strlen("DIGIT") && !strncmp(al, "DIGIT", strlen("DIGIT")))
00065         alpha = DIGIT;
00066     else if (len>=(int32_t) strlen("RAWDIGIT2") && !strncmp(al, "RAWDIGIT2", strlen("RAWDIGIT2")))
00067         alpha = RAWDIGIT2;
00068     else if (len>=(int32_t) strlen("RAWDIGIT") && !strncmp(al, "RAWDIGIT", strlen("RAWDIGIT")))
00069         alpha = RAWDIGIT;
00070     else if (len>=(int32_t) strlen("SNP") && !strncmp(al, "SNP", strlen("SNP")))
00071         alpha = SNP;
00072     else if (len>=(int32_t) strlen("RAWSNP") && !strncmp(al, "RAWSNP", strlen("RAWSNP")))
00073         alpha = RAWSNP;
00074     else if ((len>=(int32_t) strlen("BYTE") && !strncmp(al, "BYTE", strlen("BYTE"))) ||
00075             (len>=(int32_t) strlen("RAW") && !strncmp(al, "RAW", strlen("RAW"))))
00076         alpha = RAWBYTE;
00077     else if (len>=(int32_t) strlen("IUPAC_NUCLEIC_ACID") && !strncmp(al, "IUPAC_NUCLEIC_ACID", strlen("IUPAC_NUCLEIC_ACID")))
00078         alpha = IUPAC_NUCLEIC_ACID;
00079     else if (len>=(int32_t) strlen("IUPAC_AMINO_ACID") && !strncmp(al, "IUPAC_AMINO_ACID", strlen("IUPAC_AMINO_ACID")))
00080         alpha = IUPAC_AMINO_ACID;
00081     else {
00082       SG_ERROR("unknown alphabet %s\n", al)
00083    }
00084 
00085     set_alphabet(alpha);
00086 }
00087 
00088 CAlphabet::CAlphabet(EAlphabet alpha)
00089 : CSGObject()
00090 {
00091     init();
00092     set_alphabet(alpha);
00093 }
00094 
00095 CAlphabet::CAlphabet(CAlphabet* a)
00096 : CSGObject()
00097 {
00098     init();
00099     REQUIRE(a, "No Alphabet specified!\n");
00100     set_alphabet(a->get_alphabet());
00101     copy_histogram(a);
00102 }
00103 
00104 CAlphabet::~CAlphabet()
00105 {
00106 }
00107 
00108 bool CAlphabet::set_alphabet(EAlphabet alpha)
00109 {
00110     bool result=true;
00111     alphabet=alpha;
00112 
00113     switch (alphabet)
00114     {
00115         case DNA:
00116         case RAWDNA:
00117             num_symbols = 4;
00118             break;
00119         case RNA:
00120             num_symbols = 4;
00121             break;
00122         case PROTEIN:
00123             num_symbols = 26;
00124             break;
00125         case BINARY:
00126             num_symbols = 2;
00127             break;
00128         case ALPHANUM:
00129             num_symbols = 36;
00130             break;
00131         case CUBE:
00132             num_symbols = 6;
00133             break;
00134         case RAWBYTE:
00135             num_symbols = 256;
00136             break;
00137         case IUPAC_NUCLEIC_ACID:
00138             num_symbols = 16;
00139             break;
00140         case IUPAC_AMINO_ACID:
00141             num_symbols = 23;
00142             break;
00143         case NONE:
00144             num_symbols = 0;
00145             break;
00146         case DIGIT2:
00147             num_symbols = 3;
00148             break;
00149         case DIGIT:
00150             num_symbols = 10;
00151             break;
00152         case RAWDIGIT2:
00153             num_symbols = 3;
00154             break;
00155         case RAWDIGIT:
00156             num_symbols = 10;
00157             break;
00158         case SNP:
00159             num_symbols = 5;
00160             break;
00161         case RAWSNP:
00162             num_symbols = 5;
00163             break;
00164         default:
00165             num_symbols = 0;
00166             result=false;
00167             break;
00168     }
00169 
00170     num_bits=(int32_t) ceil(log((float64_t) num_symbols)/log((float64_t) 2));
00171     init_map_table();
00172     clear_histogram();
00173 
00174     SG_DEBUG("initialised alphabet %s\n", get_alphabet_name(alphabet))
00175 
00176     return result;
00177 }
00178 
00179 void CAlphabet::init_map_table()
00180 {
00181     for (int32_t i=0; i<(1<<(8*sizeof(uint8_t))); i++)
00182     {
00183         maptable_to_bin[i] = MAPTABLE_UNDEF;
00184         maptable_to_char[i] = MAPTABLE_UNDEF;
00185         valid_chars[i] = false;
00186     }
00187 
00188     switch (alphabet)
00189     {
00190         case RAWDIGIT:
00191             for (uint8_t i=0; i<=9; i++)
00192             {
00193                 valid_chars[i]=true;
00194                 maptable_to_bin[i]=i;
00195                 maptable_to_char[i]=i;
00196             }
00197             break;
00198 
00199         case RAWDIGIT2:
00200             for (uint8_t i=0; i<=2; i++)
00201             {
00202                 valid_chars[i]=true;
00203                 maptable_to_bin[i]=i;
00204                 maptable_to_char[i]=i;
00205             }
00206             break;
00207 
00208         case DIGIT:
00209             valid_chars[(uint8_t) '0']=true;
00210             valid_chars[(uint8_t) '1']=true;
00211             valid_chars[(uint8_t) '2']=true;
00212             valid_chars[(uint8_t) '3']=true;
00213             valid_chars[(uint8_t) '4']=true;
00214             valid_chars[(uint8_t) '5']=true;
00215             valid_chars[(uint8_t) '6']=true;
00216             valid_chars[(uint8_t) '7']=true;
00217             valid_chars[(uint8_t) '8']=true;
00218             valid_chars[(uint8_t) '9']=true;    //Translation '0-9' -> 0-9
00219 
00220             maptable_to_bin[(uint8_t) '0']=0;
00221             maptable_to_bin[(uint8_t) '1']=1;
00222             maptable_to_bin[(uint8_t) '2']=2;
00223             maptable_to_bin[(uint8_t) '3']=3;
00224             maptable_to_bin[(uint8_t) '4']=4;
00225             maptable_to_bin[(uint8_t) '5']=5;
00226             maptable_to_bin[(uint8_t) '6']=6;
00227             maptable_to_bin[(uint8_t) '7']=7;
00228             maptable_to_bin[(uint8_t) '8']=8;
00229             maptable_to_bin[(uint8_t) '9']=9;   //Translation '0-9' -> 0-9
00230 
00231             maptable_to_char[(uint8_t) 0]='0';
00232             maptable_to_char[(uint8_t) 1]='1';
00233             maptable_to_char[(uint8_t) 2]='2';
00234             maptable_to_char[(uint8_t) 3]='3';
00235             maptable_to_char[(uint8_t) 4]='4';
00236             maptable_to_char[(uint8_t) 5]='5';
00237             maptable_to_char[(uint8_t) 6]='6';
00238             maptable_to_char[(uint8_t) 7]='7';
00239             maptable_to_char[(uint8_t) 8]='8';
00240             maptable_to_char[(uint8_t) 9]='9';  //Translation 0-9 -> '0-9'
00241             break;
00242 
00243         case DIGIT2:
00244             valid_chars[(uint8_t) '0']=true;
00245             valid_chars[(uint8_t) '1']=true;
00246             valid_chars[(uint8_t) '2']=true; //Translation '0-2' -> 0-2
00247 
00248             maptable_to_bin[(uint8_t) '0']=0;
00249             maptable_to_bin[(uint8_t) '1']=1;
00250             maptable_to_bin[(uint8_t) '2']=2;   //Translation '0-2' -> 0-2
00251 
00252             maptable_to_char[(uint8_t) 0]='0';
00253             maptable_to_char[(uint8_t) 1]='1';
00254             maptable_to_char[(uint8_t) 2]='2'; //Translation 0-2 -> '0-2'
00255             break;
00256 
00257         case CUBE:
00258             valid_chars[(uint8_t) '1']=true;
00259             valid_chars[(uint8_t) '2']=true;
00260             valid_chars[(uint8_t) '3']=true;
00261             valid_chars[(uint8_t) '4']=true;
00262             valid_chars[(uint8_t) '5']=true;
00263             valid_chars[(uint8_t) '6']=true;    //Translation '123456' -> 012345
00264 
00265             maptable_to_bin[(uint8_t) '1']=0;
00266             maptable_to_bin[(uint8_t) '2']=1;
00267             maptable_to_bin[(uint8_t) '3']=2;
00268             maptable_to_bin[(uint8_t) '4']=3;
00269             maptable_to_bin[(uint8_t) '5']=4;
00270             maptable_to_bin[(uint8_t) '6']=5;   //Translation '123456' -> 012345
00271 
00272             maptable_to_char[(uint8_t) 0]='1';
00273             maptable_to_char[(uint8_t) 1]='2';
00274             maptable_to_char[(uint8_t) 2]='3';
00275             maptable_to_char[(uint8_t) 3]='4';
00276             maptable_to_char[(uint8_t) 4]='5';
00277             maptable_to_char[(uint8_t) 5]='6';  //Translation 012345->'123456'
00278             break;
00279 
00280         case PROTEIN:
00281             {
00282                 int32_t skip=0 ;
00283                 for (int32_t i=0; i<21; i++)
00284                 {
00285                     if (i==1) skip++ ;
00286                     if (i==8) skip++ ;
00287                     if (i==12) skip++ ;
00288                     if (i==17) skip++ ;
00289                     valid_chars['A'+i+skip]=true;
00290                     maptable_to_bin['A'+i+skip]=i ;
00291                     maptable_to_char[i]='A'+i+skip ;
00292                 } ;                   //Translation 012345->acde...xy -- the protein code
00293             } ;
00294             break;
00295 
00296         case BINARY:
00297             valid_chars[(uint8_t) '0']=true;
00298             valid_chars[(uint8_t) '1']=true;
00299 
00300             maptable_to_bin[(uint8_t) '0']=0;
00301             maptable_to_bin[(uint8_t) '1']=1;
00302 
00303             maptable_to_char[0]=(uint8_t) '0';
00304             maptable_to_char[1]=(uint8_t) '1';
00305             break;
00306 
00307         case ALPHANUM:
00308             {
00309                 for (int32_t i=0; i<26; i++)
00310                 {
00311                     valid_chars[(uint8_t) 'A'+i]=true;
00312                     maptable_to_bin[(uint8_t) 'A'+i]=i ;
00313                     maptable_to_char[i]='A'+i ;
00314                 } ;
00315                 for (int32_t i=0; i<10; i++)
00316                 {
00317                     valid_chars[(uint8_t) '0'+i]=true;
00318                     maptable_to_bin[(uint8_t) '0'+i]=26+i ;
00319                     maptable_to_char[26+i]='0'+i ;
00320                 } ;        //Translation 012345->acde...xy0123456789
00321             } ;
00322             break;
00323 
00324         case RAWBYTE:
00325             {
00326                 //identity
00327                 for (int32_t i=0; i<256; i++)
00328                 {
00329                     valid_chars[i]=true;
00330                     maptable_to_bin[i]=i;
00331                     maptable_to_char[i]=i;
00332                 }
00333             }
00334             break;
00335 
00336         case DNA:
00337             valid_chars[(uint8_t) 'A']=true;
00338             valid_chars[(uint8_t) 'C']=true;
00339             valid_chars[(uint8_t) 'G']=true;
00340             valid_chars[(uint8_t) 'T']=true;
00341 
00342             maptable_to_bin[(uint8_t) 'A']=B_A;
00343             maptable_to_bin[(uint8_t) 'C']=B_C;
00344             maptable_to_bin[(uint8_t) 'G']=B_G;
00345             maptable_to_bin[(uint8_t) 'T']=B_T;
00346 
00347             maptable_to_char[B_A]='A';
00348             maptable_to_char[B_C]='C';
00349             maptable_to_char[B_G]='G';
00350             maptable_to_char[B_T]='T';
00351             break;
00352         case RAWDNA:
00353             {
00354                 //identity
00355                 for (int32_t i=0; i<4; i++)
00356                 {
00357                     valid_chars[i]=true;
00358                     maptable_to_bin[i]=i;
00359                     maptable_to_char[i]=i;
00360                 }
00361             }
00362             break;
00363 
00364         case SNP:
00365             valid_chars[(uint8_t) 'A']=true;
00366             valid_chars[(uint8_t) 'C']=true;
00367             valid_chars[(uint8_t) 'G']=true;
00368             valid_chars[(uint8_t) 'T']=true;
00369             valid_chars[(uint8_t) '0']=true;
00370 
00371             maptable_to_bin[(uint8_t) 'A']=B_A;
00372             maptable_to_bin[(uint8_t) 'C']=B_C;
00373             maptable_to_bin[(uint8_t) 'G']=B_G;
00374             maptable_to_bin[(uint8_t) 'T']=B_T;
00375             maptable_to_bin[(uint8_t) '0']=B_0;
00376 
00377             maptable_to_char[B_A]='A';
00378             maptable_to_char[B_C]='C';
00379             maptable_to_char[B_G]='G';
00380             maptable_to_char[B_T]='T';
00381             maptable_to_char[B_0]='0';
00382             break;
00383         case RAWSNP:
00384             {
00385                 //identity
00386                 for (int32_t i=0; i<5; i++)
00387                 {
00388                     valid_chars[i]=true;
00389                     maptable_to_bin[i]=i;
00390                     maptable_to_char[i]=i;
00391                 }
00392             }
00393             break;
00394 
00395         case RNA:
00396             valid_chars[(uint8_t) 'A']=true;
00397             valid_chars[(uint8_t) 'C']=true;
00398             valid_chars[(uint8_t) 'G']=true;
00399             valid_chars[(uint8_t) 'U']=true;
00400 
00401             maptable_to_bin[(uint8_t) 'A']=B_A;
00402             maptable_to_bin[(uint8_t) 'C']=B_C;
00403             maptable_to_bin[(uint8_t) 'G']=B_G;
00404             maptable_to_bin[(uint8_t) 'U']=B_T;
00405 
00406             maptable_to_char[B_A]='A';
00407             maptable_to_char[B_C]='C';
00408             maptable_to_char[B_G]='G';
00409             maptable_to_char[B_T]='U';
00410             break;
00411 
00412         case IUPAC_NUCLEIC_ACID:
00413             valid_chars[(uint8_t) 'A']=true; // A   Adenine
00414             valid_chars[(uint8_t) 'C']=true; // C   Cytosine
00415             valid_chars[(uint8_t) 'G']=true; // G   Guanine
00416             valid_chars[(uint8_t) 'T']=true; // T   Thymine
00417             valid_chars[(uint8_t) 'U']=true; // U   Uracil
00418             valid_chars[(uint8_t) 'R']=true; // R   Purine (A or G)
00419             valid_chars[(uint8_t) 'Y']=true; // Y   Pyrimidine (C, T, or U)
00420             valid_chars[(uint8_t) 'M']=true; // M   C or A
00421             valid_chars[(uint8_t) 'K']=true; // K   T, U, or G
00422             valid_chars[(uint8_t) 'W']=true; // W   T, U, or A
00423             valid_chars[(uint8_t) 'S']=true; // S   C or G
00424             valid_chars[(uint8_t) 'B']=true; // B   C, T, U, or G (not A)
00425             valid_chars[(uint8_t) 'D']=true; // D   A, T, U, or G (not C)
00426             valid_chars[(uint8_t) 'H']=true; // H   A, T, U, or C (not G)
00427             valid_chars[(uint8_t) 'V']=true; // V   A, C, or G (not T, not U)
00428             valid_chars[(uint8_t) 'N']=true; // N   Any base (A, C, G, T, or U)
00429 
00430             maptable_to_bin[(uint8_t) 'A']=0; // A  Adenine
00431             maptable_to_bin[(uint8_t) 'C']=1; // C  Cytosine
00432             maptable_to_bin[(uint8_t) 'G']=2; // G  Guanine
00433             maptable_to_bin[(uint8_t) 'T']=3; // T  Thymine
00434             maptable_to_bin[(uint8_t) 'U']=4; // U  Uracil
00435             maptable_to_bin[(uint8_t) 'R']=5; // R  Purine (A or G)
00436             maptable_to_bin[(uint8_t) 'Y']=6; // Y  Pyrimidine (C, T, or U)
00437             maptable_to_bin[(uint8_t) 'M']=7; // M  C or A
00438             maptable_to_bin[(uint8_t) 'K']=8; // K  T, U, or G
00439             maptable_to_bin[(uint8_t) 'W']=9; // W  T, U, or A
00440             maptable_to_bin[(uint8_t) 'S']=10; // S C or G
00441             maptable_to_bin[(uint8_t) 'B']=11; // B C, T, U, or G (not A)
00442             maptable_to_bin[(uint8_t) 'D']=12; // D A, T, U, or G (not C)
00443             maptable_to_bin[(uint8_t) 'H']=13; // H A, T, U, or C (not G)
00444             maptable_to_bin[(uint8_t) 'V']=14; // V A, C, or G (not T, not U)
00445             maptable_to_bin[(uint8_t) 'N']=15; // N Any base (A, C, G, T, or U)
00446 
00447             maptable_to_char[0]=(uint8_t) 'A'; // A Adenine
00448             maptable_to_char[1]=(uint8_t) 'C'; // C Cytosine
00449             maptable_to_char[2]=(uint8_t) 'G'; // G Guanine
00450             maptable_to_char[3]=(uint8_t) 'T'; // T Thymine
00451             maptable_to_char[4]=(uint8_t) 'U'; // U Uracil
00452             maptable_to_char[5]=(uint8_t) 'R'; // R Purine (A or G)
00453             maptable_to_char[6]=(uint8_t) 'Y'; // Y Pyrimidine (C, T, or U)
00454             maptable_to_char[7]=(uint8_t) 'M'; // M C or A
00455             maptable_to_char[8]=(uint8_t) 'K'; // K T, U, or G
00456             maptable_to_char[9]=(uint8_t) 'W'; // W T, U, or A
00457             maptable_to_char[10]=(uint8_t) 'S'; // S    C or G
00458             maptable_to_char[11]=(uint8_t) 'B'; // B    C, T, U, or G (not A)
00459             maptable_to_char[12]=(uint8_t) 'D'; // D    A, T, U, or G (not C)
00460             maptable_to_char[13]=(uint8_t) 'H'; // H    A, T, U, or C (not G)
00461             maptable_to_char[14]=(uint8_t) 'V'; // V    A, C, or G (not T, not U)
00462             maptable_to_char[15]=(uint8_t) 'N'; // N    Any base (A, C, G, T, or U)
00463             break;
00464 
00465         case IUPAC_AMINO_ACID:
00466             valid_chars[(uint8_t) 'A']=true; //A    Ala Alanine
00467             valid_chars[(uint8_t) 'R']=true; //R    Arg Arginine
00468             valid_chars[(uint8_t) 'N']=true; //N    Asn Asparagine
00469             valid_chars[(uint8_t) 'D']=true; //D    Asp Aspartic acid
00470             valid_chars[(uint8_t) 'C']=true; //C    Cys Cysteine
00471             valid_chars[(uint8_t) 'Q']=true; //Q    Gln Glutamine
00472             valid_chars[(uint8_t) 'E']=true; //E    Glu Glutamic acid
00473             valid_chars[(uint8_t) 'G']=true; //G    Gly Glycine
00474             valid_chars[(uint8_t) 'H']=true; //H    His Histidine
00475             valid_chars[(uint8_t) 'I']=true; //I    Ile Isoleucine
00476             valid_chars[(uint8_t) 'L']=true; //L    Leu Leucine
00477             valid_chars[(uint8_t) 'K']=true; //K    Lys Lysine
00478             valid_chars[(uint8_t) 'M']=true; //M    Met Methionine
00479             valid_chars[(uint8_t) 'F']=true; //F    Phe Phenylalanine
00480             valid_chars[(uint8_t) 'P']=true; //P    Pro Proline
00481             valid_chars[(uint8_t) 'S']=true; //S    Ser Serine
00482             valid_chars[(uint8_t) 'T']=true; //T    Thr Threonine
00483             valid_chars[(uint8_t) 'W']=true; //W    Trp Tryptophan
00484             valid_chars[(uint8_t) 'Y']=true; //Y    Tyr Tyrosine
00485             valid_chars[(uint8_t) 'V']=true; //V    Val Valine
00486             valid_chars[(uint8_t) 'B']=true; //B    Asx Aspartic acid or Asparagine
00487             valid_chars[(uint8_t) 'Z']=true; //Z    Glx Glutamine or Glutamic acid
00488             valid_chars[(uint8_t) 'X']=true; //X    Xaa Any amino acid
00489 
00490             maptable_to_bin[(uint8_t) 'A']=0;  //A  Ala Alanine
00491             maptable_to_bin[(uint8_t) 'R']=1;  //R  Arg Arginine
00492             maptable_to_bin[(uint8_t) 'N']=2;  //N  Asn Asparagine
00493             maptable_to_bin[(uint8_t) 'D']=3;  //D  Asp Aspartic acid
00494             maptable_to_bin[(uint8_t) 'C']=4;  //C  Cys Cysteine
00495             maptable_to_bin[(uint8_t) 'Q']=5;  //Q  Gln Glutamine
00496             maptable_to_bin[(uint8_t) 'E']=6;  //E  Glu Glutamic acid
00497             maptable_to_bin[(uint8_t) 'G']=7;  //G  Gly Glycine
00498             maptable_to_bin[(uint8_t) 'H']=8;  //H  His Histidine
00499             maptable_to_bin[(uint8_t) 'I']=9;  //I  Ile Isoleucine
00500             maptable_to_bin[(uint8_t) 'L']=10; //L  Leu Leucine
00501             maptable_to_bin[(uint8_t) 'K']=11; //K  Lys Lysine
00502             maptable_to_bin[(uint8_t) 'M']=12; //M  Met Methionine
00503             maptable_to_bin[(uint8_t) 'F']=13; //F  Phe Phenylalanine
00504             maptable_to_bin[(uint8_t) 'P']=14; //P  Pro Proline
00505             maptable_to_bin[(uint8_t) 'S']=15; //S  Ser Serine
00506             maptable_to_bin[(uint8_t) 'T']=16; //T  Thr Threonine
00507             maptable_to_bin[(uint8_t) 'W']=17; //W  Trp Tryptophan
00508             maptable_to_bin[(uint8_t) 'Y']=18; //Y  Tyr Tyrosine
00509             maptable_to_bin[(uint8_t) 'V']=19; //V  Val Valine
00510             maptable_to_bin[(uint8_t) 'B']=20; //B  Asx Aspartic acid or Asparagine
00511             maptable_to_bin[(uint8_t) 'Z']=21; //Z  Glx Glutamine or Glutamic acid
00512             maptable_to_bin[(uint8_t) 'X']=22; //X  Xaa Any amino acid
00513 
00514             maptable_to_char[0]=(uint8_t) 'A';  //A Ala Alanine
00515             maptable_to_char[1]=(uint8_t) 'R';  //R Arg Arginine
00516             maptable_to_char[2]=(uint8_t) 'N';  //N Asn Asparagine
00517             maptable_to_char[3]=(uint8_t) 'D';  //D Asp Aspartic acid
00518             maptable_to_char[4]=(uint8_t) 'C';  //C Cys Cysteine
00519             maptable_to_char[5]=(uint8_t) 'Q';  //Q Gln Glutamine
00520             maptable_to_char[6]=(uint8_t) 'E';  //E Glu Glutamic acid
00521             maptable_to_char[7]=(uint8_t) 'G';  //G Gly Glycine
00522             maptable_to_char[8]=(uint8_t) 'H';  //H His Histidine
00523             maptable_to_char[9]=(uint8_t) 'I';  //I Ile Isoleucine
00524             maptable_to_char[10]=(uint8_t) 'L'; //L Leu Leucine
00525             maptable_to_char[11]=(uint8_t) 'K'; //K Lys Lysine
00526             maptable_to_char[12]=(uint8_t) 'M'; //M Met Methionine
00527             maptable_to_char[13]=(uint8_t) 'F'; //F Phe Phenylalanine
00528             maptable_to_char[14]=(uint8_t) 'P'; //P Pro Proline
00529             maptable_to_char[15]=(uint8_t) 'S'; //S Ser Serine
00530             maptable_to_char[16]=(uint8_t) 'T'; //T Thr Threonine
00531             maptable_to_char[17]=(uint8_t) 'W'; //W Trp Tryptophan
00532             maptable_to_char[18]=(uint8_t) 'Y'; //Y Tyr Tyrosine
00533             maptable_to_char[19]=(uint8_t) 'V'; //V Val Valine
00534             maptable_to_char[20]=(uint8_t) 'B'; //B Asx Aspartic acid or Asparagine
00535             maptable_to_char[21]=(uint8_t) 'Z'; //Z Glx Glutamine or Glutamic acid
00536             maptable_to_char[22]=(uint8_t) 'X'; //X Xaa Any amino acid
00537             break;
00538 
00539         default:
00540             break; //leave uninitialised
00541     };
00542 }
00543 
00544 void CAlphabet::clear_histogram()
00545 {
00546     memset(histogram, 0, sizeof(histogram));
00547     print_histogram();
00548 }
00549 
00550 int32_t CAlphabet::get_max_value_in_histogram()
00551 {
00552     int32_t max_sym=-1;
00553     for (int32_t i=(int32_t) (1 <<(sizeof(uint8_t)*8))-1;i>=0; i--)
00554     {
00555         if (histogram[i])
00556         {
00557             max_sym=i;
00558             break;
00559         }
00560     }
00561 
00562     return max_sym;
00563 }
00564 
00565 int32_t CAlphabet::get_num_symbols_in_histogram()
00566 {
00567     int32_t num_sym=0;
00568     for (int32_t i=0; i<(int32_t) (1 <<(sizeof(uint8_t)*8)); i++)
00569     {
00570         if (histogram[i])
00571             num_sym++;
00572     }
00573 
00574     return num_sym;
00575 }
00576 
00577 int32_t CAlphabet::get_num_bits_in_histogram()
00578 {
00579     int32_t num_sym=get_num_symbols_in_histogram();
00580     if (num_sym>0)
00581         return (int32_t) ceil(log((float64_t) num_sym)/log((float64_t) 2));
00582     else
00583         return 0;
00584 }
00585 
00586 
00587 void CAlphabet::print_histogram()
00588 {
00589     for (int32_t i=0; i<(int32_t) (1 <<(sizeof(uint8_t)*8)); i++)
00590     {
00591         if (histogram[i])
00592         {
00593             if (isprint(i))
00594                 SG_PRINT("hist['%c']=%lld", i, histogram[i])
00595             else if (i == '\t')
00596                 SG_PRINT("hist['\\t']=%lld", histogram[i])
00597             else if (i == '\n')
00598                 SG_PRINT("hist['\\n']=%lld", histogram[i])
00599             else if (i == '\r')
00600                 SG_PRINT("hist['\\r']=%lld", histogram[i])
00601             else
00602                 SG_PRINT("hist[%d]=%lld", i, histogram[i])
00603 
00604             if (!valid_chars[i])
00605                 SG_PRINT(" - Character not in Alphabet.\n")
00606             else
00607                 SG_PRINT("\n");
00608         }
00609     }
00610 }
00611 
00612 SGVector<int64_t> CAlphabet::get_histogram()
00613 {
00614     return SGVector<int64_t>(&histogram[0], 1 << (sizeof(uint8_t)*8), false);
00615 }
00616 
00617 bool CAlphabet::check_alphabet(bool print_error)
00618 {
00619     bool result = true;
00620 
00621     for (int32_t i=0; i<(int32_t) (1 <<(sizeof(uint8_t)*8)); i++)
00622     {
00623         if (histogram[i]>0 && valid_chars[i]==0)
00624         {
00625             result=false;
00626             break;
00627         }
00628     }
00629 
00630     if (!result && print_error)
00631     {
00632         print_histogram();
00633         SG_ERROR("ALPHABET does not contain all symbols in histogram\n")
00634     }
00635 
00636     return result;
00637 }
00638 
00639 bool CAlphabet::check_alphabet_size(bool print_error)
00640 {
00641     if (get_num_bits_in_histogram() > get_num_bits())
00642     {
00643         if (print_error)
00644         {
00645             print_histogram();
00646             fprintf(stderr, "get_num_bits_in_histogram()=%i > get_num_bits()=%i\n", get_num_bits_in_histogram(), get_num_bits()) ;
00647          SG_ERROR("ALPHABET too small to contain all symbols in histogram\n")
00648         }
00649         return false;
00650     }
00651     else
00652         return true;
00653 
00654 }
00655 
00656 void CAlphabet::copy_histogram(CAlphabet* a)
00657 {
00658     SGVector<int64_t> h=a->get_histogram();
00659 
00660     if (h.vlen != sizeof(histogram)/sizeof(histogram[0]))
00661     {
00662         SG_ERROR("Histogram has %d elements, but %d elements where expected\n",
00663                 h.vlen, sizeof(histogram)/sizeof(histogram[0]));
00664     }
00665 
00666     memcpy(histogram, h.vector, sizeof(histogram));
00667 }
00668 
00669 const char* CAlphabet::get_alphabet_name(EAlphabet alphabet)
00670 {
00671 
00672     int32_t idx;
00673     switch (alphabet)
00674     {
00675         case DNA:
00676             idx=0;
00677             break;
00678         case RAWDNA:
00679             idx=1;
00680             break;
00681         case RNA:
00682             idx=2;
00683             break;
00684         case PROTEIN:
00685             idx=3;
00686             break;
00687         case BINARY:
00688             idx=4;
00689             break;
00690         case ALPHANUM:
00691             idx=5;
00692             break;
00693         case CUBE:
00694             idx=6;
00695             break;
00696         case RAWBYTE:
00697             idx=7;
00698             break;
00699         case IUPAC_NUCLEIC_ACID:
00700             idx=8;
00701             break;
00702         case IUPAC_AMINO_ACID:
00703             idx=9;
00704             break;
00705         case NONE:
00706             idx=10;
00707             break;
00708         case DIGIT:
00709             idx=11;
00710             break;
00711         case DIGIT2:
00712             idx=12;
00713             break;
00714         default:
00715             idx=13;
00716             break;
00717     }
00718     return alphabet_names[idx];
00719 }
00720 
00721 void CAlphabet::init()
00722 {
00723     alphabet = NONE;
00724     num_symbols = 0;
00725     num_bits = 0;
00726 
00727     memset(valid_chars, 0, sizeof (valid_chars));
00728     memset(maptable_to_bin, 0, sizeof (maptable_to_bin));
00729     memset(maptable_to_char, 0, sizeof (maptable_to_char));
00730     memset(histogram, 0, sizeof (histogram));
00731 
00732 
00733     m_parameters->add((machine_int_t*) &alphabet, "alphabet",
00734                       "Alphabet enum.");
00735     m_parameters->add(&num_symbols, "num_symbols",
00736                       "Number of symbols.");
00737     m_parameters->add(&num_bits, "num_bits",
00738                       "Number of bits.");
00739 
00740     /* We don't need to serialize the mapping tables / they can be computed
00741      * after de-serializing. Lets not serialize the histogram for now. Doesn't
00742      * really make sense.  */
00743 
00744     /* m_parameters->add_histogram(&histogram, sizeof(histogram),
00745             "histogram",
00746             "Histogram."); */
00747 }
00748 
00749 void CAlphabet::load_serializable_post() throw (ShogunException)
00750 {
00751     CSGObject::load_serializable_post();
00752 
00753     init_map_table();
00754 }
00755 
00756 
00757 namespace shogun
00758 {
00759 template <class ST>
00760 void CAlphabet::translate_from_single_order(ST* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
00761 {
00762     int32_t i,j;
00763     ST value=0;
00764 
00765     for (i=sequence_length-1; i>= p_order-1; i--) //convert interval of size T
00766     {
00767         value=0;
00768         for (j=i; j>=i-p_order+1; j--)
00769             value= (value >> max_val) | (obs[j] << (max_val * (p_order-1)));
00770 
00771         obs[i]= (ST) value;
00772     }
00773 
00774     for (i=p_order-2;i>=0;i--)
00775     {
00776         if (i>=sequence_length)
00777             continue;
00778 
00779         value=0;
00780         for (j=i; j>=i-p_order+1; j--)
00781         {
00782             value= (value >> max_val);
00783             if (j>=0 && j<sequence_length)
00784                 value|=obs[j] << (max_val * (p_order-1));
00785         }
00786         obs[i]=value;
00787     }
00788 
00789     // TODO we should get rid of this loop!
00790     if (start>0)
00791     {
00792         for (i=start; i<sequence_length; i++)
00793             obs[i-start]=obs[i];
00794     }
00795 }
00796 
00797 template <class ST>
00798 void CAlphabet::translate_from_single_order_reversed(ST* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
00799 {
00800     int32_t i,j;
00801     ST value=0;
00802 
00803     for (i=sequence_length-1; i>= p_order-1; i--) //convert interval of size T
00804     {
00805         value=0;
00806         for (j=i; j>=i-p_order+1; j--)
00807             value= (value << max_val) | obs[j];
00808 
00809         obs[i]= (ST) value;
00810     }
00811 
00812     for (i=p_order-2;i>=0;i--)
00813     {
00814         if (i>=sequence_length)
00815             continue;
00816 
00817         value=0;
00818         for (j=i; j>=i-p_order+1; j--)
00819         {
00820             value= (value << max_val);
00821             if (j>=0 && j<sequence_length)
00822                 value|=obs[j];
00823         }
00824         obs[i]=value;
00825     }
00826 
00827     // TODO we should get rid of this loop!
00828     if (start>0)
00829     {
00830         for (i=start; i<sequence_length; i++)
00831             obs[i-start]=obs[i];
00832     }
00833 }
00834 
00835 template <class ST>
00836 void CAlphabet::translate_from_single_order(ST* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00837 {
00838     ASSERT(gap>=0)
00839 
00840     const int32_t start_gap=(p_order-gap)/2;
00841     const int32_t end_gap=start_gap+gap;
00842 
00843     int32_t i,j;
00844     ST value=0;
00845 
00846     // almost all positions
00847     for (i=sequence_length-1; i>=p_order-1; i--) //convert interval of size T
00848     {
00849         value=0;
00850         for (j=i; j>=i-p_order+1; j--)
00851         {
00852             if (i-j<start_gap)
00853             {
00854                 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
00855             }
00856             else if (i-j>=end_gap)
00857             {
00858                 value= (value >> max_val) | (obs[j] << (max_val * (p_order-1-gap)));
00859             }
00860         }
00861         obs[i]= (ST) value;
00862     }
00863 
00864     // the remaining `order` positions
00865     for (i=p_order-2;i>=0;i--)
00866     {
00867         if (i>=sequence_length)
00868             continue;
00869 
00870         value=0;
00871         for (j=i; j>=i-p_order+1; j--)
00872         {
00873             if (i-j<start_gap)
00874             {
00875                 value= (value >> max_val);
00876                 if (j>=0 && j<sequence_length)
00877                     value|=obs[j] << (max_val * (p_order-1-gap));
00878             }
00879             else if (i-j>=end_gap)
00880             {
00881                 value= (value >> max_val);
00882                 if (j>=0 && j<sequence_length)
00883                     value|=obs[j] << (max_val * (p_order-1-gap));
00884             }
00885         }
00886         obs[i]=value;
00887     }
00888 
00889     // TODO we should get rid of this loop!
00890     if (start>0)
00891     {
00892         for (i=start; i<sequence_length; i++)
00893             obs[i-start]=obs[i];
00894     }
00895 }
00896 
00897 template <class ST>
00898 void CAlphabet::translate_from_single_order_reversed(ST* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00899 {
00900     ASSERT(gap>=0)
00901 
00902     const int32_t start_gap=(p_order-gap)/2;
00903     const int32_t end_gap=start_gap+gap;
00904 
00905     int32_t i,j;
00906     ST value=0;
00907 
00908     // almost all positions
00909     for (i=sequence_length-1; i>=p_order-1; i--) //convert interval of size T
00910     {
00911         value=0;
00912         for (j=i; j>=i-p_order+1; j--)
00913         {
00914             if (i-j<start_gap)
00915                 value= (value << max_val) | obs[j];
00916             else if (i-j>=end_gap)
00917                 value= (value << max_val) | obs[j];
00918         }
00919         obs[i]= (ST) value;
00920     }
00921 
00922     // the remaining `order` positions
00923     for (i=p_order-2;i>=0;i--)
00924     {
00925         if (i>=sequence_length)
00926             continue;
00927 
00928         value=0;
00929         for (j=i; j>=i-p_order+1; j--)
00930         {
00931             if (i-j<start_gap)
00932             {
00933                 value= value << max_val;
00934                 if (j>=0 && j<sequence_length)
00935                     value|=obs[j];
00936             }
00937             else if (i-j>=end_gap)
00938             {
00939                 value= value << max_val;
00940                 if (j>=0 && j<sequence_length)
00941                     value|=obs[j];
00942             }
00943         }
00944         obs[i]=value;
00945     }
00946 
00947     // TODO we should get rid of this loop!
00948     if (start>0)
00949     {
00950         for (i=start; i<sequence_length; i++)
00951             obs[i-start]=obs[i];
00952     }
00953 }
00954 
00955 template<> void CAlphabet::translate_from_single_order(float32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00956 {
00957 }
00958 
00959 template<> void CAlphabet::translate_from_single_order(float64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00960 {
00961 }
00962 
00963 template<> void CAlphabet::translate_from_single_order(floatmax_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00964 {
00965 }
00966 
00967 template<> void CAlphabet::translate_from_single_order_reversed(float32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00968 {
00969 }
00970 
00971 template<> void CAlphabet::translate_from_single_order_reversed(float64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00972 {
00973 }
00974 
00975 template<> void CAlphabet::translate_from_single_order_reversed(floatmax_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
00976 {
00977 }
00978 
00979 template void CAlphabet::translate_from_single_order<bool>(bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00980 template void CAlphabet::translate_from_single_order<char>(char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00981 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00982 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00983 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00984 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00985 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00986 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00987 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00988 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
00989 
00990 template void CAlphabet::translate_from_single_order<bool>(bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00991 template void CAlphabet::translate_from_single_order<char>(char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00992 template void CAlphabet::translate_from_single_order<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00993 template void CAlphabet::translate_from_single_order<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00994 template void CAlphabet::translate_from_single_order<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00995 template void CAlphabet::translate_from_single_order<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00996 template void CAlphabet::translate_from_single_order<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00997 template void CAlphabet::translate_from_single_order<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00998 template void CAlphabet::translate_from_single_order<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
00999 template void CAlphabet::translate_from_single_order<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01000 
01001 template void CAlphabet::translate_from_single_order_reversed<bool>(bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01002 template void CAlphabet::translate_from_single_order_reversed<char>(char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01003 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01004 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01005 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01006 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01007 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01008 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01009 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01010 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val);
01011 
01012 template void CAlphabet::translate_from_single_order_reversed<bool>(bool* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01013 template void CAlphabet::translate_from_single_order_reversed<char>(char* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01014 template void CAlphabet::translate_from_single_order_reversed<int8_t>(int8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01015 template void CAlphabet::translate_from_single_order_reversed<uint8_t>(uint8_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01016 template void CAlphabet::translate_from_single_order_reversed<int16_t>(int16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01017 template void CAlphabet::translate_from_single_order_reversed<uint16_t>(uint16_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01018 template void CAlphabet::translate_from_single_order_reversed<int32_t>(int32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01019 template void CAlphabet::translate_from_single_order_reversed<uint32_t>(uint32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01020 template void CAlphabet::translate_from_single_order_reversed<int64_t>(int64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01021 template void CAlphabet::translate_from_single_order_reversed<uint64_t>(uint64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01022 template void CAlphabet::translate_from_single_order_reversed<float32_t>(float32_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01023 template void CAlphabet::translate_from_single_order_reversed<float64_t>(float64_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01024 template void CAlphabet::translate_from_single_order_reversed<floatmax_t>(floatmax_t* obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap);
01025 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation