Marsyas  0.6.0-alpha
/usr/src/RPM/BUILD/marsyas-0.6.0/src/marsyas/expr/ExScanner.h
Go to the documentation of this file.
00001 /*
00002 ** Copyright (C) 1998-2007 George Tzanetakis <gtzan@cs.uvic.ca>
00003 **
00004 ** This program is free software; you can redistribute it and/or modify
00005 ** it under the terms of the GNU General Public License as published by
00006 ** the Free Software Foundation; either version 2 of the License, or
00007 ** (at your option) any later version.
00008 **
00009 ** This program is distributed in the hope that it will be useful,
00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 ** GNU General Public License for more details.
00013 **
00014 ** You should have received a copy of the GNU General Public License
00015 ** along with this program; if not, write to the Free Software
00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017 */
00018 
00019 #if !defined(MARSYAS_EX_SCANNER_H)
00020 #define MARSYAS_EX_SCANNER_H
00021 
00022 #include <marsyas/common_header.h>
00023 
00024 #include <limits.h>
00025 #include <stdio.h>
00026 #include <stdlib.h>
00027 #include <string.h>
00028 #include <cstring>
00029 
00030 #if _MSC_VER >= 1400
00031 #define coco_sprintf sprintf_s
00032 #elif _MSC_VER >= 1300
00033 #define coco_sprintf _snprintf
00034 #elif defined __GNUC__
00035 #define coco_sprintf snprintf
00036 #else
00037 #error unknown compiler!
00038 #endif
00039 
00040 #define COCO_WCHAR_MAX 65535
00041 #define MAX_BUFFER_LENGTH (64*1024)
00042 #define HEAP_BLOCK_SIZE (64*1024)
00043 
00044 // string handling, wide character
00045 char* coco_string_create(const char* value);
00046 char* coco_string_create(const char* value , int startIndex, int length);
00047 char* coco_string_create_upper(char* data);
00048 char* coco_string_create_lower(char* data);
00049 char* coco_string_create_append(const char* data1, const char* data2);
00050 char* coco_string_create_append(const char* data, const char value);
00051 void  coco_string_delete(char* &data);
00052 int   coco_string_length(const char* data);
00053 bool  coco_string_endswith(char* data, char* value);
00054 int   coco_string_indexof(char* data, char value);
00055 int   coco_string_lastindexof(char* data, char value);
00056 void  coco_string_merge(char* &data, char* value);
00057 bool  coco_string_equal(char* data1, char* data2);
00058 int   coco_string_compareto(char* data1, char* data2);
00059 int   coco_string_hash(char* data);
00060 
00061 
00062 namespace Marsyas {
00063 
00064 class Token
00065 {
00066 public:
00067   int kind;     // token kind
00068   int pos;      // token position in the source text (starting at 0)
00069   int col;      // token column (starting at 0)
00070   int line;     // token line (starting at 1)
00071   char* val; // token value
00072   Token *next;  // ML 2005-03-11 Peek tokens are kept in linked list
00073 
00074   Token();
00075   ~Token();
00076 
00077 };
00078 
00079 class Buffer {
00080 private:
00081   char *buf;          // input buffer
00082   int bufStart;       // position of first byte in buffer relative to input stream
00083   int bufLen;         // length of buffer
00084   int fileLen;        // length of input stream
00085   int pos;            // current position in buffer
00086   FILE* stream;      // input stream (seekable)
00087   bool isUserStream;  // was the stream opened by the user?
00088 
00089 public:
00090   static const int EoF = COCO_WCHAR_MAX + 1;
00091 
00092   Buffer(FILE* s, bool isUserStream);
00093   Buffer(const char* s);
00094   Buffer(Buffer *b);
00095   virtual ~Buffer();
00096 
00097   virtual void Close();
00098   virtual int Read();
00099   virtual int Peek();
00100   virtual char* GetString(int beg, int end);
00101   virtual int GetPos();
00102   virtual void SetPos(int value);
00103 };
00104 
00105 class UTF8Buffer : public Buffer {
00106 public:
00107   UTF8Buffer(Buffer *b) : Buffer(b) {};
00108   virtual int Read();
00109 };
00110 
00111 //-----------------------------------------------------------------------------------
00112 // StartStates  -- maps charactes to start states of tokens
00113 //-----------------------------------------------------------------------------------
00114 class StartStates {
00115 private:
00116   class Elem {
00117   public:
00118     int key, val;
00119     Elem *next;
00120     Elem(int key, int val) { this->key = key; this->val = val; next = NULL; }
00121   };
00122 
00123   Elem **tab;
00124 
00125 public:
00126   StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
00127   virtual ~StartStates() {
00128     for (int i = 0; i < 128; ++i) {
00129       Elem *e = tab[i];
00130       while (e != NULL) {
00131         Elem *next = e->next;
00132         delete e;
00133         e = next;
00134       }
00135     }
00136     delete [] tab;
00137   }
00138 
00139   void set(int key, int val) {
00140     Elem *e = new Elem(key, val);
00141     int k = key % 128;
00142     e->next = tab[k]; tab[k] = e;
00143   }
00144 
00145   int state(int key) {
00146     Elem *e = tab[key % 128];
00147     while (e != NULL && e->key != key) e = e->next;
00148     return e == NULL ? 0 : e->val;
00149   }
00150 };
00151 
00152 //-------------------------------------------------------------------------------------------
00153 // KeywordMap  -- maps strings to integers (identifiers to keyword kinds)
00154 //-------------------------------------------------------------------------------------------
00155 
00156 class KeywordMap {
00157 private:
00158   class Elem {
00159   public:
00160     char* key;
00161     int val;
00162     Elem *next;
00163     Elem(char* key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; }
00164     virtual ~Elem() { coco_string_delete(key); }
00165   };
00166 
00167   Elem **tab;
00168 
00169 public:
00170   KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); }
00171   virtual ~KeywordMap() {
00172     for (int i = 0; i < 128; ++i) {
00173       Elem *e = tab[i];
00174       while (e != NULL) {
00175         Elem *next = e->next;
00176         delete e;
00177         e = next;
00178       }
00179     }
00180     delete [] tab;
00181   }
00182 
00183   void set(char* key, int val) {
00184     Elem *e = new Elem(key, val);
00185     int k = coco_string_hash(key) % 128;
00186     e->next = tab[k]; tab[k] = e;
00187   }
00188 
00189   int get(char* key, int defaultVal) {
00190     Elem *e = tab[coco_string_hash(key) % 128];
00191     while (e != NULL && !coco_string_equal(e->key, key)) e = e->next;
00192     return e == NULL ? defaultVal : e->val;
00193   }
00194 };
00195 
00196 class ExScanner {
00197 private:
00198   char EOL;
00199   int eofSym;
00200   int noSym;
00201   int maxT;
00202   int charSetSize;
00203   StartStates start;
00204   KeywordMap keywords;
00205 
00206   Token *t;         // current token
00207   char* tval;    // text of current token
00208   int tvalLength;   // length of text of current token
00209   int tlen;         // length of current token
00210 
00211   Token *tokens;    // list of tokens already peeked (first token is a dummy)
00212   Token *pt;        // current peek token
00213   Token* tail;      // first token in list for deleting
00214   int ch;           // current input character
00215 
00216   int pos;          // byte position of current character
00217   int line;         // line number of current character
00218   int col;          // column number of current character
00219   int oldEols;      // EOLs that appeared in a comment;
00220 
00221   Token* CreateToken();
00222 
00223   void Init();
00224   void NextCh();
00225   void AddCh();
00226   bool Comment0();
00227   bool Comment1();
00228 
00229   Token* NextToken();
00230 
00231 
00232 public:
00233   Buffer *buffer;   // scanner buffer
00234   ExScanner(char* data, int i);
00235   ExScanner(char* fileName);
00236   ExScanner(FILE* s);
00237   ExScanner();
00238   ~ExScanner();
00239   Token* Scan();
00240   Token* Peek();
00241   void ResetPeek();
00242   void setString(const char* s);
00243 
00244 }; // end ExScanner
00245 
00246 }; // namespace
00247 
00248 #endif // !defined(MARSYAS_EX_SCANNER_H)
00249