Marsyas
0.6.0-alpha
|
00001 /* 00002 ** Copyright (C) 1998-2007 George Tzanetakis <gtzan@cs.uvic.ca> 00003 ** 00004 ** This program is free software; you can redistribute it and/or modify 00005 ** it under the terms of the GNU General Public License as published by 00006 ** the Free Software Foundation; either version 2 of the License, or 00007 ** (at your option) any later version. 00008 ** 00009 ** This program is distributed in the hope that it will be useful, 00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 ** GNU General Public License for more details. 00013 ** 00014 ** You should have received a copy of the GNU General Public License 00015 ** along with this program; if not, write to the Free Software 00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00017 */ 00018 00019 #if !defined(MARSYAS_EX_SCANNER_H) 00020 #define MARSYAS_EX_SCANNER_H 00021 00022 #include <marsyas/common_header.h> 00023 00024 #include <limits.h> 00025 #include <stdio.h> 00026 #include <stdlib.h> 00027 #include <string.h> 00028 #include <cstring> 00029 00030 #if _MSC_VER >= 1400 00031 #define coco_sprintf sprintf_s 00032 #elif _MSC_VER >= 1300 00033 #define coco_sprintf _snprintf 00034 #elif defined __GNUC__ 00035 #define coco_sprintf snprintf 00036 #else 00037 #error unknown compiler! 00038 #endif 00039 00040 #define COCO_WCHAR_MAX 65535 00041 #define MAX_BUFFER_LENGTH (64*1024) 00042 #define HEAP_BLOCK_SIZE (64*1024) 00043 00044 // string handling, wide character 00045 char* coco_string_create(const char* value); 00046 char* coco_string_create(const char* value , int startIndex, int length); 00047 char* coco_string_create_upper(char* data); 00048 char* coco_string_create_lower(char* data); 00049 char* coco_string_create_append(const char* data1, const char* data2); 00050 char* coco_string_create_append(const char* data, const char value); 00051 void coco_string_delete(char* &data); 00052 int coco_string_length(const char* data); 00053 bool coco_string_endswith(char* data, char* value); 00054 int coco_string_indexof(char* data, char value); 00055 int coco_string_lastindexof(char* data, char value); 00056 void coco_string_merge(char* &data, char* value); 00057 bool coco_string_equal(char* data1, char* data2); 00058 int coco_string_compareto(char* data1, char* data2); 00059 int coco_string_hash(char* data); 00060 00061 00062 namespace Marsyas { 00063 00064 class Token 00065 { 00066 public: 00067 int kind; // token kind 00068 int pos; // token position in the source text (starting at 0) 00069 int col; // token column (starting at 0) 00070 int line; // token line (starting at 1) 00071 char* val; // token value 00072 Token *next; // ML 2005-03-11 Peek tokens are kept in linked list 00073 00074 Token(); 00075 ~Token(); 00076 00077 }; 00078 00079 class Buffer { 00080 private: 00081 char *buf; // input buffer 00082 int bufStart; // position of first byte in buffer relative to input stream 00083 int bufLen; // length of buffer 00084 int fileLen; // length of input stream 00085 int pos; // current position in buffer 00086 FILE* stream; // input stream (seekable) 00087 bool isUserStream; // was the stream opened by the user? 00088 00089 public: 00090 static const int EoF = COCO_WCHAR_MAX + 1; 00091 00092 Buffer(FILE* s, bool isUserStream); 00093 Buffer(const char* s); 00094 Buffer(Buffer *b); 00095 virtual ~Buffer(); 00096 00097 virtual void Close(); 00098 virtual int Read(); 00099 virtual int Peek(); 00100 virtual char* GetString(int beg, int end); 00101 virtual int GetPos(); 00102 virtual void SetPos(int value); 00103 }; 00104 00105 class UTF8Buffer : public Buffer { 00106 public: 00107 UTF8Buffer(Buffer *b) : Buffer(b) {}; 00108 virtual int Read(); 00109 }; 00110 00111 //----------------------------------------------------------------------------------- 00112 // StartStates -- maps charactes to start states of tokens 00113 //----------------------------------------------------------------------------------- 00114 class StartStates { 00115 private: 00116 class Elem { 00117 public: 00118 int key, val; 00119 Elem *next; 00120 Elem(int key, int val) { this->key = key; this->val = val; next = NULL; } 00121 }; 00122 00123 Elem **tab; 00124 00125 public: 00126 StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } 00127 virtual ~StartStates() { 00128 for (int i = 0; i < 128; ++i) { 00129 Elem *e = tab[i]; 00130 while (e != NULL) { 00131 Elem *next = e->next; 00132 delete e; 00133 e = next; 00134 } 00135 } 00136 delete [] tab; 00137 } 00138 00139 void set(int key, int val) { 00140 Elem *e = new Elem(key, val); 00141 int k = key % 128; 00142 e->next = tab[k]; tab[k] = e; 00143 } 00144 00145 int state(int key) { 00146 Elem *e = tab[key % 128]; 00147 while (e != NULL && e->key != key) e = e->next; 00148 return e == NULL ? 0 : e->val; 00149 } 00150 }; 00151 00152 //------------------------------------------------------------------------------------------- 00153 // KeywordMap -- maps strings to integers (identifiers to keyword kinds) 00154 //------------------------------------------------------------------------------------------- 00155 00156 class KeywordMap { 00157 private: 00158 class Elem { 00159 public: 00160 char* key; 00161 int val; 00162 Elem *next; 00163 Elem(char* key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; } 00164 virtual ~Elem() { coco_string_delete(key); } 00165 }; 00166 00167 Elem **tab; 00168 00169 public: 00170 KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } 00171 virtual ~KeywordMap() { 00172 for (int i = 0; i < 128; ++i) { 00173 Elem *e = tab[i]; 00174 while (e != NULL) { 00175 Elem *next = e->next; 00176 delete e; 00177 e = next; 00178 } 00179 } 00180 delete [] tab; 00181 } 00182 00183 void set(char* key, int val) { 00184 Elem *e = new Elem(key, val); 00185 int k = coco_string_hash(key) % 128; 00186 e->next = tab[k]; tab[k] = e; 00187 } 00188 00189 int get(char* key, int defaultVal) { 00190 Elem *e = tab[coco_string_hash(key) % 128]; 00191 while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; 00192 return e == NULL ? defaultVal : e->val; 00193 } 00194 }; 00195 00196 class ExScanner { 00197 private: 00198 char EOL; 00199 int eofSym; 00200 int noSym; 00201 int maxT; 00202 int charSetSize; 00203 StartStates start; 00204 KeywordMap keywords; 00205 00206 Token *t; // current token 00207 char* tval; // text of current token 00208 int tvalLength; // length of text of current token 00209 int tlen; // length of current token 00210 00211 Token *tokens; // list of tokens already peeked (first token is a dummy) 00212 Token *pt; // current peek token 00213 Token* tail; // first token in list for deleting 00214 int ch; // current input character 00215 00216 int pos; // byte position of current character 00217 int line; // line number of current character 00218 int col; // column number of current character 00219 int oldEols; // EOLs that appeared in a comment; 00220 00221 Token* CreateToken(); 00222 00223 void Init(); 00224 void NextCh(); 00225 void AddCh(); 00226 bool Comment0(); 00227 bool Comment1(); 00228 00229 Token* NextToken(); 00230 00231 00232 public: 00233 Buffer *buffer; // scanner buffer 00234 ExScanner(char* data, int i); 00235 ExScanner(char* fileName); 00236 ExScanner(FILE* s); 00237 ExScanner(); 00238 ~ExScanner(); 00239 Token* Scan(); 00240 Token* Peek(); 00241 void ResetPeek(); 00242 void setString(const char* s); 00243 00244 }; // end ExScanner 00245 00246 }; // namespace 00247 00248 #endif // !defined(MARSYAS_EX_SCANNER_H) 00249