SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evangelos Anagnostopoulos 00008 * Copyright (C) 2013 Evangelos Anagnostopoulos 00009 */ 00010 00011 #ifndef _TOKENIZER__H__ 00012 #define _TOKENIZER__H__ 00013 00014 #include <shogun/base/SGObject.h> 00015 #include <shogun/lib/SGString.h> 00016 #include <shogun/lib/SGVector.h> 00017 00018 namespace shogun 00019 { 00020 class CSGObject; 00021 template<class T> class SGVector; 00022 00027 class CTokenizer: public CSGObject 00028 { 00029 public: 00031 CTokenizer(); 00032 00034 CTokenizer(const CTokenizer& orig); 00035 00037 virtual ~CTokenizer() { }; 00038 00043 virtual void set_text(SGVector<char> txt); 00044 00050 virtual bool has_next()=0; 00051 00058 virtual index_t next_token_idx(index_t& start)=0; 00059 00064 virtual CTokenizer* get_copy()=0; 00065 00066 private: 00067 void init(); 00068 00069 protected: 00071 SGVector<char> text; 00072 }; 00073 } 00074 00075 #endif /* _TOKENIZER__H__ */