SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Tokenizer.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evangelos Anagnostopoulos
00008  * Copyright (C) 2013 Evangelos Anagnostopoulos
00009  */
00010 
00011 #ifndef _TOKENIZER__H__
00012 #define _TOKENIZER__H__
00013 
00014 #include <shogun/base/SGObject.h>
00015 #include <shogun/lib/SGString.h>
00016 #include <shogun/lib/SGVector.h>
00017 
00018 namespace shogun
00019 {
00020 class CSGObject;
00021 template<class T> class SGVector;
00022 
00027 class CTokenizer: public CSGObject
00028 {
00029 public:
00031     CTokenizer();
00032 
00034     CTokenizer(const CTokenizer& orig);
00035 
00037     virtual ~CTokenizer() { };
00038 
00043     virtual void set_text(SGVector<char> txt);
00044 
00050     virtual bool has_next()=0;
00051 
00058     virtual index_t next_token_idx(index_t& start)=0;
00059 
00064     virtual CTokenizer* get_copy()=0;
00065 
00066 private:
00067     void init();
00068 
00069 protected:
00071     SGVector<char> text;
00072 };
00073 }
00074 
00075 #endif  /* _TOKENIZER__H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation