SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evgeniy Andreev (gsomix) 00008 */ 00009 00010 #include <shogun/io/LineReader.h> 00011 #include <cstdio> 00012 00013 using namespace shogun; 00014 00015 CLineReader::CLineReader() 00016 { 00017 init(); 00018 00019 m_buffer=new CCircularBuffer(); 00020 } 00021 00022 CLineReader::CLineReader(FILE* stream, CTokenizer* tokenizer) 00023 { 00024 init(); 00025 00026 m_stream=stream; 00027 m_max_token_length=10*1024*1024; 00028 00029 SG_REF(tokenizer); 00030 m_tokenizer=tokenizer; 00031 00032 m_buffer=new CCircularBuffer(m_max_token_length); 00033 m_buffer->set_tokenizer(m_tokenizer); 00034 } 00035 00036 CLineReader::CLineReader(int32_t max_token_length, FILE* stream, CTokenizer* tokenizer) 00037 { 00038 init(); 00039 00040 m_stream=stream; 00041 m_max_token_length=max_token_length; 00042 00043 SG_REF(tokenizer); 00044 m_tokenizer=tokenizer; 00045 00046 m_buffer=new CCircularBuffer(m_max_token_length); 00047 m_buffer->set_tokenizer(m_tokenizer); 00048 } 00049 00050 CLineReader::~CLineReader() 00051 { 00052 SG_UNREF(m_tokenizer); 00053 SG_UNREF(m_buffer); 00054 } 00055 00056 bool CLineReader::has_next() 00057 { 00058 if (m_stream==NULL || m_max_token_length==0 || m_tokenizer==NULL) 00059 { 00060 SG_ERROR("CLineReader::has_next():: Class is not initialized\n"); 00061 return false; 00062 } 00063 00064 if (ferror(m_stream)) 00065 { 00066 SG_ERROR("CLineReader::has_next():: Error reading file\n"); 00067 return false; 00068 } 00069 00070 if (feof(m_stream) && (m_buffer->num_bytes_contained()<=0 || !m_buffer->has_next())) 00071 return false; // nothing to read 00072 00073 return true; 00074 } 00075 00076 void CLineReader::skip_line() 00077 { 00078 int32_t bytes_to_skip=0; 00079 m_next_token_length=read(bytes_to_skip); 00080 if (m_next_token_length==-1) 00081 return; 00082 else 00083 m_buffer->skip_characters(bytes_to_skip); 00084 } 00085 00086 SGVector<char> CLineReader::read_line() 00087 { 00088 SGVector<char> line; 00089 00090 int32_t bytes_to_skip=0; 00091 m_next_token_length=read(bytes_to_skip); 00092 if (m_next_token_length==-1) 00093 line=SGVector<char>(); 00094 else 00095 { 00096 m_buffer->skip_characters(bytes_to_skip); 00097 line=read_token(m_next_token_length-bytes_to_skip); 00098 } 00099 00100 return line; 00101 } 00102 00103 void CLineReader::reset() 00104 { 00105 rewind(m_stream); 00106 m_buffer->clear(); 00107 } 00108 00109 void CLineReader::set_tokenizer(CTokenizer* tokenizer) 00110 { 00111 SG_REF(tokenizer); 00112 SG_UNREF(m_tokenizer); 00113 m_tokenizer=tokenizer; 00114 00115 m_buffer->set_tokenizer(tokenizer); 00116 } 00117 00118 void CLineReader::init() 00119 { 00120 m_buffer=NULL; 00121 m_tokenizer=NULL; 00122 m_stream=NULL; 00123 00124 m_max_token_length=0; 00125 m_next_token_length=-1; 00126 } 00127 00128 int32_t CLineReader::read(int32_t& bytes_to_skip) 00129 { 00130 int32_t line_end=0; 00131 int32_t bytes_to_read=0; 00132 int32_t temp_bytes_to_skip=0; 00133 00134 while (1) 00135 { 00136 if (bytes_to_skip==line_end) 00137 line_end=m_buffer->next_token_idx(bytes_to_skip); 00138 else 00139 line_end=m_buffer->next_token_idx(temp_bytes_to_skip); 00140 00141 if (m_buffer->num_bytes_contained()!=0 && line_end<m_buffer->num_bytes_contained()) 00142 return line_end; 00143 else if (m_buffer->available()==0) 00144 return -1; // we need some limit in case file does not contain delimiter 00145 00146 // if there is no delimiter in buffer 00147 // try get more data from stream 00148 // and write it into buffer 00149 if (m_buffer->available() < m_max_token_length) 00150 bytes_to_read=m_buffer->available(); 00151 else 00152 bytes_to_read=m_max_token_length; 00153 00154 if (feof(m_stream)) 00155 return line_end; 00156 else 00157 m_buffer->push(m_stream, bytes_to_read); 00158 00159 if (ferror(m_stream)) 00160 { 00161 SG_ERROR("CLineReader::read(int32_t&):: Error reading file\n"); 00162 return -1; 00163 } 00164 } 00165 } 00166 00167 SGVector<char> CLineReader::read_token(int32_t line_len) 00168 { 00169 SGVector<char> line; 00170 00171 if (line_len==0) 00172 line=SGVector<char>(); 00173 else 00174 line=m_buffer->pop(line_len); 00175 00176 return line; 00177 }