SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2013 Evgeniy Andreev (gsomix) 00008 */ 00009 00010 #include <shogun/lib/CircularBuffer.h> 00011 00012 #include <cstdio> 00013 #include <cstring> 00014 00015 using namespace shogun; 00016 00017 CCircularBuffer::CCircularBuffer() 00018 { 00019 init(); 00020 } 00021 00022 CCircularBuffer::CCircularBuffer(int32_t buffer_size) 00023 { 00024 init(); 00025 00026 m_buffer=SGVector<char>(buffer_size); 00027 m_buffer_end=m_buffer.vector+m_buffer.vlen; 00028 00029 m_begin_pos=m_buffer.vector; 00030 m_end_pos=m_begin_pos; 00031 00032 m_bytes_available=m_buffer.vlen; 00033 } 00034 00035 CCircularBuffer::~CCircularBuffer() 00036 { 00037 SG_UNREF(m_tokenizer); 00038 } 00039 00040 void CCircularBuffer::set_tokenizer(CTokenizer* tokenizer) 00041 { 00042 SG_REF(tokenizer); 00043 SG_UNREF(m_tokenizer); 00044 m_tokenizer=tokenizer; 00045 } 00046 00047 int32_t CCircularBuffer::push(SGVector<char> source) 00048 { 00049 if (source.vector==NULL || source.vlen==0) 00050 { 00051 SG_ERROR("CCircularBuffer::push(SGVector<char>):: Invalid parameters! Source shouldn't be NULL or zero sized\n"); 00052 return -1; 00053 } 00054 00055 int32_t bytes_to_write; 00056 if (source.vlen>m_bytes_available) 00057 bytes_to_write=m_bytes_available; 00058 else 00059 bytes_to_write=source.vlen; 00060 00061 if (bytes_to_write==0) 00062 return 0; 00063 00064 // determine which part of the memory block is free to read 00065 if (m_end_pos>=m_begin_pos) 00066 { 00067 int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector); 00068 if (bytes_to_memory_end<bytes_to_write) 00069 { 00070 // we need write as at end of memory block and at begin 00071 // because logical structure of buffer is ring 00072 int32_t first_chunk_size=bytes_to_memory_end; 00073 int32_t second_chunk_size=bytes_to_write-first_chunk_size; 00074 00075 bytes_to_write=append_chunk(source.vector, first_chunk_size, false); 00076 bytes_to_write+=append_chunk(source.vector+first_chunk_size, second_chunk_size, true); 00077 } 00078 else 00079 { 00080 bytes_to_write=append_chunk(source.vector, bytes_to_write, false); 00081 } 00082 } 00083 else 00084 { 00085 bytes_to_write=append_chunk(source.vector, bytes_to_write, false); 00086 } 00087 00088 return bytes_to_write; 00089 } 00090 00091 int32_t CCircularBuffer::push(FILE* source, int32_t source_size) 00092 { 00093 if (source==NULL || source_size==0) 00094 { 00095 SG_ERROR("CCircularBuffer::push(FILE*, int32_t):: Invalid parameters! Source shouldn't be NULL or zero sized\n"); 00096 return -1; 00097 } 00098 00099 int32_t bytes_to_write; 00100 if (source_size>m_bytes_available) 00101 bytes_to_write=m_bytes_available; 00102 else 00103 bytes_to_write=source_size; 00104 00105 if (bytes_to_write==0) 00106 return 0; 00107 00108 // determine which part of the memory block is free to read 00109 if (m_end_pos>=m_begin_pos) 00110 { 00111 int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector); 00112 if (bytes_to_memory_end<bytes_to_write) 00113 { 00114 // we need write as at end of memory block and at begin 00115 // because logical structure of buffer is ring 00116 int32_t first_chunk_size=bytes_to_memory_end; 00117 int32_t second_chunk_size=bytes_to_write-first_chunk_size; 00118 00119 bytes_to_write=append_chunk(source, first_chunk_size, false); 00120 bytes_to_write+=append_chunk(source, second_chunk_size, true); 00121 } 00122 else 00123 { 00124 bytes_to_write=append_chunk(source, bytes_to_write, false); 00125 } 00126 } 00127 else 00128 { 00129 bytes_to_write=append_chunk(source, bytes_to_write, false); 00130 } 00131 00132 return bytes_to_write; 00133 } 00134 00135 SGVector<char> CCircularBuffer::pop(int32_t num_bytes) 00136 { 00137 SGVector<char> result; 00138 00139 int32_t bytes_to_read; 00140 if (num_bytes>m_bytes_count) 00141 bytes_to_read=m_bytes_count; 00142 else 00143 bytes_to_read=num_bytes; 00144 00145 if (bytes_to_read==0) 00146 return 0; 00147 00148 // determine which part of the memory block will be read 00149 if (m_begin_pos>=m_end_pos) 00150 { 00151 int32_t bytes_to_memory_end=m_buffer.vlen-(m_begin_pos-m_buffer.vector); 00152 if (bytes_to_memory_end<bytes_to_read) 00153 { 00154 // read continious block from end of memory and from begin 00155 int32_t first_chunk_size=bytes_to_memory_end; 00156 int32_t second_chunk_size=bytes_to_read-first_chunk_size; 00157 00158 detach_chunk(&result.vector, &result.vlen, 0, first_chunk_size, false); 00159 detach_chunk(&result.vector, &result.vlen, first_chunk_size, second_chunk_size, true); 00160 } 00161 else 00162 { 00163 detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false); 00164 } 00165 } 00166 else 00167 { 00168 detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false); 00169 } 00170 00171 return result; 00172 } 00173 00174 bool CCircularBuffer::has_next() 00175 { 00176 if (m_tokenizer==NULL) 00177 { 00178 SG_ERROR("CCircularBuffer::has_next():: Tokenizer is not initialized\n"); 00179 return false; 00180 } 00181 00182 if (m_bytes_count==0) 00183 return false; 00184 00185 int32_t head_length=m_buffer_end-m_begin_pos; 00186 00187 // determine position of finder pointer in memory block 00188 if (m_last_idx<head_length) 00189 { 00190 if (m_end_pos>=m_begin_pos && m_bytes_available!=0) 00191 { 00192 return has_next_locally(m_begin_pos+m_last_idx, m_end_pos); 00193 } 00194 else 00195 { 00196 bool temp=false; 00197 temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end); 00198 00199 if (temp) 00200 return temp; 00201 00202 return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos); 00203 } 00204 } 00205 else 00206 { 00207 return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos); 00208 } 00209 00210 return false; 00211 } 00212 00213 index_t CCircularBuffer::next_token_idx(index_t &start) 00214 { 00215 index_t end; 00216 00217 if (m_tokenizer==NULL) 00218 { 00219 SG_ERROR("CCircularBuffer::next_token_idx(index_t&):: Tokenizer is not initialized\n"); 00220 return 0; 00221 } 00222 00223 if (m_bytes_count==0) 00224 return m_bytes_count; 00225 00226 int32_t tail_length=m_end_pos-m_buffer.vector; 00227 int32_t head_length=m_buffer_end-m_begin_pos; 00228 00229 // determine position of finder pointer in memory block 00230 if (m_last_idx<head_length) 00231 { 00232 if (m_end_pos>=m_begin_pos && m_bytes_available!=0) 00233 { 00234 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_end_pos); 00235 if (end<=m_bytes_count) 00236 return end; 00237 } 00238 else 00239 { 00240 index_t temp_start; 00241 00242 // in this case we should find first at end of memory block 00243 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_buffer_end); 00244 00245 if (end<head_length) 00246 return end; 00247 00248 // and then at begin 00249 end=next_token_idx_locally(temp_start, m_buffer.vector+m_last_idx-head_length, m_end_pos); 00250 00251 if (start>=head_length) 00252 start=temp_start; 00253 00254 return end; 00255 } 00256 } 00257 else 00258 { 00259 end=next_token_idx_locally(start, m_buffer.vector+m_last_idx-head_length, m_end_pos); 00260 if (end-head_length<=tail_length) 00261 return end; 00262 } 00263 00264 start=0; 00265 return start; 00266 } 00267 00268 void CCircularBuffer::skip_characters(int32_t num_chars) 00269 { 00270 move_pointer(&m_begin_pos, m_begin_pos+num_chars); 00271 00272 m_last_idx-=num_chars; 00273 if (m_last_idx<0) 00274 m_last_idx=0; 00275 00276 m_bytes_available+=num_chars; 00277 m_bytes_count-=num_chars; 00278 } 00279 00280 void CCircularBuffer::clear() 00281 { 00282 m_begin_pos=m_buffer.vector; 00283 m_end_pos=m_begin_pos; 00284 00285 m_last_idx=0; 00286 m_bytes_available=m_buffer.vlen; 00287 m_bytes_count=0; 00288 } 00289 00290 void CCircularBuffer::init() 00291 { 00292 m_buffer=SGVector<char>(); 00293 m_buffer_end=NULL; 00294 m_tokenizer=NULL; 00295 00296 m_begin_pos=NULL; 00297 m_end_pos=NULL; 00298 00299 m_last_idx=0; 00300 m_bytes_available=0; 00301 m_bytes_count=0; 00302 } 00303 00304 int32_t CCircularBuffer::append_chunk(const char* source, int32_t source_size, 00305 bool from_buffer_begin) 00306 { 00307 if (source==NULL || source_size==0) 00308 { 00309 SG_ERROR("CCircularBuffer::append_chunk(const char*, int32_t, bool):: Invalid parameters!\ 00310 Source shouldn't be NULL or zero sized\n"); 00311 return -1; 00312 } 00313 00314 if (from_buffer_begin) 00315 m_end_pos=m_buffer.vector; 00316 00317 memcpy(m_end_pos, source, source_size); 00318 move_pointer(&m_end_pos, m_end_pos+source_size); 00319 00320 m_bytes_available-=source_size; 00321 m_bytes_count+=source_size; 00322 00323 return source_size; 00324 } 00325 00326 int32_t CCircularBuffer::append_chunk(FILE* source, int32_t source_size, 00327 bool from_buffer_begin) 00328 { 00329 int32_t actually_read=fread(m_end_pos, sizeof(char), source_size, source); 00330 00331 if (from_buffer_begin && actually_read==source_size) 00332 m_end_pos=m_buffer.vector; 00333 move_pointer(&m_end_pos, m_end_pos+actually_read); 00334 00335 m_bytes_available-=actually_read; 00336 m_bytes_count+=actually_read; 00337 00338 return actually_read; 00339 } 00340 00341 void CCircularBuffer::detach_chunk(char** dest, int32_t* dest_size, int32_t dest_offset, int32_t num_bytes, 00342 bool from_buffer_begin) 00343 { 00344 if (dest==NULL || dest_size==NULL) 00345 { 00346 SG_ERROR("CCircularBuffer::detach_chunk(...):: Invalid parameters! Pointers are NULL\n"); 00347 return; 00348 } 00349 00350 if (*dest==NULL) 00351 { 00352 *dest=SG_MALLOC(char, num_bytes+dest_offset); 00353 *dest_size=num_bytes+dest_offset; 00354 } 00355 00356 if (*dest_size<num_bytes+dest_offset) 00357 { 00358 *dest=SG_REALLOC(char, *dest, *dest_size, num_bytes+dest_offset); 00359 *dest_size=num_bytes+dest_offset; 00360 } 00361 00362 if (from_buffer_begin) 00363 m_begin_pos=m_buffer.vector; 00364 00365 memcpy(*dest+dest_offset, m_begin_pos, num_bytes); 00366 move_pointer(&m_begin_pos, m_begin_pos+num_bytes); 00367 00368 m_last_idx-=num_bytes; 00369 if (m_last_idx<0) 00370 m_last_idx=0; 00371 00372 m_bytes_available+=num_bytes; 00373 m_bytes_count-=num_bytes; 00374 } 00375 00376 bool CCircularBuffer::has_next_locally(char* part_begin, char* part_end) 00377 { 00378 int32_t num_bytes_to_search=part_end-part_begin; 00379 00380 SGVector<char> buffer_part(part_begin, num_bytes_to_search, false); 00381 m_tokenizer->set_text(buffer_part); 00382 00383 return m_tokenizer->has_next(); 00384 } 00385 00386 index_t CCircularBuffer::next_token_idx_locally(index_t &start, char* part_begin, char* part_end) 00387 { 00388 index_t end=0; 00389 int32_t num_bytes_to_search=part_end-part_begin; 00390 if (num_bytes_to_search<=0) 00391 { 00392 start=0; 00393 return m_last_idx; 00394 } 00395 00396 SGVector<char> buffer_part(part_begin, num_bytes_to_search, false); 00397 m_tokenizer->set_text(buffer_part); 00398 00399 end=m_tokenizer->next_token_idx(start); 00400 00401 start+=m_last_idx; 00402 m_last_idx+=end; 00403 00404 if (end==num_bytes_to_search) 00405 return m_last_idx; 00406 else 00407 return m_last_idx++; 00408 } 00409 00410 void CCircularBuffer::move_pointer(char** pointer, char* new_position) 00411 { 00412 *pointer=new_position; 00413 if (*pointer>=m_buffer.vector+m_buffer.vlen) 00414 *pointer=m_buffer.vector; 00415 }