SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
CircularBuffer.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2013 Evgeniy Andreev (gsomix)
00008  */
00009 
00010 #include <shogun/lib/CircularBuffer.h>
00011 
00012 #include <cstdio>
00013 #include <cstring>
00014 
00015 using namespace shogun;
00016 
00017 CCircularBuffer::CCircularBuffer()
00018 {
00019     init();
00020 }
00021 
00022 CCircularBuffer::CCircularBuffer(int32_t buffer_size)
00023 {
00024     init();
00025 
00026     m_buffer=SGVector<char>(buffer_size);
00027     m_buffer_end=m_buffer.vector+m_buffer.vlen;
00028 
00029     m_begin_pos=m_buffer.vector;
00030     m_end_pos=m_begin_pos;
00031 
00032     m_bytes_available=m_buffer.vlen;
00033 }
00034 
00035 CCircularBuffer::~CCircularBuffer()
00036 {
00037     SG_UNREF(m_tokenizer);
00038 }
00039 
00040 void CCircularBuffer::set_tokenizer(CTokenizer* tokenizer)
00041 {
00042     SG_REF(tokenizer);
00043     SG_UNREF(m_tokenizer);
00044     m_tokenizer=tokenizer;
00045 }
00046 
00047 int32_t CCircularBuffer::push(SGVector<char> source)
00048 {
00049     if (source.vector==NULL || source.vlen==0)
00050     {
00051         SG_ERROR("CCircularBuffer::push(SGVector<char>):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
00052         return -1;
00053     }
00054 
00055     int32_t bytes_to_write;
00056     if (source.vlen>m_bytes_available)
00057         bytes_to_write=m_bytes_available;
00058     else
00059         bytes_to_write=source.vlen;
00060 
00061     if (bytes_to_write==0)
00062         return 0;
00063 
00064     // determine which part of the memory block is free to read
00065     if (m_end_pos>=m_begin_pos)
00066     {
00067         int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
00068         if (bytes_to_memory_end<bytes_to_write)
00069         {
00070             // we need write as at end of memory block and at begin
00071             // because logical structure of buffer is ring
00072             int32_t first_chunk_size=bytes_to_memory_end;
00073             int32_t second_chunk_size=bytes_to_write-first_chunk_size;
00074 
00075             bytes_to_write=append_chunk(source.vector, first_chunk_size, false);
00076             bytes_to_write+=append_chunk(source.vector+first_chunk_size, second_chunk_size, true);
00077         }
00078         else
00079         {
00080             bytes_to_write=append_chunk(source.vector, bytes_to_write, false);
00081         }
00082     }
00083     else
00084     {
00085         bytes_to_write=append_chunk(source.vector, bytes_to_write, false);
00086     }
00087 
00088     return bytes_to_write;
00089 }
00090 
00091 int32_t CCircularBuffer::push(FILE* source, int32_t source_size)
00092 {
00093     if (source==NULL || source_size==0)
00094     {
00095         SG_ERROR("CCircularBuffer::push(FILE*, int32_t):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
00096         return -1;
00097     }
00098 
00099     int32_t bytes_to_write;
00100     if (source_size>m_bytes_available)
00101         bytes_to_write=m_bytes_available;
00102     else
00103         bytes_to_write=source_size;
00104 
00105     if (bytes_to_write==0)
00106         return 0;
00107 
00108     // determine which part of the memory block is free to read
00109     if (m_end_pos>=m_begin_pos)
00110     {
00111         int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
00112         if (bytes_to_memory_end<bytes_to_write)
00113         {
00114             // we need write as at end of memory block and at begin
00115             // because logical structure of buffer is ring
00116             int32_t first_chunk_size=bytes_to_memory_end;
00117             int32_t second_chunk_size=bytes_to_write-first_chunk_size;
00118 
00119             bytes_to_write=append_chunk(source, first_chunk_size, false);
00120             bytes_to_write+=append_chunk(source, second_chunk_size, true);
00121         }
00122         else
00123         {
00124             bytes_to_write=append_chunk(source, bytes_to_write, false);
00125         }
00126     }
00127     else
00128     {
00129         bytes_to_write=append_chunk(source, bytes_to_write, false);
00130     }
00131 
00132     return bytes_to_write;
00133 }
00134 
00135 SGVector<char> CCircularBuffer::pop(int32_t num_bytes)
00136 {
00137     SGVector<char> result;
00138 
00139     int32_t bytes_to_read;
00140     if (num_bytes>m_bytes_count)
00141         bytes_to_read=m_bytes_count;
00142     else
00143         bytes_to_read=num_bytes;
00144 
00145     if (bytes_to_read==0)
00146         return 0;
00147 
00148     // determine which part of the memory block will be read
00149     if (m_begin_pos>=m_end_pos)
00150     {
00151         int32_t bytes_to_memory_end=m_buffer.vlen-(m_begin_pos-m_buffer.vector);
00152         if (bytes_to_memory_end<bytes_to_read)
00153         {
00154             // read continious block from end of memory and from begin
00155             int32_t first_chunk_size=bytes_to_memory_end;
00156             int32_t second_chunk_size=bytes_to_read-first_chunk_size;
00157 
00158             detach_chunk(&result.vector, &result.vlen, 0, first_chunk_size, false);
00159             detach_chunk(&result.vector, &result.vlen, first_chunk_size, second_chunk_size, true);
00160         }
00161         else
00162         {
00163             detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false);
00164         }
00165     }
00166     else
00167     {
00168         detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false);
00169     }
00170 
00171     return result;
00172 }
00173 
00174 bool CCircularBuffer::has_next()
00175 {
00176     if (m_tokenizer==NULL)
00177     {
00178         SG_ERROR("CCircularBuffer::has_next():: Tokenizer is not initialized\n");
00179         return false;
00180     }
00181 
00182     if (m_bytes_count==0)
00183         return false;
00184 
00185     int32_t head_length=m_buffer_end-m_begin_pos;
00186 
00187     // determine position of finder pointer in memory block
00188     if (m_last_idx<head_length)
00189     {
00190         if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
00191         {
00192             return has_next_locally(m_begin_pos+m_last_idx, m_end_pos);
00193         }
00194         else
00195         {
00196             bool temp=false;
00197             temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end);
00198 
00199             if (temp)
00200                 return temp;
00201 
00202             return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
00203         }
00204     }
00205     else
00206     {
00207         return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
00208     }
00209 
00210     return false;
00211 }
00212 
00213 index_t CCircularBuffer::next_token_idx(index_t &start)
00214 {
00215     index_t end;
00216 
00217     if (m_tokenizer==NULL)
00218     {
00219         SG_ERROR("CCircularBuffer::next_token_idx(index_t&):: Tokenizer is not initialized\n");
00220         return 0;
00221     }
00222 
00223     if (m_bytes_count==0)
00224         return m_bytes_count;
00225 
00226     int32_t tail_length=m_end_pos-m_buffer.vector;
00227     int32_t head_length=m_buffer_end-m_begin_pos;
00228 
00229     // determine position of finder pointer in memory block
00230     if (m_last_idx<head_length)
00231     {
00232         if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
00233         {
00234             end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_end_pos);
00235             if (end<=m_bytes_count)
00236                 return end;
00237         }
00238         else
00239         {
00240             index_t temp_start;
00241 
00242             // in this case we should find first at end of memory block
00243             end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_buffer_end);
00244 
00245             if (end<head_length)
00246                 return end;
00247 
00248             // and then at begin
00249             end=next_token_idx_locally(temp_start, m_buffer.vector+m_last_idx-head_length, m_end_pos);
00250 
00251             if (start>=head_length)
00252                 start=temp_start;
00253 
00254             return end;
00255         }
00256     }
00257     else
00258     {
00259         end=next_token_idx_locally(start, m_buffer.vector+m_last_idx-head_length, m_end_pos);
00260         if (end-head_length<=tail_length)
00261             return end;
00262     }
00263 
00264     start=0;
00265     return start;
00266 }
00267 
00268 void CCircularBuffer::skip_characters(int32_t num_chars)
00269 {
00270     move_pointer(&m_begin_pos, m_begin_pos+num_chars);
00271 
00272     m_last_idx-=num_chars;
00273     if (m_last_idx<0)
00274         m_last_idx=0;
00275 
00276     m_bytes_available+=num_chars;
00277     m_bytes_count-=num_chars;
00278 }
00279 
00280 void CCircularBuffer::clear()
00281 {
00282     m_begin_pos=m_buffer.vector;
00283     m_end_pos=m_begin_pos;
00284 
00285     m_last_idx=0;
00286     m_bytes_available=m_buffer.vlen;
00287     m_bytes_count=0;
00288 }
00289 
00290 void CCircularBuffer::init()
00291 {
00292     m_buffer=SGVector<char>();
00293     m_buffer_end=NULL;
00294     m_tokenizer=NULL;
00295 
00296     m_begin_pos=NULL;
00297     m_end_pos=NULL;
00298 
00299     m_last_idx=0;
00300     m_bytes_available=0;
00301     m_bytes_count=0;
00302 }
00303 
00304 int32_t CCircularBuffer::append_chunk(const char* source, int32_t source_size,
00305                     bool from_buffer_begin)
00306 {
00307     if (source==NULL || source_size==0)
00308     {
00309         SG_ERROR("CCircularBuffer::append_chunk(const char*, int32_t, bool):: Invalid parameters!\
00310                 Source shouldn't be NULL or zero sized\n");
00311         return -1;
00312     }
00313 
00314     if (from_buffer_begin)
00315         m_end_pos=m_buffer.vector;
00316 
00317     memcpy(m_end_pos, source, source_size);
00318     move_pointer(&m_end_pos, m_end_pos+source_size);
00319 
00320     m_bytes_available-=source_size;
00321     m_bytes_count+=source_size;
00322 
00323     return source_size;
00324 }
00325 
00326 int32_t CCircularBuffer::append_chunk(FILE* source, int32_t source_size,
00327                     bool from_buffer_begin)
00328 {
00329     int32_t actually_read=fread(m_end_pos, sizeof(char), source_size, source);
00330 
00331     if (from_buffer_begin && actually_read==source_size)
00332         m_end_pos=m_buffer.vector;
00333     move_pointer(&m_end_pos, m_end_pos+actually_read);
00334 
00335     m_bytes_available-=actually_read;
00336     m_bytes_count+=actually_read;
00337 
00338     return actually_read;
00339 }
00340 
00341 void CCircularBuffer::detach_chunk(char** dest, int32_t* dest_size, int32_t dest_offset, int32_t num_bytes,
00342                     bool from_buffer_begin)
00343 {
00344     if (dest==NULL || dest_size==NULL)
00345     {
00346         SG_ERROR("CCircularBuffer::detach_chunk(...):: Invalid parameters! Pointers are NULL\n");
00347         return;
00348     }
00349 
00350     if (*dest==NULL)
00351     {
00352         *dest=SG_MALLOC(char, num_bytes+dest_offset);
00353         *dest_size=num_bytes+dest_offset;
00354     }
00355 
00356     if (*dest_size<num_bytes+dest_offset)
00357     {
00358         *dest=SG_REALLOC(char, *dest, *dest_size, num_bytes+dest_offset);
00359         *dest_size=num_bytes+dest_offset;
00360     }
00361 
00362     if (from_buffer_begin)
00363         m_begin_pos=m_buffer.vector;
00364 
00365     memcpy(*dest+dest_offset, m_begin_pos, num_bytes);
00366     move_pointer(&m_begin_pos, m_begin_pos+num_bytes);
00367 
00368     m_last_idx-=num_bytes;
00369     if (m_last_idx<0)
00370         m_last_idx=0;
00371 
00372     m_bytes_available+=num_bytes;
00373     m_bytes_count-=num_bytes;
00374 }
00375 
00376 bool CCircularBuffer::has_next_locally(char* part_begin, char* part_end)
00377 {
00378     int32_t num_bytes_to_search=part_end-part_begin;
00379 
00380     SGVector<char> buffer_part(part_begin, num_bytes_to_search, false);
00381     m_tokenizer->set_text(buffer_part);
00382 
00383     return m_tokenizer->has_next();
00384 }
00385 
00386 index_t CCircularBuffer::next_token_idx_locally(index_t &start, char* part_begin, char* part_end)
00387 {
00388     index_t end=0;
00389     int32_t num_bytes_to_search=part_end-part_begin;
00390     if (num_bytes_to_search<=0)
00391     {
00392         start=0;
00393         return m_last_idx;
00394     }
00395 
00396     SGVector<char> buffer_part(part_begin, num_bytes_to_search, false);
00397     m_tokenizer->set_text(buffer_part);
00398 
00399     end=m_tokenizer->next_token_idx(start);
00400 
00401     start+=m_last_idx;
00402     m_last_idx+=end;
00403 
00404     if (end==num_bytes_to_search)
00405         return m_last_idx;
00406     else
00407         return m_last_idx++;
00408 }
00409 
00410 void CCircularBuffer::move_pointer(char** pointer, char* new_position)
00411 {
00412     *pointer=new_position;
00413     if (*pointer>=m_buffer.vector+m_buffer.vlen)
00414         *pointer=m_buffer.vector;
00415 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation