SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
IOBuffer.cpp
Go to the documentation of this file.
00001 /*
00002   Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003   embodied in the content of this file are licensed under the BSD
00004   (revised) open source license.
00005 
00006   Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society.
00007 
00008   This program is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012 
00013   Shogun adjustments (w) 2011 Shashwat Lal Das
00014 */
00015 
00016 #include <string.h>
00017 #include <shogun/io/IOBuffer.h>
00018 
00019 using namespace shogun;
00020 
00021 CIOBuffer::CIOBuffer()
00022 {
00023     init();
00024 }
00025 
00026 CIOBuffer::CIOBuffer(int fd)
00027 {
00028     init();
00029     working_file = fd;
00030 }
00031 
00032 CIOBuffer::~CIOBuffer()
00033 {
00034 }
00035 
00036 void CIOBuffer::init()
00037 {
00038     size_t s = 1 << 16;
00039     space.reserve(s);
00040     endloaded = space.begin;
00041     working_file=-1;
00042 }
00043 
00044 void CIOBuffer::use_file(int fd)
00045 {
00046     working_file = fd;
00047 }
00048 
00049 int CIOBuffer::open_file(const char* name, char flag)
00050 {
00051     int ret=1;
00052     switch(flag)
00053     {
00054     case 'r':
00055         working_file = open(name, O_RDONLY|O_LARGEFILE);
00056         break;
00057 
00058     case 'w':
00059         working_file = open(name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
00060         break;
00061 
00062     default:
00063         SG_ERROR("Unknown file operation. Something other than 'r'/'w' specified.\n")
00064         ret = 0;
00065     }
00066     return ret;
00067 }
00068 
00069 void CIOBuffer::reset_file()
00070 {
00071     lseek(working_file, 0, SEEK_SET);
00072     endloaded = space.begin;
00073     space.end = space.begin;
00074 }
00075 
00076 void CIOBuffer::set(char *p)
00077 {
00078     space.end = p;
00079 }
00080 
00081 ssize_t CIOBuffer::read_file(void* buf, size_t nbytes)
00082 {
00083     return read(working_file, buf, nbytes);
00084 }
00085 
00086 size_t CIOBuffer::fill()
00087 {
00088     if (space.end_array - endloaded == 0)
00089     {
00090         size_t offset = endloaded - space.begin;
00091         space.reserve(2 * (space.end_array - space.begin));
00092         endloaded = space.begin+offset;
00093     }
00094     ssize_t num_read = read_file(endloaded, space.end_array - endloaded);
00095     if (num_read >= 0)
00096     {
00097         endloaded = endloaded+num_read;
00098         return num_read;
00099     }
00100     else
00101         return 0;
00102 }
00103 
00104 ssize_t CIOBuffer::write_file(const void* buf, size_t nbytes)
00105 {
00106     return write(working_file, buf, nbytes);
00107 }
00108 
00109 void CIOBuffer::flush()
00110 {
00111     if (working_file>=0)
00112     {
00113         if (write_file(space.begin, space.index()) != (int) space.index())
00114             SG_ERROR("Error, failed to write example!\n")
00115     }
00116     space.end = space.begin;
00117     fsync(working_file);
00118 }
00119 
00120 bool CIOBuffer::close_file()
00121 {
00122     if (working_file < 0)
00123         return false;
00124     else
00125     {
00126         int r = close(working_file);
00127         if (r < 0)
00128             SG_ERROR("Error closing the file!\n")
00129         return true;
00130     }
00131 }
00132 
00133 ssize_t CIOBuffer::readto(char* &pointer, char terminal)
00134 {
00135 //Return a pointer to the bytes before the terminal.  Must be less
00136 //than the buffer size.
00137     pointer = space.end;
00138     while (pointer != endloaded && *pointer != terminal)
00139         pointer++;
00140     if (pointer != endloaded)
00141     {
00142         size_t n = pointer - space.end;
00143         space.end = pointer+1;
00144         pointer -= n;
00145         return n;
00146     }
00147     else
00148     {
00149         if (endloaded == space.end_array)
00150         {
00151             size_t left = endloaded - space.end;
00152             memmove(space.begin, space.end, left);
00153             space.end = space.begin;
00154             endloaded = space.begin+left;
00155             pointer = endloaded;
00156         }
00157         if (fill() > 0)// more bytes are read.
00158             return readto(pointer,terminal);
00159         else //no more bytes to read, return nothing.
00160             return 0;
00161     }
00162 }
00163 
00164 void CIOBuffer::buf_write(char* &pointer, int n)
00165 {
00166     if (space.end + n <= space.end_array)
00167     {
00168         pointer = space.end;
00169         space.end += n;
00170     }
00171     else // Time to dump the file
00172     {
00173         if (space.end != space.begin)
00174             flush();
00175         else // Array is short, so increase size.
00176         {
00177             space.reserve(2 * (space.end_array - space.begin));
00178             endloaded = space.begin;
00179         }
00180         buf_write(pointer,n);
00181     }
00182 }
00183 
00184 unsigned int CIOBuffer::buf_read(char* &pointer, int n)
00185 {
00186     // Return a pointer to the next n bytes.
00187     // n must be smaller than the maximum size.
00188     if (space.end + n <= endloaded)
00189     {
00190         pointer = space.end;
00191         space.end += n;
00192         return n;
00193     }
00194     else // out of bytes, so refill.
00195     {
00196         if (space.end != space.begin) //There exists room to shift.
00197         {
00198             // Out of buffer so swap to beginning.
00199             int left = endloaded - space.end;
00200             memmove(space.begin, space.end, left);
00201             space.end = space.begin;
00202             endloaded = space.begin+left;
00203         }
00204         if (fill() > 0)
00205             return buf_read(pointer,n);// more bytes are read.
00206         else
00207         {
00208             // No more bytes to read, return all that we have left.
00209             pointer = space.end;
00210             space.end = endloaded;
00211             return endloaded - pointer;
00212         }
00213     }
00214 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation