SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
MemoryMappedFile.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #ifndef __MEMORYMAPPEDFILE_H__
00012 #define __MEMORYMAPPEDFILE_H__
00013 
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/base/SGObject.h>
00016 
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <sys/mman.h>
00020 #include <sys/stat.h>
00021 #include <sys/types.h>
00022 #include <fcntl.h>
00023 #include <unistd.h>
00024 
00025 namespace shogun
00026 {
00031 template <class T> class CMemoryMappedFile : public CSGObject
00032 {
00033     public:
00035         CMemoryMappedFile() :CSGObject()
00036         {
00037             SG_UNSTABLE("CMemoryMappedFile::CMemoryMappedFile()",
00038                         "\n");
00039 
00040             fd = 0;
00041             length = 0;
00042             address = NULL;
00043             rw = 'r';
00044             last_written_byte = 0;
00045 
00046             set_generic<T>();
00047         }
00048 
00062         CMemoryMappedFile(const char* fname, char flag='r', int64_t fsize=0)
00063         : CSGObject()
00064         {
00065             REQUIRE(flag=='w' || flag=='r', "Only 'r' and 'w' flags are allowed")
00066 
00067             last_written_byte=0;
00068             rw=flag;
00069 
00070             int open_flags=O_RDONLY;
00071             int mmap_prot=PROT_READ;
00072             int mmap_flags=MAP_PRIVATE;
00073 
00074             if (rw=='w')
00075             {
00076                 open_flags=O_RDWR | O_CREAT;
00077                 mmap_prot=PROT_READ|PROT_WRITE;
00078                 mmap_flags=MAP_SHARED;
00079             }
00080 
00081             fd = open(fname, open_flags, S_IRWXU | S_IRWXG | S_IRWXO);
00082             if (fd == -1)
00083                 SG_ERROR("Error opening file\n")
00084 
00085             if (rw=='w' && fsize)
00086             {
00087                 uint8_t byte=0;
00088                 if (lseek(fd, fsize, SEEK_SET) != fsize || write(fd, &byte, 1) != 1)
00089                     SG_ERROR("Error creating file of size %ld bytes\n", fsize)
00090             }
00091 
00092             struct stat sb;
00093             if (fstat(fd, &sb) == -1)
00094                 SG_ERROR("Error determining file size\n")
00095 
00096             length = sb.st_size;
00097             address = mmap(NULL, length, mmap_prot, mmap_flags, fd, 0);
00098             if (address == MAP_FAILED)
00099                 SG_ERROR("Error mapping file")
00100 
00101                 set_generic<T>();
00102         }
00103 
00105         virtual ~CMemoryMappedFile()
00106         {
00107             munmap(address, length);
00108             if (rw=='w' && last_written_byte && ftruncate(fd, last_written_byte) == -1)
00109 
00110             {
00111                 close(fd);
00112                 SG_ERROR("Error Truncating file to %ld bytes\n", last_written_byte)
00113             }
00114             close(fd);
00115         }
00116 
00126         inline T* get_map()
00127         {
00128             return (T*) address;
00129         }
00130 
00135         uint64_t get_length()
00136         {
00137             return length/sizeof(T);
00138         }
00139 
00144         uint64_t get_size()
00145         {
00146             return length;
00147         }
00148 
00160         char* get_line(uint64_t& len, uint64_t& offs)
00161         {
00162             char* s = (char*) address;
00163             for (uint64_t i=offs; i<length; i++)
00164             {
00165                 if (s[i] == '\n')
00166                 {
00167                     char* line=&s[offs];
00168                     len=i-offs;
00169                     offs=i+1;
00170                     return line;
00171                 }
00172             }
00173 
00174             len=0;
00175             offs=length;
00176             return NULL;
00177         }
00178 
00189         void write_line(const char* line, uint64_t len, uint64_t& offs)
00190         {
00191             char* s = ((char*) address) + offs;
00192             if (len+1+offs > length)
00193                 SG_ERROR("Writing beyond size of file\n")
00194 
00195             for (uint64_t i=0; i<len; i++)
00196                 s[i] = line[i];
00197 
00198             s[len]='\n';
00199             offs+=length+1;
00200             last_written_byte=offs-1;
00201         }
00202 
00214         inline void set_truncate_size(uint64_t sz=0)
00215         {
00216             last_written_byte=sz;
00217         }
00218 
00223         int32_t get_num_lines()
00224         {
00225             char* s = (char*) address;
00226             int32_t linecount=0;
00227             for (uint64_t i=0; i<length; i++)
00228             {
00229                 if (s[i] == '\n')
00230                     linecount++;
00231             }
00232 
00233             return linecount;
00234         }
00235 
00243         inline T operator[](uint64_t index) const
00244         {
00245           return ((T*)address)[index];
00246         }
00247 
00255         inline T operator[](int32_t index) const
00256         {
00257           return ((T*)address)[index];
00258         }
00259 
00261         virtual const char* get_name() const { return "MemoryMappedFile"; }
00262 
00263     protected:
00265         int fd;
00267         uint64_t length;
00269         void* address;
00271         char rw;
00272 
00274         uint64_t last_written_byte;
00275 };
00276 }
00277 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation