SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Compressor.cpp
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Berlin Institute of Technology
00009  */
00010 #include <shogun/lib/Compressor.h>
00011 #include <shogun/lib/SGVector.h>
00012 #include <shogun/mathematics/Math.h>
00013 #include <string.h>
00014 
00015 #ifdef USE_LZO
00016 #include <lzo/lzoconf.h>
00017 #include <lzo/lzoutil.h>
00018 #include <lzo/lzo1x.h>
00019 #endif
00020 
00021 #ifdef USE_GZIP
00022 #include <zlib.h>
00023 #endif
00024 
00025 #ifdef USE_BZIP2
00026 #include <bzlib.h>
00027 #endif
00028 
00029 #ifdef USE_LZMA
00030 #include <lzma.h>
00031 #endif
00032 
00033 #ifdef USE_SNAPPY
00034 #include <snappy.h>
00035 #endif
00036 
00037 using namespace shogun;
00038 
00039 CCompressor::CCompressor()
00040     :CSGObject(), compression_type(UNCOMPRESSED)
00041 {
00042     SG_UNSTABLE("CCompressor::CCompressor()", "\n")
00043 }
00044 
00045 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
00046         uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
00047 {
00048     uint64_t initial_buffer_size=0;
00049 
00050     if (uncompressed_size==0)
00051     {
00052         compressed=NULL;
00053         compressed_size=0;
00054         return;
00055     }
00056 
00057     switch (compression_type)
00058     {
00059         case UNCOMPRESSED:
00060             {
00061                 initial_buffer_size=uncompressed_size;
00062                 compressed_size=uncompressed_size;
00063                 compressed=SG_MALLOC(uint8_t, compressed_size);
00064                 memcpy(compressed, uncompressed, uncompressed_size);
00065                 break;
00066             }
00067 #ifdef USE_LZO
00068         case LZO:
00069             {
00070                 if (lzo_init() != LZO_E_OK)
00071                     SG_ERROR("Error initializing LZO Compression\n")
00072 
00073                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00074                 if (!lzo_wrkmem)
00075                     SG_ERROR("Error allocating LZO workmem\n")
00076 
00077                 initial_buffer_size=uncompressed_size +
00078                     uncompressed_size / 16+ 64 + 3;
00079 
00080                 compressed_size=initial_buffer_size;
00081                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00082 
00083                 lzo_uint lzo_size=compressed_size;
00084 
00085                 int ret;
00086                 if (level<9)
00087                 {
00088                     ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
00089                                 compressed, &lzo_size, lzo_wrkmem);
00090                 }
00091                 else
00092                 {
00093                     ret=lzo1x_999_compress(uncompressed, uncompressed_size,
00094                                 compressed, &lzo_size, lzo_wrkmem);
00095                 }
00096 
00097                 compressed_size=lzo_size;
00098                 lzo_free(lzo_wrkmem);
00099 
00100                 if (ret!= LZO_E_OK)
00101                     SG_ERROR("Error lzo-compressing data\n")
00102 
00103                 break;
00104             }
00105 #endif
00106 #ifdef USE_GZIP
00107         case GZIP:
00108             {
00109                 initial_buffer_size=1.001*uncompressed_size + 12;
00110                 compressed_size=initial_buffer_size;
00111                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00112                 uLongf gz_size=compressed_size;
00113 
00114                 if (compress2(compressed, &gz_size, uncompressed,
00115                             uncompressed_size, level) != Z_OK)
00116                 {
00117                     SG_ERROR("Error gzip-compressing data\n")
00118                 }
00119                 compressed_size=gz_size;
00120                 break;
00121             }
00122 #endif
00123 #ifdef USE_BZIP2
00124         case BZIP2:
00125             {
00126                 bz_stream strm;
00127                 strm.bzalloc=NULL;
00128                 strm.bzfree=NULL;
00129                 strm.opaque=NULL;
00130                 initial_buffer_size=1.01*uncompressed_size + 600;
00131                 compressed_size=initial_buffer_size;
00132                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00133                 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
00134                     SG_ERROR("Error initializing bzip2 compressor\n")
00135 
00136                 strm.next_in=(char*) uncompressed;
00137                 strm.avail_in=(unsigned int) uncompressed_size;
00138                 strm.next_out=(char*) compressed;
00139                 strm.avail_out=(unsigned int) compressed_size;
00140                 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
00141                     SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n")
00142 
00143                 int ret=0;
00144                 while (true)
00145                 {
00146                     ret=BZ2_bzCompress(&strm, BZ_FINISH);
00147                     if (ret==BZ_FINISH_OK)
00148                         continue;
00149                     if (ret==BZ_STREAM_END)
00150                         break;
00151                     else
00152                         SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n")
00153                 }
00154                 BZ2_bzCompressEnd(&strm);
00155                 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
00156                 break;
00157             }
00158 #endif
00159 #ifdef USE_LZMA
00160         case LZMA:
00161             {
00162                 lzma_stream strm = LZMA_STREAM_INIT;
00163                 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
00164                 compressed_size=initial_buffer_size;
00165                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00166                 strm.next_in=uncompressed;
00167                 strm.avail_in=(size_t) uncompressed_size;
00168                 strm.next_out=compressed;
00169                 strm.avail_out=(size_t) compressed_size;
00170 
00171                 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
00172                     SG_ERROR("Error initializing lzma compressor\n")
00173                 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
00174                     SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n")
00175 
00176                 lzma_ret ret;
00177                 while (true)
00178                 {
00179                     ret=lzma_code(&strm, LZMA_FINISH);
00180                     if (ret==LZMA_OK)
00181                         continue;
00182                     if (ret==LZMA_STREAM_END)
00183                         break;
00184                     else
00185                         SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n")
00186                 }
00187                 lzma_end(&strm);
00188                 compressed_size=strm.total_out;
00189                 break;
00190             }
00191 #endif
00192 #ifdef USE_SNAPPY
00193         case SNAPPY:
00194             {
00195                 compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size));
00196                 size_t output_length;
00197                 snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length);
00198                 compressed_size=(uint64_t) output_length;
00199                 break;
00200             }
00201 #endif
00202         default:
00203             SG_ERROR("Unknown compression type\n")
00204     }
00205 
00206     if (compressed)
00207         compressed = SG_REALLOC(uint8_t, compressed, initial_buffer_size, compressed_size);
00208 }
00209 
00210 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
00211         uint8_t* uncompressed, uint64_t& uncompressed_size)
00212 {
00213     if (compressed_size==0)
00214     {
00215         uncompressed_size=0;
00216         return;
00217     }
00218 
00219     switch (compression_type)
00220     {
00221         case UNCOMPRESSED:
00222             {
00223                 ASSERT(uncompressed_size>=compressed_size)
00224                 uncompressed_size=compressed_size;
00225                 memcpy(uncompressed, compressed, uncompressed_size);
00226                 break;
00227             }
00228 #ifdef USE_LZO
00229         case LZO:
00230             {
00231                 if (lzo_init() != LZO_E_OK)
00232                     SG_ERROR("Error initializing LZO Compression\n")
00233 
00234                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00235                 if (!lzo_wrkmem)
00236                     SG_ERROR("Error allocating LZO workmem\n")
00237 
00238                 lzo_uint lzo_size=uncompressed_size;
00239                 if (lzo1x_decompress(compressed, compressed_size, uncompressed,
00240                             &lzo_size, NULL) != LZO_E_OK)
00241                 {
00242                     SG_ERROR("Error uncompressing lzo-data\n")
00243                 }
00244                 uncompressed_size=lzo_size;
00245 
00246                 lzo_free(lzo_wrkmem);
00247                 break;
00248             }
00249 #endif
00250 #ifdef USE_GZIP
00251         case GZIP:
00252             {
00253                 uLongf gz_size=uncompressed_size;
00254                 if (uncompress(uncompressed, &gz_size, compressed,
00255                             compressed_size) != Z_OK)
00256                 {
00257                     SG_ERROR("Error uncompressing gzip-data\n")
00258                 }
00259                 uncompressed_size=gz_size;
00260                 break;
00261             }
00262 #endif
00263 #ifdef USE_BZIP2
00264         case BZIP2:
00265             {
00266                 bz_stream strm;
00267                 strm.bzalloc=NULL;
00268                 strm.bzfree=NULL;
00269                 strm.opaque=NULL;
00270                 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
00271                     SG_ERROR("Error initializing bzip2 decompressor\n")
00272                 strm.next_in=(char*) compressed;
00273                 strm.avail_in=(unsigned int) compressed_size;
00274                 strm.next_out=(char*) uncompressed;
00275                 strm.avail_out=(unsigned int) uncompressed_size;
00276                 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
00277                     SG_ERROR("Error uncompressing bzip2-data\n")
00278                 BZ2_bzDecompressEnd(&strm);
00279                 break;
00280             }
00281 #endif
00282 #ifdef USE_LZMA
00283         case LZMA:
00284             {
00285                 lzma_stream strm = LZMA_STREAM_INIT;
00286                 strm.next_in=compressed;
00287                 strm.avail_in=(size_t) compressed_size;
00288                 strm.next_out=uncompressed;
00289                 strm.avail_out=(size_t) uncompressed_size;
00290 
00291                 uint64_t memory_limit=lzma_easy_decoder_memusage(9);
00292 
00293                 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
00294                     SG_ERROR("Error initializing lzma decompressor\n")
00295                 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
00296                     SG_ERROR("Error decompressing lzma data\n")
00297                 lzma_end(&strm);
00298                 break;
00299             }
00300 #endif
00301 #ifdef USE_SNAPPY
00302         case SNAPPY:
00303             {
00304                 size_t uncompressed_length;
00305                 if (!snappy::GetUncompressedLength( (char*) compressed,
00306                         (size_t) compressed_size, &uncompressed_length))
00307                     SG_ERROR("Error obtaining uncompressed length\n")
00308 
00309                 ASSERT(uncompressed_length<=uncompressed_size)
00310                 uncompressed_size=uncompressed_length;
00311                 if (!snappy::RawUncompress((char*) compressed,
00312                             (size_t) compressed_size,
00313                             (char*) uncompressed))
00314                     SG_ERROR("Error uncompressing snappy data\n")
00315 
00316                 break;
00317             }
00318 #endif
00319         default:
00320             SG_ERROR("Unknown compression type\n")
00321     }
00322 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation