SHOGUN
v3.2.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Berlin Institute of Technology 00009 */ 00010 #include <shogun/lib/Compressor.h> 00011 #include <shogun/lib/SGVector.h> 00012 #include <shogun/mathematics/Math.h> 00013 #include <string.h> 00014 00015 #ifdef USE_LZO 00016 #include <lzo/lzoconf.h> 00017 #include <lzo/lzoutil.h> 00018 #include <lzo/lzo1x.h> 00019 #endif 00020 00021 #ifdef USE_GZIP 00022 #include <zlib.h> 00023 #endif 00024 00025 #ifdef USE_BZIP2 00026 #include <bzlib.h> 00027 #endif 00028 00029 #ifdef USE_LZMA 00030 #include <lzma.h> 00031 #endif 00032 00033 #ifdef USE_SNAPPY 00034 #include <snappy.h> 00035 #endif 00036 00037 using namespace shogun; 00038 00039 CCompressor::CCompressor() 00040 :CSGObject(), compression_type(UNCOMPRESSED) 00041 { 00042 SG_UNSTABLE("CCompressor::CCompressor()", "\n") 00043 } 00044 00045 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size, 00046 uint8_t* &compressed, uint64_t &compressed_size, int32_t level) 00047 { 00048 uint64_t initial_buffer_size=0; 00049 00050 if (uncompressed_size==0) 00051 { 00052 compressed=NULL; 00053 compressed_size=0; 00054 return; 00055 } 00056 00057 switch (compression_type) 00058 { 00059 case UNCOMPRESSED: 00060 { 00061 initial_buffer_size=uncompressed_size; 00062 compressed_size=uncompressed_size; 00063 compressed=SG_MALLOC(uint8_t, compressed_size); 00064 memcpy(compressed, uncompressed, uncompressed_size); 00065 break; 00066 } 00067 #ifdef USE_LZO 00068 case LZO: 00069 { 00070 if (lzo_init() != LZO_E_OK) 00071 SG_ERROR("Error initializing LZO Compression\n") 00072 00073 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS); 00074 if (!lzo_wrkmem) 00075 SG_ERROR("Error allocating LZO workmem\n") 00076 00077 initial_buffer_size=uncompressed_size + 00078 uncompressed_size / 16+ 64 + 3; 00079 00080 compressed_size=initial_buffer_size; 00081 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00082 00083 lzo_uint lzo_size=compressed_size; 00084 00085 int ret; 00086 if (level<9) 00087 { 00088 ret=lzo1x_1_15_compress(uncompressed, uncompressed_size, 00089 compressed, &lzo_size, lzo_wrkmem); 00090 } 00091 else 00092 { 00093 ret=lzo1x_999_compress(uncompressed, uncompressed_size, 00094 compressed, &lzo_size, lzo_wrkmem); 00095 } 00096 00097 compressed_size=lzo_size; 00098 lzo_free(lzo_wrkmem); 00099 00100 if (ret!= LZO_E_OK) 00101 SG_ERROR("Error lzo-compressing data\n") 00102 00103 break; 00104 } 00105 #endif 00106 #ifdef USE_GZIP 00107 case GZIP: 00108 { 00109 initial_buffer_size=1.001*uncompressed_size + 12; 00110 compressed_size=initial_buffer_size; 00111 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00112 uLongf gz_size=compressed_size; 00113 00114 if (compress2(compressed, &gz_size, uncompressed, 00115 uncompressed_size, level) != Z_OK) 00116 { 00117 SG_ERROR("Error gzip-compressing data\n") 00118 } 00119 compressed_size=gz_size; 00120 break; 00121 } 00122 #endif 00123 #ifdef USE_BZIP2 00124 case BZIP2: 00125 { 00126 bz_stream strm; 00127 strm.bzalloc=NULL; 00128 strm.bzfree=NULL; 00129 strm.opaque=NULL; 00130 initial_buffer_size=1.01*uncompressed_size + 600; 00131 compressed_size=initial_buffer_size; 00132 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00133 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK) 00134 SG_ERROR("Error initializing bzip2 compressor\n") 00135 00136 strm.next_in=(char*) uncompressed; 00137 strm.avail_in=(unsigned int) uncompressed_size; 00138 strm.next_out=(char*) compressed; 00139 strm.avail_out=(unsigned int) compressed_size; 00140 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK) 00141 SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n") 00142 00143 int ret=0; 00144 while (true) 00145 { 00146 ret=BZ2_bzCompress(&strm, BZ_FINISH); 00147 if (ret==BZ_FINISH_OK) 00148 continue; 00149 if (ret==BZ_STREAM_END) 00150 break; 00151 else 00152 SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n") 00153 } 00154 BZ2_bzCompressEnd(&strm); 00155 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32; 00156 break; 00157 } 00158 #endif 00159 #ifdef USE_LZMA 00160 case LZMA: 00161 { 00162 lzma_stream strm = LZMA_STREAM_INIT; 00163 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size); 00164 compressed_size=initial_buffer_size; 00165 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00166 strm.next_in=uncompressed; 00167 strm.avail_in=(size_t) uncompressed_size; 00168 strm.next_out=compressed; 00169 strm.avail_out=(size_t) compressed_size; 00170 00171 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK) 00172 SG_ERROR("Error initializing lzma compressor\n") 00173 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK) 00174 SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n") 00175 00176 lzma_ret ret; 00177 while (true) 00178 { 00179 ret=lzma_code(&strm, LZMA_FINISH); 00180 if (ret==LZMA_OK) 00181 continue; 00182 if (ret==LZMA_STREAM_END) 00183 break; 00184 else 00185 SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n") 00186 } 00187 lzma_end(&strm); 00188 compressed_size=strm.total_out; 00189 break; 00190 } 00191 #endif 00192 #ifdef USE_SNAPPY 00193 case SNAPPY: 00194 { 00195 compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size)); 00196 size_t output_length; 00197 snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length); 00198 compressed_size=(uint64_t) output_length; 00199 break; 00200 } 00201 #endif 00202 default: 00203 SG_ERROR("Unknown compression type\n") 00204 } 00205 00206 if (compressed) 00207 compressed = SG_REALLOC(uint8_t, compressed, initial_buffer_size, compressed_size); 00208 } 00209 00210 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size, 00211 uint8_t* uncompressed, uint64_t& uncompressed_size) 00212 { 00213 if (compressed_size==0) 00214 { 00215 uncompressed_size=0; 00216 return; 00217 } 00218 00219 switch (compression_type) 00220 { 00221 case UNCOMPRESSED: 00222 { 00223 ASSERT(uncompressed_size>=compressed_size) 00224 uncompressed_size=compressed_size; 00225 memcpy(uncompressed, compressed, uncompressed_size); 00226 break; 00227 } 00228 #ifdef USE_LZO 00229 case LZO: 00230 { 00231 if (lzo_init() != LZO_E_OK) 00232 SG_ERROR("Error initializing LZO Compression\n") 00233 00234 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS); 00235 if (!lzo_wrkmem) 00236 SG_ERROR("Error allocating LZO workmem\n") 00237 00238 lzo_uint lzo_size=uncompressed_size; 00239 if (lzo1x_decompress(compressed, compressed_size, uncompressed, 00240 &lzo_size, NULL) != LZO_E_OK) 00241 { 00242 SG_ERROR("Error uncompressing lzo-data\n") 00243 } 00244 uncompressed_size=lzo_size; 00245 00246 lzo_free(lzo_wrkmem); 00247 break; 00248 } 00249 #endif 00250 #ifdef USE_GZIP 00251 case GZIP: 00252 { 00253 uLongf gz_size=uncompressed_size; 00254 if (uncompress(uncompressed, &gz_size, compressed, 00255 compressed_size) != Z_OK) 00256 { 00257 SG_ERROR("Error uncompressing gzip-data\n") 00258 } 00259 uncompressed_size=gz_size; 00260 break; 00261 } 00262 #endif 00263 #ifdef USE_BZIP2 00264 case BZIP2: 00265 { 00266 bz_stream strm; 00267 strm.bzalloc=NULL; 00268 strm.bzfree=NULL; 00269 strm.opaque=NULL; 00270 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK) 00271 SG_ERROR("Error initializing bzip2 decompressor\n") 00272 strm.next_in=(char*) compressed; 00273 strm.avail_in=(unsigned int) compressed_size; 00274 strm.next_out=(char*) uncompressed; 00275 strm.avail_out=(unsigned int) uncompressed_size; 00276 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0) 00277 SG_ERROR("Error uncompressing bzip2-data\n") 00278 BZ2_bzDecompressEnd(&strm); 00279 break; 00280 } 00281 #endif 00282 #ifdef USE_LZMA 00283 case LZMA: 00284 { 00285 lzma_stream strm = LZMA_STREAM_INIT; 00286 strm.next_in=compressed; 00287 strm.avail_in=(size_t) compressed_size; 00288 strm.next_out=uncompressed; 00289 strm.avail_out=(size_t) uncompressed_size; 00290 00291 uint64_t memory_limit=lzma_easy_decoder_memusage(9); 00292 00293 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK) 00294 SG_ERROR("Error initializing lzma decompressor\n") 00295 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END) 00296 SG_ERROR("Error decompressing lzma data\n") 00297 lzma_end(&strm); 00298 break; 00299 } 00300 #endif 00301 #ifdef USE_SNAPPY 00302 case SNAPPY: 00303 { 00304 size_t uncompressed_length; 00305 if (!snappy::GetUncompressedLength( (char*) compressed, 00306 (size_t) compressed_size, &uncompressed_length)) 00307 SG_ERROR("Error obtaining uncompressed length\n") 00308 00309 ASSERT(uncompressed_length<=uncompressed_size) 00310 uncompressed_size=uncompressed_length; 00311 if (!snappy::RawUncompress((char*) compressed, 00312 (size_t) compressed_size, 00313 (char*) uncompressed)) 00314 SG_ERROR("Error uncompressing snappy data\n") 00315 00316 break; 00317 } 00318 #endif 00319 default: 00320 SG_ERROR("Unknown compression type\n") 00321 } 00322 }