pion
5.0.6
|
00001 // --------------------------------------------------------------------- 00002 // pion: a Boost C++ framework for building lightweight HTTP interfaces 00003 // --------------------------------------------------------------------- 00004 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion) 00005 // 00006 // Distributed under the Boost Software License, Version 1.0. 00007 // See http://www.boost.org/LICENSE_1_0.txt 00008 // 00009 00010 #include <cmath> 00011 #include <cstdlib> 00012 #include <cstdio> 00013 #include <cstring> 00014 #include <pion/algorithm.hpp> 00015 #include <boost/assert.hpp> 00016 00017 // macro to shift bitmask by a single bit 00018 #define SHIFT_BITMASK(ptr, mask) if (mask & 0x01) { mask = 0x80; ++ptr; } else mask >>= 1; 00019 00020 00021 namespace pion { // begin namespace pion 00022 00023 00024 bool algorithm::base64_decode(const std::string &input, std::string &output) 00025 { 00026 static const char nop = -1; 00027 static const char decoding_data[] = { 00028 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00029 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00030 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop, 62, nop,nop,nop, 63, 00031 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,nop,nop, nop,nop,nop,nop, 00032 nop, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 00033 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,nop, nop,nop,nop,nop, 00034 nop,26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 00035 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,nop, nop,nop,nop,nop, 00036 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00037 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00038 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00039 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00040 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00041 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00042 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, 00043 nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop, nop,nop,nop,nop 00044 }; 00045 00046 unsigned int input_length=input.size(); 00047 const char * input_ptr = input.data(); 00048 00049 // allocate space for output string 00050 output.clear(); 00051 output.reserve(((input_length+2)/3)*4); 00052 00053 // for each 4-bytes sequence from the input, extract 4 6-bits sequences by droping first two bits 00054 // and regenerate into 3 8-bits sequence 00055 00056 for (unsigned int i=0; i<input_length;i++) { 00057 char base64code0; 00058 char base64code1; 00059 char base64code2 = 0; // initialized to 0 to suppress warnings 00060 char base64code3; 00061 00062 base64code0 = decoding_data[static_cast<int>(input_ptr[i])]; 00063 if(base64code0==nop) // non base64 character 00064 return false; 00065 if(!(++i<input_length)) // we need at least two input bytes for first byte output 00066 return false; 00067 base64code1 = decoding_data[static_cast<int>(input_ptr[i])]; 00068 if(base64code1==nop) // non base64 character 00069 return false; 00070 00071 output += ((base64code0 << 2) | ((base64code1 >> 4) & 0x3)); 00072 00073 if(++i<input_length) { 00074 char c = input_ptr[i]; 00075 if(c =='=') { // padding , end of input 00076 BOOST_ASSERT( (base64code1 & 0x0f)==0); 00077 return true; 00078 } 00079 base64code2 = decoding_data[static_cast<int>(input_ptr[i])]; 00080 if(base64code2==nop) // non base64 character 00081 return false; 00082 00083 output += ((base64code1 << 4) & 0xf0) | ((base64code2 >> 2) & 0x0f); 00084 } 00085 00086 if(++i<input_length) { 00087 char c = input_ptr[i]; 00088 if(c =='=') { // padding , end of input 00089 BOOST_ASSERT( (base64code2 & 0x03)==0); 00090 return true; 00091 } 00092 base64code3 = decoding_data[static_cast<int>(input_ptr[i])]; 00093 if(base64code3==nop) // non base64 character 00094 return false; 00095 00096 output += (((base64code2 << 6) & 0xc0) | base64code3 ); 00097 } 00098 00099 } 00100 00101 return true; 00102 } 00103 00104 bool algorithm::base64_encode(const std::string &input, std::string &output) 00105 { 00106 static const char encoding_data[] = 00107 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 00108 00109 unsigned int input_length=input.size(); 00110 const char * input_ptr = input.data(); 00111 00112 // allocate space for output string 00113 output.clear(); 00114 output.reserve(((input_length+2)/3)*4); 00115 00116 // for each 3-bytes sequence from the input, extract 4 6-bits sequences and encode using 00117 // encoding_data lookup table. 00118 // if input do not contains enough chars to complete 3-byte sequence,use pad char '=' 00119 for (unsigned int i=0; i<input_length;i++) { 00120 int base64code0=0; 00121 int base64code1=0; 00122 int base64code2=0; 00123 int base64code3=0; 00124 00125 base64code0 = (input_ptr[i] >> 2) & 0x3f; // 1-byte 6 bits 00126 output += encoding_data[base64code0]; 00127 base64code1 = (input_ptr[i] << 4 ) & 0x3f; // 1-byte 2 bits + 00128 00129 if (++i < input_length) { 00130 base64code1 |= (input_ptr[i] >> 4) & 0x0f; // 2-byte 4 bits 00131 output += encoding_data[base64code1]; 00132 base64code2 = (input_ptr[i] << 2) & 0x3f; // 2-byte 4 bits + 00133 00134 if (++i < input_length) { 00135 base64code2 |= (input_ptr[i] >> 6) & 0x03; // 3-byte 2 bits 00136 base64code3 = input_ptr[i] & 0x3f; // 3-byte 6 bits 00137 output += encoding_data[base64code2]; 00138 output += encoding_data[base64code3]; 00139 } else { 00140 output += encoding_data[base64code2]; 00141 output += '='; 00142 } 00143 } else { 00144 output += encoding_data[base64code1]; 00145 output += '='; 00146 output += '='; 00147 } 00148 } 00149 00150 return true; 00151 } 00152 00153 std::string algorithm::url_decode(const std::string& str) 00154 { 00155 char decode_buf[3]; 00156 std::string result; 00157 result.reserve(str.size()); 00158 00159 for (std::string::size_type pos = 0; pos < str.size(); ++pos) { 00160 switch(str[pos]) { 00161 case '+': 00162 // convert to space character 00163 result += ' '; 00164 break; 00165 case '%': 00166 // decode hexadecimal value 00167 if (pos + 2 < str.size()) { 00168 decode_buf[0] = str[++pos]; 00169 decode_buf[1] = str[++pos]; 00170 decode_buf[2] = '\0'; 00171 00172 char decoded_char = static_cast<char>( strtol(decode_buf, 0, 16) ); 00173 00174 // decoded_char will be '\0' if strtol can't parse decode_buf as hex 00175 // (or if decode_buf == "00", which is also not valid). 00176 // In this case, recover from error by not decoding. 00177 if (decoded_char == '\0') { 00178 result += '%'; 00179 pos -= 2; 00180 } else 00181 result += decoded_char; 00182 } else { 00183 // recover from error by not decoding character 00184 result += '%'; 00185 } 00186 break; 00187 default: 00188 // character does not need to be escaped 00189 result += str[pos]; 00190 } 00191 }; 00192 00193 return result; 00194 } 00195 00196 std::string algorithm::url_encode(const std::string& str) 00197 { 00198 char encode_buf[4]; 00199 std::string result; 00200 encode_buf[0] = '%'; 00201 result.reserve(str.size()); 00202 00203 // character selection for this algorithm is based on the following url: 00204 // http://www.blooberry.com/indexdot/html/topics/urlencoding.htm 00205 00206 for (std::string::size_type pos = 0; pos < str.size(); ++pos) { 00207 switch(str[pos]) { 00208 default: 00209 if (str[pos] > 32 && str[pos] < 127) { 00210 // character does not need to be escaped 00211 result += str[pos]; 00212 break; 00213 } 00214 // else pass through to next case 00215 case ' ': 00216 case '$': case '&': case '+': case ',': case '/': case ':': 00217 case ';': case '=': case '?': case '@': case '"': case '<': 00218 case '>': case '#': case '%': case '{': case '}': case '|': 00219 case '\\': case '^': case '~': case '[': case ']': case '`': 00220 // the character needs to be encoded 00221 sprintf(encode_buf+1, "%.2X", (unsigned char)(str[pos])); 00222 result += encode_buf; 00223 break; 00224 } 00225 }; 00226 00227 return result; 00228 } 00229 00230 // TODO 00231 //std::string algorithm::xml_decode(const std::string& str) 00232 //{ 00233 //} 00234 00235 std::string algorithm::xml_encode(const std::string& str) 00236 { 00237 std::string result; 00238 result.reserve(str.size() + 20); // Assume ~5 characters converted (length increases) 00239 const unsigned char *ptr = reinterpret_cast<const unsigned char*>(str.c_str()); 00240 const unsigned char *end_ptr = ptr + str.size(); 00241 while (ptr < end_ptr) { 00242 // check byte ranges for valid UTF-8 00243 // see http://en.wikipedia.org/wiki/UTF-8 00244 // also, see http://www.w3.org/TR/REC-xml/#charsets 00245 // this implementation is the strictest subset of both 00246 if ((*ptr >= 0x20 && *ptr <= 0x7F) || *ptr == 0x9 || *ptr == 0xa || *ptr == 0xd) { 00247 // regular ASCII character 00248 switch(*ptr) { 00249 // Escape special XML characters. 00250 case '&': 00251 result += "&"; 00252 break; 00253 case '<': 00254 result += "<"; 00255 break; 00256 case '>': 00257 result += ">"; 00258 break; 00259 case '\"': 00260 result += """; 00261 break; 00262 case '\'': 00263 result += "'"; 00264 break; 00265 default: 00266 result += *ptr; 00267 } 00268 } else if (*ptr >= 0xC2 && *ptr <= 0xDF) { 00269 // two-byte sequence 00270 if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF) { 00271 result += *ptr; 00272 result += *(++ptr); 00273 } else { 00274 // insert replacement char 00275 result += 0xef; 00276 result += 0xbf; 00277 result += 0xbd; 00278 } 00279 } else if (*ptr >= 0xE0 && *ptr <= 0xEF) { 00280 // three-byte sequence 00281 if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF 00282 && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF) { 00283 result += *ptr; 00284 result += *(++ptr); 00285 result += *(++ptr); 00286 } else { 00287 // insert replacement char 00288 result += 0xef; 00289 result += 0xbf; 00290 result += 0xbd; 00291 } 00292 } else if (*ptr >= 0xF0 && *ptr <= 0xF4) { 00293 // four-byte sequence 00294 if (*(ptr+1) >= 0x80 && *(ptr+1) <= 0xBF 00295 && *(ptr+2) >= 0x80 && *(ptr+2) <= 0xBF 00296 && *(ptr+3) >= 0x80 && *(ptr+3) <= 0xBF) { 00297 result += *ptr; 00298 result += *(++ptr); 00299 result += *(++ptr); 00300 result += *(++ptr); 00301 } else { 00302 // insert replacement char 00303 result += 0xef; 00304 result += 0xbf; 00305 result += 0xbd; 00306 } 00307 } else { 00308 // insert replacement char 00309 result += 0xef; 00310 result += 0xbf; 00311 result += 0xbd; 00312 } 00313 ++ptr; 00314 } 00315 00316 return result; 00317 } 00318 00319 void algorithm::float_from_bytes(long double& value, const unsigned char *ptr, size_t num_exp_bits, size_t num_fraction_bits) 00320 { 00321 // get sign of the number from the first bit 00322 const int value_sign = (*ptr & 0x80) ? -1 : 1; 00323 00324 // build exponent value from bitstream 00325 unsigned char mask = 0x80; 00326 boost::int16_t exponent = 0; 00327 for (size_t n = 0; n < num_exp_bits; ++n) { 00328 SHIFT_BITMASK(ptr, mask); 00329 exponent *= 2; 00330 if (*ptr & mask) 00331 exponent += 1; 00332 } 00333 00334 // build significand from bitstream 00335 long double significand = exponent ? 1.0 : 0.0; 00336 long double significand_value = 1.0; 00337 while (num_fraction_bits) { 00338 SHIFT_BITMASK(ptr, mask); 00339 significand_value /= 2; 00340 if (*ptr & mask) 00341 significand += significand_value; 00342 --num_fraction_bits; 00343 } 00344 00345 // calculate final value 00346 exponent -= (::pow((long double)2, (int)(num_exp_bits - 1)) - 1); 00347 value = value_sign * significand * ::pow((long double)2, exponent); 00348 } 00349 00350 void algorithm::float_to_bytes(long double value, unsigned char *buf, size_t num_exp_bits, size_t num_fraction_bits) 00351 { 00352 // first initialize output buffer to zeros 00353 unsigned char *ptr = buf; 00354 memset(ptr, 0x00, ::ceil(static_cast<float>(num_exp_bits + num_fraction_bits + 1) / 8)); 00355 00356 // initialize first byte starting with sign of number 00357 if (value < 0) { 00358 *ptr = 0x80; 00359 value *= -1; 00360 } 00361 00362 // break down numbers >= 1.0 by incrementing the exponent & dividing by 2 00363 boost::int16_t exponent = 0; 00364 while (value >= 1) { 00365 value /= 2; 00366 ++exponent; 00367 } 00368 00369 // skip past exponent bits because we don't know the value yet 00370 unsigned char mask = 0x40; 00371 for (size_t n = num_exp_bits; n > 0; --n) { 00372 if (n >= 8) { 00373 ++ptr; 00374 n -= 7; 00375 } else { 00376 SHIFT_BITMASK(ptr, mask); 00377 } 00378 } 00379 00380 // serialize fractional value < 1.0 00381 bool got_exponent = false; 00382 boost::uint16_t num_bits = 0; 00383 while (value && num_bits < num_fraction_bits) { 00384 value *= 2; 00385 if (got_exponent) { 00386 if (value >= 1.0) { 00387 *ptr |= mask; 00388 value -= 1.0; 00389 } 00390 SHIFT_BITMASK(ptr, mask); 00391 ++num_bits; 00392 } else { 00393 --exponent; 00394 if (value >= 1.0) { 00395 value -= 1.0; 00396 got_exponent = true; 00397 } 00398 } 00399 } 00400 00401 // normalize exponent. 00402 // note: we should have a zero exponent if value == 0 00403 boost::int32_t high_bit = ::pow((long double)2, (int)(num_exp_bits - 1)); 00404 if (got_exponent) 00405 exponent += (high_bit - 1); 00406 else 00407 exponent = 0; 00408 00409 // serialize exponent bits 00410 ptr = buf; 00411 mask = 0x80; 00412 for (size_t n = 0; n < num_exp_bits; ++n) { 00413 SHIFT_BITMASK(ptr, mask); 00414 if (exponent >= high_bit) { 00415 *ptr |= mask; 00416 exponent -= high_bit; 00417 } 00418 high_bit /= 2; 00419 } 00420 } 00421 00422 } // end namespace pion