pion
5.0.6
|
00001 // --------------------------------------------------------------------- 00002 // pion: a Boost C++ framework for building lightweight HTTP interfaces 00003 // --------------------------------------------------------------------- 00004 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion) 00005 // 00006 // Distributed under the Boost Software License, Version 1.0. 00007 // See http://www.boost.org/LICENSE_1_0.txt 00008 // 00009 00010 #include <cstdlib> 00011 #include <cstring> 00012 #include <boost/regex.hpp> 00013 #include <boost/assert.hpp> 00014 #include <boost/logic/tribool.hpp> 00015 #include <boost/algorithm/string.hpp> 00016 #include <pion/algorithm.hpp> 00017 #include <pion/http/parser.hpp> 00018 #include <pion/http/request.hpp> 00019 #include <pion/http/response.hpp> 00020 #include <pion/http/message.hpp> 00021 00022 00023 namespace pion { // begin namespace pion 00024 namespace http { // begin namespace http 00025 00026 00027 // static members of parser 00028 00029 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024; // 1 KB 00030 const boost::uint32_t parser::METHOD_MAX = 1024; // 1 KB 00031 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024; // 256 KB 00032 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB 00033 const boost::uint32_t parser::HEADER_NAME_MAX = 1024; // 1 KB 00034 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB 00035 const boost::uint32_t parser::QUERY_NAME_MAX = 1024; // 1 KB 00036 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024; // 1 MB 00037 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024; // 1 KB 00038 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB 00039 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024; // 1 MB 00040 parser::error_category_t * parser::m_error_category_ptr = NULL; 00041 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT; 00042 00043 00044 // parser member functions 00045 00046 boost::tribool parser::parse(http::message& http_msg, 00047 boost::system::error_code& ec) 00048 { 00049 BOOST_ASSERT(! eof() ); 00050 00051 boost::tribool rc = boost::indeterminate; 00052 std::size_t total_bytes_parsed = 0; 00053 00054 if(http_msg.has_missing_packets()) { 00055 http_msg.set_data_after_missing_packet(true); 00056 } 00057 00058 do { 00059 switch (m_message_parse_state) { 00060 // just started parsing the HTTP message 00061 case PARSE_START: 00062 m_message_parse_state = PARSE_HEADERS; 00063 // step through to PARSE_HEADERS 00064 00065 // parsing the HTTP headers 00066 case PARSE_HEADERS: 00067 case PARSE_FOOTERS: 00068 rc = parse_headers(http_msg, ec); 00069 total_bytes_parsed += m_bytes_last_read; 00070 // check if we have finished parsing HTTP headers 00071 if (rc == true && m_message_parse_state == PARSE_HEADERS) { 00072 // finish_header_parsing() updates m_message_parse_state 00073 // We only call this for Headers and not Footers 00074 rc = finish_header_parsing(http_msg, ec); 00075 } 00076 break; 00077 00078 // parsing chunked payload content 00079 case PARSE_CHUNKS: 00080 rc = parse_chunks(http_msg.get_chunk_cache(), ec); 00081 total_bytes_parsed += m_bytes_last_read; 00082 // check if we have finished parsing all chunks 00083 if (rc == true && !m_payload_handler) { 00084 http_msg.concatenate_chunks(); 00085 00086 // Handle footers if present 00087 rc = ((m_message_parse_state == PARSE_FOOTERS) ? 00088 boost::indeterminate : (boost::tribool)true); 00089 } 00090 break; 00091 00092 // parsing regular payload content with a known length 00093 case PARSE_CONTENT: 00094 rc = consume_content(http_msg, ec); 00095 total_bytes_parsed += m_bytes_last_read; 00096 break; 00097 00098 // parsing payload content with no length (until EOF) 00099 case PARSE_CONTENT_NO_LENGTH: 00100 consume_content_as_next_chunk(http_msg.get_chunk_cache()); 00101 total_bytes_parsed += m_bytes_last_read; 00102 break; 00103 00104 // finished parsing the HTTP message 00105 case PARSE_END: 00106 rc = true; 00107 break; 00108 } 00109 } while ( boost::indeterminate(rc) && ! eof() ); 00110 00111 // check if we've finished parsing the HTTP message 00112 if (rc == true) { 00113 m_message_parse_state = PARSE_END; 00114 finish(http_msg); 00115 } else if(rc == false) { 00116 compute_msg_status(http_msg, false); 00117 } 00118 00119 // update bytes last read (aggregate individual operations for caller) 00120 m_bytes_last_read = total_bytes_parsed; 00121 00122 return rc; 00123 } 00124 00125 boost::tribool parser::parse_missing_data(http::message& http_msg, 00126 std::size_t len, boost::system::error_code& ec) 00127 { 00128 static const char MISSING_DATA_CHAR = 'X'; 00129 boost::tribool rc = boost::indeterminate; 00130 00131 http_msg.set_missing_packets(true); 00132 00133 switch (m_message_parse_state) { 00134 00135 // cannot recover from missing data while parsing HTTP headers 00136 case PARSE_START: 00137 case PARSE_HEADERS: 00138 case PARSE_FOOTERS: 00139 set_error(ec, ERROR_MISSING_HEADER_DATA); 00140 rc = false; 00141 break; 00142 00143 // parsing chunked payload content 00144 case PARSE_CHUNKS: 00145 // parsing chunk data -> we can only recover if data fits into current chunk 00146 if (m_chunked_content_parse_state == PARSE_CHUNK 00147 && m_bytes_read_in_current_chunk < m_size_of_current_chunk 00148 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len) 00149 { 00150 // use dummy content for missing data 00151 if (m_payload_handler) { 00152 for (std::size_t n = 0; n < len; ++n) 00153 m_payload_handler(&MISSING_DATA_CHAR, 1); 00154 } else { 00155 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n) 00156 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR); 00157 } 00158 00159 m_bytes_read_in_current_chunk += len; 00160 m_bytes_last_read = len; 00161 m_bytes_total_read += len; 00162 m_bytes_content_read += len; 00163 00164 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) { 00165 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK; 00166 } 00167 } else { 00168 // cannot recover from missing data 00169 set_error(ec, ERROR_MISSING_CHUNK_DATA); 00170 rc = false; 00171 } 00172 break; 00173 00174 // parsing regular payload content with a known length 00175 case PARSE_CONTENT: 00176 // parsing content (with length) -> we can only recover if data fits into content 00177 if (m_bytes_content_remaining == 0) { 00178 // we have all of the remaining payload content 00179 rc = true; 00180 } else if (m_bytes_content_remaining < len) { 00181 // cannot recover from missing data 00182 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT); 00183 rc = false; 00184 } else { 00185 00186 // make sure content buffer is not already full 00187 if (m_payload_handler) { 00188 for (std::size_t n = 0; n < len; ++n) 00189 m_payload_handler(&MISSING_DATA_CHAR, 1); 00190 } else if ( (m_bytes_content_read+len) <= m_max_content_length) { 00191 // use dummy content for missing data 00192 for (std::size_t n = 0; n < len; ++n) 00193 http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR; 00194 } else { 00195 m_bytes_content_read += len; 00196 } 00197 00198 m_bytes_content_remaining -= len; 00199 m_bytes_total_read += len; 00200 m_bytes_last_read = len; 00201 00202 if (m_bytes_content_remaining == 0) 00203 rc = true; 00204 } 00205 break; 00206 00207 // parsing payload content with no length (until EOF) 00208 case PARSE_CONTENT_NO_LENGTH: 00209 // use dummy content for missing data 00210 if (m_payload_handler) { 00211 for (std::size_t n = 0; n < len; ++n) 00212 m_payload_handler(&MISSING_DATA_CHAR, 1); 00213 } else { 00214 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n) 00215 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR); 00216 } 00217 m_bytes_last_read = len; 00218 m_bytes_total_read += len; 00219 m_bytes_content_read += len; 00220 break; 00221 00222 // finished parsing the HTTP message 00223 case PARSE_END: 00224 rc = true; 00225 break; 00226 } 00227 00228 // check if we've finished parsing the HTTP message 00229 if (rc == true) { 00230 m_message_parse_state = PARSE_END; 00231 finish(http_msg); 00232 } else if(rc == false) { 00233 compute_msg_status(http_msg, false); 00234 } 00235 00236 return rc; 00237 } 00238 00239 boost::tribool parser::parse_headers(http::message& http_msg, 00240 boost::system::error_code& ec) 00241 { 00242 // 00243 // note that boost::tribool may have one of THREE states: 00244 // 00245 // false: encountered an error while parsing HTTP headers 00246 // true: finished successfully parsing the HTTP headers 00247 // indeterminate: parsed bytes, but the HTTP headers are not yet finished 00248 // 00249 const char *read_start_ptr = m_read_ptr; 00250 m_bytes_last_read = 0; 00251 while (m_read_ptr < m_read_end_ptr) { 00252 00253 if (m_save_raw_headers) 00254 m_raw_headers += *m_read_ptr; 00255 00256 switch (m_headers_parse_state) { 00257 case PARSE_METHOD_START: 00258 // we have not yet started parsing the HTTP method string 00259 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace 00260 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00261 set_error(ec, ERROR_METHOD_CHAR); 00262 return false; 00263 } 00264 m_headers_parse_state = PARSE_METHOD; 00265 m_method.erase(); 00266 m_method.push_back(*m_read_ptr); 00267 } 00268 break; 00269 00270 case PARSE_METHOD: 00271 // we have started parsing the HTTP method string 00272 if (*m_read_ptr == ' ') { 00273 m_resource.erase(); 00274 m_headers_parse_state = PARSE_URI_STEM; 00275 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00276 set_error(ec, ERROR_METHOD_CHAR); 00277 return false; 00278 } else if (m_method.size() >= METHOD_MAX) { 00279 set_error(ec, ERROR_METHOD_SIZE); 00280 return false; 00281 } else { 00282 m_method.push_back(*m_read_ptr); 00283 } 00284 break; 00285 00286 case PARSE_URI_STEM: 00287 // we have started parsing the URI stem (or resource name) 00288 if (*m_read_ptr == ' ') { 00289 m_headers_parse_state = PARSE_HTTP_VERSION_H; 00290 } else if (*m_read_ptr == '?') { 00291 m_query_string.erase(); 00292 m_headers_parse_state = PARSE_URI_QUERY; 00293 } else if (*m_read_ptr == '\r') { 00294 http_msg.set_version_major(0); 00295 http_msg.set_version_minor(0); 00296 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00297 } else if (*m_read_ptr == '\n') { 00298 http_msg.set_version_major(0); 00299 http_msg.set_version_minor(0); 00300 m_headers_parse_state = PARSE_EXPECTING_CR; 00301 } else if (is_control(*m_read_ptr)) { 00302 set_error(ec, ERROR_URI_CHAR); 00303 return false; 00304 } else if (m_resource.size() >= RESOURCE_MAX) { 00305 set_error(ec, ERROR_URI_SIZE); 00306 return false; 00307 } else { 00308 m_resource.push_back(*m_read_ptr); 00309 } 00310 break; 00311 00312 case PARSE_URI_QUERY: 00313 // we have started parsing the URI query string 00314 if (*m_read_ptr == ' ') { 00315 m_headers_parse_state = PARSE_HTTP_VERSION_H; 00316 } else if (*m_read_ptr == '\r') { 00317 http_msg.set_version_major(0); 00318 http_msg.set_version_minor(0); 00319 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00320 } else if (*m_read_ptr == '\n') { 00321 http_msg.set_version_major(0); 00322 http_msg.set_version_minor(0); 00323 m_headers_parse_state = PARSE_EXPECTING_CR; 00324 } else if (is_control(*m_read_ptr)) { 00325 set_error(ec, ERROR_QUERY_CHAR); 00326 return false; 00327 } else if (m_query_string.size() >= QUERY_STRING_MAX) { 00328 set_error(ec, ERROR_QUERY_SIZE); 00329 return false; 00330 } else { 00331 m_query_string.push_back(*m_read_ptr); 00332 } 00333 break; 00334 00335 case PARSE_HTTP_VERSION_H: 00336 // parsing "HTTP" 00337 if (*m_read_ptr == '\r') { 00338 // should only happen for requests (no HTTP/VERSION specified) 00339 if (! m_is_request) { 00340 set_error(ec, ERROR_VERSION_EMPTY); 00341 return false; 00342 } 00343 http_msg.set_version_major(0); 00344 http_msg.set_version_minor(0); 00345 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00346 } else if (*m_read_ptr == '\n') { 00347 // should only happen for requests (no HTTP/VERSION specified) 00348 if (! m_is_request) { 00349 set_error(ec, ERROR_VERSION_EMPTY); 00350 return false; 00351 } 00352 http_msg.set_version_major(0); 00353 http_msg.set_version_minor(0); 00354 m_headers_parse_state = PARSE_EXPECTING_CR; 00355 } else if (*m_read_ptr != 'H') { 00356 set_error(ec, ERROR_VERSION_CHAR); 00357 return false; 00358 } 00359 m_headers_parse_state = PARSE_HTTP_VERSION_T_1; 00360 break; 00361 00362 case PARSE_HTTP_VERSION_T_1: 00363 // parsing "HTTP" 00364 if (*m_read_ptr != 'T') { 00365 set_error(ec, ERROR_VERSION_CHAR); 00366 return false; 00367 } 00368 m_headers_parse_state = PARSE_HTTP_VERSION_T_2; 00369 break; 00370 00371 case PARSE_HTTP_VERSION_T_2: 00372 // parsing "HTTP" 00373 if (*m_read_ptr != 'T') { 00374 set_error(ec, ERROR_VERSION_CHAR); 00375 return false; 00376 } 00377 m_headers_parse_state = PARSE_HTTP_VERSION_P; 00378 break; 00379 00380 case PARSE_HTTP_VERSION_P: 00381 // parsing "HTTP" 00382 if (*m_read_ptr != 'P') { 00383 set_error(ec, ERROR_VERSION_CHAR); 00384 return false; 00385 } 00386 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH; 00387 break; 00388 00389 case PARSE_HTTP_VERSION_SLASH: 00390 // parsing slash after "HTTP" 00391 if (*m_read_ptr != '/') { 00392 set_error(ec, ERROR_VERSION_CHAR); 00393 return false; 00394 } 00395 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START; 00396 break; 00397 00398 case PARSE_HTTP_VERSION_MAJOR_START: 00399 // parsing the first digit of the major version number 00400 if (!is_digit(*m_read_ptr)) { 00401 set_error(ec, ERROR_VERSION_CHAR); 00402 return false; 00403 } 00404 http_msg.set_version_major(*m_read_ptr - '0'); 00405 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR; 00406 break; 00407 00408 case PARSE_HTTP_VERSION_MAJOR: 00409 // parsing the major version number (not first digit) 00410 if (*m_read_ptr == '.') { 00411 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START; 00412 } else if (is_digit(*m_read_ptr)) { 00413 http_msg.set_version_major( (http_msg.get_version_major() * 10) 00414 + (*m_read_ptr - '0') ); 00415 } else { 00416 set_error(ec, ERROR_VERSION_CHAR); 00417 return false; 00418 } 00419 break; 00420 00421 case PARSE_HTTP_VERSION_MINOR_START: 00422 // parsing the first digit of the minor version number 00423 if (!is_digit(*m_read_ptr)) { 00424 set_error(ec, ERROR_VERSION_CHAR); 00425 return false; 00426 } 00427 http_msg.set_version_minor(*m_read_ptr - '0'); 00428 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR; 00429 break; 00430 00431 case PARSE_HTTP_VERSION_MINOR: 00432 // parsing the major version number (not first digit) 00433 if (*m_read_ptr == ' ') { 00434 // ignore trailing spaces after version in request 00435 if (! m_is_request) { 00436 m_headers_parse_state = PARSE_STATUS_CODE_START; 00437 } 00438 } else if (*m_read_ptr == '\r') { 00439 // should only happen for requests 00440 if (! m_is_request) { 00441 set_error(ec, ERROR_STATUS_EMPTY); 00442 return false; 00443 } 00444 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00445 } else if (*m_read_ptr == '\n') { 00446 // should only happen for requests 00447 if (! m_is_request) { 00448 set_error(ec, ERROR_STATUS_EMPTY); 00449 return false; 00450 } 00451 m_headers_parse_state = PARSE_EXPECTING_CR; 00452 } else if (is_digit(*m_read_ptr)) { 00453 http_msg.set_version_minor( (http_msg.get_version_minor() * 10) 00454 + (*m_read_ptr - '0') ); 00455 } else { 00456 set_error(ec, ERROR_VERSION_CHAR); 00457 return false; 00458 } 00459 break; 00460 00461 case PARSE_STATUS_CODE_START: 00462 // parsing the first digit of the response status code 00463 if (!is_digit(*m_read_ptr)) { 00464 set_error(ec, ERROR_STATUS_CHAR); 00465 return false; 00466 } 00467 m_status_code = (*m_read_ptr - '0'); 00468 m_headers_parse_state = PARSE_STATUS_CODE; 00469 break; 00470 00471 case PARSE_STATUS_CODE: 00472 // parsing the response status code (not first digit) 00473 if (*m_read_ptr == ' ') { 00474 m_status_message.erase(); 00475 m_headers_parse_state = PARSE_STATUS_MESSAGE; 00476 } else if (is_digit(*m_read_ptr)) { 00477 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') ); 00478 } else if (*m_read_ptr == '\r') { 00479 // recover from status message not sent 00480 m_status_message.erase(); 00481 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00482 } else if (*m_read_ptr == '\n') { 00483 // recover from status message not sent 00484 m_status_message.erase(); 00485 m_headers_parse_state = PARSE_EXPECTING_CR; 00486 } else { 00487 set_error(ec, ERROR_STATUS_CHAR); 00488 return false; 00489 } 00490 break; 00491 00492 case PARSE_STATUS_MESSAGE: 00493 // parsing the response status message 00494 if (*m_read_ptr == '\r') { 00495 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00496 } else if (*m_read_ptr == '\n') { 00497 m_headers_parse_state = PARSE_EXPECTING_CR; 00498 } else if (is_control(*m_read_ptr)) { 00499 set_error(ec, ERROR_STATUS_CHAR); 00500 return false; 00501 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) { 00502 set_error(ec, ERROR_STATUS_CHAR); 00503 return false; 00504 } else { 00505 m_status_message.push_back(*m_read_ptr); 00506 } 00507 break; 00508 00509 case PARSE_EXPECTING_NEWLINE: 00510 // we received a CR; expecting a newline to follow 00511 if (*m_read_ptr == '\n') { 00512 // check if this is a HTTP 0.9 "Simple Request" 00513 if (m_is_request && http_msg.get_version_major() == 0) { 00514 PION_LOG_DEBUG(m_logger, "HTTP 0.9 Simple-Request found"); 00515 ++m_read_ptr; 00516 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00517 m_bytes_total_read += m_bytes_last_read; 00518 return true; 00519 } else { 00520 m_headers_parse_state = PARSE_HEADER_START; 00521 } 00522 } else if (*m_read_ptr == '\r') { 00523 // we received two CR's in a row 00524 // assume CR only is (incorrectly) being used for line termination 00525 // therefore, the message is finished 00526 ++m_read_ptr; 00527 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00528 m_bytes_total_read += m_bytes_last_read; 00529 return true; 00530 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00531 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00532 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00533 set_error(ec, ERROR_HEADER_CHAR); 00534 return false; 00535 } else { 00536 // assume it is the first character for the name of a header 00537 m_header_name.erase(); 00538 m_header_name.push_back(*m_read_ptr); 00539 m_headers_parse_state = PARSE_HEADER_NAME; 00540 } 00541 break; 00542 00543 case PARSE_EXPECTING_CR: 00544 // we received a newline without a CR 00545 if (*m_read_ptr == '\r') { 00546 m_headers_parse_state = PARSE_HEADER_START; 00547 } else if (*m_read_ptr == '\n') { 00548 // we received two newlines in a row 00549 // assume newline only is (incorrectly) being used for line termination 00550 // therefore, the message is finished 00551 ++m_read_ptr; 00552 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00553 m_bytes_total_read += m_bytes_last_read; 00554 return true; 00555 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00556 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00557 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00558 set_error(ec, ERROR_HEADER_CHAR); 00559 return false; 00560 } else { 00561 // assume it is the first character for the name of a header 00562 m_header_name.erase(); 00563 m_header_name.push_back(*m_read_ptr); 00564 m_headers_parse_state = PARSE_HEADER_NAME; 00565 } 00566 break; 00567 00568 case PARSE_HEADER_WHITESPACE: 00569 // parsing whitespace before a header name 00570 if (*m_read_ptr == '\r') { 00571 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00572 } else if (*m_read_ptr == '\n') { 00573 m_headers_parse_state = PARSE_EXPECTING_CR; 00574 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') { 00575 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00576 set_error(ec, ERROR_HEADER_CHAR); 00577 return false; 00578 } 00579 // assume it is the first character for the name of a header 00580 m_header_name.erase(); 00581 m_header_name.push_back(*m_read_ptr); 00582 m_headers_parse_state = PARSE_HEADER_NAME; 00583 } 00584 break; 00585 00586 case PARSE_HEADER_START: 00587 // parsing the start of a new header 00588 if (*m_read_ptr == '\r') { 00589 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE; 00590 } else if (*m_read_ptr == '\n') { 00591 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR; 00592 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') { 00593 m_headers_parse_state = PARSE_HEADER_WHITESPACE; 00594 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00595 set_error(ec, ERROR_HEADER_CHAR); 00596 return false; 00597 } else { 00598 // first character for the name of a header 00599 m_header_name.erase(); 00600 m_header_name.push_back(*m_read_ptr); 00601 m_headers_parse_state = PARSE_HEADER_NAME; 00602 } 00603 break; 00604 00605 case PARSE_HEADER_NAME: 00606 // parsing the name of a header 00607 if (*m_read_ptr == ':') { 00608 m_header_value.erase(); 00609 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE; 00610 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00611 set_error(ec, ERROR_HEADER_CHAR); 00612 return false; 00613 } else if (m_header_name.size() >= HEADER_NAME_MAX) { 00614 set_error(ec, ERROR_HEADER_NAME_SIZE); 00615 return false; 00616 } else { 00617 // character (not first) for the name of a header 00618 m_header_name.push_back(*m_read_ptr); 00619 } 00620 break; 00621 00622 case PARSE_SPACE_BEFORE_HEADER_VALUE: 00623 // parsing space character before a header's value 00624 if (*m_read_ptr == ' ') { 00625 m_headers_parse_state = PARSE_HEADER_VALUE; 00626 } else if (*m_read_ptr == '\r') { 00627 http_msg.add_header(m_header_name, m_header_value); 00628 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00629 } else if (*m_read_ptr == '\n') { 00630 http_msg.add_header(m_header_name, m_header_value); 00631 m_headers_parse_state = PARSE_EXPECTING_CR; 00632 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) { 00633 set_error(ec, ERROR_HEADER_CHAR); 00634 return false; 00635 } else { 00636 // assume it is the first character for the value of a header 00637 m_header_value.push_back(*m_read_ptr); 00638 m_headers_parse_state = PARSE_HEADER_VALUE; 00639 } 00640 break; 00641 00642 case PARSE_HEADER_VALUE: 00643 // parsing the value of a header 00644 if (*m_read_ptr == '\r') { 00645 http_msg.add_header(m_header_name, m_header_value); 00646 m_headers_parse_state = PARSE_EXPECTING_NEWLINE; 00647 } else if (*m_read_ptr == '\n') { 00648 http_msg.add_header(m_header_name, m_header_value); 00649 m_headers_parse_state = PARSE_EXPECTING_CR; 00650 } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) { 00651 // RFC 2616, 2.2 basic Rules. 00652 // TEXT = <any OCTET except CTLs, but including LWS> 00653 // LWS = [CRLF] 1*( SP | HT ) 00654 // 00655 // TODO: parsing of folding LWS in multiple lines headers 00656 // doesn't work properly still 00657 set_error(ec, ERROR_HEADER_CHAR); 00658 return false; 00659 } else if (m_header_value.size() >= HEADER_VALUE_MAX) { 00660 set_error(ec, ERROR_HEADER_VALUE_SIZE); 00661 return false; 00662 } else { 00663 // character (not first) for the value of a header 00664 m_header_value.push_back(*m_read_ptr); 00665 } 00666 break; 00667 00668 case PARSE_EXPECTING_FINAL_NEWLINE: 00669 if (*m_read_ptr == '\n') ++m_read_ptr; 00670 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00671 m_bytes_total_read += m_bytes_last_read; 00672 return true; 00673 00674 case PARSE_EXPECTING_FINAL_CR: 00675 if (*m_read_ptr == '\r') ++m_read_ptr; 00676 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00677 m_bytes_total_read += m_bytes_last_read; 00678 return true; 00679 } 00680 00681 ++m_read_ptr; 00682 } 00683 00684 m_bytes_last_read = (m_read_ptr - read_start_ptr); 00685 m_bytes_total_read += m_bytes_last_read; 00686 return boost::indeterminate; 00687 } 00688 00689 void parser::update_message_with_header_data(http::message& http_msg) const 00690 { 00691 if (is_parsing_request()) { 00692 00693 // finish an HTTP request message 00694 00695 http::request& http_request(dynamic_cast<http::request&>(http_msg)); 00696 http_request.set_method(m_method); 00697 http_request.set_resource(m_resource); 00698 http_request.set_query_string(m_query_string); 00699 00700 // parse query pairs from the URI query string 00701 if (! m_query_string.empty()) { 00702 if (! parse_url_encoded(http_request.get_queries(), 00703 m_query_string.c_str(), 00704 m_query_string.size())) 00705 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)"); 00706 } 00707 00708 // parse "Cookie" headers in request 00709 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator> 00710 cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE); 00711 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first; 00712 cookie_iterator != http_request.get_headers().end() 00713 && cookie_iterator != cookie_pair.second; ++cookie_iterator) 00714 { 00715 if (! parse_cookie_header(http_request.get_cookies(), 00716 cookie_iterator->second, false) ) 00717 PION_LOG_WARN(m_logger, "Cookie header parsing failed"); 00718 } 00719 00720 } else { 00721 00722 // finish an HTTP response message 00723 00724 http::response& http_response(dynamic_cast<http::response&>(http_msg)); 00725 http_response.set_status_code(m_status_code); 00726 http_response.set_status_message(m_status_message); 00727 00728 // parse "Set-Cookie" headers in response 00729 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator> 00730 cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE); 00731 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first; 00732 cookie_iterator != http_response.get_headers().end() 00733 && cookie_iterator != cookie_pair.second; ++cookie_iterator) 00734 { 00735 if (! parse_cookie_header(http_response.get_cookies(), 00736 cookie_iterator->second, true) ) 00737 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed"); 00738 } 00739 00740 } 00741 } 00742 00743 boost::tribool parser::finish_header_parsing(http::message& http_msg, 00744 boost::system::error_code& ec) 00745 { 00746 boost::tribool rc = boost::indeterminate; 00747 00748 m_bytes_content_remaining = m_bytes_content_read = 0; 00749 http_msg.set_content_length(0); 00750 http_msg.update_transfer_encoding_using_header(); 00751 update_message_with_header_data(http_msg); 00752 00753 if (http_msg.is_chunked()) { 00754 00755 // content is encoded using chunks 00756 m_message_parse_state = PARSE_CHUNKS; 00757 00758 // return true if parsing headers only 00759 if (m_parse_headers_only) 00760 rc = true; 00761 00762 } else if (http_msg.is_content_length_implied()) { 00763 00764 // content length is implied to be zero 00765 m_message_parse_state = PARSE_END; 00766 rc = true; 00767 00768 } else { 00769 // content length should be specified in the headers 00770 00771 if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) { 00772 00773 // message has a content-length header 00774 try { 00775 http_msg.update_content_length_using_header(); 00776 } catch (...) { 00777 PION_LOG_ERROR(m_logger, "Unable to update content length"); 00778 set_error(ec, ERROR_INVALID_CONTENT_LENGTH); 00779 return false; 00780 } 00781 00782 // check if content-length header == 0 00783 if (http_msg.get_content_length() == 0) { 00784 m_message_parse_state = PARSE_END; 00785 rc = true; 00786 } else { 00787 m_message_parse_state = PARSE_CONTENT; 00788 m_bytes_content_remaining = http_msg.get_content_length(); 00789 00790 // check if content-length exceeds maximum allowed 00791 if (m_bytes_content_remaining > m_max_content_length) 00792 http_msg.set_content_length(m_max_content_length); 00793 00794 if (m_parse_headers_only) { 00795 // return true if parsing headers only 00796 rc = true; 00797 } else { 00798 // allocate a buffer for payload content (may be zero-size) 00799 http_msg.create_content_buffer(); 00800 } 00801 } 00802 00803 } else { 00804 // no content-length specified, and the content length cannot 00805 // otherwise be determined 00806 00807 // only if not a request, read through the close of the connection 00808 if (! m_is_request) { 00809 // clear the chunk buffers before we start 00810 http_msg.get_chunk_cache().clear(); 00811 00812 // continue reading content until there is no more data 00813 m_message_parse_state = PARSE_CONTENT_NO_LENGTH; 00814 00815 // return true if parsing headers only 00816 if (m_parse_headers_only) 00817 rc = true; 00818 } else { 00819 m_message_parse_state = PARSE_END; 00820 rc = true; 00821 } 00822 } 00823 } 00824 00825 finished_parsing_headers(ec); 00826 00827 return rc; 00828 } 00829 00830 bool parser::parse_uri(const std::string& uri, std::string& proto, 00831 std::string& host, boost::uint16_t& port, 00832 std::string& path, std::string& query) 00833 { 00834 size_t proto_end = uri.find("://"); 00835 size_t proto_len = 0; 00836 00837 if(proto_end != std::string::npos) { 00838 proto = uri.substr(0, proto_end); 00839 proto_len = proto_end + 3; // add :// 00840 } else { 00841 proto.clear(); 00842 } 00843 00844 // find a first slash charact 00845 // that indicates the end of the <server>:<port> part 00846 size_t server_port_end = uri.find('/', proto_len); 00847 if (server_port_end == std::string::npos) { 00848 // no path -> use just / 00849 path = "/"; 00850 server_port_end = uri.size(); 00851 } 00852 00853 // copy <server>:<port> into temp string 00854 std::string t; 00855 t = uri.substr(proto_len, server_port_end - proto_len); 00856 size_t port_pos = t.find(':', 0); 00857 00858 // assign output host and port parameters 00859 00860 host = t.substr(0, port_pos); // if port_pos == npos, copy whole string 00861 if(host.length() == 0) { 00862 return false; 00863 } 00864 00865 // parse the port, if it's not empty 00866 if(port_pos != std::string::npos) { 00867 try { 00868 port = boost::lexical_cast<int>(t.substr(port_pos+1)); 00869 } catch (boost::bad_lexical_cast &) { 00870 return false; 00871 } 00872 } else if (proto == "http" || proto == "HTTP") { 00873 port = 80; 00874 } else if (proto == "https" || proto == "HTTPS") { 00875 port = 443; 00876 } else { 00877 port = 0; 00878 } 00879 00880 if (server_port_end < uri.size()) { 00881 // copy the rest of the URI into path part 00882 path = uri.substr(server_port_end); 00883 00884 // split the path and the query string parts 00885 size_t query_pos = path.find('?', 0); 00886 00887 if(query_pos != std::string::npos) { 00888 query = path.substr(query_pos + 1, path.length() - query_pos - 1); 00889 path = path.substr(0, query_pos); 00890 } else { 00891 query.clear(); 00892 } 00893 } 00894 00895 return true; 00896 } 00897 00898 bool parser::parse_url_encoded(ihash_multimap& dict, 00899 const char *ptr, const size_t len) 00900 { 00901 // sanity check 00902 if (ptr == NULL || len == 0) 00903 return true; 00904 00905 // used to track whether we are parsing the name or value 00906 enum QueryParseState { 00907 QUERY_PARSE_NAME, QUERY_PARSE_VALUE 00908 } parse_state = QUERY_PARSE_NAME; 00909 00910 // misc other variables used for parsing 00911 const char * const end = ptr + len; 00912 std::string query_name; 00913 std::string query_value; 00914 00915 // iterate through each encoded character 00916 while (ptr < end) { 00917 switch (parse_state) { 00918 00919 case QUERY_PARSE_NAME: 00920 // parsing query name 00921 if (*ptr == '=') { 00922 // end of name found (OK if empty) 00923 parse_state = QUERY_PARSE_VALUE; 00924 } else if (*ptr == '&') { 00925 // if query name is empty, just skip it (i.e. "&&") 00926 if (! query_name.empty()) { 00927 // assume that "=" is missing -- it's OK if the value is empty 00928 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) ); 00929 query_name.erase(); 00930 } 00931 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') { 00932 // ignore linefeeds, carriage return and tabs (normally within POST content) 00933 } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) { 00934 // control character detected, or max sized exceeded 00935 return false; 00936 } else { 00937 // character is part of the name 00938 query_name.push_back(*ptr); 00939 } 00940 break; 00941 00942 case QUERY_PARSE_VALUE: 00943 // parsing query value 00944 if (*ptr == '&') { 00945 // end of value found (OK if empty) 00946 if (! query_name.empty()) { 00947 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) ); 00948 query_name.erase(); 00949 } 00950 query_value.erase(); 00951 parse_state = QUERY_PARSE_NAME; 00952 } else if (*ptr == ',') { 00953 // end of value found in multi-value list (OK if empty) 00954 if (! query_name.empty()) 00955 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) ); 00956 query_value.erase(); 00957 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') { 00958 // ignore linefeeds, carriage return and tabs (normally within POST content) 00959 } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) { 00960 // control character detected, or max sized exceeded 00961 return false; 00962 } else { 00963 // character is part of the value 00964 query_value.push_back(*ptr); 00965 } 00966 break; 00967 } 00968 00969 ++ptr; 00970 } 00971 00972 // handle last pair in string 00973 if (! query_name.empty()) 00974 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) ); 00975 00976 return true; 00977 } 00978 00979 bool parser::parse_multipart_form_data(ihash_multimap& dict, 00980 const std::string& content_type, 00981 const char *ptr, const size_t len) 00982 { 00983 // sanity check 00984 if (ptr == NULL || len == 0) 00985 return true; 00986 00987 // parse field boundary 00988 std::size_t pos = content_type.find("boundary="); 00989 if (pos == std::string::npos) 00990 return false; 00991 const std::string boundary = std::string("--") + content_type.substr(pos+9); 00992 00993 // used to track what we are parsing 00994 enum MultiPartParseState { 00995 MP_PARSE_START, 00996 MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF, 00997 MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE, 00998 MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA 00999 } parse_state = MP_PARSE_START; 01000 01001 // a few variables used for parsing 01002 std::string header_name; 01003 std::string header_value; 01004 std::string field_name; 01005 std::string field_value; 01006 bool found_parameter = false; 01007 bool save_current_field = true; 01008 const char * const end_ptr = ptr + len; 01009 01010 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end()); 01011 01012 while (ptr != NULL && ptr < end_ptr) { 01013 switch (parse_state) { 01014 case MP_PARSE_START: 01015 // start parsing a new field 01016 header_name.clear(); 01017 header_value.clear(); 01018 field_name.clear(); 01019 field_value.clear(); 01020 save_current_field = true; 01021 ptr += boundary.size() - 1; 01022 parse_state = MP_PARSE_HEADER_CR; 01023 break; 01024 case MP_PARSE_HEADER_CR: 01025 // expecting CR while parsing headers 01026 if (*ptr == '\r') { 01027 // got it -> look for linefeed 01028 parse_state = MP_PARSE_HEADER_LF; 01029 } else if (*ptr == '\n') { 01030 // got a linefeed? try to ignore and start parsing header 01031 parse_state = MP_PARSE_HEADER_NAME; 01032 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') { 01033 // end of multipart content 01034 return true; 01035 } else return false; 01036 break; 01037 case MP_PARSE_HEADER_LF: 01038 // expecting LF while parsing headers 01039 if (*ptr == '\n') { 01040 // got it -> start parsing header name 01041 parse_state = MP_PARSE_HEADER_NAME; 01042 } else return false; 01043 break; 01044 case MP_PARSE_HEADER_NAME: 01045 // parsing the name of a header 01046 if (*ptr == '\r' || *ptr == '\n') { 01047 if (header_name.empty()) { 01048 // got CR or LF at beginning; skip to data 01049 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA); 01050 } else { 01051 // premature CR or LF -> just ignore and start parsing next header 01052 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME); 01053 } 01054 } else if (*ptr == ':') { 01055 // done parsing header name -> consume space next 01056 parse_state = MP_PARSE_HEADER_SPACE; 01057 } else { 01058 // one more byte for header name 01059 header_name += *ptr; 01060 } 01061 break; 01062 case MP_PARSE_HEADER_SPACE: 01063 // expecting a space before header value 01064 if (*ptr == '\r') { 01065 // premature CR -> just ignore and start parsing next header 01066 parse_state = MP_PARSE_HEADER_LF; 01067 } else if (*ptr == '\n') { 01068 // premature LF -> just ignore and start parsing next header 01069 parse_state = MP_PARSE_HEADER_NAME; 01070 } else if (*ptr != ' ') { 01071 // not a space -> assume it's a value char 01072 header_value += *ptr; 01073 parse_state = MP_PARSE_HEADER_VALUE; 01074 } 01075 // otherwise just ignore the space(s) 01076 break; 01077 case MP_PARSE_HEADER_VALUE: 01078 // parsing the value of a header 01079 if (*ptr == '\r' || *ptr == '\n') { 01080 // reached the end of the value -> check if it's important 01081 if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) { 01082 // only keep fields that have a text type or no type 01083 save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/"); 01084 } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) { 01085 // get current field from content-disposition header 01086 std::size_t name_pos = header_value.find("name=\""); 01087 if (name_pos != std::string::npos) { 01088 for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) { 01089 field_name += header_value[name_pos]; 01090 } 01091 } 01092 } 01093 // clear values and start parsing next header 01094 header_name.clear(); 01095 header_value.clear(); 01096 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME); 01097 } else { 01098 // one more byte for header value 01099 header_value += *ptr; 01100 } 01101 break; 01102 case MP_PARSE_HEADER_LAST_LF: 01103 // expecting final linefeed to terminate headers and begin field data 01104 if (*ptr == '\n') { 01105 // got it 01106 if (save_current_field && !field_name.empty()) { 01107 // parse the field if we care & know enough about it 01108 parse_state = MP_PARSE_FIELD_DATA; 01109 } else { 01110 // otherwise skip ahead to next field 01111 parse_state = MP_PARSE_START; 01112 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end()); 01113 } 01114 } else return false; 01115 break; 01116 case MP_PARSE_FIELD_DATA: 01117 // parsing the value of a field -> find the end of it 01118 const char *field_end_ptr = end_ptr; 01119 const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end()); 01120 if (next_ptr) { 01121 // don't include CRLF before next boundary 01122 const char *temp_ptr = next_ptr - 2; 01123 if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n') 01124 field_end_ptr = temp_ptr; 01125 else field_end_ptr = next_ptr; 01126 } 01127 field_value.assign(ptr, field_end_ptr - ptr); 01128 // add the field to the query dictionary 01129 dict.insert( std::make_pair(field_name, field_value) ); 01130 found_parameter = true; 01131 // skip ahead to next field 01132 parse_state = MP_PARSE_START; 01133 ptr = next_ptr; 01134 break; 01135 } 01136 // we've already bumped position if MP_PARSE_START 01137 if (parse_state != MP_PARSE_START) 01138 ++ptr; 01139 } 01140 01141 return found_parameter; 01142 } 01143 01144 bool parser::parse_cookie_header(ihash_multimap& dict, 01145 const char *ptr, const size_t len, 01146 bool set_cookie_header) 01147 { 01148 // BASED ON RFC 2109 01149 // http://www.ietf.org/rfc/rfc2109.txt 01150 // 01151 // The current implementation ignores cookie attributes which begin with '$' 01152 // (i.e. $Path=/, $Domain=, etc.) 01153 01154 // used to track what we are parsing 01155 enum CookieParseState { 01156 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE 01157 } parse_state = COOKIE_PARSE_NAME; 01158 01159 // misc other variables used for parsing 01160 const char * const end = ptr + len; 01161 std::string cookie_name; 01162 std::string cookie_value; 01163 char value_quote_character = '\0'; 01164 01165 // iterate through each character 01166 while (ptr < end) { 01167 switch (parse_state) { 01168 01169 case COOKIE_PARSE_NAME: 01170 // parsing cookie name 01171 if (*ptr == '=') { 01172 // end of name found (OK if empty) 01173 value_quote_character = '\0'; 01174 parse_state = COOKIE_PARSE_VALUE; 01175 } else if (*ptr == ';' || *ptr == ',') { 01176 // ignore empty cookie names since this may occur naturally 01177 // when quoted values are encountered 01178 if (! cookie_name.empty()) { 01179 // value is empty (OK) 01180 if (! is_cookie_attribute(cookie_name, set_cookie_header)) 01181 dict.insert( std::make_pair(cookie_name, cookie_value) ); 01182 cookie_name.erase(); 01183 } 01184 } else if (*ptr != ' ') { // ignore whitespace 01185 // check if control character detected, or max sized exceeded 01186 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX) 01187 return false; 01188 // character is part of the name 01189 cookie_name.push_back(*ptr); 01190 } 01191 break; 01192 01193 case COOKIE_PARSE_VALUE: 01194 // parsing cookie value 01195 if (value_quote_character == '\0') { 01196 // value is not (yet) quoted 01197 if (*ptr == ';' || *ptr == ',') { 01198 // end of value found (OK if empty) 01199 if (! is_cookie_attribute(cookie_name, set_cookie_header)) 01200 dict.insert( std::make_pair(cookie_name, cookie_value) ); 01201 cookie_name.erase(); 01202 cookie_value.erase(); 01203 parse_state = COOKIE_PARSE_NAME; 01204 } else if (*ptr == '\'' || *ptr == '"') { 01205 if (cookie_value.empty()) { 01206 // begin quoted value 01207 value_quote_character = *ptr; 01208 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) { 01209 // max size exceeded 01210 return false; 01211 } else { 01212 // assume character is part of the (unquoted) value 01213 cookie_value.push_back(*ptr); 01214 } 01215 } else if (*ptr != ' ' || !cookie_value.empty()) { // ignore leading unquoted whitespace 01216 // check if control character detected, or max sized exceeded 01217 if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX) 01218 return false; 01219 // character is part of the (unquoted) value 01220 cookie_value.push_back(*ptr); 01221 } 01222 } else { 01223 // value is quoted 01224 if (*ptr == value_quote_character) { 01225 // end of value found (OK if empty) 01226 if (! is_cookie_attribute(cookie_name, set_cookie_header)) 01227 dict.insert( std::make_pair(cookie_name, cookie_value) ); 01228 cookie_name.erase(); 01229 cookie_value.erase(); 01230 parse_state = COOKIE_PARSE_IGNORE; 01231 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) { 01232 // max size exceeded 01233 return false; 01234 } else { 01235 // character is part of the (quoted) value 01236 cookie_value.push_back(*ptr); 01237 } 01238 } 01239 break; 01240 01241 case COOKIE_PARSE_IGNORE: 01242 // ignore everything until we reach a comma "," or semicolon ";" 01243 if (*ptr == ';' || *ptr == ',') 01244 parse_state = COOKIE_PARSE_NAME; 01245 break; 01246 } 01247 01248 ++ptr; 01249 } 01250 01251 // handle last cookie in string 01252 if (! is_cookie_attribute(cookie_name, set_cookie_header)) 01253 dict.insert( std::make_pair(cookie_name, cookie_value) ); 01254 01255 return true; 01256 } 01257 01258 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks, 01259 boost::system::error_code& ec) 01260 { 01261 // 01262 // note that boost::tribool may have one of THREE states: 01263 // 01264 // false: encountered an error while parsing message 01265 // true: finished successfully parsing the message 01266 // indeterminate: parsed bytes, but the message is not yet finished 01267 // 01268 const char *read_start_ptr = m_read_ptr; 01269 m_bytes_last_read = 0; 01270 while (m_read_ptr < m_read_end_ptr) { 01271 01272 switch (m_chunked_content_parse_state) { 01273 case PARSE_CHUNK_SIZE_START: 01274 // we have not yet started parsing the next chunk size 01275 if (is_hex_digit(*m_read_ptr)) { 01276 m_chunk_size_str.erase(); 01277 m_chunk_size_str.push_back(*m_read_ptr); 01278 m_chunked_content_parse_state = PARSE_CHUNK_SIZE; 01279 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') { 01280 // Ignore leading whitespace. Technically, the standard probably doesn't allow white space here, 01281 // but we'll be flexible, since there's no ambiguity. 01282 break; 01283 } else { 01284 set_error(ec, ERROR_CHUNK_CHAR); 01285 return false; 01286 } 01287 break; 01288 01289 case PARSE_CHUNK_SIZE: 01290 if (is_hex_digit(*m_read_ptr)) { 01291 m_chunk_size_str.push_back(*m_read_ptr); 01292 } else if (*m_read_ptr == '\x0D') { 01293 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE; 01294 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') { 01295 // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this, 01296 // but we'll be flexible, since there's no ambiguity. 01297 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE; 01298 } else if (*m_read_ptr == ';') { 01299 // Following the semicolon we have text which will be ignored till we encounter 01300 // a CRLF 01301 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE; 01302 } else { 01303 set_error(ec, ERROR_CHUNK_CHAR); 01304 return false; 01305 } 01306 break; 01307 01308 case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE: 01309 if (*m_read_ptr == '\x0D') { 01310 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE; 01311 } 01312 break; 01313 01314 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE: 01315 if (*m_read_ptr == '\x0D') { 01316 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE; 01317 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') { 01318 // Ignore trailing tabs or spaces. Technically, the standard probably doesn't allow this, 01319 // but we'll be flexible, since there's no ambiguity. 01320 break; 01321 } else { 01322 set_error(ec, ERROR_CHUNK_CHAR); 01323 return false; 01324 } 01325 break; 01326 01327 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE: 01328 // We received a CR; expecting LF to follow. We can't be flexible here because 01329 // if we see anything other than LF, we can't be certain where the chunk starts. 01330 if (*m_read_ptr == '\x0A') { 01331 m_bytes_read_in_current_chunk = 0; 01332 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16); 01333 if (m_size_of_current_chunk == 0) { 01334 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK; 01335 } else { 01336 m_chunked_content_parse_state = PARSE_CHUNK; 01337 } 01338 } else { 01339 set_error(ec, ERROR_CHUNK_CHAR); 01340 return false; 01341 } 01342 break; 01343 01344 case PARSE_CHUNK: 01345 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) { 01346 if (m_payload_handler) { 01347 const std::size_t bytes_avail = bytes_available(); 01348 const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk; 01349 const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk; 01350 m_payload_handler(m_read_ptr, len); 01351 m_bytes_read_in_current_chunk += len; 01352 if (len > 1) m_read_ptr += (len - 1); 01353 } else if (chunks.size() < m_max_content_length) { 01354 chunks.push_back(*m_read_ptr); 01355 m_bytes_read_in_current_chunk++; 01356 } 01357 } 01358 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) { 01359 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK; 01360 } 01361 break; 01362 01363 case PARSE_EXPECTING_CR_AFTER_CHUNK: 01364 // we've read exactly m_size_of_current_chunk bytes since starting the current chunk 01365 if (*m_read_ptr == '\x0D') { 01366 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK; 01367 } else { 01368 set_error(ec, ERROR_CHUNK_CHAR); 01369 return false; 01370 } 01371 break; 01372 01373 case PARSE_EXPECTING_LF_AFTER_CHUNK: 01374 // we received a CR; expecting LF to follow 01375 if (*m_read_ptr == '\x0A') { 01376 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START; 01377 } else { 01378 set_error(ec, ERROR_CHUNK_CHAR); 01379 return false; 01380 } 01381 break; 01382 01383 case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK: 01384 // we've read the final chunk; expecting final CRLF 01385 if (*m_read_ptr == '\x0D') { 01386 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK; 01387 } else { 01388 // Packet contains footers; Chunk parsing is commplete 01389 // Footer data contains name value pairs to be added to HTTP Message 01390 m_message_parse_state = PARSE_FOOTERS; 01391 m_headers_parse_state = PARSE_HEADER_START; 01392 m_bytes_last_read = (m_read_ptr - read_start_ptr); 01393 m_bytes_total_read += m_bytes_last_read; 01394 m_bytes_content_read += m_bytes_last_read; 01395 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete."); 01396 return true; 01397 } 01398 break; 01399 01400 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK: 01401 // we received the final CR; expecting LF to follow 01402 if (*m_read_ptr == '\x0A') { 01403 ++m_read_ptr; 01404 m_bytes_last_read = (m_read_ptr - read_start_ptr); 01405 m_bytes_total_read += m_bytes_last_read; 01406 m_bytes_content_read += m_bytes_last_read; 01407 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete."); 01408 return true; 01409 } else { 01410 set_error(ec, ERROR_CHUNK_CHAR); 01411 return false; 01412 } 01413 } 01414 01415 ++m_read_ptr; 01416 } 01417 01418 m_bytes_last_read = (m_read_ptr - read_start_ptr); 01419 m_bytes_total_read += m_bytes_last_read; 01420 m_bytes_content_read += m_bytes_last_read; 01421 return boost::indeterminate; 01422 } 01423 01424 boost::tribool parser::consume_content(http::message& http_msg, 01425 boost::system::error_code& ec) 01426 { 01427 size_t content_bytes_to_read; 01428 size_t content_bytes_available = bytes_available(); 01429 boost::tribool rc = boost::indeterminate; 01430 01431 if (m_bytes_content_remaining == 0) { 01432 // we have all of the remaining payload content 01433 return true; 01434 } else { 01435 if (content_bytes_available >= m_bytes_content_remaining) { 01436 // we have all of the remaining payload content 01437 rc = true; 01438 content_bytes_to_read = m_bytes_content_remaining; 01439 } else { 01440 // only some of the payload content is available 01441 content_bytes_to_read = content_bytes_available; 01442 } 01443 m_bytes_content_remaining -= content_bytes_to_read; 01444 } 01445 01446 // make sure content buffer is not already full 01447 if (m_payload_handler) { 01448 m_payload_handler(m_read_ptr, content_bytes_to_read); 01449 } else if (m_bytes_content_read < m_max_content_length) { 01450 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) { 01451 // read would exceed maximum size for content buffer 01452 // copy only enough bytes to fill up the content buffer 01453 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, 01454 m_max_content_length - m_bytes_content_read); 01455 } else { 01456 // copy all bytes available 01457 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read); 01458 } 01459 } 01460 01461 m_read_ptr += content_bytes_to_read; 01462 m_bytes_content_read += content_bytes_to_read; 01463 m_bytes_total_read += content_bytes_to_read; 01464 m_bytes_last_read = content_bytes_to_read; 01465 01466 return rc; 01467 } 01468 01469 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks) 01470 { 01471 if (bytes_available() == 0) { 01472 m_bytes_last_read = 0; 01473 } else { 01474 // note: m_bytes_last_read must be > 0 because of bytes_available() check 01475 m_bytes_last_read = (m_read_end_ptr - m_read_ptr); 01476 if (m_payload_handler) { 01477 m_payload_handler(m_read_ptr, m_bytes_last_read); 01478 m_read_ptr += m_bytes_last_read; 01479 } else { 01480 while (m_read_ptr < m_read_end_ptr) { 01481 if (chunks.size() < m_max_content_length) 01482 chunks.push_back(*m_read_ptr); 01483 ++m_read_ptr; 01484 } 01485 } 01486 m_bytes_total_read += m_bytes_last_read; 01487 m_bytes_content_read += m_bytes_last_read; 01488 } 01489 return m_bytes_last_read; 01490 } 01491 01492 void parser::finish(http::message& http_msg) const 01493 { 01494 switch (m_message_parse_state) { 01495 case PARSE_START: 01496 http_msg.set_is_valid(false); 01497 http_msg.set_content_length(0); 01498 http_msg.create_content_buffer(); 01499 return; 01500 case PARSE_END: 01501 http_msg.set_is_valid(true); 01502 break; 01503 case PARSE_HEADERS: 01504 case PARSE_FOOTERS: 01505 http_msg.set_is_valid(false); 01506 update_message_with_header_data(http_msg); 01507 http_msg.set_content_length(0); 01508 http_msg.create_content_buffer(); 01509 break; 01510 case PARSE_CONTENT: 01511 http_msg.set_is_valid(false); 01512 if (get_content_bytes_read() < m_max_content_length) // NOTE: we can read more than we have allocated/stored 01513 http_msg.set_content_length(get_content_bytes_read()); 01514 break; 01515 case PARSE_CHUNKS: 01516 http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START); 01517 if (!m_payload_handler) 01518 http_msg.concatenate_chunks(); 01519 break; 01520 case PARSE_CONTENT_NO_LENGTH: 01521 http_msg.set_is_valid(true); 01522 if (!m_payload_handler) 01523 http_msg.concatenate_chunks(); 01524 break; 01525 } 01526 01527 compute_msg_status(http_msg, http_msg.is_valid()); 01528 01529 if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) { 01530 // Parse query pairs from post content if content type is x-www-form-urlencoded. 01531 // Type could be followed by parameters (as defined in section 3.6 of RFC 2616) 01532 // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8 01533 http::request& http_request(dynamic_cast<http::request&>(http_msg)); 01534 const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE); 01535 if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(), 01536 http::types::CONTENT_TYPE_URLENCODED) == 0) 01537 { 01538 if (! parse_url_encoded(http_request.get_queries(), 01539 http_request.get_content(), 01540 http_request.get_content_length())) 01541 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)"); 01542 } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(), 01543 http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0) 01544 { 01545 if (! parse_multipart_form_data(http_request.get_queries(), 01546 content_type_header, 01547 http_request.get_content(), 01548 http_request.get_content_length())) 01549 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)"); 01550 } 01551 } 01552 } 01553 01554 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok ) 01555 { 01556 http::message::data_status_t st = http::message::STATUS_NONE; 01557 01558 if(http_msg.has_missing_packets()) { 01559 st = http_msg.has_data_after_missing_packets() ? 01560 http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED; 01561 } else { 01562 st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED; 01563 } 01564 01565 http_msg.set_status(st); 01566 } 01567 01568 void parser::create_error_category(void) 01569 { 01570 static error_category_t UNIQUE_ERROR_CATEGORY; 01571 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY; 01572 } 01573 01574 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip) 01575 { 01576 // static regex's used to check for ipv4 address 01577 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}"); 01578 01584 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}"); 01585 01586 // sanity check 01587 if (header.empty()) 01588 return false; 01589 01590 // local variables re-used by while loop 01591 boost::match_results<std::string::const_iterator> m; 01592 std::string::const_iterator start_it = header.begin(); 01593 01594 // search for next ip address within the header 01595 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) { 01596 // get ip that matched 01597 std::string ip_str(m[0].first, m[0].second); 01598 // check if public network ip address 01599 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) { 01600 // match found! 01601 public_ip = ip_str; 01602 return true; 01603 } 01604 // update search starting position 01605 start_it = m[0].second; 01606 } 01607 01608 // no matches found 01609 return false; 01610 } 01611 01612 } // end namespace http 01613 } // end namespace pion