pion
5.0.6
|
00001 // --------------------------------------------------------------------- 00002 // pion: a Boost C++ framework for building lightweight HTTP interfaces 00003 // --------------------------------------------------------------------- 00004 // Copyright (C) 2007-2014 Splunk Inc. (https://github.com/splunk/pion) 00005 // 00006 // Distributed under the Boost Software License, Version 1.0. 00007 // See http://www.boost.org/LICENSE_1_0.txt 00008 // 00009 00010 #ifndef __PION_HTTP_PARSER_HEADER__ 00011 #define __PION_HTTP_PARSER_HEADER__ 00012 00013 #include <string> 00014 #include <boost/noncopyable.hpp> 00015 #include <boost/function/function2.hpp> 00016 #include <boost/logic/tribool.hpp> 00017 #include <boost/system/error_code.hpp> 00018 #include <boost/thread/once.hpp> 00019 #include <pion/config.hpp> 00020 #include <pion/logger.hpp> 00021 #include <pion/http/message.hpp> 00022 00023 #ifndef BOOST_SYSTEM_NOEXCEPT 00024 #define BOOST_SYSTEM_NOEXCEPT BOOST_NOEXCEPT 00025 #endif 00026 00027 00028 namespace pion { // begin namespace pion 00029 namespace http { // begin namespace http 00030 00031 00032 // forward declarations used for finishing HTTP messages 00033 class request; 00034 class response; 00035 00039 class PION_API parser : 00040 private boost::noncopyable 00041 { 00042 00043 public: 00044 00046 static const std::size_t DEFAULT_CONTENT_MAX; 00047 00049 typedef boost::function2<void, const char *, std::size_t> payload_handler_t; 00050 00052 enum error_value_t { 00053 ERROR_METHOD_CHAR = 1, 00054 ERROR_METHOD_SIZE, 00055 ERROR_URI_CHAR, 00056 ERROR_URI_SIZE, 00057 ERROR_QUERY_CHAR, 00058 ERROR_QUERY_SIZE, 00059 ERROR_VERSION_EMPTY, 00060 ERROR_VERSION_CHAR, 00061 ERROR_STATUS_EMPTY, 00062 ERROR_STATUS_CHAR, 00063 ERROR_HEADER_CHAR, 00064 ERROR_HEADER_NAME_SIZE, 00065 ERROR_HEADER_VALUE_SIZE, 00066 ERROR_INVALID_CONTENT_LENGTH, 00067 ERROR_CHUNK_CHAR, 00068 ERROR_MISSING_CHUNK_DATA, 00069 ERROR_MISSING_HEADER_DATA, 00070 ERROR_MISSING_TOO_MUCH_CONTENT, 00071 }; 00072 00074 class error_category_t 00075 : public boost::system::error_category 00076 { 00077 public: 00078 const char *name() const BOOST_SYSTEM_NOEXCEPT { return "parser"; } 00079 std::string message(int ev) const { 00080 switch (ev) { 00081 case ERROR_METHOD_CHAR: 00082 return "invalid method character"; 00083 case ERROR_METHOD_SIZE: 00084 return "method exceeds maximum size"; 00085 case ERROR_URI_CHAR: 00086 return "invalid URI character"; 00087 case ERROR_URI_SIZE: 00088 return "method exceeds maximum size"; 00089 case ERROR_QUERY_CHAR: 00090 return "invalid query string character"; 00091 case ERROR_QUERY_SIZE: 00092 return "query string exceeds maximum size"; 00093 case ERROR_VERSION_EMPTY: 00094 return "HTTP version undefined"; 00095 case ERROR_VERSION_CHAR: 00096 return "invalid version character"; 00097 case ERROR_STATUS_EMPTY: 00098 return "HTTP status undefined"; 00099 case ERROR_STATUS_CHAR: 00100 return "invalid status character"; 00101 case ERROR_HEADER_CHAR: 00102 return "invalid header character"; 00103 case ERROR_HEADER_NAME_SIZE: 00104 return "header name exceeds maximum size"; 00105 case ERROR_HEADER_VALUE_SIZE: 00106 return "header value exceeds maximum size"; 00107 case ERROR_INVALID_CONTENT_LENGTH: 00108 return "invalid Content-Length header"; 00109 case ERROR_CHUNK_CHAR: 00110 return "invalid chunk character"; 00111 case ERROR_MISSING_HEADER_DATA: 00112 return "missing header data"; 00113 case ERROR_MISSING_CHUNK_DATA: 00114 return "missing chunk data"; 00115 case ERROR_MISSING_TOO_MUCH_CONTENT: 00116 return "missing too much content"; 00117 } 00118 return "parser error"; 00119 } 00120 }; 00121 00129 parser(const bool is_request, std::size_t max_content_length = DEFAULT_CONTENT_MAX) 00130 : m_logger(PION_GET_LOGGER("pion.http.parser")), m_is_request(is_request), 00131 m_read_ptr(NULL), m_read_end_ptr(NULL), m_message_parse_state(PARSE_START), 00132 m_headers_parse_state(is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H), 00133 m_chunked_content_parse_state(PARSE_CHUNK_SIZE_START), m_status_code(0), 00134 m_bytes_content_remaining(0), m_bytes_content_read(0), 00135 m_bytes_last_read(0), m_bytes_total_read(0), 00136 m_max_content_length(max_content_length), 00137 m_parse_headers_only(false), m_save_raw_headers(false) 00138 {} 00139 00141 virtual ~parser() {} 00142 00154 boost::tribool parse(http::message& http_msg, boost::system::error_code& ec); 00155 00168 boost::tribool parse_missing_data(http::message& http_msg, std::size_t len, 00169 boost::system::error_code& ec); 00170 00176 void finish(http::message& http_msg) const; 00177 00184 inline void set_read_buffer(const char *ptr, size_t len) { 00185 m_read_ptr = ptr; 00186 m_read_end_ptr = ptr + len; 00187 } 00188 00195 inline void load_read_pos(const char *&read_ptr, const char *&read_end_ptr) const { 00196 read_ptr = m_read_ptr; 00197 read_end_ptr = m_read_end_ptr; 00198 } 00199 00208 inline bool check_premature_eof(http::message& http_msg) { 00209 if (m_message_parse_state != PARSE_CONTENT_NO_LENGTH) 00210 return true; 00211 m_message_parse_state = PARSE_END; 00212 http_msg.concatenate_chunks(); 00213 finish(http_msg); 00214 return false; 00215 } 00216 00222 inline void parse_headers_only(bool b = true) { m_parse_headers_only = b; } 00223 00229 inline void skip_header_parsing(http::message& http_msg) { 00230 boost::system::error_code ec; 00231 finish_header_parsing(http_msg, ec); 00232 } 00233 00235 inline void reset(void) { 00236 m_message_parse_state = PARSE_START; 00237 m_headers_parse_state = (m_is_request ? PARSE_METHOD_START : PARSE_HTTP_VERSION_H); 00238 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START; 00239 m_status_code = 0; 00240 m_status_message.erase(); 00241 m_method.erase(); 00242 m_resource.erase(); 00243 m_query_string.erase(); 00244 m_raw_headers.erase(); 00245 m_bytes_content_read = m_bytes_last_read = m_bytes_total_read = 0; 00246 } 00247 00249 inline bool eof(void) const { return m_read_ptr == NULL || m_read_ptr >= m_read_end_ptr; } 00250 00252 inline std::size_t bytes_available(void) const { return (eof() ? 0 : (std::size_t)(m_read_end_ptr - m_read_ptr)); } 00253 00255 inline std::size_t gcount(void) const { return m_bytes_last_read; } 00256 00258 inline std::size_t get_total_bytes_read(void) const { return m_bytes_total_read; } 00259 00261 inline std::size_t get_content_bytes_read(void) const { return m_bytes_content_read; } 00262 00264 inline std::size_t get_max_content_length(void) const { return m_max_content_length; } 00265 00267 inline const std::string& get_raw_headers(void) const { return m_raw_headers; } 00268 00270 inline bool get_save_raw_headers(void) const { return m_save_raw_headers; } 00271 00273 inline bool get_parse_headers_only(void) { return m_parse_headers_only; } 00274 00276 inline bool is_parsing_request(void) const { return m_is_request; } 00277 00279 inline bool is_parsing_response(void) const { return ! m_is_request; } 00280 00282 inline void set_payload_handler(payload_handler_t& h) { m_payload_handler = h; } 00283 00285 inline void set_max_content_length(std::size_t n) { m_max_content_length = n; } 00286 00288 inline void reset_max_content_length(void) { m_max_content_length = DEFAULT_CONTENT_MAX; } 00289 00291 inline void set_save_raw_headers(bool b) { m_save_raw_headers = b; } 00292 00294 inline void set_logger(logger log_ptr) { m_logger = log_ptr; } 00295 00297 inline logger get_logger(void) { return m_logger; } 00298 00299 00312 static bool parse_uri(const std::string& uri, std::string& proto, 00313 std::string& host, boost::uint16_t& port, std::string& path, 00314 std::string& query); 00315 00326 static bool parse_url_encoded(ihash_multimap& dict, 00327 const char *ptr, const std::size_t len); 00328 00340 static bool parse_multipart_form_data(ihash_multimap& dict, 00341 const std::string& content_type, 00342 const char *ptr, const std::size_t len); 00343 00355 static bool parse_cookie_header(ihash_multimap& dict, 00356 const char *ptr, const std::size_t len, 00357 bool set_cookie_header); 00358 00369 static inline bool parse_cookie_header(ihash_multimap& dict, 00370 const std::string& cookie_header, bool set_cookie_header) 00371 { 00372 return parse_cookie_header(dict, cookie_header.c_str(), cookie_header.size(), set_cookie_header); 00373 } 00374 00384 static inline bool parse_url_encoded(ihash_multimap& dict, 00385 const std::string& query) 00386 { 00387 return parse_url_encoded(dict, query.c_str(), query.size()); 00388 } 00389 00400 static inline bool parse_multipart_form_data(ihash_multimap& dict, 00401 const std::string& content_type, 00402 const std::string& form_data) 00403 { 00404 return parse_multipart_form_data(dict, content_type, form_data.c_str(), form_data.size()); 00405 } 00406 00419 boost::tribool finish_header_parsing(http::message& http_msg, 00420 boost::system::error_code& ec); 00421 00431 static bool parse_forwarded_for(const std::string& header, std::string& public_ip); 00432 00434 static inline error_category_t& get_error_category(void) { 00435 boost::call_once(parser::create_error_category, m_instance_flag); 00436 return *m_error_category_ptr; 00437 } 00438 00439 00440 protected: 00441 00443 virtual void finished_parsing_headers(const boost::system::error_code& ec) {} 00444 00457 boost::tribool parse_headers(http::message& http_msg, boost::system::error_code& ec); 00458 00464 void update_message_with_header_data(http::message& http_msg) const; 00465 00477 boost::tribool parse_chunks(http::message::chunk_cache_t& chunk_buffers, 00478 boost::system::error_code& ec); 00479 00491 boost::tribool consume_content(http::message& http_msg, 00492 boost::system::error_code& ec); 00493 00501 std::size_t consume_content_as_next_chunk(http::message::chunk_cache_t& chunk_buffers); 00502 00508 static void compute_msg_status(http::message& http_msg, bool msg_parsed_ok); 00509 00516 static inline void set_error(boost::system::error_code& ec, error_value_t ev) { 00517 ec = boost::system::error_code(static_cast<int>(ev), get_error_category()); 00518 } 00519 00521 static void create_error_category(void); 00522 00523 00524 // misc functions used by the parsing functions 00525 inline static bool is_char(int c); 00526 inline static bool is_control(int c); 00527 inline static bool is_special(int c); 00528 inline static bool is_digit(int c); 00529 inline static bool is_hex_digit(int c); 00530 inline static bool is_cookie_attribute(const std::string& name, bool set_cookie_header); 00531 00532 00534 static const boost::uint32_t STATUS_MESSAGE_MAX; 00535 00537 static const boost::uint32_t METHOD_MAX; 00538 00540 static const boost::uint32_t RESOURCE_MAX; 00541 00543 static const boost::uint32_t QUERY_STRING_MAX; 00544 00546 static const boost::uint32_t HEADER_NAME_MAX; 00547 00549 static const boost::uint32_t HEADER_VALUE_MAX; 00550 00552 static const boost::uint32_t QUERY_NAME_MAX; 00553 00555 static const boost::uint32_t QUERY_VALUE_MAX; 00556 00558 static const boost::uint32_t COOKIE_NAME_MAX; 00559 00561 static const boost::uint32_t COOKIE_VALUE_MAX; 00562 00563 00565 mutable logger m_logger; 00566 00568 const bool m_is_request; 00569 00571 const char * m_read_ptr; 00572 00574 const char * m_read_end_ptr; 00575 00576 00577 private: 00578 00580 enum message_parse_state_t { 00581 PARSE_START, PARSE_HEADERS, PARSE_FOOTERS, PARSE_CONTENT, 00582 PARSE_CONTENT_NO_LENGTH, PARSE_CHUNKS, PARSE_END 00583 }; 00584 00587 enum header_parse_state_t { 00588 PARSE_METHOD_START, PARSE_METHOD, PARSE_URI_STEM, PARSE_URI_QUERY, 00589 PARSE_HTTP_VERSION_H, PARSE_HTTP_VERSION_T_1, PARSE_HTTP_VERSION_T_2, 00590 PARSE_HTTP_VERSION_P, PARSE_HTTP_VERSION_SLASH, 00591 PARSE_HTTP_VERSION_MAJOR_START, PARSE_HTTP_VERSION_MAJOR, 00592 PARSE_HTTP_VERSION_MINOR_START, PARSE_HTTP_VERSION_MINOR, 00593 PARSE_STATUS_CODE_START, PARSE_STATUS_CODE, PARSE_STATUS_MESSAGE, 00594 PARSE_EXPECTING_NEWLINE, PARSE_EXPECTING_CR, 00595 PARSE_HEADER_WHITESPACE, PARSE_HEADER_START, PARSE_HEADER_NAME, 00596 PARSE_SPACE_BEFORE_HEADER_VALUE, PARSE_HEADER_VALUE, 00597 PARSE_EXPECTING_FINAL_NEWLINE, PARSE_EXPECTING_FINAL_CR 00598 }; 00599 00602 enum chunk_parse_state_t { 00603 PARSE_CHUNK_SIZE_START, PARSE_CHUNK_SIZE, 00604 PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE, 00605 PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE, 00606 PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE, PARSE_CHUNK, 00607 PARSE_EXPECTING_CR_AFTER_CHUNK, PARSE_EXPECTING_LF_AFTER_CHUNK, 00608 PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK, 00609 PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK 00610 }; 00611 00612 00614 message_parse_state_t m_message_parse_state; 00615 00617 header_parse_state_t m_headers_parse_state; 00618 00620 chunk_parse_state_t m_chunked_content_parse_state; 00621 00623 payload_handler_t m_payload_handler; 00624 00626 boost::uint16_t m_status_code; 00627 00629 std::string m_status_message; 00630 00632 std::string m_method; 00633 00635 std::string m_resource; 00636 00638 std::string m_query_string; 00639 00641 std::string m_raw_headers; 00642 00644 std::string m_header_name; 00645 00647 std::string m_header_value; 00648 00650 std::string m_chunk_size_str; 00651 00653 std::size_t m_size_of_current_chunk; 00654 00656 std::size_t m_bytes_read_in_current_chunk; 00657 00659 std::size_t m_bytes_content_remaining; 00660 00662 std::size_t m_bytes_content_read; 00663 00665 std::size_t m_bytes_last_read; 00666 00668 std::size_t m_bytes_total_read; 00669 00671 std::size_t m_max_content_length; 00672 00674 bool m_parse_headers_only; 00675 00677 bool m_save_raw_headers; 00678 00680 static error_category_t * m_error_category_ptr; 00681 00683 static boost::once_flag m_instance_flag; 00684 }; 00685 00686 00687 // inline functions for parser 00688 00689 inline bool parser::is_char(int c) 00690 { 00691 return(c >= 0 && c <= 127); 00692 } 00693 00694 inline bool parser::is_control(int c) 00695 { 00696 return( (c >= 0 && c <= 31) || c == 127); 00697 } 00698 00699 inline bool parser::is_special(int c) 00700 { 00701 switch (c) { 00702 case '(': case ')': case '<': case '>': case '@': 00703 case ',': case ';': case ':': case '\\': case '"': 00704 case '/': case '[': case ']': case '?': case '=': 00705 case '{': case '}': case ' ': case '\t': 00706 return true; 00707 default: 00708 return false; 00709 } 00710 } 00711 00712 inline bool parser::is_digit(int c) 00713 { 00714 return(c >= '0' && c <= '9'); 00715 } 00716 00717 inline bool parser::is_hex_digit(int c) 00718 { 00719 return((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); 00720 } 00721 00722 inline bool parser::is_cookie_attribute(const std::string& name, bool set_cookie_header) 00723 { 00724 return (name.empty() || name[0] == '$' || (set_cookie_header && 00725 ( 00726 // This is needed because of a very lenient determination in parse_cookie_header() of what 00727 // qualifies as a cookie-pair in a Set-Cookie header. 00728 // According to RFC 6265, everything after the first semicolon is a cookie attribute, but RFC 2109, 00729 // which is obsolete, allowed multiple comma separated cookies. 00730 // parse_cookie_header() is very conservatively assuming that any <name>=<value> pair in a 00731 // Set-Cookie header is a cookie-pair unless <name> is a known cookie attribute. 00732 boost::algorithm::iequals(name, "Comment") 00733 || boost::algorithm::iequals(name, "Domain") 00734 || boost::algorithm::iequals(name, "Max-Age") 00735 || boost::algorithm::iequals(name, "Path") 00736 || boost::algorithm::iequals(name, "Secure") 00737 || boost::algorithm::iequals(name, "Version") 00738 || boost::algorithm::iequals(name, "Expires") 00739 || boost::algorithm::iequals(name, "HttpOnly") 00740 ) 00741 )); 00742 } 00743 00744 } // end namespace http 00745 } // end namespace pion 00746 00747 #endif