pion  5.0.6
src/http_parser.cpp
00001 // ---------------------------------------------------------------------
00002 // pion:  a Boost C++ framework for building lightweight HTTP interfaces
00003 // ---------------------------------------------------------------------
00004 // Copyright (C) 2007-2014 Splunk Inc.  (https://github.com/splunk/pion)
00005 //
00006 // Distributed under the Boost Software License, Version 1.0.
00007 // See http://www.boost.org/LICENSE_1_0.txt
00008 //
00009 
00010 #include <cstdlib>
00011 #include <cstring>
00012 #include <boost/regex.hpp>
00013 #include <boost/assert.hpp>
00014 #include <boost/logic/tribool.hpp>
00015 #include <boost/algorithm/string.hpp>
00016 #include <pion/algorithm.hpp>
00017 #include <pion/http/parser.hpp>
00018 #include <pion/http/request.hpp>
00019 #include <pion/http/response.hpp>
00020 #include <pion/http/message.hpp>
00021 
00022 
00023 namespace pion {    // begin namespace pion
00024 namespace http {    // begin namespace http
00025 
00026 
00027 // static members of parser
00028 
00029 const boost::uint32_t   parser::STATUS_MESSAGE_MAX = 1024;  // 1 KB
00030 const boost::uint32_t   parser::METHOD_MAX = 1024;  // 1 KB
00031 const boost::uint32_t   parser::RESOURCE_MAX = 256 * 1024;  // 256 KB
00032 const boost::uint32_t   parser::QUERY_STRING_MAX = 1024 * 1024; // 1 MB
00033 const boost::uint32_t   parser::HEADER_NAME_MAX = 1024; // 1 KB
00034 const boost::uint32_t   parser::HEADER_VALUE_MAX = 1024 * 1024; // 1 MB
00035 const boost::uint32_t   parser::QUERY_NAME_MAX = 1024;  // 1 KB
00036 const boost::uint32_t   parser::QUERY_VALUE_MAX = 1024 * 1024;  // 1 MB
00037 const boost::uint32_t   parser::COOKIE_NAME_MAX = 1024; // 1 KB
00038 const boost::uint32_t   parser::COOKIE_VALUE_MAX = 1024 * 1024; // 1 MB
00039 const std::size_t       parser::DEFAULT_CONTENT_MAX = 1024 * 1024;  // 1 MB
00040 parser::error_category_t * parser::m_error_category_ptr = NULL;
00041 boost::once_flag            parser::m_instance_flag = BOOST_ONCE_INIT;
00042 
00043 
00044 // parser member functions
00045 
00046 boost::tribool parser::parse(http::message& http_msg,
00047     boost::system::error_code& ec)
00048 {
00049     BOOST_ASSERT(! eof() );
00050 
00051     boost::tribool rc = boost::indeterminate;
00052     std::size_t total_bytes_parsed = 0;
00053 
00054     if(http_msg.has_missing_packets()) {
00055         http_msg.set_data_after_missing_packet(true);
00056     }
00057 
00058     do {
00059         switch (m_message_parse_state) {
00060             // just started parsing the HTTP message
00061             case PARSE_START:
00062                 m_message_parse_state = PARSE_HEADERS;
00063                 // step through to PARSE_HEADERS
00064 
00065             // parsing the HTTP headers
00066             case PARSE_HEADERS:
00067             case PARSE_FOOTERS:
00068                 rc = parse_headers(http_msg, ec);
00069                 total_bytes_parsed += m_bytes_last_read;
00070                 // check if we have finished parsing HTTP headers
00071                 if (rc == true && m_message_parse_state == PARSE_HEADERS) {
00072                     // finish_header_parsing() updates m_message_parse_state
00073                     // We only call this for Headers and not Footers
00074                     rc = finish_header_parsing(http_msg, ec);
00075                 }
00076                 break;
00077 
00078             // parsing chunked payload content
00079             case PARSE_CHUNKS:
00080                 rc = parse_chunks(http_msg.get_chunk_cache(), ec);
00081                 total_bytes_parsed += m_bytes_last_read;
00082                 // check if we have finished parsing all chunks
00083                 if (rc == true && !m_payload_handler) {
00084                     http_msg.concatenate_chunks();
00085                     
00086                     // Handle footers if present
00087                     rc = ((m_message_parse_state == PARSE_FOOTERS) ?
00088                           boost::indeterminate : (boost::tribool)true);
00089                 }
00090                 break;
00091 
00092             // parsing regular payload content with a known length
00093             case PARSE_CONTENT:
00094                 rc = consume_content(http_msg, ec);
00095                 total_bytes_parsed += m_bytes_last_read;
00096                 break;
00097 
00098             // parsing payload content with no length (until EOF)
00099             case PARSE_CONTENT_NO_LENGTH:
00100                 consume_content_as_next_chunk(http_msg.get_chunk_cache());
00101                 total_bytes_parsed += m_bytes_last_read;
00102                 break;
00103 
00104             // finished parsing the HTTP message
00105             case PARSE_END:
00106                 rc = true;
00107                 break;
00108         }
00109     } while ( boost::indeterminate(rc) && ! eof() );
00110 
00111     // check if we've finished parsing the HTTP message
00112     if (rc == true) {
00113         m_message_parse_state = PARSE_END;
00114         finish(http_msg);
00115     } else if(rc == false) {
00116         compute_msg_status(http_msg, false);
00117     }
00118 
00119     // update bytes last read (aggregate individual operations for caller)
00120     m_bytes_last_read = total_bytes_parsed;
00121 
00122     return rc;
00123 }
00124 
00125 boost::tribool parser::parse_missing_data(http::message& http_msg,
00126     std::size_t len, boost::system::error_code& ec)
00127 {
00128     static const char MISSING_DATA_CHAR = 'X';
00129     boost::tribool rc = boost::indeterminate;
00130 
00131     http_msg.set_missing_packets(true);
00132 
00133     switch (m_message_parse_state) {
00134 
00135         // cannot recover from missing data while parsing HTTP headers
00136         case PARSE_START:
00137         case PARSE_HEADERS:
00138         case PARSE_FOOTERS:
00139             set_error(ec, ERROR_MISSING_HEADER_DATA);
00140             rc = false;
00141             break;
00142 
00143         // parsing chunked payload content
00144         case PARSE_CHUNKS:
00145             // parsing chunk data -> we can only recover if data fits into current chunk
00146             if (m_chunked_content_parse_state == PARSE_CHUNK
00147                 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00148                 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00149             {
00150                 // use dummy content for missing data
00151                 if (m_payload_handler) {
00152                     for (std::size_t n = 0; n < len; ++n)
00153                         m_payload_handler(&MISSING_DATA_CHAR, 1);
00154                 } else {
00155                     for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n) 
00156                         http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00157                 }
00158 
00159                 m_bytes_read_in_current_chunk += len;
00160                 m_bytes_last_read = len;
00161                 m_bytes_total_read += len;
00162                 m_bytes_content_read += len;
00163 
00164                 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00165                     m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00166                 }
00167             } else {
00168                 // cannot recover from missing data
00169                 set_error(ec, ERROR_MISSING_CHUNK_DATA);
00170                 rc = false;
00171             }
00172             break;
00173 
00174         // parsing regular payload content with a known length
00175         case PARSE_CONTENT:
00176             // parsing content (with length) -> we can only recover if data fits into content
00177             if (m_bytes_content_remaining == 0) {
00178                 // we have all of the remaining payload content
00179                 rc = true;
00180             } else if (m_bytes_content_remaining < len) {
00181                 // cannot recover from missing data
00182                 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00183                 rc = false;
00184             } else {
00185 
00186                 // make sure content buffer is not already full
00187                 if (m_payload_handler) {
00188                     for (std::size_t n = 0; n < len; ++n)
00189                         m_payload_handler(&MISSING_DATA_CHAR, 1);
00190                 } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
00191                     // use dummy content for missing data
00192                     for (std::size_t n = 0; n < len; ++n)
00193                         http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00194                 } else {
00195                     m_bytes_content_read += len;
00196                 }
00197 
00198                 m_bytes_content_remaining -= len;
00199                 m_bytes_total_read += len;
00200                 m_bytes_last_read = len;
00201 
00202                 if (m_bytes_content_remaining == 0)
00203                     rc = true;
00204             }
00205             break;
00206 
00207         // parsing payload content with no length (until EOF)
00208         case PARSE_CONTENT_NO_LENGTH:
00209             // use dummy content for missing data
00210             if (m_payload_handler) {
00211                 for (std::size_t n = 0; n < len; ++n)
00212                     m_payload_handler(&MISSING_DATA_CHAR, 1);
00213             } else {
00214                 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n) 
00215                     http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00216             }
00217             m_bytes_last_read = len;
00218             m_bytes_total_read += len;
00219             m_bytes_content_read += len;
00220             break;
00221 
00222         // finished parsing the HTTP message
00223         case PARSE_END:
00224             rc = true;
00225             break;
00226     }
00227 
00228     // check if we've finished parsing the HTTP message
00229     if (rc == true) {
00230         m_message_parse_state = PARSE_END;
00231         finish(http_msg);
00232     } else if(rc == false) {
00233         compute_msg_status(http_msg, false);
00234     }
00235 
00236     return rc;
00237 }
00238 
00239 boost::tribool parser::parse_headers(http::message& http_msg,
00240     boost::system::error_code& ec)
00241 {
00242     //
00243     // note that boost::tribool may have one of THREE states:
00244     //
00245     // false: encountered an error while parsing HTTP headers
00246     // true: finished successfully parsing the HTTP headers
00247     // indeterminate: parsed bytes, but the HTTP headers are not yet finished
00248     //
00249     const char *read_start_ptr = m_read_ptr;
00250     m_bytes_last_read = 0;
00251     while (m_read_ptr < m_read_end_ptr) {
00252 
00253         if (m_save_raw_headers)
00254             m_raw_headers += *m_read_ptr;
00255         
00256         switch (m_headers_parse_state) {
00257         case PARSE_METHOD_START:
00258             // we have not yet started parsing the HTTP method string
00259             if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') { // ignore leading whitespace
00260                 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00261                     set_error(ec, ERROR_METHOD_CHAR);
00262                     return false;
00263                 }
00264                 m_headers_parse_state = PARSE_METHOD;
00265                 m_method.erase();
00266                 m_method.push_back(*m_read_ptr);
00267             }
00268             break;
00269 
00270         case PARSE_METHOD:
00271             // we have started parsing the HTTP method string
00272             if (*m_read_ptr == ' ') {
00273                 m_resource.erase();
00274                 m_headers_parse_state = PARSE_URI_STEM;
00275             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00276                 set_error(ec, ERROR_METHOD_CHAR);
00277                 return false;
00278             } else if (m_method.size() >= METHOD_MAX) {
00279                 set_error(ec, ERROR_METHOD_SIZE);
00280                 return false;
00281             } else {
00282                 m_method.push_back(*m_read_ptr);
00283             }
00284             break;
00285 
00286         case PARSE_URI_STEM:
00287             // we have started parsing the URI stem (or resource name)
00288             if (*m_read_ptr == ' ') {
00289                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00290             } else if (*m_read_ptr == '?') {
00291                 m_query_string.erase();
00292                 m_headers_parse_state = PARSE_URI_QUERY;
00293             } else if (*m_read_ptr == '\r') {
00294                 http_msg.set_version_major(0);
00295                 http_msg.set_version_minor(0);
00296                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00297             } else if (*m_read_ptr == '\n') {
00298                 http_msg.set_version_major(0);
00299                 http_msg.set_version_minor(0);
00300                 m_headers_parse_state = PARSE_EXPECTING_CR;
00301             } else if (is_control(*m_read_ptr)) {
00302                 set_error(ec, ERROR_URI_CHAR);
00303                 return false;
00304             } else if (m_resource.size() >= RESOURCE_MAX) {
00305                 set_error(ec, ERROR_URI_SIZE);
00306                 return false;
00307             } else {
00308                 m_resource.push_back(*m_read_ptr);
00309             }
00310             break;
00311 
00312         case PARSE_URI_QUERY:
00313             // we have started parsing the URI query string
00314             if (*m_read_ptr == ' ') {
00315                 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00316             } else if (*m_read_ptr == '\r') {
00317                 http_msg.set_version_major(0);
00318                 http_msg.set_version_minor(0);
00319                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00320             } else if (*m_read_ptr == '\n') {
00321                 http_msg.set_version_major(0);
00322                 http_msg.set_version_minor(0);
00323                 m_headers_parse_state = PARSE_EXPECTING_CR;
00324             } else if (is_control(*m_read_ptr)) {
00325                 set_error(ec, ERROR_QUERY_CHAR);
00326                 return false;
00327             } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00328                 set_error(ec, ERROR_QUERY_SIZE);
00329                 return false;
00330             } else {
00331                 m_query_string.push_back(*m_read_ptr);
00332             }
00333             break;
00334 
00335         case PARSE_HTTP_VERSION_H:
00336             // parsing "HTTP"
00337             if (*m_read_ptr == '\r') {
00338                 // should only happen for requests (no HTTP/VERSION specified)
00339                 if (! m_is_request) {
00340                     set_error(ec, ERROR_VERSION_EMPTY);
00341                     return false;
00342                 }
00343                 http_msg.set_version_major(0);
00344                 http_msg.set_version_minor(0);
00345                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00346             } else if (*m_read_ptr == '\n') {
00347                 // should only happen for requests (no HTTP/VERSION specified)
00348                 if (! m_is_request) {
00349                     set_error(ec, ERROR_VERSION_EMPTY);
00350                     return false;
00351                 }
00352                 http_msg.set_version_major(0);
00353                 http_msg.set_version_minor(0);
00354                 m_headers_parse_state = PARSE_EXPECTING_CR;
00355             } else if (*m_read_ptr != 'H') {
00356                 set_error(ec, ERROR_VERSION_CHAR);
00357                 return false;
00358             }
00359             m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00360             break;
00361 
00362         case PARSE_HTTP_VERSION_T_1:
00363             // parsing "HTTP"
00364             if (*m_read_ptr != 'T') {
00365                 set_error(ec, ERROR_VERSION_CHAR);
00366                 return false;
00367             }
00368             m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00369             break;
00370 
00371         case PARSE_HTTP_VERSION_T_2:
00372             // parsing "HTTP"
00373             if (*m_read_ptr != 'T') {
00374                 set_error(ec, ERROR_VERSION_CHAR);
00375                 return false;
00376             }
00377             m_headers_parse_state = PARSE_HTTP_VERSION_P;
00378             break;
00379 
00380         case PARSE_HTTP_VERSION_P:
00381             // parsing "HTTP"
00382             if (*m_read_ptr != 'P') {
00383                 set_error(ec, ERROR_VERSION_CHAR);
00384                 return false;
00385             }
00386             m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00387             break;
00388 
00389         case PARSE_HTTP_VERSION_SLASH:
00390             // parsing slash after "HTTP"
00391             if (*m_read_ptr != '/') {
00392                 set_error(ec, ERROR_VERSION_CHAR);
00393                 return false;
00394             }
00395             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00396             break;
00397 
00398         case PARSE_HTTP_VERSION_MAJOR_START:
00399             // parsing the first digit of the major version number
00400             if (!is_digit(*m_read_ptr)) {
00401                 set_error(ec, ERROR_VERSION_CHAR);
00402                 return false;
00403             }
00404             http_msg.set_version_major(*m_read_ptr - '0');
00405             m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00406             break;
00407 
00408         case PARSE_HTTP_VERSION_MAJOR:
00409             // parsing the major version number (not first digit)
00410             if (*m_read_ptr == '.') {
00411                 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00412             } else if (is_digit(*m_read_ptr)) {
00413                 http_msg.set_version_major( (http_msg.get_version_major() * 10)
00414                                           + (*m_read_ptr - '0') );
00415             } else {
00416                 set_error(ec, ERROR_VERSION_CHAR);
00417                 return false;
00418             }
00419             break;
00420 
00421         case PARSE_HTTP_VERSION_MINOR_START:
00422             // parsing the first digit of the minor version number
00423             if (!is_digit(*m_read_ptr)) {
00424                 set_error(ec, ERROR_VERSION_CHAR);
00425                 return false;
00426             }
00427             http_msg.set_version_minor(*m_read_ptr - '0');
00428             m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00429             break;
00430 
00431         case PARSE_HTTP_VERSION_MINOR:
00432             // parsing the major version number (not first digit)
00433             if (*m_read_ptr == ' ') {
00434                 // ignore trailing spaces after version in request
00435                 if (! m_is_request) {
00436                     m_headers_parse_state = PARSE_STATUS_CODE_START;
00437                 }
00438             } else if (*m_read_ptr == '\r') {
00439                 // should only happen for requests
00440                 if (! m_is_request) {
00441                     set_error(ec, ERROR_STATUS_EMPTY);
00442                     return false;
00443                 }
00444                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00445             } else if (*m_read_ptr == '\n') {
00446                 // should only happen for requests
00447                 if (! m_is_request) {
00448                     set_error(ec, ERROR_STATUS_EMPTY);
00449                     return false;
00450                 }
00451                 m_headers_parse_state = PARSE_EXPECTING_CR;
00452             } else if (is_digit(*m_read_ptr)) {
00453                 http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
00454                                           + (*m_read_ptr - '0') );
00455             } else {
00456                 set_error(ec, ERROR_VERSION_CHAR);
00457                 return false;
00458             }
00459             break;
00460 
00461         case PARSE_STATUS_CODE_START:
00462             // parsing the first digit of the response status code
00463             if (!is_digit(*m_read_ptr)) {
00464                 set_error(ec, ERROR_STATUS_CHAR);
00465                 return false;
00466             }
00467             m_status_code = (*m_read_ptr - '0');
00468             m_headers_parse_state = PARSE_STATUS_CODE;
00469             break;
00470 
00471         case PARSE_STATUS_CODE:
00472             // parsing the response status code (not first digit)
00473             if (*m_read_ptr == ' ') {
00474                 m_status_message.erase();
00475                 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00476             } else if (is_digit(*m_read_ptr)) {
00477                 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00478             } else if (*m_read_ptr == '\r') {
00479                 // recover from status message not sent
00480                 m_status_message.erase();
00481                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00482             } else if (*m_read_ptr == '\n') {
00483                 // recover from status message not sent
00484                 m_status_message.erase();
00485                 m_headers_parse_state = PARSE_EXPECTING_CR;
00486             } else {
00487                 set_error(ec, ERROR_STATUS_CHAR);
00488                 return false;
00489             }
00490             break;
00491 
00492         case PARSE_STATUS_MESSAGE:
00493             // parsing the response status message
00494             if (*m_read_ptr == '\r') {
00495                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00496             } else if (*m_read_ptr == '\n') {
00497                 m_headers_parse_state = PARSE_EXPECTING_CR;
00498             } else if (is_control(*m_read_ptr)) {
00499                 set_error(ec, ERROR_STATUS_CHAR);
00500                 return false;
00501             } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00502                 set_error(ec, ERROR_STATUS_CHAR);
00503                 return false;
00504             } else {
00505                 m_status_message.push_back(*m_read_ptr);
00506             }
00507             break;
00508 
00509         case PARSE_EXPECTING_NEWLINE:
00510             // we received a CR; expecting a newline to follow
00511             if (*m_read_ptr == '\n') {
00512                 // check if this is a HTTP 0.9 "Simple Request"
00513                 if (m_is_request && http_msg.get_version_major() == 0) {
00514                     PION_LOG_DEBUG(m_logger, "HTTP 0.9 Simple-Request found");
00515                     ++m_read_ptr;
00516                     m_bytes_last_read = (m_read_ptr - read_start_ptr);
00517                     m_bytes_total_read += m_bytes_last_read;
00518                     return true;
00519                 } else {
00520                     m_headers_parse_state = PARSE_HEADER_START;
00521                 }
00522             } else if (*m_read_ptr == '\r') {
00523                 // we received two CR's in a row
00524                 // assume CR only is (incorrectly) being used for line termination
00525                 // therefore, the message is finished
00526                 ++m_read_ptr;
00527                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00528                 m_bytes_total_read += m_bytes_last_read;
00529                 return true;
00530             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00531                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00532             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00533                 set_error(ec, ERROR_HEADER_CHAR);
00534                 return false;
00535             } else {
00536                 // assume it is the first character for the name of a header
00537                 m_header_name.erase();
00538                 m_header_name.push_back(*m_read_ptr);
00539                 m_headers_parse_state = PARSE_HEADER_NAME;
00540             }
00541             break;
00542 
00543         case PARSE_EXPECTING_CR:
00544             // we received a newline without a CR
00545             if (*m_read_ptr == '\r') {
00546                 m_headers_parse_state = PARSE_HEADER_START;
00547             } else if (*m_read_ptr == '\n') {
00548                 // we received two newlines in a row
00549                 // assume newline only is (incorrectly) being used for line termination
00550                 // therefore, the message is finished
00551                 ++m_read_ptr;
00552                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00553                 m_bytes_total_read += m_bytes_last_read;
00554                 return true;
00555             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00556                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00557             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00558                 set_error(ec, ERROR_HEADER_CHAR);
00559                 return false;
00560             } else {
00561                 // assume it is the first character for the name of a header
00562                 m_header_name.erase();
00563                 m_header_name.push_back(*m_read_ptr);
00564                 m_headers_parse_state = PARSE_HEADER_NAME;
00565             }
00566             break;
00567 
00568         case PARSE_HEADER_WHITESPACE:
00569             // parsing whitespace before a header name
00570             if (*m_read_ptr == '\r') {
00571                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00572             } else if (*m_read_ptr == '\n') {
00573                 m_headers_parse_state = PARSE_EXPECTING_CR;
00574             } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00575                 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00576                     set_error(ec, ERROR_HEADER_CHAR);
00577                     return false;
00578                 }
00579                 // assume it is the first character for the name of a header
00580                 m_header_name.erase();
00581                 m_header_name.push_back(*m_read_ptr);
00582                 m_headers_parse_state = PARSE_HEADER_NAME;
00583             }
00584             break;
00585 
00586         case PARSE_HEADER_START:
00587             // parsing the start of a new header
00588             if (*m_read_ptr == '\r') {
00589                 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00590             } else if (*m_read_ptr == '\n') {
00591                 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00592             } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00593                 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00594             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00595                 set_error(ec, ERROR_HEADER_CHAR);
00596                 return false;
00597             } else {
00598                 // first character for the name of a header
00599                 m_header_name.erase();
00600                 m_header_name.push_back(*m_read_ptr);
00601                 m_headers_parse_state = PARSE_HEADER_NAME;
00602             }
00603             break;
00604 
00605         case PARSE_HEADER_NAME:
00606             // parsing the name of a header
00607             if (*m_read_ptr == ':') {
00608                 m_header_value.erase();
00609                 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00610             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00611                 set_error(ec, ERROR_HEADER_CHAR);
00612                 return false;
00613             } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00614                 set_error(ec, ERROR_HEADER_NAME_SIZE);
00615                 return false;
00616             } else {
00617                 // character (not first) for the name of a header
00618                 m_header_name.push_back(*m_read_ptr);
00619             }
00620             break;
00621 
00622         case PARSE_SPACE_BEFORE_HEADER_VALUE:
00623             // parsing space character before a header's value
00624             if (*m_read_ptr == ' ') {
00625                 m_headers_parse_state = PARSE_HEADER_VALUE;
00626             } else if (*m_read_ptr == '\r') {
00627                 http_msg.add_header(m_header_name, m_header_value);
00628                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00629             } else if (*m_read_ptr == '\n') {
00630                 http_msg.add_header(m_header_name, m_header_value);
00631                 m_headers_parse_state = PARSE_EXPECTING_CR;
00632             } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00633                 set_error(ec, ERROR_HEADER_CHAR);
00634                 return false;
00635             } else {
00636                 // assume it is the first character for the value of a header
00637                 m_header_value.push_back(*m_read_ptr);
00638                 m_headers_parse_state = PARSE_HEADER_VALUE;
00639             }
00640             break;
00641 
00642         case PARSE_HEADER_VALUE:
00643             // parsing the value of a header
00644             if (*m_read_ptr == '\r') {
00645                 http_msg.add_header(m_header_name, m_header_value);
00646                 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00647             } else if (*m_read_ptr == '\n') {
00648                 http_msg.add_header(m_header_name, m_header_value);
00649                 m_headers_parse_state = PARSE_EXPECTING_CR;
00650             } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
00651                 // RFC 2616, 2.2 basic Rules.
00652                 // TEXT = <any OCTET except CTLs, but including LWS>
00653                 // LWS  = [CRLF] 1*( SP | HT )
00654                 //
00655                 // TODO: parsing of folding LWS in multiple lines headers
00656                 //       doesn't work properly still
00657                 set_error(ec, ERROR_HEADER_CHAR);
00658                 return false;
00659             } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00660                 set_error(ec, ERROR_HEADER_VALUE_SIZE);
00661                 return false;
00662             } else {
00663                 // character (not first) for the value of a header
00664                 m_header_value.push_back(*m_read_ptr);
00665             }
00666             break;
00667 
00668         case PARSE_EXPECTING_FINAL_NEWLINE:
00669             if (*m_read_ptr == '\n') ++m_read_ptr;
00670             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00671             m_bytes_total_read += m_bytes_last_read;
00672             return true;
00673 
00674         case PARSE_EXPECTING_FINAL_CR:
00675             if (*m_read_ptr == '\r') ++m_read_ptr;
00676             m_bytes_last_read = (m_read_ptr - read_start_ptr);
00677             m_bytes_total_read += m_bytes_last_read;
00678             return true;
00679         }
00680         
00681         ++m_read_ptr;
00682     }
00683 
00684     m_bytes_last_read = (m_read_ptr - read_start_ptr);
00685     m_bytes_total_read += m_bytes_last_read;
00686     return boost::indeterminate;
00687 }
00688 
00689 void parser::update_message_with_header_data(http::message& http_msg) const
00690 {
00691     if (is_parsing_request()) {
00692 
00693         // finish an HTTP request message
00694 
00695         http::request& http_request(dynamic_cast<http::request&>(http_msg));
00696         http_request.set_method(m_method);
00697         http_request.set_resource(m_resource);
00698         http_request.set_query_string(m_query_string);
00699 
00700         // parse query pairs from the URI query string
00701         if (! m_query_string.empty()) {
00702             if (! parse_url_encoded(http_request.get_queries(),
00703                                   m_query_string.c_str(),
00704                                   m_query_string.size())) 
00705                 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00706         }
00707 
00708         // parse "Cookie" headers in request
00709         std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00710         cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
00711         for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00712              cookie_iterator != http_request.get_headers().end()
00713              && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00714         {
00715             if (! parse_cookie_header(http_request.get_cookies(),
00716                                     cookie_iterator->second, false) )
00717                 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00718         }
00719 
00720     } else {
00721 
00722         // finish an HTTP response message
00723 
00724         http::response& http_response(dynamic_cast<http::response&>(http_msg));
00725         http_response.set_status_code(m_status_code);
00726         http_response.set_status_message(m_status_message);
00727 
00728         // parse "Set-Cookie" headers in response
00729         std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00730         cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
00731         for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00732              cookie_iterator != http_response.get_headers().end()
00733              && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00734         {
00735             if (! parse_cookie_header(http_response.get_cookies(),
00736                                     cookie_iterator->second, true) )
00737                 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00738         }
00739 
00740     }
00741 }
00742 
00743 boost::tribool parser::finish_header_parsing(http::message& http_msg,
00744     boost::system::error_code& ec)
00745 {
00746     boost::tribool rc = boost::indeterminate;
00747 
00748     m_bytes_content_remaining = m_bytes_content_read = 0;
00749     http_msg.set_content_length(0);
00750     http_msg.update_transfer_encoding_using_header();
00751     update_message_with_header_data(http_msg);
00752 
00753     if (http_msg.is_chunked()) {
00754 
00755         // content is encoded using chunks
00756         m_message_parse_state = PARSE_CHUNKS;
00757         
00758         // return true if parsing headers only
00759         if (m_parse_headers_only)
00760             rc = true;
00761 
00762     } else if (http_msg.is_content_length_implied()) {
00763 
00764         // content length is implied to be zero
00765         m_message_parse_state = PARSE_END;
00766         rc = true;
00767 
00768     } else {
00769         // content length should be specified in the headers
00770 
00771         if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
00772 
00773             // message has a content-length header
00774             try {
00775                 http_msg.update_content_length_using_header();
00776             } catch (...) {
00777                 PION_LOG_ERROR(m_logger, "Unable to update content length");
00778                 set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
00779                 return false;
00780             }
00781 
00782             // check if content-length header == 0
00783             if (http_msg.get_content_length() == 0) {
00784                 m_message_parse_state = PARSE_END;
00785                 rc = true;
00786             } else {
00787                 m_message_parse_state = PARSE_CONTENT;
00788                 m_bytes_content_remaining = http_msg.get_content_length();
00789 
00790                 // check if content-length exceeds maximum allowed
00791                 if (m_bytes_content_remaining > m_max_content_length)
00792                     http_msg.set_content_length(m_max_content_length);
00793 
00794                 if (m_parse_headers_only) {
00795                     // return true if parsing headers only
00796                     rc = true;
00797                 } else {
00798                     // allocate a buffer for payload content (may be zero-size)
00799                     http_msg.create_content_buffer();
00800                 }
00801             }
00802 
00803         } else {
00804             // no content-length specified, and the content length cannot 
00805             // otherwise be determined
00806 
00807             // only if not a request, read through the close of the connection
00808             if (! m_is_request) {
00809                 // clear the chunk buffers before we start
00810                 http_msg.get_chunk_cache().clear();
00811 
00812                 // continue reading content until there is no more data
00813                 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00814 
00815                 // return true if parsing headers only
00816                 if (m_parse_headers_only)
00817                     rc = true;
00818             } else {
00819                 m_message_parse_state = PARSE_END;
00820                 rc = true;
00821             }
00822         }
00823     }
00824 
00825     finished_parsing_headers(ec);
00826     
00827     return rc;
00828 }
00829     
00830 bool parser::parse_uri(const std::string& uri, std::string& proto, 
00831                       std::string& host, boost::uint16_t& port,
00832                       std::string& path, std::string& query)
00833 {
00834     size_t proto_end = uri.find("://");
00835     size_t proto_len = 0;
00836     
00837     if(proto_end != std::string::npos) {
00838         proto = uri.substr(0, proto_end);
00839         proto_len = proto_end + 3; // add ://
00840     } else {
00841         proto.clear();
00842     }
00843     
00844     // find a first slash charact
00845     // that indicates the end of the <server>:<port> part
00846     size_t server_port_end = uri.find('/', proto_len);
00847     if (server_port_end == std::string::npos) {
00848         // no path -> use just /
00849         path = "/";
00850         server_port_end = uri.size();
00851     }
00852     
00853     // copy <server>:<port> into temp string
00854     std::string t; 
00855     t = uri.substr(proto_len, server_port_end - proto_len);
00856     size_t port_pos = t.find(':', 0);
00857     
00858     // assign output host and port parameters
00859     
00860     host = t.substr(0, port_pos); // if port_pos == npos, copy whole string
00861     if(host.length() == 0) {
00862         return false;
00863     }
00864     
00865     // parse the port, if it's not empty
00866     if(port_pos != std::string::npos) {
00867         try {
00868             port = boost::lexical_cast<int>(t.substr(port_pos+1));
00869         } catch (boost::bad_lexical_cast &) {
00870             return false;
00871         }
00872     } else if (proto == "http" || proto == "HTTP") {
00873         port = 80;
00874     } else if (proto == "https" || proto == "HTTPS") {
00875         port = 443;
00876     } else {
00877         port = 0;
00878     }
00879     
00880     if (server_port_end < uri.size()) {
00881         // copy the rest of the URI into path part
00882         path = uri.substr(server_port_end);
00883         
00884         // split the path and the query string parts
00885         size_t query_pos = path.find('?', 0);
00886         
00887         if(query_pos != std::string::npos) {
00888             query = path.substr(query_pos + 1, path.length() - query_pos - 1);
00889             path = path.substr(0, query_pos);
00890         } else {
00891             query.clear();
00892         }
00893     }
00894     
00895     return true;
00896 }
00897 
00898 bool parser::parse_url_encoded(ihash_multimap& dict,
00899                                const char *ptr, const size_t len)
00900 {
00901     // sanity check
00902     if (ptr == NULL || len == 0)
00903         return true;
00904 
00905     // used to track whether we are parsing the name or value
00906     enum QueryParseState {
00907         QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00908     } parse_state = QUERY_PARSE_NAME;
00909 
00910     // misc other variables used for parsing
00911     const char * const end = ptr + len;
00912     std::string query_name;
00913     std::string query_value;
00914 
00915     // iterate through each encoded character
00916     while (ptr < end) {
00917         switch (parse_state) {
00918 
00919         case QUERY_PARSE_NAME:
00920             // parsing query name
00921             if (*ptr == '=') {
00922                 // end of name found (OK if empty)
00923                 parse_state = QUERY_PARSE_VALUE;
00924             } else if (*ptr == '&') {
00925                 // if query name is empty, just skip it (i.e. "&&")
00926                 if (! query_name.empty()) {
00927                     // assume that "=" is missing -- it's OK if the value is empty
00928                     dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00929                     query_name.erase();
00930                 }
00931             } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00932                 // ignore linefeeds, carriage return and tabs (normally within POST content)
00933             } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00934                 // control character detected, or max sized exceeded
00935                 return false;
00936             } else {
00937                 // character is part of the name
00938                 query_name.push_back(*ptr);
00939             }
00940             break;
00941 
00942         case QUERY_PARSE_VALUE:
00943             // parsing query value
00944             if (*ptr == '&') {
00945                 // end of value found (OK if empty)
00946                 if (! query_name.empty()) {
00947                     dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00948                     query_name.erase();
00949                 }
00950                 query_value.erase();
00951                 parse_state = QUERY_PARSE_NAME;
00952             } else if (*ptr == ',') {
00953                 // end of value found in multi-value list (OK if empty)
00954                 if (! query_name.empty())
00955                     dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00956                 query_value.erase();
00957             } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00958                 // ignore linefeeds, carriage return and tabs (normally within POST content)
00959             } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00960                 // control character detected, or max sized exceeded
00961                 return false;
00962             } else {
00963                 // character is part of the value
00964                 query_value.push_back(*ptr);
00965             }
00966             break;
00967         }
00968 
00969         ++ptr;
00970     }
00971 
00972     // handle last pair in string
00973     if (! query_name.empty())
00974         dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00975 
00976     return true;
00977 }
00978 
00979 bool parser::parse_multipart_form_data(ihash_multimap& dict,
00980                                        const std::string& content_type,
00981                                        const char *ptr, const size_t len)
00982 {
00983     // sanity check
00984     if (ptr == NULL || len == 0)
00985         return true;
00986     
00987     // parse field boundary
00988     std::size_t pos = content_type.find("boundary=");
00989     if (pos == std::string::npos)
00990         return false;
00991     const std::string boundary = std::string("--") + content_type.substr(pos+9);
00992     
00993     // used to track what we are parsing
00994     enum MultiPartParseState {
00995         MP_PARSE_START,
00996         MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
00997         MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
00998         MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
00999     } parse_state = MP_PARSE_START;
01000 
01001     // a few variables used for parsing
01002     std::string header_name;
01003     std::string header_value;
01004     std::string field_name;
01005     std::string field_value;
01006     bool found_parameter = false;
01007     bool save_current_field = true;
01008     const char * const end_ptr = ptr + len;
01009 
01010     ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01011 
01012     while (ptr != NULL && ptr < end_ptr) {
01013         switch (parse_state) {
01014             case MP_PARSE_START:
01015                 // start parsing a new field
01016                 header_name.clear();
01017                 header_value.clear();
01018                 field_name.clear();
01019                 field_value.clear();
01020                 save_current_field = true;
01021                 ptr += boundary.size() - 1;
01022                 parse_state = MP_PARSE_HEADER_CR;
01023                 break;
01024             case MP_PARSE_HEADER_CR:
01025                 // expecting CR while parsing headers
01026                 if (*ptr == '\r') {
01027                     // got it -> look for linefeed
01028                     parse_state = MP_PARSE_HEADER_LF;
01029                 } else if (*ptr == '\n') {
01030                     // got a linefeed? try to ignore and start parsing header
01031                     parse_state = MP_PARSE_HEADER_NAME;
01032                 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
01033                     // end of multipart content
01034                     return true;
01035                 } else return false;
01036                 break;
01037             case MP_PARSE_HEADER_LF:
01038                 // expecting LF while parsing headers
01039                 if (*ptr == '\n') {
01040                     // got it -> start parsing header name
01041                     parse_state = MP_PARSE_HEADER_NAME;
01042                 } else return false;
01043                 break;
01044             case MP_PARSE_HEADER_NAME:
01045                 // parsing the name of a header
01046                 if (*ptr == '\r' || *ptr == '\n') {
01047                     if (header_name.empty()) {
01048                         // got CR or LF at beginning; skip to data
01049                         parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
01050                     } else {
01051                         // premature CR or LF -> just ignore and start parsing next header
01052                         parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01053                     }
01054                 } else if (*ptr == ':') {
01055                     // done parsing header name -> consume space next
01056                     parse_state = MP_PARSE_HEADER_SPACE;
01057                 } else {
01058                     // one more byte for header name
01059                     header_name += *ptr;
01060                 }
01061                 break;
01062             case MP_PARSE_HEADER_SPACE:
01063                 // expecting a space before header value
01064                 if (*ptr == '\r') {
01065                     // premature CR -> just ignore and start parsing next header
01066                     parse_state = MP_PARSE_HEADER_LF;
01067                 } else if (*ptr == '\n') {
01068                     // premature LF -> just ignore and start parsing next header
01069                     parse_state = MP_PARSE_HEADER_NAME;
01070                 } else if (*ptr != ' ') {
01071                     // not a space -> assume it's a value char
01072                     header_value += *ptr;
01073                     parse_state = MP_PARSE_HEADER_VALUE;
01074                 }
01075                 // otherwise just ignore the space(s)
01076                 break;
01077             case MP_PARSE_HEADER_VALUE:
01078                 // parsing the value of a header
01079                 if (*ptr == '\r' || *ptr == '\n') {
01080                     // reached the end of the value -> check if it's important
01081                     if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
01082                         // only keep fields that have a text type or no type
01083                         save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
01084                     } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
01085                         // get current field from content-disposition header
01086                         std::size_t name_pos = header_value.find("name=\"");
01087                         if (name_pos != std::string::npos) {
01088                             for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
01089                                 field_name += header_value[name_pos];
01090                             }
01091                         }
01092                     }
01093                     // clear values and start parsing next header
01094                     header_name.clear();
01095                     header_value.clear();
01096                     parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01097                 } else {
01098                     // one more byte for header value
01099                     header_value += *ptr;
01100                 }
01101                 break;
01102             case MP_PARSE_HEADER_LAST_LF:
01103                 // expecting final linefeed to terminate headers and begin field data
01104                 if (*ptr == '\n') {
01105                     // got it
01106                     if (save_current_field && !field_name.empty()) {
01107                         // parse the field if we care & know enough about it
01108                         parse_state = MP_PARSE_FIELD_DATA;
01109                     } else {
01110                         // otherwise skip ahead to next field
01111                         parse_state = MP_PARSE_START;
01112                         ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01113                     }
01114                 } else return false;
01115                 break;
01116             case MP_PARSE_FIELD_DATA:
01117                 // parsing the value of a field -> find the end of it
01118                 const char *field_end_ptr = end_ptr;
01119                 const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01120                 if (next_ptr) {
01121                     // don't include CRLF before next boundary
01122                     const char *temp_ptr = next_ptr - 2;
01123                     if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
01124                         field_end_ptr = temp_ptr;
01125                     else field_end_ptr = next_ptr;
01126                 }
01127                 field_value.assign(ptr, field_end_ptr - ptr);
01128                 // add the field to the query dictionary
01129                 dict.insert( std::make_pair(field_name, field_value) );
01130                 found_parameter = true;
01131                 // skip ahead to next field
01132                 parse_state = MP_PARSE_START;
01133                 ptr = next_ptr;
01134                 break;
01135         }
01136         // we've already bumped position if MP_PARSE_START
01137         if (parse_state != MP_PARSE_START)
01138             ++ptr;
01139     }
01140     
01141     return found_parameter;
01142 }
01143 
01144 bool parser::parse_cookie_header(ihash_multimap& dict,
01145                                    const char *ptr, const size_t len,
01146                                    bool set_cookie_header)
01147 {
01148     // BASED ON RFC 2109
01149     // http://www.ietf.org/rfc/rfc2109.txt
01150     // 
01151     // The current implementation ignores cookie attributes which begin with '$'
01152     // (i.e. $Path=/, $Domain=, etc.)
01153 
01154     // used to track what we are parsing
01155     enum CookieParseState {
01156         COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
01157     } parse_state = COOKIE_PARSE_NAME;
01158 
01159     // misc other variables used for parsing
01160     const char * const end = ptr + len;
01161     std::string cookie_name;
01162     std::string cookie_value;
01163     char value_quote_character = '\0';
01164 
01165     // iterate through each character
01166     while (ptr < end) {
01167         switch (parse_state) {
01168 
01169         case COOKIE_PARSE_NAME:
01170             // parsing cookie name
01171             if (*ptr == '=') {
01172                 // end of name found (OK if empty)
01173                 value_quote_character = '\0';
01174                 parse_state = COOKIE_PARSE_VALUE;
01175             } else if (*ptr == ';' || *ptr == ',') {
01176                 // ignore empty cookie names since this may occur naturally
01177                 // when quoted values are encountered
01178                 if (! cookie_name.empty()) {
01179                     // value is empty (OK)
01180                     if (! is_cookie_attribute(cookie_name, set_cookie_header))
01181                         dict.insert( std::make_pair(cookie_name, cookie_value) );
01182                     cookie_name.erase();
01183                 }
01184             } else if (*ptr != ' ') {   // ignore whitespace
01185                 // check if control character detected, or max sized exceeded
01186                 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
01187                     return false;
01188                 // character is part of the name
01189                 cookie_name.push_back(*ptr);
01190             }
01191             break;
01192 
01193         case COOKIE_PARSE_VALUE:
01194             // parsing cookie value
01195             if (value_quote_character == '\0') {
01196                 // value is not (yet) quoted
01197                 if (*ptr == ';' || *ptr == ',') {
01198                     // end of value found (OK if empty)
01199                     if (! is_cookie_attribute(cookie_name, set_cookie_header))
01200                         dict.insert( std::make_pair(cookie_name, cookie_value) );
01201                     cookie_name.erase();
01202                     cookie_value.erase();
01203                     parse_state = COOKIE_PARSE_NAME;
01204                 } else if (*ptr == '\'' || *ptr == '"') {
01205                     if (cookie_value.empty()) {
01206                         // begin quoted value
01207                         value_quote_character = *ptr;
01208                     } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01209                         // max size exceeded
01210                         return false;
01211                     } else {
01212                         // assume character is part of the (unquoted) value
01213                         cookie_value.push_back(*ptr);
01214                     }
01215                 } else if (*ptr != ' ' || !cookie_value.empty()) {  // ignore leading unquoted whitespace
01216                     // check if control character detected, or max sized exceeded
01217                     if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
01218                         return false;
01219                     // character is part of the (unquoted) value
01220                     cookie_value.push_back(*ptr);
01221                 }
01222             } else {
01223                 // value is quoted
01224                 if (*ptr == value_quote_character) {
01225                     // end of value found (OK if empty)
01226                     if (! is_cookie_attribute(cookie_name, set_cookie_header))
01227                         dict.insert( std::make_pair(cookie_name, cookie_value) );
01228                     cookie_name.erase();
01229                     cookie_value.erase();
01230                     parse_state = COOKIE_PARSE_IGNORE;
01231                 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01232                     // max size exceeded
01233                     return false;
01234                 } else {
01235                     // character is part of the (quoted) value
01236                     cookie_value.push_back(*ptr);
01237                 }
01238             }
01239             break;
01240 
01241         case COOKIE_PARSE_IGNORE:
01242             // ignore everything until we reach a comma "," or semicolon ";"
01243             if (*ptr == ';' || *ptr == ',')
01244                 parse_state = COOKIE_PARSE_NAME;
01245             break;
01246         }
01247 
01248         ++ptr;
01249     }
01250 
01251     // handle last cookie in string
01252     if (! is_cookie_attribute(cookie_name, set_cookie_header))
01253         dict.insert( std::make_pair(cookie_name, cookie_value) );
01254 
01255     return true;
01256 }
01257 
01258 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks,
01259     boost::system::error_code& ec)
01260 {
01261     //
01262     // note that boost::tribool may have one of THREE states:
01263     //
01264     // false: encountered an error while parsing message
01265     // true: finished successfully parsing the message
01266     // indeterminate: parsed bytes, but the message is not yet finished
01267     //
01268     const char *read_start_ptr = m_read_ptr;
01269     m_bytes_last_read = 0;
01270     while (m_read_ptr < m_read_end_ptr) {
01271 
01272         switch (m_chunked_content_parse_state) {
01273         case PARSE_CHUNK_SIZE_START:
01274             // we have not yet started parsing the next chunk size
01275             if (is_hex_digit(*m_read_ptr)) {
01276                 m_chunk_size_str.erase();
01277                 m_chunk_size_str.push_back(*m_read_ptr);
01278                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
01279             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
01280                 // Ignore leading whitespace.  Technically, the standard probably doesn't allow white space here, 
01281                 // but we'll be flexible, since there's no ambiguity.
01282                 break;
01283             } else {
01284                 set_error(ec, ERROR_CHUNK_CHAR);
01285                 return false;
01286             }
01287             break;
01288 
01289         case PARSE_CHUNK_SIZE:
01290             if (is_hex_digit(*m_read_ptr)) {
01291                 m_chunk_size_str.push_back(*m_read_ptr);
01292             } else if (*m_read_ptr == '\x0D') {
01293                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01294             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01295                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
01296                 // but we'll be flexible, since there's no ambiguity.
01297                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01298             } else if (*m_read_ptr == ';') {
01299                 // Following the semicolon we have text which will be ignored till we encounter
01300                 //  a CRLF
01301                 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
01302             } else {
01303                 set_error(ec, ERROR_CHUNK_CHAR);
01304                 return false;
01305             }
01306             break;
01307                 
01308         case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
01309             if (*m_read_ptr == '\x0D') {
01310                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01311             } 
01312             break;
01313 
01314         case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01315             if (*m_read_ptr == '\x0D') {
01316                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01317             } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01318                 // Ignore trailing tabs or spaces.  Technically, the standard probably doesn't allow this, 
01319                 // but we'll be flexible, since there's no ambiguity.
01320                 break;
01321             } else {
01322                 set_error(ec, ERROR_CHUNK_CHAR);
01323                 return false;
01324             }
01325             break;
01326 
01327         case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01328             // We received a CR; expecting LF to follow.  We can't be flexible here because 
01329             // if we see anything other than LF, we can't be certain where the chunk starts.
01330             if (*m_read_ptr == '\x0A') {
01331                 m_bytes_read_in_current_chunk = 0;
01332                 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01333                 if (m_size_of_current_chunk == 0) {
01334                     m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
01335                 } else {
01336                     m_chunked_content_parse_state = PARSE_CHUNK;
01337                 }
01338             } else {
01339                 set_error(ec, ERROR_CHUNK_CHAR);
01340                 return false;
01341             }
01342             break;
01343 
01344         case PARSE_CHUNK:
01345             if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01346                 if (m_payload_handler) {
01347                     const std::size_t bytes_avail = bytes_available();
01348                     const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
01349                     const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
01350                     m_payload_handler(m_read_ptr, len);
01351                     m_bytes_read_in_current_chunk += len;
01352                     if (len > 1) m_read_ptr += (len - 1);
01353                 } else if (chunks.size() < m_max_content_length) {
01354                     chunks.push_back(*m_read_ptr);
01355                     m_bytes_read_in_current_chunk++;
01356                 }
01357             }
01358             if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01359                 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01360             }
01361             break;
01362 
01363         case PARSE_EXPECTING_CR_AFTER_CHUNK:
01364             // we've read exactly m_size_of_current_chunk bytes since starting the current chunk
01365             if (*m_read_ptr == '\x0D') {
01366                 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01367             } else {
01368                 set_error(ec, ERROR_CHUNK_CHAR);
01369                 return false;
01370             }
01371             break;
01372 
01373         case PARSE_EXPECTING_LF_AFTER_CHUNK:
01374             // we received a CR; expecting LF to follow
01375             if (*m_read_ptr == '\x0A') {
01376                 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01377             } else {
01378                 set_error(ec, ERROR_CHUNK_CHAR);
01379                 return false;
01380             }
01381             break;
01382 
01383         case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
01384             // we've read the final chunk; expecting final CRLF
01385             if (*m_read_ptr == '\x0D') {
01386                 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01387             } else {
01388                 // Packet contains footers; Chunk parsing is commplete
01389                 // Footer data contains name value pairs to be added to HTTP Message
01390                 m_message_parse_state = PARSE_FOOTERS;
01391                 m_headers_parse_state = PARSE_HEADER_START;
01392                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01393                 m_bytes_total_read += m_bytes_last_read;
01394                 m_bytes_content_read += m_bytes_last_read;
01395                 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01396                 return true;
01397             }
01398             break;
01399 
01400         case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01401             // we received the final CR; expecting LF to follow
01402             if (*m_read_ptr == '\x0A') {
01403                 ++m_read_ptr;
01404                 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01405                 m_bytes_total_read += m_bytes_last_read;
01406                 m_bytes_content_read += m_bytes_last_read;
01407                 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01408                 return true;
01409             } else {
01410                 set_error(ec, ERROR_CHUNK_CHAR);
01411                 return false;
01412             }
01413         }
01414 
01415         ++m_read_ptr;
01416     }
01417 
01418     m_bytes_last_read = (m_read_ptr - read_start_ptr);
01419     m_bytes_total_read += m_bytes_last_read;
01420     m_bytes_content_read += m_bytes_last_read;
01421     return boost::indeterminate;
01422 }
01423 
01424 boost::tribool parser::consume_content(http::message& http_msg,
01425     boost::system::error_code& ec)
01426 {
01427     size_t content_bytes_to_read;
01428     size_t content_bytes_available = bytes_available();
01429     boost::tribool rc = boost::indeterminate;
01430 
01431     if (m_bytes_content_remaining == 0) {
01432         // we have all of the remaining payload content
01433         return true;
01434     } else {
01435         if (content_bytes_available >= m_bytes_content_remaining) {
01436             // we have all of the remaining payload content
01437             rc = true;
01438             content_bytes_to_read = m_bytes_content_remaining;
01439         } else {
01440             // only some of the payload content is available
01441             content_bytes_to_read = content_bytes_available;
01442         }
01443         m_bytes_content_remaining -= content_bytes_to_read;
01444     }
01445 
01446     // make sure content buffer is not already full
01447     if (m_payload_handler) {
01448         m_payload_handler(m_read_ptr, content_bytes_to_read);
01449     } else if (m_bytes_content_read < m_max_content_length) {
01450         if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01451             // read would exceed maximum size for content buffer
01452             // copy only enough bytes to fill up the content buffer
01453             memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, 
01454                 m_max_content_length - m_bytes_content_read);
01455         } else {
01456             // copy all bytes available
01457             memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01458         }
01459     }
01460 
01461     m_read_ptr += content_bytes_to_read;
01462     m_bytes_content_read += content_bytes_to_read;
01463     m_bytes_total_read += content_bytes_to_read;
01464     m_bytes_last_read = content_bytes_to_read;
01465 
01466     return rc;
01467 }
01468 
01469 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks)
01470 {
01471     if (bytes_available() == 0) {
01472         m_bytes_last_read = 0;
01473     } else {
01474         // note: m_bytes_last_read must be > 0 because of bytes_available() check
01475         m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01476         if (m_payload_handler) {
01477             m_payload_handler(m_read_ptr, m_bytes_last_read);
01478             m_read_ptr += m_bytes_last_read;
01479         } else {
01480             while (m_read_ptr < m_read_end_ptr) {
01481                 if (chunks.size() < m_max_content_length)
01482                     chunks.push_back(*m_read_ptr);
01483                 ++m_read_ptr;
01484             }
01485         }
01486         m_bytes_total_read += m_bytes_last_read;
01487         m_bytes_content_read += m_bytes_last_read;
01488     }
01489     return m_bytes_last_read;
01490 }
01491 
01492 void parser::finish(http::message& http_msg) const
01493 {
01494     switch (m_message_parse_state) {
01495     case PARSE_START:
01496         http_msg.set_is_valid(false);
01497         http_msg.set_content_length(0);
01498         http_msg.create_content_buffer();
01499         return;
01500     case PARSE_END:
01501         http_msg.set_is_valid(true);
01502         break;
01503     case PARSE_HEADERS:
01504     case PARSE_FOOTERS:
01505         http_msg.set_is_valid(false);
01506         update_message_with_header_data(http_msg);
01507         http_msg.set_content_length(0);
01508         http_msg.create_content_buffer();
01509         break;
01510     case PARSE_CONTENT:
01511         http_msg.set_is_valid(false);
01512         if (get_content_bytes_read() < m_max_content_length)   // NOTE: we can read more than we have allocated/stored
01513             http_msg.set_content_length(get_content_bytes_read());
01514         break;
01515     case PARSE_CHUNKS:
01516         http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01517         if (!m_payload_handler)
01518             http_msg.concatenate_chunks();
01519         break;
01520     case PARSE_CONTENT_NO_LENGTH:
01521         http_msg.set_is_valid(true);
01522         if (!m_payload_handler)
01523             http_msg.concatenate_chunks();
01524         break;
01525     }
01526 
01527     compute_msg_status(http_msg, http_msg.is_valid());
01528 
01529     if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) {
01530         // Parse query pairs from post content if content type is x-www-form-urlencoded.
01531         // Type could be followed by parameters (as defined in section 3.6 of RFC 2616)
01532         // e.g. Content-Type: application/x-www-form-urlencoded; charset=UTF-8
01533         http::request& http_request(dynamic_cast<http::request&>(http_msg));
01534         const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
01535         if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
01536                                         http::types::CONTENT_TYPE_URLENCODED) == 0)
01537         {
01538             if (! parse_url_encoded(http_request.get_queries(),
01539                                   http_request.get_content(),
01540                                   http_request.get_content_length()))
01541                 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
01542         } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
01543                                                http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
01544         {
01545             if (! parse_multipart_form_data(http_request.get_queries(),
01546                                             content_type_header,
01547                                             http_request.get_content(),
01548                                             http_request.get_content_length()))
01549                 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
01550         }
01551     }
01552 }
01553 
01554 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
01555 {
01556     http::message::data_status_t st = http::message::STATUS_NONE;
01557 
01558     if(http_msg.has_missing_packets()) {
01559         st = http_msg.has_data_after_missing_packets() ?
01560                         http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
01561     } else {
01562         st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
01563     }
01564 
01565     http_msg.set_status(st);
01566 }
01567 
01568 void parser::create_error_category(void)
01569 {
01570     static error_category_t UNIQUE_ERROR_CATEGORY;
01571     m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01572 }
01573 
01574 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
01575 {
01576     // static regex's used to check for ipv4 address
01577     static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01578 
01584     static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01585 
01586     // sanity check
01587     if (header.empty())
01588         return false;
01589 
01590     // local variables re-used by while loop
01591     boost::match_results<std::string::const_iterator> m;
01592     std::string::const_iterator start_it = header.begin();
01593 
01594     // search for next ip address within the header
01595     while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01596         // get ip that matched
01597         std::string ip_str(m[0].first, m[0].second);
01598         // check if public network ip address
01599         if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01600             // match found!
01601             public_ip = ip_str;
01602             return true;
01603         }
01604         // update search starting position
01605         start_it = m[0].second;
01606     }
01607 
01608     // no matches found
01609     return false;
01610 }
01611 
01612 }   // end namespace http
01613 }   // end namespace pion