libdap  Updated for version 3.17.0
chunked_istream.cc
00001 // -*- mode: c++; c-basic-offset:4 -*-
00002 
00003 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00004 // Access Protocol.
00005 
00006 // Copyright (c) 2009 OPeNDAP, Inc.
00007 // Author: James Gallagher <jgallagher@opendap.org>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 //
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00024 //
00025 // Portions of this code were taken verbatim from  Josuttis,
00026 // "The C++ Standard Library," p.672
00027 
00028 #include "config.h"
00029 
00030 #include <stdint.h>
00031 #include <byteswap.h>
00032 #include <arpa/inet.h>
00033 
00034 #include <cstring>
00035 #include <vector>
00036 
00037 #include "chunked_stream.h"
00038 #include "chunked_istream.h"
00039 
00040 #include "Error.h"
00041 
00042 //#define DODS_DEBUG
00043 //#define DODS_DEBUG2
00044 #ifdef DODS_DEBUG
00045 #include <iostream>
00046 #endif
00047 
00048 #include "util.h"
00049 #include "debug.h"
00050 
00051 namespace libdap {
00052 
00053 /*
00054   This code does not use a 'put back' buffer, but here's a picture of the
00055   d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
00056   the I/O Stream library's streambuf class works. For the case with no
00057   putback, just imagine it as zero and eliminate the leftmost extension. This
00058   might also come in useful if the code was extended to support put back. I
00059   removed that feature because I don't see it being used with our chunked
00060   transmission protocol and it requires an extra call to memcopy() when data
00061   are added to the internal buffer.
00062 
00063   d_buffer  d_buffer + putBack
00064   |         |
00065   v         v
00066   |---------|--------------------------------------------|....
00067   |         |                                            |   .
00068   |---------|--------------------------------------------|....
00069             ^                         ^                   ^
00070             |                         |                   |
00071             eback()                   gptr()              egptr()
00072 
00073  */
00074 
00084 std::streambuf::int_type
00085 chunked_inbuf::underflow()
00086 {
00087     DBG(cerr << "underflow..." << endl);
00088     DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
00089 
00090         // return the next character; uflow() increments the puffer pointer.
00091         if (gptr() < egptr())
00092                 return traits_type::to_int_type(*gptr());
00093 
00094         // gptr() == egptr() so read more data from the underlying input source.
00095 
00096         // To read data from the chunked stream, first read the header
00097         uint32_t header;
00098         d_is.read((char *) &header, 4);
00099 #if !BYTE_ORDER_PREFIX
00100         // When the endian nature of the server is encoded in the chunk header, the header is
00101         // sent using network byte order
00102         ntohl(header);
00103 #endif
00104 
00105         // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
00106         // it holds data. In the latter case, bytes those will be read and moved into the
00107         // buffer. Once those data are consumed, we'll be back here again and this read()
00108         // will return EOF. See below for the other case...
00109         if (d_is.eof()) return traits_type::eof();
00110 #if BYTE_ORDER_PREFIX
00111         if (d_twiddle_bytes) header = bswap_32(header);
00112 #else
00113         // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
00114         if (!d_set_twiddle) {
00115             d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
00116             d_set_twiddle = true;
00117         }
00118 #endif
00119         uint32_t chunk_size = header & CHUNK_SIZE_MASK;
00120 
00121         DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
00122         DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
00123         DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
00124 
00125         // Handle the case where the buffer is not big enough to hold the incoming chunk
00126         if (chunk_size > d_buf_size) {
00127                 d_buf_size = chunk_size;
00128                 m_buffer_alloc();
00129         }
00130 
00131         // If the END chunk has zero bytes, return EOF. See above for more information
00132         if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
00133 
00134         // Read the chunk's data
00135         d_is.read(d_buffer, chunk_size);
00136         DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
00137         if (d_is.bad()) return traits_type::eof();
00138 
00139         DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
00140         setg(d_buffer,                                          // beginning of put back area
00141                         d_buffer,                       // read position (gptr() == eback())
00142                         d_buffer + chunk_size);         // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
00143 
00144         DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
00145 
00146         switch (header & CHUNK_TYPE_MASK) {
00147         case CHUNK_END:
00148                 DBG2(cerr << "Found end chunk" << endl);
00149                 return traits_type::to_int_type(*gptr());
00150         case CHUNK_DATA:
00151                 return traits_type::to_int_type(*gptr());
00152 
00153         case CHUNK_ERR:
00154                 // this is pretty much the end of the show... Assume the buffer/chunk holds
00155                 // the error message text.
00156                 d_error = true;
00157                 d_error_message = string(d_buffer, chunk_size);
00158                 return traits_type::eof();
00159         default:
00160                 d_error = true;
00161                 d_error_message = "Failed to read known chunk header type.";
00162                 return traits_type::eof();
00163         }
00164 
00165         return traits_type::eof();      // Can never get here; this quiets g++
00166 }
00167 
00184 std::streamsize
00185 chunked_inbuf::xsgetn(char* s, std::streamsize num)
00186 {
00187         DBG(cerr << "xsgetn... num: " << num << endl);
00188 
00189         // if num is <= the chars currently in the buffer
00190         if (num <= (egptr() - gptr())) {
00191                 memcpy(s, gptr(), num);
00192                 gbump(num);
00193 
00194                 return traits_type::not_eof(num);
00195         }
00196 
00197         // else they asked for more
00198         uint32_t bytes_left_to_read = num;
00199 
00200         // are there any bytes in the buffer? if so grab them first
00201         if (gptr() < egptr()) {
00202                 int bytes_to_transfer = egptr() - gptr();
00203                 memcpy(s, gptr(), bytes_to_transfer);
00204                 gbump(bytes_to_transfer);
00205                 s += bytes_to_transfer;
00206                 bytes_left_to_read -= bytes_to_transfer;
00207         }
00208 
00209         // We need to get more bytes from the underlying stream; at this
00210         // point the internal buffer is empty.
00211 
00212         // read the remaining bytes to transfer, a chunk at a time,
00213         // and put any leftover stuff in the buffer.
00214 
00215         // note that when the code is here, gptr() == egptr(), so the
00216         // next call to read() will fall through the previous tests and
00217         // read at least one chunk here.
00218         bool done = false;
00219     while (!done) {
00220         // Get a chunk header
00221         uint32_t header;
00222         d_is.read((char *) &header, 4);
00223 #if !BYTE_ORDER_PREFIX
00224         ntohl(header);
00225 #endif
00226 
00227         // There are two EOF cases: One where the END chunk is zero bytes and one where
00228         // it holds data. In the latter case, those will be read and moved into the
00229         // buffer. Once those data are consumed, we'll be back here again and this read()
00230         // will return EOF. See below for the other case...
00231         if (d_is.eof()) return traits_type::eof();
00232 #if BYTE_ORDER_PREFIX
00233         if (d_twiddle_bytes) header = bswap_32(header);
00234 #else
00235         // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
00236         if (!d_set_twiddle) {
00237             d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
00238             d_set_twiddle = true;
00239         }
00240 #endif
00241 
00242             uint32_t chunk_size = header & CHUNK_SIZE_MASK;
00243                 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
00244                 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
00245                 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
00246 
00247                 // handle error chunks here
00248             if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
00249                         d_error = true;
00250                         // Note that d_buffer is not used to avoid calling resize if it is too
00251                         // small to hold the error message. At this point, there's not much reason
00252                         // to optimize transport efficiency, however.
00253                         std::vector<char> message(chunk_size);
00254                         d_is.read(&message[0], chunk_size);
00255                         d_error_message = string(&message[0], chunk_size);
00256                         // leave the buffer and gptr(), ..., in a consistent state (empty)
00257                         setg(d_buffer, d_buffer, d_buffer);
00258             }
00259             // And zero-length END chunks here.
00260             else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
00261                 return traits_type::not_eof(num-bytes_left_to_read);
00262             }
00263             // The next case is complicated because we read some data from the current
00264             // chunk into 's' an some into the internal buffer.
00265             else if (chunk_size > bytes_left_to_read) {
00266                         d_is.read(s, bytes_left_to_read);
00267                         if (d_is.bad()) return traits_type::eof();
00268 
00269                         // Now slurp up the remain part of the chunk and store it in the buffer
00270                         uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
00271                         // expand the internal buffer if needed
00272                     if (bytes_leftover > d_buf_size) {
00273                         d_buf_size = chunk_size;
00274                         m_buffer_alloc();
00275                     }
00276                     // read the remain stuff in to d_buffer
00277                         d_is.read(d_buffer, bytes_leftover);
00278                         if (d_is.bad()) return traits_type::eof();
00279 
00280                         setg(d_buffer,                                                                          // beginning of put back area
00281                                  d_buffer,                                                              // read position (gptr() == eback())
00282                                  d_buffer + bytes_leftover /*d_is.gcount()*/);  // end of buffer (egptr())
00283 
00284                         bytes_left_to_read = 0 /* -= d_is.gcount()*/;
00285                 }
00286                 else {
00287                         // expand the internal buffer if needed
00288                     if (chunk_size > d_buf_size) {
00289                         d_buf_size = chunk_size;
00290                         m_buffer_alloc();
00291                     }
00292                     // If we get a chunk that's zero bytes, Don't call read()
00293                     // to save the kernel context switch overhead.
00294                         if (chunk_size > 0) {
00295                                 d_is.read(s, chunk_size);
00296                                 if (d_is.bad()) return traits_type::eof();
00297                                 bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
00298                                 s += chunk_size;
00299                         }
00300                 }
00301 
00302             switch (header & CHUNK_TYPE_MASK) {
00303             case CHUNK_END:
00304                         DBG(cerr << "Found end chunk" << endl);
00305                 // in this case bytes_left_to_read can be > 0 because we ran out of data
00306                 // before reading all the requested bytes. The next read() call will return
00307                 // eof; this call returns the number of bytes read and transferred to 's'.
00308                 done = true;
00309                 break;
00310             case CHUNK_DATA:
00311                 done = bytes_left_to_read == 0;
00312                 break;
00313             case CHUNK_ERR:
00314                         // this is pretty much the end of the show... The error message has
00315                 // already been read above
00316                         return traits_type::eof();
00317                 break;
00318                 default:
00319                         d_error = true;
00320                         d_error_message = "Failed to read known chunk header type.";
00321                         return traits_type::eof();
00322             }
00323         }
00324 
00325         return traits_type::not_eof(num-bytes_left_to_read);
00326 }
00327 
00340 std::streambuf::int_type
00341 chunked_inbuf::read_next_chunk()
00342 {
00343         // To read data from the chunked stream, first read the header
00344         uint32_t header;
00345         d_is.read((char *) &header, 4);
00346 #if !BYTE_ORDER_PREFIX
00347     ntohl(header);
00348 #endif
00349 
00350         // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
00351         // it holds data. In the latter case, bytes those will be read and moved into the
00352         // buffer. Once those data are consumed, we'll be back here again and this read()
00353         // will return EOF. See below for the other case...
00354         if (d_is.eof()) return traits_type::eof();
00355 #if BYTE_ORDER_PREFIX
00356     if (d_twiddle_bytes) header = bswap_32(header);
00357 #else
00358     // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
00359     if (!d_set_twiddle) {
00360         d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
00361         d_set_twiddle = true;
00362     }
00363 #endif
00364 
00365         uint32_t chunk_size = header & CHUNK_SIZE_MASK;
00366 
00367         DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
00368         DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
00369         DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
00370 
00371         // Handle the case where the buffer is not big enough to hold the incoming chunk
00372         if (chunk_size > d_buf_size) {
00373                 d_buf_size = chunk_size;
00374                 m_buffer_alloc();
00375         }
00376 
00377         // If the END chunk has zero bytes, return EOF. See above for more information
00378         if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
00379 
00380         // Read the chunk's data
00381         d_is.read(d_buffer, chunk_size);
00382         DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
00383         if (d_is.bad()) return traits_type::eof();
00384 
00385         DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
00386         setg(d_buffer,                                          // beginning of put back area
00387                         d_buffer,                       // read position (gptr() == eback())
00388                         d_buffer + chunk_size);         // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
00389 
00390         DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
00391 
00392         switch (header & CHUNK_TYPE_MASK) {
00393         case CHUNK_END:
00394                 DBG(cerr << "Found end chunk" << endl);
00395                 return traits_type::not_eof(chunk_size);
00396         case CHUNK_DATA:
00397                 return traits_type::not_eof(chunk_size);
00398 
00399         case CHUNK_ERR:
00400                 // this is pretty much the end of the show... Assume the buffer/chunk holds
00401                 // the error message text.
00402                 d_error = true;
00403                 d_error_message = string(d_buffer, chunk_size);
00404                 return traits_type::eof();
00405         default:
00406                 d_error = true;
00407                 d_error_message = "Failed to read known chunk header type.";
00408                 return traits_type::eof();
00409         }
00410 
00411         return traits_type::eof();      // Can never get here; this quiets g++
00412 }
00413 
00414 }