libdap
Updated for version 3.17.0
|
00001 // -*- mode: c++; c-basic-offset:4 -*- 00002 00003 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00004 // Access Protocol. 00005 00006 // Copyright (c) 2009 OPeNDAP, Inc. 00007 // Author: James Gallagher <jgallagher@opendap.org> 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU Lesser General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 2.1 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // Lesser General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU Lesser General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00024 // 00025 // Portions of this code were taken verbatim from Josuttis, 00026 // "The C++ Standard Library," p.672 00027 00028 #include "config.h" 00029 00030 #include <stdint.h> 00031 #include <byteswap.h> 00032 #include <arpa/inet.h> 00033 00034 #include <cstring> 00035 #include <vector> 00036 00037 #include "chunked_stream.h" 00038 #include "chunked_istream.h" 00039 00040 #include "Error.h" 00041 00042 //#define DODS_DEBUG 00043 //#define DODS_DEBUG2 00044 #ifdef DODS_DEBUG 00045 #include <iostream> 00046 #endif 00047 00048 #include "util.h" 00049 #include "debug.h" 00050 00051 namespace libdap { 00052 00053 /* 00054 This code does not use a 'put back' buffer, but here's a picture of the 00055 d_buffer pointer, eback(), gptr() and egptr() that can be used to see how 00056 the I/O Stream library's streambuf class works. For the case with no 00057 putback, just imagine it as zero and eliminate the leftmost extension. This 00058 might also come in useful if the code was extended to support put back. I 00059 removed that feature because I don't see it being used with our chunked 00060 transmission protocol and it requires an extra call to memcopy() when data 00061 are added to the internal buffer. 00062 00063 d_buffer d_buffer + putBack 00064 | | 00065 v v 00066 |---------|--------------------------------------------|.... 00067 | | | . 00068 |---------|--------------------------------------------|.... 00069 ^ ^ ^ 00070 | | | 00071 eback() gptr() egptr() 00072 00073 */ 00074 00084 std::streambuf::int_type 00085 chunked_inbuf::underflow() 00086 { 00087 DBG(cerr << "underflow..." << endl); 00088 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl); 00089 00090 // return the next character; uflow() increments the puffer pointer. 00091 if (gptr() < egptr()) 00092 return traits_type::to_int_type(*gptr()); 00093 00094 // gptr() == egptr() so read more data from the underlying input source. 00095 00096 // To read data from the chunked stream, first read the header 00097 uint32_t header; 00098 d_is.read((char *) &header, 4); 00099 #if !BYTE_ORDER_PREFIX 00100 // When the endian nature of the server is encoded in the chunk header, the header is 00101 // sent using network byte order 00102 ntohl(header); 00103 #endif 00104 00105 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where 00106 // it holds data. In the latter case, bytes those will be read and moved into the 00107 // buffer. Once those data are consumed, we'll be back here again and this read() 00108 // will return EOF. See below for the other case... 00109 if (d_is.eof()) return traits_type::eof(); 00110 #if BYTE_ORDER_PREFIX 00111 if (d_twiddle_bytes) header = bswap_32(header); 00112 #else 00113 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian 00114 if (!d_set_twiddle) { 00115 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN)); 00116 d_set_twiddle = true; 00117 } 00118 #endif 00119 uint32_t chunk_size = header & CHUNK_SIZE_MASK; 00120 00121 DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl); 00122 DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl); 00123 DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl); 00124 00125 // Handle the case where the buffer is not big enough to hold the incoming chunk 00126 if (chunk_size > d_buf_size) { 00127 d_buf_size = chunk_size; 00128 m_buffer_alloc(); 00129 } 00130 00131 // If the END chunk has zero bytes, return EOF. See above for more information 00132 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof(); 00133 00134 // Read the chunk's data 00135 d_is.read(d_buffer, chunk_size); 00136 DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl); 00137 if (d_is.bad()) return traits_type::eof(); 00138 00139 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl); 00140 setg(d_buffer, // beginning of put back area 00141 d_buffer, // read position (gptr() == eback()) 00142 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error 00143 00144 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl); 00145 00146 switch (header & CHUNK_TYPE_MASK) { 00147 case CHUNK_END: 00148 DBG2(cerr << "Found end chunk" << endl); 00149 return traits_type::to_int_type(*gptr()); 00150 case CHUNK_DATA: 00151 return traits_type::to_int_type(*gptr()); 00152 00153 case CHUNK_ERR: 00154 // this is pretty much the end of the show... Assume the buffer/chunk holds 00155 // the error message text. 00156 d_error = true; 00157 d_error_message = string(d_buffer, chunk_size); 00158 return traits_type::eof(); 00159 default: 00160 d_error = true; 00161 d_error_message = "Failed to read known chunk header type."; 00162 return traits_type::eof(); 00163 } 00164 00165 return traits_type::eof(); // Can never get here; this quiets g++ 00166 } 00167 00184 std::streamsize 00185 chunked_inbuf::xsgetn(char* s, std::streamsize num) 00186 { 00187 DBG(cerr << "xsgetn... num: " << num << endl); 00188 00189 // if num is <= the chars currently in the buffer 00190 if (num <= (egptr() - gptr())) { 00191 memcpy(s, gptr(), num); 00192 gbump(num); 00193 00194 return traits_type::not_eof(num); 00195 } 00196 00197 // else they asked for more 00198 uint32_t bytes_left_to_read = num; 00199 00200 // are there any bytes in the buffer? if so grab them first 00201 if (gptr() < egptr()) { 00202 int bytes_to_transfer = egptr() - gptr(); 00203 memcpy(s, gptr(), bytes_to_transfer); 00204 gbump(bytes_to_transfer); 00205 s += bytes_to_transfer; 00206 bytes_left_to_read -= bytes_to_transfer; 00207 } 00208 00209 // We need to get more bytes from the underlying stream; at this 00210 // point the internal buffer is empty. 00211 00212 // read the remaining bytes to transfer, a chunk at a time, 00213 // and put any leftover stuff in the buffer. 00214 00215 // note that when the code is here, gptr() == egptr(), so the 00216 // next call to read() will fall through the previous tests and 00217 // read at least one chunk here. 00218 bool done = false; 00219 while (!done) { 00220 // Get a chunk header 00221 uint32_t header; 00222 d_is.read((char *) &header, 4); 00223 #if !BYTE_ORDER_PREFIX 00224 ntohl(header); 00225 #endif 00226 00227 // There are two EOF cases: One where the END chunk is zero bytes and one where 00228 // it holds data. In the latter case, those will be read and moved into the 00229 // buffer. Once those data are consumed, we'll be back here again and this read() 00230 // will return EOF. See below for the other case... 00231 if (d_is.eof()) return traits_type::eof(); 00232 #if BYTE_ORDER_PREFIX 00233 if (d_twiddle_bytes) header = bswap_32(header); 00234 #else 00235 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian 00236 if (!d_set_twiddle) { 00237 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN)); 00238 d_set_twiddle = true; 00239 } 00240 #endif 00241 00242 uint32_t chunk_size = header & CHUNK_SIZE_MASK; 00243 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl); 00244 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl); 00245 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl); 00246 00247 // handle error chunks here 00248 if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) { 00249 d_error = true; 00250 // Note that d_buffer is not used to avoid calling resize if it is too 00251 // small to hold the error message. At this point, there's not much reason 00252 // to optimize transport efficiency, however. 00253 std::vector<char> message(chunk_size); 00254 d_is.read(&message[0], chunk_size); 00255 d_error_message = string(&message[0], chunk_size); 00256 // leave the buffer and gptr(), ..., in a consistent state (empty) 00257 setg(d_buffer, d_buffer, d_buffer); 00258 } 00259 // And zero-length END chunks here. 00260 else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) { 00261 return traits_type::not_eof(num-bytes_left_to_read); 00262 } 00263 // The next case is complicated because we read some data from the current 00264 // chunk into 's' an some into the internal buffer. 00265 else if (chunk_size > bytes_left_to_read) { 00266 d_is.read(s, bytes_left_to_read); 00267 if (d_is.bad()) return traits_type::eof(); 00268 00269 // Now slurp up the remain part of the chunk and store it in the buffer 00270 uint32_t bytes_leftover = chunk_size - bytes_left_to_read; 00271 // expand the internal buffer if needed 00272 if (bytes_leftover > d_buf_size) { 00273 d_buf_size = chunk_size; 00274 m_buffer_alloc(); 00275 } 00276 // read the remain stuff in to d_buffer 00277 d_is.read(d_buffer, bytes_leftover); 00278 if (d_is.bad()) return traits_type::eof(); 00279 00280 setg(d_buffer, // beginning of put back area 00281 d_buffer, // read position (gptr() == eback()) 00282 d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr()) 00283 00284 bytes_left_to_read = 0 /* -= d_is.gcount()*/; 00285 } 00286 else { 00287 // expand the internal buffer if needed 00288 if (chunk_size > d_buf_size) { 00289 d_buf_size = chunk_size; 00290 m_buffer_alloc(); 00291 } 00292 // If we get a chunk that's zero bytes, Don't call read() 00293 // to save the kernel context switch overhead. 00294 if (chunk_size > 0) { 00295 d_is.read(s, chunk_size); 00296 if (d_is.bad()) return traits_type::eof(); 00297 bytes_left_to_read -= chunk_size /*d_is.gcount()*/; 00298 s += chunk_size; 00299 } 00300 } 00301 00302 switch (header & CHUNK_TYPE_MASK) { 00303 case CHUNK_END: 00304 DBG(cerr << "Found end chunk" << endl); 00305 // in this case bytes_left_to_read can be > 0 because we ran out of data 00306 // before reading all the requested bytes. The next read() call will return 00307 // eof; this call returns the number of bytes read and transferred to 's'. 00308 done = true; 00309 break; 00310 case CHUNK_DATA: 00311 done = bytes_left_to_read == 0; 00312 break; 00313 case CHUNK_ERR: 00314 // this is pretty much the end of the show... The error message has 00315 // already been read above 00316 return traits_type::eof(); 00317 break; 00318 default: 00319 d_error = true; 00320 d_error_message = "Failed to read known chunk header type."; 00321 return traits_type::eof(); 00322 } 00323 } 00324 00325 return traits_type::not_eof(num-bytes_left_to_read); 00326 } 00327 00340 std::streambuf::int_type 00341 chunked_inbuf::read_next_chunk() 00342 { 00343 // To read data from the chunked stream, first read the header 00344 uint32_t header; 00345 d_is.read((char *) &header, 4); 00346 #if !BYTE_ORDER_PREFIX 00347 ntohl(header); 00348 #endif 00349 00350 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where 00351 // it holds data. In the latter case, bytes those will be read and moved into the 00352 // buffer. Once those data are consumed, we'll be back here again and this read() 00353 // will return EOF. See below for the other case... 00354 if (d_is.eof()) return traits_type::eof(); 00355 #if BYTE_ORDER_PREFIX 00356 if (d_twiddle_bytes) header = bswap_32(header); 00357 #else 00358 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian 00359 if (!d_set_twiddle) { 00360 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN)); 00361 d_set_twiddle = true; 00362 } 00363 #endif 00364 00365 uint32_t chunk_size = header & CHUNK_SIZE_MASK; 00366 00367 DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl); 00368 DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl); 00369 DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl); 00370 00371 // Handle the case where the buffer is not big enough to hold the incoming chunk 00372 if (chunk_size > d_buf_size) { 00373 d_buf_size = chunk_size; 00374 m_buffer_alloc(); 00375 } 00376 00377 // If the END chunk has zero bytes, return EOF. See above for more information 00378 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof(); 00379 00380 // Read the chunk's data 00381 d_is.read(d_buffer, chunk_size); 00382 DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl); 00383 if (d_is.bad()) return traits_type::eof(); 00384 00385 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl); 00386 setg(d_buffer, // beginning of put back area 00387 d_buffer, // read position (gptr() == eback()) 00388 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error 00389 00390 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl); 00391 00392 switch (header & CHUNK_TYPE_MASK) { 00393 case CHUNK_END: 00394 DBG(cerr << "Found end chunk" << endl); 00395 return traits_type::not_eof(chunk_size); 00396 case CHUNK_DATA: 00397 return traits_type::not_eof(chunk_size); 00398 00399 case CHUNK_ERR: 00400 // this is pretty much the end of the show... Assume the buffer/chunk holds 00401 // the error message text. 00402 d_error = true; 00403 d_error_message = string(d_buffer, chunk_size); 00404 return traits_type::eof(); 00405 default: 00406 d_error = true; 00407 d_error_message = "Failed to read known chunk header type."; 00408 return traits_type::eof(); 00409 } 00410 00411 return traits_type::eof(); // Can never get here; this quiets g++ 00412 } 00413 00414 }