libdap
Updated for version 3.17.0
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 00027 #include "config.h" 00028 00029 #ifdef HAVE_UNISTD_H 00030 #include <unistd.h> 00031 #endif 00032 00033 #include <sys/stat.h> 00034 00035 #ifdef WIN32 00036 #include <io.h> 00037 #endif 00038 00039 #include <string> 00040 #include <vector> 00041 #include <functional> 00042 #include <algorithm> 00043 #include <sstream> 00044 #include <fstream> 00045 #include <iterator> 00046 #include <cstdlib> 00047 #include <cstring> 00048 #include <cerrno> 00049 00050 //#define DODS_DEBUG2 00051 //#define HTTP_TRACE 00052 //#define DODS_DEBUG 00053 00054 #undef USE_GETENV 00055 00056 00057 #include "debug.h" 00058 #include "mime_util.h" 00059 #include "media_types.h" 00060 #include "GNURegex.h" 00061 #include "HTTPCache.h" 00062 #include "HTTPConnect.h" 00063 #include "RCReader.h" 00064 #include "HTTPResponse.h" 00065 #include "HTTPCacheResponse.h" 00066 00067 using namespace std; 00068 00069 namespace libdap { 00070 00071 // These global variables are not MT-Safe, but I'm leaving them as is because 00072 // they are used only for debugging (set them in a debugger like gdb or ddd). 00073 // They are not static because I think that many debuggers cannot access 00074 // static variables. 08/07/02 jhrg 00075 00076 // Set this to 1 to turn on libcurl's verbose mode (for debugging). 00077 int www_trace = 0; 00078 00079 // Keep the temporary files; useful for debugging. 00080 int dods_keep_temps = 0; 00081 00082 #define CLIENT_ERR_MIN 400 00083 #define CLIENT_ERR_MAX 417 00084 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] = 00085 { 00086 "Bad Request:", 00087 "Unauthorized: Contact the server administrator.", 00088 "Payment Required.", 00089 "Forbidden: Contact the server administrator.", 00090 "Not Found: The data source or server could not be found.\n\ 00091 Often this means that the OPeNDAP server is missing or needs attention;\n\ 00092 Please contact the server administrator.", 00093 "Method Not Allowed.", 00094 "Not Acceptable.", 00095 "Proxy Authentication Required.", 00096 "Request Time-out.", 00097 "Conflict.", 00098 "Gone:.", 00099 "Length Required.", 00100 "Precondition Failed.", 00101 "Request Entity Too Large.", 00102 "Request URI Too Large.", 00103 "Unsupported Media Type.", 00104 "Requested Range Not Satisfiable.", 00105 "Expectation Failed." 00106 }; 00107 00108 #define SERVER_ERR_MIN 500 00109 #define SERVER_ERR_MAX 505 00110 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = 00111 { 00112 "Internal Server Error.", 00113 "Not Implemented.", 00114 "Bad Gateway.", 00115 "Service Unavailable.", 00116 "Gateway Time-out.", 00117 "HTTP Version Not Supported." 00118 }; 00119 00122 static string 00123 http_status_to_string(int status) 00124 { 00125 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX) 00126 return string(http_client_errors[status - CLIENT_ERR_MIN]); 00127 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX) 00128 return string(http_server_errors[status - SERVER_ERR_MIN]); 00129 else 00130 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."); 00131 } 00132 00133 static ObjectType 00134 determine_object_type(const string &header_value) 00135 { 00136 // DAP4 Data: application/vnd.opendap.dap4.data 00137 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml 00138 00139 string::size_type plus = header_value.find('+'); 00140 string base_type; 00141 string type_extension = ""; 00142 if (plus != string::npos) { 00143 base_type= header_value.substr(0, plus); 00144 type_extension = header_value.substr(plus+1); 00145 } 00146 else 00147 base_type = header_value; 00148 00149 if (base_type == DMR_Content_Type 00150 || (base_type.find("application/") != string::npos 00151 && base_type.find("dap4.dataset-metadata") != string::npos)) { 00152 if (type_extension == "xml") 00153 return dap4_dmr; 00154 else 00155 return unknown_type; 00156 } 00157 else if (base_type == DAP4_DATA_Content_Type 00158 || (base_type.find("application/") != string::npos 00159 && base_type.find("dap4.data") != string::npos)) { 00160 return dap4_data; 00161 } 00162 else if (header_value.find("text/html") != string::npos) { 00163 return web_error; 00164 } 00165 else 00166 return unknown_type; 00167 } 00168 00173 class ParseHeader : public unary_function<const string &, void> 00174 { 00175 ObjectType type; // What type of object is in the stream? 00176 string server; // Server's version string. 00177 string protocol; // Server's protocol version. 00178 string location; // Url returned by server 00179 00180 public: 00181 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") 00182 { } 00183 00184 void operator()(const string &line) 00185 { 00186 string name, value; 00187 parse_mime_header(line, name, value); 00188 00189 DBG2(cerr << name << ": " << value << endl); 00190 00191 // Content-Type is used to determine the content of DAP4 responses, but allow the 00192 // Content-Description header to override CT o preserve operation with DAP2 servers. 00193 // jhrg 11/12/13 00194 if (type == unknown_type && name == "content-type") { 00195 type = determine_object_type(value); // see above 00196 } 00197 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) { 00198 type = get_description_type(value); // defined in mime_util.cc 00199 } 00200 // The second test (== "dods/0.0") tests if xopendap-server has already 00201 // been seen. If so, use that header in preference to the old 00202 // XDODS-Server header. jhrg 2/7/06 00203 else if (name == "xdods-server" && server == "dods/0.0") { 00204 server = value; 00205 } 00206 else if (name == "xopendap-server") { 00207 server = value; 00208 } 00209 else if (name == "xdap") { 00210 protocol = value; 00211 } 00212 else if (server == "dods/0.0" && name == "server") { 00213 server = value; 00214 } 00215 else if (name == "location") { 00216 location = value; 00217 } 00218 } 00219 00220 ObjectType get_object_type() 00221 { 00222 return type; 00223 } 00224 00225 string get_server() 00226 { 00227 return server; 00228 } 00229 00230 string get_protocol() 00231 { 00232 return protocol; 00233 } 00234 00235 string get_location() { 00236 return location; 00237 } 00238 }; 00239 00255 static size_t 00256 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) 00257 { 00258 DBG2(cerr << "Inside the header parser." << endl); 00259 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs); 00260 00261 // Grab the header, minus the trailing newline. Or \r\n pair. 00262 string complete_line; 00263 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r') 00264 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2)); 00265 else 00266 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1)); 00267 00268 // Store all non-empty headers that are not HTTP status codes 00269 if (complete_line != "" && complete_line.find("HTTP") == string::npos) { 00270 DBG(cerr << "Header line: " << complete_line << endl); 00271 hdrs->push_back(complete_line); 00272 } 00273 00274 return size * nmemb; 00275 } 00276 00278 static int 00279 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) 00280 { 00281 string message(msg, size); 00282 00283 switch (info) { 00284 case CURLINFO_TEXT: 00285 cerr << "Text: " << message; break; 00286 case CURLINFO_HEADER_IN: 00287 cerr << "Header in: " << message; break; 00288 case CURLINFO_HEADER_OUT: 00289 cerr << "Header out: " << message; break; 00290 case CURLINFO_DATA_IN: 00291 cerr << "Data in: " << message; break; 00292 case CURLINFO_DATA_OUT: 00293 cerr << "Data out: " << message; break; 00294 case CURLINFO_END: 00295 cerr << "End: " << message; break; 00296 #ifdef CURLINFO_SSL_DATA_IN 00297 case CURLINFO_SSL_DATA_IN: 00298 cerr << "SSL Data in: " << message; break; 00299 #endif 00300 #ifdef CURLINFO_SSL_DATA_OUT 00301 case CURLINFO_SSL_DATA_OUT: 00302 cerr << "SSL Data out: " << message; break; 00303 #endif 00304 default: 00305 cerr << "Curl info: " << message; break; 00306 } 00307 return 0; 00308 } 00309 00313 void 00314 HTTPConnect::www_lib_init() 00315 { 00316 d_curl = curl_easy_init(); 00317 if (!d_curl) 00318 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl."); 00319 00320 // Now set options that will remain constant for the duration of this 00321 // CURL object. 00322 00323 // Set the proxy host. 00324 if (!d_rcr->get_proxy_server_host().empty()) { 00325 DBG(cerr << "Setting up a proxy server." << endl); 00326 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() 00327 << endl); 00328 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() 00329 << endl); 00330 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() 00331 << endl); 00332 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00333 d_rcr->get_proxy_server_host().c_str()); 00334 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, 00335 d_rcr->get_proxy_server_port()); 00336 00337 // As of 4/21/08 only NTLM, Digest and Basic work. 00338 #ifdef CURLOPT_PROXYAUTH 00339 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY); 00340 #endif 00341 00342 // Password might not be required. 06/21/04 jhrg 00343 if (!d_rcr->get_proxy_server_userpw().empty()) 00344 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, 00345 d_rcr->get_proxy_server_userpw().c_str()); 00346 } 00347 00348 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer); 00349 // We have to set FailOnError to false for any of the non-Basic 00350 // authentication schemes to work. 07/28/03 jhrg 00351 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0); 00352 00353 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM, 00354 // choosing the the 'safest' one supported by the server. 00355 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg 00356 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00357 00358 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1); 00359 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1); 00360 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers); 00361 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth 00362 // param of save_raw_http_headers to a vector<string> object. 00363 00364 // Follow 302 (redirect) responses 00365 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1); 00366 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5); 00367 00368 // If the user turns off SSL validation... 00369 if (d_rcr->get_validate_ssl() == 0) { 00370 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0); 00371 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0); 00372 } 00373 00374 // Look to see if cookies are turned on in the .dodsrc file. If so, 00375 // activate here. We honor 'session cookies' (cookies without an 00376 // expiration date) here so that session-based SSO systems will work as 00377 // expected. 00378 if (!d_cookie_jar.empty()) { 00379 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl); 00380 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str()); 00381 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1); 00382 } 00383 00384 if (www_trace) { 00385 cerr << "Curl version: " << curl_version() << endl; 00386 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1); 00387 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug); 00388 } 00389 } 00390 00394 class BuildHeaders : public unary_function<const string &, void> 00395 { 00396 struct curl_slist *d_cl; 00397 00398 public: 00399 BuildHeaders() : d_cl(0) 00400 {} 00401 00402 void operator()(const string &header) 00403 { 00404 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." 00405 << endl); 00406 d_cl = curl_slist_append(d_cl, header.c_str()); 00407 } 00408 00409 struct curl_slist *get_headers() 00410 { 00411 return d_cl; 00412 } 00413 }; 00414 00429 long 00430 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers) 00431 { 00432 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str()); 00433 00434 #ifdef WIN32 00435 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA) 00436 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as 00437 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the 00438 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of 00439 // this issue is that one should not pass a FILE * to a windows DLL. Close 00440 // inspection of libcurl yields that their default write function when using 00441 // the CURLOPT_WRITEDATA is just "fwrite". 00442 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream); 00443 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite); 00444 #else 00445 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream); 00446 #endif 00447 00448 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00449 ostream_iterator<string>(cerr, "\n"))); 00450 00451 BuildHeaders req_hdrs; 00452 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), 00453 req_hdrs); 00454 if (headers) 00455 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs); 00456 00457 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers()); 00458 00459 // Turn off the proxy for this URL? 00460 bool temporary_proxy = false; 00461 if ((temporary_proxy = url_uses_no_proxy_for(url))) { 00462 DBG(cerr << "Suppress proxy for url: " << url << endl); 00463 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0); 00464 } 00465 00466 string::size_type at_sign = url.find('@'); 00467 // Assume username:password present *and* assume it's an HTTP URL; it *is* 00468 // HTTPConnect, after all. 7 is position after "http://"; the second arg 00469 // to substr() is the sub string length. 00470 if (at_sign != url.npos) 00471 d_upstring = url.substr(7, at_sign - 7); 00472 00473 if (!d_upstring.empty()) 00474 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str()); 00475 00476 // Pass save_raw_http_headers() a pointer to the vector<string> where the 00477 // response headers may be stored. Callers can use the resp_hdrs 00478 // value/result parameter to get the raw response header information . 00479 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs); 00480 00481 // This is the call that causes curl to go and get the remote resource and "write it down" 00482 // utilizing the configuration state that has been previously conditioned by various perturbations 00483 // of calls to curl_easy_setopt(). 00484 CURLcode res = curl_easy_perform(d_curl); 00485 00486 // Free the header list and null the value in d_curl. 00487 curl_slist_free_all(req_hdrs.get_headers()); 00488 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0); 00489 00490 // Reset the proxy? 00491 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty()) 00492 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00493 d_rcr->get_proxy_server_host().c_str()); 00494 00495 if (res != 0) 00496 throw Error(d_error_buffer); 00497 00498 long status; 00499 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status); 00500 if (res != 0) 00501 throw Error(d_error_buffer); 00502 00503 char *ct_ptr = 0; 00504 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr); 00505 if (res == CURLE_OK && ct_ptr) 00506 d_content_type = ct_ptr; 00507 else 00508 d_content_type = ""; 00509 00510 return status; 00511 } 00512 00516 bool 00517 HTTPConnect::url_uses_proxy_for(const string &url) 00518 { 00519 if (d_rcr->is_proxy_for_used()) { 00520 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str()); 00521 int index = 0, matchlen; 00522 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1; 00523 } 00524 00525 return false; 00526 } 00527 00531 bool 00532 HTTPConnect::url_uses_no_proxy_for(const string &url) throw() 00533 { 00534 return d_rcr->is_no_proxy_for_used() 00535 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos; 00536 } 00537 00538 // Public methods. Mostly... 00539 00546 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""), 00547 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp) 00548 00549 { 00550 d_accept_deflate = rcr->get_deflate(); 00551 d_rcr = rcr; 00552 00553 // Load in the default headers to send with a request. The empty Pragma 00554 // headers overrides libcurl's default Pragma: no-cache header (which 00555 // will disable caching by Squid, et c.). The User-Agent header helps 00556 // make server logs more readable. 05/05/03 jhrg 00557 d_request_headers.push_back(string("Pragma:")); 00558 string user_agent = string("User-Agent: ") + string(CNAME) 00559 + string("/") + string(CVER); 00560 d_request_headers.push_back(user_agent); 00561 if (d_accept_deflate) 00562 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00563 00564 // HTTPCache::instance returns a valid ptr or 0. 00565 if (d_rcr->get_use_cache()) 00566 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true); 00567 else 00568 d_http_cache = 0; 00569 00570 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec 00571 << ")" << endl); 00572 00573 if (d_http_cache) { 00574 d_http_cache->set_cache_enabled(d_rcr->get_use_cache()); 00575 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0); 00576 d_http_cache->set_max_size(d_rcr->get_max_cache_size()); 00577 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj()); 00578 d_http_cache->set_default_expiration(d_rcr->get_default_expires()); 00579 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0); 00580 } 00581 00582 d_cookie_jar = rcr->get_cookie_jar(); 00583 00584 www_lib_init(); // This may throw either Error or InternalErr 00585 } 00586 00587 HTTPConnect::~HTTPConnect() 00588 { 00589 DBG2(cerr << "Entering the HTTPConnect dtor" << endl); 00590 00591 curl_easy_cleanup(d_curl); 00592 00593 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl); 00594 } 00595 00597 class HeaderMatch : public unary_function<const string &, bool> { 00598 const string &d_header; 00599 public: 00600 HeaderMatch(const string &header) : d_header(header) {} 00601 bool operator()(const string &arg) { return arg.find(d_header) == 0; } 00602 }; 00603 00616 HTTPResponse * 00617 HTTPConnect::fetch_url(const string &url) 00618 { 00619 #ifdef HTTP_TRACE 00620 cout << "GET " << url << " HTTP/1.0" << endl; 00621 #endif 00622 00623 HTTPResponse *stream; 00624 00625 if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) { 00626 stream = caching_fetch_url(url); 00627 } 00628 else { 00629 stream = plain_fetch_url(url); 00630 } 00631 00632 #ifdef HTTP_TRACE 00633 stringstream ss; 00634 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl; 00635 for (size_t i = 0; i < stream->get_headers()->size(); i++) { 00636 ss << stream->get_headers()->at(i) << endl; 00637 } 00638 cout << ss.str(); 00639 #endif 00640 00641 ParseHeader parser; 00642 00643 // An apparent quirk of libcurl is that it does not pass the Content-type 00644 // header to the callback used to save them, but check and add it from the 00645 // saved state variable only if it's not there (without this a test failed 00646 // in HTTPCacheTest). jhrg 11/12/13 00647 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(), 00648 HeaderMatch("Content-Type:")) == stream->get_headers()->end()) 00649 stream->get_headers()->push_back("Content-Type: " + d_content_type); 00650 00651 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader()); 00652 00653 #ifdef HTTP_TRACE 00654 cout << endl << endl; 00655 #endif 00656 00657 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu) 00658 if (parser.get_location() != "" && 00659 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) { 00660 delete stream; 00661 return fetch_url(parser.get_location()); 00662 } 00663 00664 stream->set_type(parser.get_object_type()); // uses the value of content-description 00665 00666 stream->set_version(parser.get_server()); 00667 stream->set_protocol(parser.get_protocol()); 00668 00669 if (d_use_cpp_streams) { 00670 stream->transform_to_cpp(); 00671 } 00672 00673 return stream; 00674 } 00675 00676 // Look around for a reasonable place to put a temporary file. Check first 00677 // the value of the TMPDIR env var. If that does not yeild a path that's 00678 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00679 // defined in stdio.h. If both come up empty, then use `./'. 00680 00681 // Change this to a version that either returns a string or an open file 00682 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00683 // (see open()) to make it more secure. Ideal solution: get deserialize() 00684 // methods to read from a stream returned by libcurl, not from a temporary 00685 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11 00686 static string 00687 get_tempfile_template(const string &file_template) 00688 { 00689 string c; 00690 00691 // Windows has one idea of the standard name(s) for a temporary files dir 00692 #ifdef WIN32 00693 // white list for a WIN32 directory 00694 Regex directory("[-a-zA-Z0-9_:\\]*"); 00695 00696 // If we're OK to use getenv(), try it. 00697 #ifdef USE_GETENV 00698 c = getenv("TEMP"); 00699 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00700 goto valid_temp_directory; 00701 00702 c= getenv("TMP"); 00703 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00704 goto valid_temp_directory; 00705 #endif // USE_GETENV 00706 00707 // The windows default 00708 c = "c:\tmp"; 00709 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00710 goto valid_temp_directory; 00711 00712 #else // Unix/Linux/OSX has another... 00713 // white list for a directory 00714 Regex directory("[-a-zA-Z0-9_/]*"); 00715 #ifdef USE_GETENV 00716 c = getenv("TMPDIR"); 00717 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00718 goto valid_temp_directory; 00719 #endif // USE_GETENV 00720 00721 // Unix defines this sometimes - if present, use it. 00722 #ifdef P_tmpdir 00723 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00724 c = P_tmpdir; 00725 goto valid_temp_directory; 00726 } 00727 #endif 00728 00729 // The Unix default 00730 c = "/tmp"; 00731 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00732 goto valid_temp_directory; 00733 00734 #endif // WIN32 00735 00736 // If we found nothing useful, use the current directory 00737 c = "."; 00738 00739 valid_temp_directory: 00740 00741 #ifdef WIN32 00742 c += "\\" + file_template; 00743 #else 00744 c += "/" + file_template; 00745 #endif 00746 00747 return c; 00748 } 00749 00768 string 00769 get_temp_file(FILE *&stream) throw(Error) 00770 { 00771 string dods_temp = get_tempfile_template((string)"dodsXXXXXX"); 00772 00773 vector<char> pathname(dods_temp.length() + 1); 00774 00775 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length()); 00776 00777 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl); 00778 00779 // Open truncated for update. NB: mkstemp() returns a file descriptor. 00780 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 00781 stream = fopen(_mktemp(&pathname[0]), "w+b"); 00782 #else 00783 // Make sure that temp files are accessible only by the owner. 00784 int mask = umask(077); 00785 if (mask < 0) 00786 throw Error("Could not set the file creation mask: " + string(strerror(errno))); 00787 int fd = mkstemp(&pathname[0]); 00788 if (fd < 0) 00789 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno))); 00790 00791 stream = fdopen(fd, "w+"); 00792 umask(mask); 00793 #endif 00794 00795 if (!stream) 00796 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")"); 00797 00798 dods_temp = &pathname[0]; 00799 return dods_temp; 00800 } 00801 00802 00808 void 00809 close_temp(FILE *s, const string &name) 00810 { 00811 int res = fclose(s); 00812 if (res) 00813 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00814 00815 res = unlink(name.c_str()); 00816 if (res != 0) 00817 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00818 } 00819 00841 HTTPResponse * 00842 HTTPConnect::caching_fetch_url(const string &url) 00843 { 00844 DBG(cerr << "Is this URL (" << url << ") in the cache?... "); 00845 00846 vector<string> *headers = new vector<string>; 00847 string file_name; 00848 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name); 00849 if (!s) { 00850 // url not in cache; get it and cache it 00851 DBGN(cerr << "no; getting response and caching." << endl); 00852 delete headers; headers = 0; 00853 time_t now = time(0); 00854 HTTPResponse *rs = plain_fetch_url(url); 00855 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream()); 00856 00857 return rs; 00858 } 00859 else { // url in cache 00860 DBGN(cerr << "yes... "); 00861 00862 if (d_http_cache->is_url_valid(url)) { // url in cache and valid 00863 DBGN(cerr << "and it's valid; using cached response." << endl); 00864 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache); 00865 return crs; 00866 } 00867 else { // url in cache but not valid; validate 00868 DBGN(cerr << "but it's not valid; validating... "); 00869 00870 d_http_cache->release_cached_response(s); // This closes 's' 00871 headers->clear(); 00872 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url); 00873 FILE *body = 0; 00874 string dods_temp = get_temp_file(body); 00875 time_t now = time(0); // When was the request made (now). 00876 long http_status; 00877 00878 try { 00879 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs); 00880 rewind(body); 00881 } 00882 catch (Error &e) { 00883 close_temp(body, dods_temp); 00884 delete headers; 00885 throw ; 00886 } 00887 00888 switch (http_status) { 00889 case 200: { // New headers and new body 00890 DBGN(cerr << "read a new response; caching." << endl); 00891 00892 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body); 00893 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp); 00894 00895 return rs; 00896 } 00897 00898 case 304: { // Just new headers, use cached body 00899 DBGN(cerr << "cached response valid; updating." << endl); 00900 00901 close_temp(body, dods_temp); 00902 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers); 00903 string file_name; 00904 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name); 00905 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache); 00906 return crs; 00907 } 00908 00909 default: { // Oops. 00910 close_temp(body, dods_temp); 00911 if (http_status >= 400) { 00912 delete headers; headers = 0; 00913 string msg = "Error while reading the URL: "; 00914 msg += url; 00915 msg 00916 += ".\nThe OPeNDAP server returned the following message:\n"; 00917 msg += http_status_to_string(http_status); 00918 throw Error(msg); 00919 } 00920 else { 00921 delete headers; headers = 0; 00922 throw InternalErr(__FILE__, __LINE__, 00923 "Bad response from the HTTP server: " + long_to_string(http_status)); 00924 } 00925 } 00926 } 00927 } 00928 } 00929 00930 throw InternalErr(__FILE__, __LINE__, "Should never get here"); 00931 } 00932 00944 HTTPResponse * 00945 HTTPConnect::plain_fetch_url(const string &url) 00946 { 00947 DBG(cerr << "Getting URL: " << url << endl); 00948 FILE *stream = 0; 00949 string dods_temp = get_temp_file(stream); 00950 vector<string> *resp_hdrs = new vector<string>; 00951 00952 int status = -1; 00953 try { 00954 status = read_url(url, stream, resp_hdrs); // Throws Error. 00955 if (status >= 400) { 00956 // delete resp_hdrs; resp_hdrs = 0; 00957 string msg = "Error while reading the URL: "; 00958 msg += url; 00959 msg += ".\nThe OPeNDAP server returned the following message:\n"; 00960 msg += http_status_to_string(status); 00961 throw Error(msg); 00962 } 00963 } 00964 00965 catch (Error &e) { 00966 delete resp_hdrs; 00967 close_temp(stream, dods_temp); 00968 throw; 00969 } 00970 00971 #if 0 00972 if (d_use_cpp_streams) { 00973 fclose(stream); 00974 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary); 00975 return new HTTPResponse(in, status, resp_hdrs, dods_temp); 00976 } 00977 else { 00978 #endif 00979 rewind(stream); 00980 return new HTTPResponse(stream, status, resp_hdrs, dods_temp); 00981 #if 0 00982 } 00983 #endif 00984 } 00985 00997 void 00998 HTTPConnect::set_accept_deflate(bool deflate) 00999 { 01000 d_accept_deflate = deflate; 01001 01002 if (d_accept_deflate) { 01003 if (find(d_request_headers.begin(), d_request_headers.end(), 01004 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end()) 01005 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 01006 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 01007 ostream_iterator<string>(cerr, "\n"))); 01008 } 01009 else { 01010 vector<string>::iterator i; 01011 i = remove_if(d_request_headers.begin(), d_request_headers.end(), 01012 bind2nd(equal_to<string>(), 01013 string("Accept-Encoding: deflate, gzip, compress"))); 01014 d_request_headers.erase(i, d_request_headers.end()); 01015 } 01016 } 01017 01026 void 01027 HTTPConnect::set_xdap_protocol(int major, int minor) 01028 { 01029 // Look for, and remove if one exists, an XDAP-Accept header 01030 vector<string>::iterator i; 01031 i = find_if(d_request_headers.begin(), d_request_headers.end(), 01032 HeaderMatch("XDAP-Accept:")); 01033 if (i != d_request_headers.end()) 01034 d_request_headers.erase(i); 01035 01036 // Record and add the new header value 01037 d_dap_client_protocol_major = major; 01038 d_dap_client_protocol_minor = minor; 01039 ostringstream xdap_accept; 01040 xdap_accept << "XDAP-Accept: " << major << "." << minor; 01041 01042 d_request_headers.push_back(xdap_accept.str()); 01043 01044 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 01045 ostream_iterator<string>(cerr, "\n"))); 01046 } 01047 01063 void 01064 HTTPConnect::set_credentials(const string &u, const string &p) 01065 { 01066 if (u.empty()) 01067 return; 01068 01069 // Store the credentials locally. 01070 d_username = u; 01071 d_password = p; 01072 01073 d_upstring = u + ":" + p; 01074 } 01075 01076 } // namespace libdap