libdap
Updated for version 3.17.0
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #include "config.h" 00027 00028 // #define DODS_DEBUG 00029 // #define DODS_DEBUG2 00030 #undef USE_GETENV 00031 00032 #include <pthread.h> 00033 #include <limits.h> 00034 #include <unistd.h> // for stat 00035 #include <sys/types.h> // for stat and mkdir 00036 #include <sys/stat.h> 00037 00038 #include <cstring> 00039 #include <cerrno> 00040 00041 #include <iostream> 00042 #include <sstream> 00043 #include <algorithm> 00044 #include <iterator> 00045 #include <set> 00046 00047 #include "Error.h" 00048 #include "InternalErr.h" 00049 #include "ResponseTooBigErr.h" 00050 #ifndef WIN32 00051 #include "SignalHandler.h" 00052 #endif 00053 #include "HTTPCacheInterruptHandler.h" 00054 #include "HTTPCacheTable.h" 00055 #include "HTTPCache.h" 00056 #include "HTTPCacheMacros.h" 00057 #include "SignalHandlerRegisteredErr.h" 00058 00059 #include "util_mit.h" 00060 #include "debug.h" 00061 00062 using namespace std; 00063 00064 namespace libdap { 00065 00066 HTTPCache *HTTPCache::_instance = 0; 00067 00068 // instance_mutex is used to ensure that only one instance is created. 00069 // That is, it protects the body of the HTTPCache::instance() method. This 00070 // mutex is initialized from within the static function once_init_routine() 00071 // and the call to that takes place using pthread_once_init() where the mutex 00072 // once_block is used to protect that call. All of this ensures that no matter 00073 // how many threads call the instance() method, only one instance is ever 00074 // made. 00075 static pthread_mutex_t instance_mutex; 00076 static pthread_once_t once_block = PTHREAD_ONCE_INIT; 00077 00078 00079 #define NO_LM_EXPIRATION 24*3600 // 24 hours 00080 00081 #define DUMP_FREQUENCY 10 // Dump index every x loads 00082 00083 #define MEGA 0x100000L 00084 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 00085 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 00086 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 00087 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 00088 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 00089 00090 static void 00091 once_init_routine() 00092 { 00093 int status; 00094 status = INIT(&instance_mutex); 00095 00096 if (status != 0) 00097 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00098 } 00099 00128 HTTPCache * 00129 HTTPCache::instance(const string &cache_root, bool force) 00130 { 00131 int status = pthread_once(&once_block, once_init_routine); 00132 if (status != 0) 00133 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00134 00135 LOCK(&instance_mutex); 00136 00137 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... "); 00138 00139 try { 00140 if (!_instance) { 00141 _instance = new HTTPCache(cache_root, force); 00142 00143 DBG(cerr << "New instance: " << _instance << ", cache root: " 00144 << _instance->d_cache_root << endl); 00145 00146 atexit(delete_instance); 00147 00148 #ifndef WIN32 00149 // Register the interrupt handler. If we've already registered 00150 // one, barf. If this becomes a problem, hack SignalHandler so 00151 // that we can chain these handlers... 02/10/04 jhrg 00152 // 00153 // Technically we're leaking memory here. However, since this 00154 // class is a singleton, we know that only three objects will 00155 // ever be created and they will all exist until the process 00156 // exits. We can let this slide... 02/12/04 jhrg 00157 EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true); 00158 if (old_eh) { 00159 SignalHandler::instance()->register_handler(SIGINT, old_eh); 00160 throw SignalHandlerRegisteredErr( 00161 "Could not register event handler for SIGINT without superseding an existing one."); 00162 } 00163 00164 old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true); 00165 if (old_eh) { 00166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh); 00167 throw SignalHandlerRegisteredErr( 00168 "Could not register event handler for SIGPIPE without superseding an existing one."); 00169 } 00170 00171 old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true); 00172 if (old_eh) { 00173 SignalHandler::instance()->register_handler(SIGTERM, old_eh); 00174 throw SignalHandlerRegisteredErr( 00175 "Could not register event handler for SIGTERM without superseding an existing one."); 00176 } 00177 #endif 00178 } 00179 } 00180 catch (...) { 00181 DBG2(cerr << "The constructor threw an Error!" << endl); 00182 UNLOCK(&instance_mutex); 00183 throw; 00184 } 00185 00186 UNLOCK(&instance_mutex); 00187 DBGN(cerr << "returning " << hex << _instance << dec << endl); 00188 00189 return _instance; 00190 } 00191 00195 void 00196 HTTPCache::delete_instance() 00197 { 00198 DBG(cerr << "Entering delete_instance()..." << endl); 00199 00200 if (HTTPCache::_instance) { 00201 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl); 00202 delete HTTPCache::_instance; 00203 HTTPCache::_instance = 0; 00204 00205 //Now remove the signal handlers 00206 delete SignalHandler::instance()->remove_handler(SIGINT); 00207 delete SignalHandler::instance()->remove_handler(SIGPIPE); 00208 delete SignalHandler::instance()->remove_handler(SIGTERM); 00209 } 00210 00211 DBG(cerr << "Exiting delete_instance()" << endl); 00212 } 00213 00228 HTTPCache::HTTPCache(string cache_root, bool force) : 00229 d_locked_open_file(0), 00230 d_cache_enabled(false), 00231 d_cache_protected(false), 00232 00233 d_cache_disconnected(DISCONNECT_NONE), 00234 00235 d_expire_ignored(false), 00236 d_always_validate(false), 00237 d_total_size(CACHE_TOTAL_SIZE * MEGA), 00238 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), 00239 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT), 00240 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), 00241 d_default_expiration(NO_LM_EXPIRATION), 00242 d_max_age(-1), 00243 d_max_stale(-1), 00244 d_min_fresh(-1), 00245 d_http_cache_table(0) 00246 { 00247 DBG(cerr << "Entering the constructor for " << this << "... "); 00248 #if 0 00249 int status = pthread_once(&once_block, once_init_routine); 00250 if (status != 0) 00251 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00252 #endif 00253 INIT(&d_cache_mutex); 00254 00255 // This used to throw an Error object if we could not get the 00256 // single user lock. However, that results in an invalid object. It's 00257 // better to have an instance that has default values. If we cannot get 00258 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg 00259 // 00260 // I fixed this block so that the cache root is set before we try to get 00261 // the single user lock. That was the fix for bug #661. To make that 00262 // work, I had to move the call to create_cache_root out of 00263 // set_cache_root(). 09/08/03 jhrg 00264 00265 set_cache_root(cache_root); 00266 int block_size; 00267 00268 if (!get_single_user_lock(force)) 00269 throw Error(internal_error, "Could not get single user lock for the cache"); 00270 00271 #ifdef WIN32 00272 // Windows is unable to provide us this information. 4096 appears 00273 // a best guess. It is likely to be in the range [2048, 8192] on 00274 // windows, but will the level of truth of that statement vary over 00275 // time ? 00276 block_size = 4096; 00277 #else 00278 struct stat s; 00279 if (stat(cache_root.c_str(), &s) == 0) 00280 block_size = s.st_blksize; 00281 else 00282 throw Error(internal_error, "Could not set file system block size."); 00283 #endif 00284 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size); 00285 d_cache_enabled = true; 00286 00287 DBGN(cerr << "exiting" << endl); 00288 } 00289 00302 HTTPCache::~HTTPCache() 00303 { 00304 DBG(cerr << "Entering the destructor for " << this << "... "); 00305 00306 try { 00307 if (startGC()) 00308 perform_garbage_collection(); 00309 00310 d_http_cache_table->cache_index_write(); 00311 } 00312 catch (Error &e) { 00313 // If the cache index cannot be written, we've got problems. However, 00314 // unless we're debugging, still free up the cache table in memory. 00315 // How should we let users know they cache index is not being 00316 // written?? 10/03/02 jhrg 00317 DBG(cerr << e.get_error_message() << endl); 00318 } 00319 00320 delete d_http_cache_table; 00321 00322 release_single_user_lock(); 00323 00324 DBGN(cerr << "exiting destructor." << endl); 00325 DESTROY(&d_cache_mutex); 00326 } 00327 00328 00332 00336 bool 00337 HTTPCache::stopGC() const 00338 { 00339 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer); 00340 } 00341 00348 bool 00349 HTTPCache::startGC() const 00350 { 00351 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl); 00352 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size); 00353 } 00354 00369 void 00370 HTTPCache::perform_garbage_collection() 00371 { 00372 DBG(cerr << "Performing garbage collection" << endl); 00373 00374 // Remove all the expired responses. 00375 expired_gc(); 00376 00377 // Remove entries larger than max_entry_size. 00378 too_big_gc(); 00379 00380 // Remove entries starting with zero hits, 1, ..., until stopGC() 00381 // returns true. 00382 hits_gc(); 00383 } 00384 00390 void 00391 HTTPCache::expired_gc() 00392 { 00393 if (!d_expire_ignored) { 00394 d_http_cache_table->delete_expired_entries(); 00395 } 00396 } 00397 00414 void 00415 HTTPCache::hits_gc() 00416 { 00417 int hits = 0; 00418 00419 if (startGC()) { 00420 while (!stopGC()) { 00421 d_http_cache_table->delete_by_hits(hits); 00422 hits++; 00423 } 00424 } 00425 } 00426 00431 void HTTPCache::too_big_gc() { 00432 if (startGC()) 00433 d_http_cache_table->delete_by_size(d_max_entry_size); 00434 } 00435 00437 00448 bool HTTPCache::get_single_user_lock(bool force) 00449 { 00450 if (!d_locked_open_file) { 00451 FILE * fp = NULL; 00452 00453 try { 00454 // It's OK to call create_cache_root if the directory already 00455 // exists. 00456 create_cache_root(d_cache_root); 00457 } 00458 catch (Error &e) { 00459 // We need to catch and return false because this method is 00460 // called from a ctor and throwing at this point will result in a 00461 // partially constructed object. 01/22/04 jhrg 00462 DBG(cerr << "Failure to create the cache root" << endl); 00463 return false; 00464 } 00465 00466 // Try to read the lock file. If we can open for reading, it exists. 00467 string lock = d_cache_root + CACHE_LOCK; 00468 if ((fp = fopen(lock.c_str(), "r")) != NULL) { 00469 int res = fclose(fp); 00470 if (res) { 00471 DBG(cerr << "Failed to close " << (void *)fp << endl); 00472 } 00473 if (force) 00474 REMOVE(lock.c_str()); 00475 else 00476 return false; 00477 } 00478 00479 if ((fp = fopen(lock.c_str(), "w")) == NULL) { 00480 DBG(cerr << "Could not open for write access" << endl); 00481 return false; 00482 } 00483 00484 d_locked_open_file = fp; 00485 return true; 00486 } 00487 00488 DBG(cerr << "locked_open_file is true" << endl); 00489 return false; 00490 } 00491 00494 void 00495 HTTPCache::release_single_user_lock() 00496 { 00497 if (d_locked_open_file) { 00498 int res = fclose(d_locked_open_file); 00499 if (res) { 00500 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ; 00501 } 00502 d_locked_open_file = 0; 00503 } 00504 00505 string lock = d_cache_root + CACHE_LOCK; 00506 REMOVE(lock.c_str()); 00507 } 00508 00511 00515 string 00516 HTTPCache::get_cache_root() const 00517 { 00518 return d_cache_root; 00519 } 00520 00521 00530 void 00531 HTTPCache::create_cache_root(const string &cache_root) 00532 { 00533 #ifdef WIN32 00534 string::size_type cur = cache_root[1] == ':' ? 3 : 1; 00535 typedef int mode_t; 00536 00537 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) { 00538 string dir = cache_root.substr(0, cur); 00539 struct stat stat_info; 00540 if (stat(dir.c_str(), &stat_info) == -1) { 00541 DBG2(cerr << "Cache....... Creating " << dir << endl); 00542 mode_t mask = UMASK(0); 00543 if (MKDIR(dir.c_str(), 0777) < 0) { 00544 DBG2(cerr << "Error: can't create." << endl); 00545 UMASK(mask); 00546 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string(".")); 00547 } 00548 UMASK(mask); 00549 } 00550 else { 00551 DBG2(cerr << "Cache....... Found " << dir << endl); 00552 } 00553 cur++; 00554 } 00555 #else 00556 // OSX and Linux 00557 00558 // Save the mask 00559 mode_t mask = umask(0); 00560 00561 // Ignore the error if the directory exists 00562 errno = 0; 00563 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) { 00564 umask(mask); 00565 throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ")."); 00566 } 00567 00568 // Restore themask 00569 umask(mask); 00570 00571 #endif 00572 } 00573 00588 void 00589 HTTPCache::set_cache_root(const string &root) 00590 { 00591 if (root != "") { 00592 d_cache_root = root; 00593 // cache root should end in /. 00594 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00595 d_cache_root += DIR_SEPARATOR_CHAR; 00596 } 00597 else { 00598 // If no cache root has been indicated then look for a suitable 00599 // location. 00600 #ifdef USE_GETENV 00601 char * cr = (char *) getenv("DODS_CACHE"); 00602 if (!cr) cr = (char *) getenv("TMP"); 00603 if (!cr) cr = (char *) getenv("TEMP"); 00604 if (!cr) cr = (char*)CACHE_LOCATION; 00605 d_cache_root = cr; 00606 #else 00607 d_cache_root = CACHE_LOCATION; 00608 #endif 00609 00610 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00611 d_cache_root += DIR_SEPARATOR_CHAR; 00612 00613 d_cache_root += CACHE_ROOT; 00614 } 00615 00616 // Test d_hhtp_cache_table because this method can be called before that 00617 // instance is created and also can be called later to change the cache 00618 // root. jhrg 05.14.08 00619 if (d_http_cache_table) 00620 d_http_cache_table->set_cache_root(d_cache_root); 00621 } 00622 00634 void 00635 HTTPCache::set_cache_enabled(bool mode) 00636 { 00637 lock_cache_interface(); 00638 00639 d_cache_enabled = mode; 00640 00641 unlock_cache_interface(); 00642 } 00643 00646 bool 00647 HTTPCache::is_cache_enabled() const 00648 { 00649 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" 00650 << endl); 00651 return d_cache_enabled; 00652 } 00653 00663 void 00664 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode) 00665 { 00666 lock_cache_interface(); 00667 00668 d_cache_disconnected = mode; 00669 00670 unlock_cache_interface(); 00671 } 00672 00675 CacheDisconnectedMode 00676 HTTPCache::get_cache_disconnected() const 00677 { 00678 return d_cache_disconnected; 00679 } 00680 00689 void 00690 HTTPCache::set_expire_ignored(bool mode) 00691 { 00692 lock_cache_interface(); 00693 00694 d_expire_ignored = mode; 00695 00696 unlock_cache_interface(); 00697 } 00698 00699 /* Is the cache ignoring Expires headers returned with responses that have 00700 been cached? */ 00701 00702 bool 00703 HTTPCache::is_expire_ignored() const 00704 { 00705 return d_expire_ignored; 00706 } 00707 00723 void 00724 HTTPCache::set_max_size(unsigned long size) 00725 { 00726 lock_cache_interface(); 00727 00728 try { 00729 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? 00730 MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA; 00731 unsigned long old_size = d_total_size; 00732 d_total_size = new_size; 00733 d_folder_size = d_total_size / CACHE_FOLDER_PCT; 00734 d_gc_buffer = d_total_size / CACHE_GC_PCT; 00735 00736 if (new_size < old_size && startGC()) { 00737 perform_garbage_collection(); 00738 d_http_cache_table->cache_index_write(); 00739 } 00740 } 00741 catch (...) { 00742 unlock_cache_interface(); 00743 DBGN(cerr << "Unlocking interface." << endl); 00744 throw; 00745 } 00746 00747 DBG2(cerr << "Cache....... Total cache size: " << d_total_size 00748 << " with " << d_folder_size 00749 << " bytes for meta information and folders and at least " 00750 << d_gc_buffer << " bytes free after every gc" << endl); 00751 00752 unlock_cache_interface(); 00753 } 00754 00757 unsigned long 00758 HTTPCache::get_max_size() const 00759 { 00760 return d_total_size / MEGA; 00761 } 00762 00771 void 00772 HTTPCache::set_max_entry_size(unsigned long size) 00773 { 00774 lock_cache_interface(); 00775 00776 try { 00777 unsigned long new_size = size * MEGA; 00778 if (new_size > 0 && new_size < d_total_size - d_folder_size) { 00779 unsigned long old_size = d_max_entry_size; 00780 d_max_entry_size = new_size; 00781 if (new_size < old_size && startGC()) { 00782 perform_garbage_collection(); 00783 d_http_cache_table->cache_index_write(); 00784 } 00785 } 00786 } 00787 catch (...) { 00788 unlock_cache_interface(); 00789 throw; 00790 } 00791 00792 DBG2(cerr << "Cache...... Max entry cache size is " 00793 << d_max_entry_size << endl); 00794 00795 unlock_cache_interface(); 00796 } 00797 00802 unsigned long 00803 HTTPCache::get_max_entry_size() const 00804 { 00805 return d_max_entry_size / MEGA; 00806 } 00807 00818 void 00819 HTTPCache::set_default_expiration(const int exp_time) 00820 { 00821 lock_cache_interface(); 00822 00823 d_default_expiration = exp_time; 00824 00825 unlock_cache_interface(); 00826 } 00827 00830 int 00831 HTTPCache::get_default_expiration() const 00832 { 00833 return d_default_expiration; 00834 } 00835 00840 void 00841 HTTPCache::set_always_validate(bool validate) 00842 { 00843 d_always_validate = validate; 00844 } 00845 00849 bool 00850 HTTPCache::get_always_validate() const 00851 { 00852 return d_always_validate; 00853 } 00854 00871 void 00872 HTTPCache::set_cache_control(const vector<string> &cc) 00873 { 00874 lock_cache_interface(); 00875 00876 try { 00877 d_cache_control = cc; 00878 00879 vector<string>::const_iterator i; 00880 for (i = cc.begin(); i != cc.end(); ++i) { 00881 string header = (*i).substr(0, (*i).find(':')); 00882 string value = (*i).substr((*i).find(": ") + 2); 00883 if (header != "Cache-Control") { 00884 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found."); 00885 } 00886 else { 00887 if (value == "no-cache" || value == "no-store") 00888 d_cache_enabled = false; 00889 else if (value.find("max-age") != string::npos) { 00890 string max_age = value.substr(value.find("=" + 1)); 00891 d_max_age = parse_time(max_age.c_str()); 00892 } 00893 else if (value == "max-stale") 00894 d_max_stale = 0; // indicates will take anything; 00895 else if (value.find("max-stale") != string::npos) { 00896 string max_stale = value.substr(value.find("=" + 1)); 00897 d_max_stale = parse_time(max_stale.c_str()); 00898 } 00899 else if (value.find("min-fresh") != string::npos) { 00900 string min_fresh = value.substr(value.find("=" + 1)); 00901 d_min_fresh = parse_time(min_fresh.c_str()); 00902 } 00903 } 00904 } 00905 } 00906 catch (...) { 00907 unlock_cache_interface(); 00908 throw; 00909 } 00910 00911 unlock_cache_interface(); 00912 } 00913 00914 00919 vector<string> 00920 HTTPCache::get_cache_control() 00921 { 00922 return d_cache_control; 00923 } 00924 00926 00935 bool 00936 HTTPCache::is_url_in_cache(const string &url) 00937 { 00938 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl); 00939 00940 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 00941 bool status = entry != 0; 00942 if (entry) { 00943 entry->unlock_read_response(); 00944 } 00945 return status; 00946 } 00947 00953 bool 00954 is_hop_by_hop_header(const string &header) 00955 { 00956 return header.find("Connection") != string::npos 00957 || header.find("Keep-Alive") != string::npos 00958 || header.find("Proxy-Authenticate") != string::npos 00959 || header.find("Proxy-Authorization") != string::npos 00960 || header.find("Transfer-Encoding") != string::npos 00961 || header.find("Upgrade") != string::npos; 00962 } 00963 00975 void 00976 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) 00977 { 00978 string fname = cachename + CACHE_META; 00979 d_open_files.push_back(fname); 00980 00981 FILE *dest = fopen(fname.c_str(), "w"); 00982 if (!dest) { 00983 throw InternalErr(__FILE__, __LINE__, 00984 "Could not open named cache entry file."); 00985 } 00986 00987 vector<string>::const_iterator i; 00988 for (i = headers.begin(); i != headers.end(); ++i) { 00989 if (!is_hop_by_hop_header(*i)) { 00990 int s = fwrite((*i).c_str(), (*i).size(), 1, dest); 00991 if (s != 1) { 00992 fclose(dest); 00993 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s)); 00994 } 00995 s = fwrite("\n", 1, 1, dest); 00996 if (s != 1) { 00997 fclose(dest); 00998 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s)); 00999 } 01000 } 01001 } 01002 01003 int res = fclose(dest); 01004 if (res) { 01005 DBG(cerr << "HTTPCache::write_metadata - Failed to close " 01006 << dest << endl); 01007 } 01008 01009 d_open_files.pop_back(); 01010 } 01011 01022 void 01023 HTTPCache::read_metadata(const string &cachename, vector<string> &headers) 01024 { 01025 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r"); 01026 if (!md) { 01027 throw InternalErr(__FILE__, __LINE__, 01028 "Could not open named cache entry meta data file."); 01029 } 01030 01031 char line[1024]; 01032 while (!feof(md) && fgets(line, 1024, md)) { 01033 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline 01034 headers.push_back(string(line)); 01035 } 01036 01037 int res = fclose(md); 01038 if (res) { 01039 DBG(cerr << "HTTPCache::read_metadata - Failed to close " 01040 << md << endl); 01041 } 01042 } 01043 01065 int 01066 HTTPCache::write_body(const string &cachename, const FILE *src) 01067 { 01068 d_open_files.push_back(cachename); 01069 01070 FILE *dest = fopen(cachename.c_str(), "wb"); 01071 if (!dest) { 01072 throw InternalErr(__FILE__, __LINE__, 01073 "Could not open named cache entry file."); 01074 } 01075 01076 // Read and write in 1k blocks; an attempt at doing this efficiently. 01077 // 09/30/02 jhrg 01078 char line[1024]; 01079 size_t n; 01080 int total = 0; 01081 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) { 01082 total += fwrite(line, 1, n, dest); 01083 DBG2(sleep(3)); 01084 } 01085 01086 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) { 01087 int res = fclose(dest); 01088 res = res & unlink(cachename.c_str()); 01089 if (res) { 01090 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " 01091 << dest << endl); 01092 } 01093 throw InternalErr(__FILE__, __LINE__, 01094 "I/O error transferring data to the cache."); 01095 } 01096 01097 rewind(const_cast<FILE *>(src)); 01098 01099 int res = fclose(dest); 01100 if (res) { 01101 DBG(cerr << "HTTPCache::write_body - Failed to close " 01102 << dest << endl); 01103 } 01104 01105 d_open_files.pop_back(); 01106 01107 return total; 01108 } 01109 01118 FILE * 01119 HTTPCache::open_body(const string &cachename) 01120 { 01121 DBG(cerr << "cachename: " << cachename << endl); 01122 01123 FILE *src = fopen(cachename.c_str(), "rb"); // Read only 01124 if (!src) 01125 throw InternalErr(__FILE__, __LINE__, "Could not open cache file."); 01126 01127 return src; 01128 } 01129 01155 bool 01156 HTTPCache::cache_response(const string &url, time_t request_time, 01157 const vector<string> &headers, const FILE *body) 01158 { 01159 lock_cache_interface(); 01160 01161 DBG(cerr << "Caching url: " << url << "." << endl); 01162 01163 try { 01164 // If this is not an http or https URL, don't cache. 01165 if (url.find("http:") == string::npos && 01166 url.find("https:") == string::npos) { 01167 unlock_cache_interface(); 01168 return false; 01169 } 01170 01171 // This does nothing if url is not already in the cache. It's 01172 // more efficient to do this than to first check and see if the entry 01173 // exists. 10/10/02 jhrg 01174 d_http_cache_table->remove_entry_from_cache_table(url); 01175 01176 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url); 01177 entry->lock_write_response(); 01178 01179 try { 01180 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age. 01181 if (entry->is_no_cache()) { 01182 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry 01183 << "(" << url << ")" << endl); 01184 entry->unlock_write_response(); 01185 delete entry; entry = 0; 01186 unlock_cache_interface(); 01187 return false; 01188 } 01189 01190 // corrected_initial_age, freshness_lifetime, response_time. 01191 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01192 01193 d_http_cache_table->create_location(entry); // cachename, cache_body_fd 01194 // move these write function to cache table 01195 entry->set_size(write_body(entry->get_cachename(), body)); 01196 write_metadata(entry->get_cachename(), headers); 01197 d_http_cache_table->add_entry_to_cache_table(entry); 01198 entry->unlock_write_response(); 01199 } 01200 catch (ResponseTooBigErr &e) { 01201 // Oops. Bummer. Clean up and exit. 01202 DBG(cerr << e.get_error_message() << endl); 01203 REMOVE(entry->get_cachename().c_str()); 01204 REMOVE(string(entry->get_cachename() + CACHE_META).c_str()); 01205 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url 01206 << ")" << endl); 01207 entry->unlock_write_response(); 01208 delete entry; entry = 0; 01209 unlock_cache_interface(); 01210 return false; 01211 } 01212 01213 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) { 01214 if (startGC()) 01215 perform_garbage_collection(); 01216 01217 d_http_cache_table->cache_index_write(); // resets new_entries 01218 } 01219 } 01220 catch (...) { 01221 unlock_cache_interface(); 01222 throw; 01223 } 01224 01225 unlock_cache_interface(); 01226 01227 return true; 01228 } 01229 01248 vector<string> 01249 HTTPCache::get_conditional_request_headers(const string &url) 01250 { 01251 lock_cache_interface(); 01252 01253 HTTPCacheTable::CacheEntry *entry = 0; 01254 vector<string> headers; 01255 01256 DBG(cerr << "Getting conditional request headers for " << url << endl); 01257 01258 try { 01259 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01260 if (!entry) 01261 throw Error(internal_error, "There is no cache entry for the URL: " + url); 01262 01263 if (entry->get_etag() != "") 01264 headers.push_back(string("If-None-Match: ") + entry->get_etag()); 01265 01266 if (entry->get_lm() > 0) { 01267 time_t lm = entry->get_lm(); 01268 headers.push_back(string("If-Modified-Since: ") 01269 + date_time_str(&lm)); 01270 } 01271 else if (entry->get_max_age() > 0) { 01272 time_t max_age = entry->get_max_age(); 01273 headers.push_back(string("If-Modified-Since: ") 01274 + date_time_str(&max_age)); 01275 } 01276 else if (entry->get_expires() > 0) { 01277 time_t expires = entry->get_expires(); 01278 headers.push_back(string("If-Modified-Since: ") 01279 + date_time_str(&expires)); 01280 } 01281 entry->unlock_read_response(); 01282 unlock_cache_interface(); 01283 } 01284 catch (...) { 01285 unlock_cache_interface(); 01286 if (entry) { 01287 entry->unlock_read_response(); 01288 } 01289 throw; 01290 } 01291 01292 return headers; 01293 } 01294 01298 struct HeaderLess: binary_function<const string&, const string&, bool> 01299 { 01300 bool operator()(const string &s1, const string &s2) const { 01301 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':')); 01302 } 01303 }; 01304 01318 void 01319 HTTPCache::update_response(const string &url, time_t request_time, 01320 const vector<string> &headers) 01321 { 01322 lock_cache_interface(); 01323 01324 HTTPCacheTable::CacheEntry *entry = 0; 01325 DBG(cerr << "Updating the response headers for: " << url << endl); 01326 01327 try { 01328 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url); 01329 if (!entry) 01330 throw Error(internal_error, "There is no cache entry for the URL: " + url); 01331 01332 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object. 01333 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); 01334 01335 // Update corrected_initial_age, freshness_lifetime, response_time. 01336 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01337 01338 // Merge the new headers with those in the persistent store. How: 01339 // Load the new headers into a set, then merge the old headers. Since 01340 // set<> ignores duplicates, old headers with the same name as a new 01341 // header will got into the bit bucket. Define a special compare 01342 // functor to make sure that headers are compared using only their 01343 // name and not their value too. 01344 set<string, HeaderLess> merged_headers; 01345 01346 // Load in the new headers 01347 copy(headers.begin(), headers.end(), 01348 inserter(merged_headers, merged_headers.begin())); 01349 01350 // Get the old headers and load them in. 01351 vector<string> old_headers; 01352 read_metadata(entry->get_cachename(), old_headers); 01353 copy(old_headers.begin(), old_headers.end(), 01354 inserter(merged_headers, merged_headers.begin())); 01355 01356 // Read the values back out. Use reverse iterators with back_inserter 01357 // to preserve header order. NB: vector<> does not support push_front 01358 // so we can't use front_inserter(). 01/09/03 jhrg 01359 vector<string> result; 01360 copy(merged_headers.rbegin(), merged_headers.rend(), 01361 back_inserter(result)); 01362 01363 write_metadata(entry->get_cachename(), result); 01364 entry->unlock_write_response(); 01365 unlock_cache_interface(); 01366 } 01367 catch (...) { 01368 if (entry) { 01369 entry->unlock_read_response(); 01370 } 01371 unlock_cache_interface(); 01372 throw; 01373 } 01374 } 01375 01387 bool 01388 HTTPCache::is_url_valid(const string &url) 01389 { 01390 lock_cache_interface(); 01391 01392 bool freshness; 01393 HTTPCacheTable::CacheEntry *entry = 0; 01394 01395 DBG(cerr << "Is this URL valid? (" << url << ")" << endl); 01396 01397 try { 01398 if (d_always_validate) { 01399 unlock_cache_interface(); 01400 return false; // force re-validation. 01401 } 01402 01403 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01404 if (!entry) 01405 throw Error(internal_error, "There is no cache entry for the URL: " + url); 01406 01407 // If we supported range requests, we'd need code here to check if 01408 // there was only a partial response in the cache. 10/02/02 jhrg 01409 01410 // In case this entry is of type "must-revalidate" then we consider it 01411 // invalid. 01412 if (entry->get_must_revalidate()) { 01413 entry->unlock_read_response(); 01414 unlock_cache_interface(); 01415 return false; 01416 } 01417 01418 time_t resident_time = time(NULL) - entry->get_response_time(); 01419 time_t current_age = entry->get_corrected_initial_age() + resident_time; 01420 01421 // Check that the max-age, max-stale, and min-fresh directives 01422 // given in the request cache control header is followed. 01423 if (d_max_age >= 0 && current_age > d_max_age) { 01424 DBG(cerr << "Cache....... Max-age validation" << endl); 01425 entry->unlock_read_response(); 01426 unlock_cache_interface(); 01427 return false; 01428 } 01429 if (d_min_fresh >= 0 01430 && entry->get_freshness_lifetime() < current_age + d_min_fresh) { 01431 DBG(cerr << "Cache....... Min-fresh validation" << endl); 01432 entry->unlock_read_response(); 01433 unlock_cache_interface(); 01434 return false; 01435 } 01436 01437 freshness = (entry->get_freshness_lifetime() 01438 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age); 01439 entry->unlock_read_response(); 01440 unlock_cache_interface(); 01441 } 01442 catch (...) { 01443 if (entry) { 01444 entry->unlock_read_response(); 01445 } 01446 unlock_cache_interface(); 01447 throw; 01448 } 01449 01450 return freshness; 01451 } 01452 01480 FILE * HTTPCache::get_cached_response(const string &url, 01481 vector<string> &headers, string &cacheName) { 01482 lock_cache_interface(); 01483 01484 FILE *body = 0; 01485 HTTPCacheTable::CacheEntry *entry = 0; 01486 01487 DBG(cerr << "Getting the cached response for " << url << endl); 01488 01489 try { 01490 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01491 if (!entry) { 01492 unlock_cache_interface(); 01493 return 0; 01494 } 01495 01496 cacheName = entry->get_cachename(); 01497 read_metadata(entry->get_cachename(), headers); 01498 01499 DBG(cerr << "Headers just read from cache: " << endl); 01500 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n"))); 01501 01502 body = open_body(entry->get_cachename()); 01503 01504 DBG(cerr << "Returning: " << url << " from the cache." << endl); 01505 01506 d_http_cache_table->bind_entry_to_data(entry, body); 01507 } 01508 catch (...) { 01509 // Why make this unlock operation conditional on entry? 01510 if (entry) 01511 unlock_cache_interface(); 01512 if (body != 0) 01513 fclose(body); 01514 throw; 01515 } 01516 01517 unlock_cache_interface(); 01518 01519 return body; 01520 } 01521 01533 FILE * 01534 HTTPCache::get_cached_response(const string &url, vector<string> &headers) 01535 { 01536 string discard_name; 01537 return get_cached_response(url, headers, discard_name); 01538 } 01539 01550 FILE * 01551 HTTPCache::get_cached_response(const string &url) 01552 { 01553 string discard_name; 01554 vector<string> discard_headers; 01555 return get_cached_response(url, discard_headers, discard_name); 01556 } 01557 01570 void 01571 HTTPCache::release_cached_response(FILE *body) 01572 { 01573 lock_cache_interface(); 01574 01575 try { 01576 // fclose(body); This results in a seg fault on linux jhrg 8/27/13 01577 d_http_cache_table->uncouple_entry_from_data(body); 01578 } 01579 catch (...) { 01580 unlock_cache_interface(); 01581 throw; 01582 } 01583 01584 unlock_cache_interface(); 01585 } 01586 01599 void 01600 HTTPCache::purge_cache() 01601 { 01602 lock_cache_interface(); 01603 01604 try { 01605 if (d_http_cache_table->is_locked_read_responses()) 01606 throw Error(internal_error, "Attempt to purge the cache with entries in use."); 01607 01608 d_http_cache_table->delete_all_entries(); 01609 } 01610 catch (...) { 01611 unlock_cache_interface(); 01612 throw; 01613 } 01614 01615 unlock_cache_interface(); 01616 } 01617 01618 } // namespace libdap