libdap  Updated for version 3.17.0
HTTPCache.cc
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 // #define DODS_DEBUG
00029 // #define DODS_DEBUG2
00030 #undef USE_GETENV
00031 
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>   // for stat
00035 #include <sys/types.h>  // for stat and mkdir
00036 #include <sys/stat.h>
00037 
00038 #include <cstring>
00039 #include <cerrno>
00040 
00041 #include <iostream>
00042 #include <sstream>
00043 #include <algorithm>
00044 #include <iterator>
00045 #include <set>
00046 
00047 #include "Error.h"
00048 #include "InternalErr.h"
00049 #include "ResponseTooBigErr.h"
00050 #ifndef WIN32
00051 #include "SignalHandler.h"
00052 #endif
00053 #include "HTTPCacheInterruptHandler.h"
00054 #include "HTTPCacheTable.h"
00055 #include "HTTPCache.h"
00056 #include "HTTPCacheMacros.h"
00057 #include "SignalHandlerRegisteredErr.h"
00058 
00059 #include "util_mit.h"
00060 #include "debug.h"
00061 
00062 using namespace std;
00063 
00064 namespace libdap {
00065 
00066 HTTPCache *HTTPCache::_instance = 0;
00067 
00068 // instance_mutex is used to ensure that only one instance is created.
00069 // That is, it protects the body of the HTTPCache::instance() method. This
00070 // mutex is initialized from within the static function once_init_routine()
00071 // and the call to that takes place using pthread_once_init() where the mutex
00072 // once_block is used to protect that call. All of this ensures that no matter
00073 // how many threads call the instance() method, only one instance is ever
00074 // made.
00075 static pthread_mutex_t instance_mutex;
00076 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00077 
00078 
00079 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00080 
00081 #define DUMP_FREQUENCY 10 // Dump index every x loads
00082 
00083 #define MEGA 0x100000L
00084 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
00085 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
00086 #define CACHE_GC_PCT 10  // 10% of cache size free after GC
00087 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
00088 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
00089 
00090 static void
00091 once_init_routine()
00092 {
00093     int status;
00094     status = INIT(&instance_mutex);
00095 
00096     if (status != 0)
00097         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00098 }
00099 
00128 HTTPCache *
00129 HTTPCache::instance(const string &cache_root, bool force)
00130 {
00131     int status = pthread_once(&once_block, once_init_routine);
00132     if (status != 0)
00133         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00134 
00135     LOCK(&instance_mutex);
00136 
00137     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
00138 
00139     try {
00140         if (!_instance) {
00141             _instance = new HTTPCache(cache_root, force);
00142 
00143             DBG(cerr << "New instance: " << _instance << ", cache root: "
00144                 << _instance->d_cache_root << endl);
00145 
00146             atexit(delete_instance);
00147 
00148 #ifndef WIN32
00149             // Register the interrupt handler. If we've already registered
00150             // one, barf. If this becomes a problem, hack SignalHandler so
00151             // that we can chain these handlers... 02/10/04 jhrg
00152             //
00153             // Technically we're leaking memory here. However, since this
00154             // class is a singleton, we know that only three objects will
00155             // ever be created and they will all exist until the process
00156             // exits. We can let this slide... 02/12/04 jhrg
00157             EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
00158             if (old_eh) {
00159                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00160                 throw SignalHandlerRegisteredErr(
00161                     "Could not register event handler for SIGINT without superseding an existing one.");
00162             }
00163 
00164             old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
00165             if (old_eh) {
00166                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00167                 throw SignalHandlerRegisteredErr(
00168                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00169             }
00170 
00171             old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
00172             if (old_eh) {
00173                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00174                 throw SignalHandlerRegisteredErr(
00175                     "Could not register event handler for SIGTERM without superseding an existing one.");
00176             }
00177 #endif
00178         }
00179     }
00180     catch (...) {
00181         DBG2(cerr << "The constructor threw an Error!" << endl);
00182         UNLOCK(&instance_mutex);
00183         throw;
00184     }
00185 
00186     UNLOCK(&instance_mutex);
00187     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00188 
00189     return _instance;
00190 }
00191 
00195 void
00196 HTTPCache::delete_instance()
00197 {
00198     DBG(cerr << "Entering delete_instance()..." << endl);
00199 
00200     if (HTTPCache::_instance) {
00201         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00202         delete HTTPCache::_instance;
00203         HTTPCache::_instance = 0;
00204 
00205         //Now remove the signal handlers
00206         delete SignalHandler::instance()->remove_handler(SIGINT);
00207         delete SignalHandler::instance()->remove_handler(SIGPIPE);
00208         delete SignalHandler::instance()->remove_handler(SIGTERM);
00209     }
00210 
00211     DBG(cerr << "Exiting delete_instance()" << endl);
00212 }
00213 
00228 HTTPCache::HTTPCache(string cache_root, bool force) :
00229         d_locked_open_file(0),
00230         d_cache_enabled(false),
00231         d_cache_protected(false),
00232 
00233         d_cache_disconnected(DISCONNECT_NONE),
00234 
00235         d_expire_ignored(false),
00236         d_always_validate(false),
00237         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00238         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00239         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00240         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00241         d_default_expiration(NO_LM_EXPIRATION),
00242         d_max_age(-1),
00243         d_max_stale(-1),
00244         d_min_fresh(-1),
00245         d_http_cache_table(0)
00246 {
00247     DBG(cerr << "Entering the constructor for " << this << "... ");
00248 #if 0
00249         int status = pthread_once(&once_block, once_init_routine);
00250         if (status != 0)
00251                 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00252 #endif
00253         INIT(&d_cache_mutex);
00254 
00255         // This used to throw an Error object if we could not get the
00256         // single user lock. However, that results in an invalid object. It's
00257         // better to have an instance that has default values. If we cannot get
00258         // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00259         //
00260         // I fixed this block so that the cache root is set before we try to get
00261         // the single user lock. That was the fix for bug #661. To make that
00262         // work, I had to move the call to create_cache_root out of
00263         // set_cache_root(). 09/08/03 jhrg
00264 
00265         set_cache_root(cache_root);
00266         int block_size;
00267 
00268         if (!get_single_user_lock(force))
00269             throw Error(internal_error, "Could not get single user lock for the cache");
00270 
00271 #ifdef WIN32
00272         //  Windows is unable to provide us this information.  4096 appears
00273         //  a best guess.  It is likely to be in the range [2048, 8192] on
00274         //  windows, but will the level of truth of that statement vary over
00275         //  time ?
00276         block_size = 4096;
00277 #else
00278         struct stat s;
00279         if (stat(cache_root.c_str(), &s) == 0)
00280                 block_size = s.st_blksize;
00281         else
00282                 throw Error(internal_error, "Could not set file system block size.");
00283 #endif
00284         d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
00285         d_cache_enabled = true;
00286 
00287         DBGN(cerr << "exiting" << endl);
00288 }
00289 
00302 HTTPCache::~HTTPCache()
00303 {
00304     DBG(cerr << "Entering the destructor for " << this << "... ");
00305 
00306     try {
00307         if (startGC())
00308             perform_garbage_collection();
00309 
00310         d_http_cache_table->cache_index_write();
00311     }
00312     catch (Error &e) {
00313         // If the cache index cannot be written, we've got problems. However,
00314         // unless we're debugging, still free up the cache table in memory.
00315         // How should we let users know they cache index is not being
00316         // written?? 10/03/02 jhrg
00317         DBG(cerr << e.get_error_message() << endl);
00318     }
00319 
00320     delete d_http_cache_table;
00321 
00322     release_single_user_lock();
00323 
00324     DBGN(cerr << "exiting destructor." << endl);
00325     DESTROY(&d_cache_mutex);
00326 }
00327 
00328 
00332 
00336 bool
00337 HTTPCache::stopGC() const
00338 {
00339     return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
00340 }
00341 
00348 bool
00349 HTTPCache::startGC() const
00350 {
00351     DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
00352     return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
00353 }
00354 
00369 void
00370 HTTPCache::perform_garbage_collection()
00371 {
00372     DBG(cerr << "Performing garbage collection" << endl);
00373 
00374     // Remove all the expired responses.
00375     expired_gc();
00376 
00377     // Remove entries larger than max_entry_size.
00378     too_big_gc();
00379 
00380     // Remove entries starting with zero hits, 1, ..., until stopGC()
00381     // returns true.
00382     hits_gc();
00383 }
00384 
00390 void
00391 HTTPCache::expired_gc()
00392 {
00393     if (!d_expire_ignored) {
00394         d_http_cache_table->delete_expired_entries();
00395     }
00396 }
00397 
00414 void
00415 HTTPCache::hits_gc()
00416 {
00417     int hits = 0;
00418 
00419     if (startGC()) {
00420                 while (!stopGC()) {
00421                         d_http_cache_table->delete_by_hits(hits);
00422                         hits++;
00423                 }
00424         }
00425 }
00426 
00431 void HTTPCache::too_big_gc() {
00432         if (startGC())
00433                 d_http_cache_table->delete_by_size(d_max_entry_size);
00434 }
00435 
00437 
00448 bool HTTPCache::get_single_user_lock(bool force) 
00449 {
00450     if (!d_locked_open_file) {
00451         FILE * fp = NULL;
00452 
00453         try {
00454             // It's OK to call create_cache_root if the directory already
00455             // exists.
00456             create_cache_root(d_cache_root);
00457         }
00458         catch (Error &e) {
00459             // We need to catch and return false because this method is
00460             // called from a ctor and throwing at this point will result in a
00461             // partially constructed object. 01/22/04 jhrg
00462             DBG(cerr << "Failure to create the cache root" << endl);
00463             return false;
00464         }
00465 
00466         // Try to read the lock file. If we can open for reading, it exists.
00467         string lock = d_cache_root + CACHE_LOCK;
00468         if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00469             int res = fclose(fp);
00470             if (res) {
00471                 DBG(cerr << "Failed to close " << (void *)fp << endl);
00472             }
00473             if (force)
00474                 REMOVE(lock.c_str());
00475             else
00476                 return false;
00477         }
00478 
00479         if ((fp = fopen(lock.c_str(), "w")) == NULL) {
00480             DBG(cerr << "Could not open for write access" << endl);
00481             return false;
00482         }
00483 
00484         d_locked_open_file = fp;
00485         return true;
00486     }
00487 
00488     DBG(cerr << "locked_open_file is true" << endl);
00489     return false;
00490 }
00491 
00494 void
00495 HTTPCache::release_single_user_lock()
00496 {
00497     if (d_locked_open_file) {
00498         int res = fclose(d_locked_open_file);
00499         if (res) {
00500             DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
00501         }
00502         d_locked_open_file = 0;
00503     }
00504 
00505     string lock = d_cache_root + CACHE_LOCK;
00506     REMOVE(lock.c_str());
00507 }
00508 
00511 
00515 string
00516 HTTPCache::get_cache_root() const
00517 {
00518     return d_cache_root;
00519 }
00520 
00521 
00530 void
00531 HTTPCache::create_cache_root(const string &cache_root)
00532 {
00533 #ifdef WIN32
00534     string::size_type cur = cache_root[1] == ':' ? 3 : 1;
00535     typedef int mode_t;
00536 
00537     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00538         string dir = cache_root.substr(0, cur);
00539         struct stat stat_info;
00540         if (stat(dir.c_str(), &stat_info) == -1) {
00541             DBG2(cerr << "Cache....... Creating " << dir << endl);
00542             mode_t mask = UMASK(0);
00543             if (MKDIR(dir.c_str(), 0777) < 0) {
00544                 DBG2(cerr << "Error: can't create." << endl);
00545                 UMASK(mask);
00546                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00547             }
00548             UMASK(mask);
00549         }
00550         else {
00551             DBG2(cerr << "Cache....... Found " << dir << endl);
00552         }
00553         cur++;
00554     }
00555 #else
00556     // OSX and Linux
00557 
00558     // Save the mask
00559     mode_t mask = umask(0);
00560 
00561     // Ignore the error if the directory exists
00562     errno = 0;
00563     if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
00564         umask(mask);
00565         throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
00566     }
00567 
00568     // Restore themask
00569     umask(mask);
00570 
00571 #endif
00572 }
00573 
00588 void
00589 HTTPCache::set_cache_root(const string &root)
00590 {
00591     if (root != "") {
00592         d_cache_root = root;
00593         // cache root should end in /.
00594         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00595             d_cache_root += DIR_SEPARATOR_CHAR;
00596     }
00597     else {
00598         // If no cache root has been indicated then look for a suitable
00599         // location.
00600 #ifdef USE_GETENV
00601         char * cr = (char *) getenv("DODS_CACHE");
00602         if (!cr) cr = (char *) getenv("TMP");
00603         if (!cr) cr = (char *) getenv("TEMP");
00604         if (!cr) cr = (char*)CACHE_LOCATION;
00605         d_cache_root = cr;
00606 #else
00607         d_cache_root = CACHE_LOCATION;
00608 #endif
00609 
00610         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00611             d_cache_root += DIR_SEPARATOR_CHAR;
00612 
00613         d_cache_root += CACHE_ROOT;
00614     }
00615 
00616     // Test d_hhtp_cache_table because this method can be called before that
00617     // instance is created and also can be called later to change the cache
00618     // root. jhrg 05.14.08
00619     if (d_http_cache_table)
00620         d_http_cache_table->set_cache_root(d_cache_root);
00621 }
00622 
00634 void
00635 HTTPCache::set_cache_enabled(bool mode)
00636 {
00637     lock_cache_interface();
00638 
00639     d_cache_enabled = mode;
00640 
00641     unlock_cache_interface();
00642 }
00643 
00646 bool
00647 HTTPCache::is_cache_enabled() const
00648 {
00649     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
00650          << endl);
00651     return d_cache_enabled;
00652 }
00653 
00663 void
00664 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
00665 {
00666     lock_cache_interface();
00667 
00668     d_cache_disconnected = mode;
00669 
00670     unlock_cache_interface();
00671 }
00672 
00675 CacheDisconnectedMode
00676 HTTPCache::get_cache_disconnected() const
00677 {
00678     return d_cache_disconnected;
00679 }
00680 
00689 void
00690 HTTPCache::set_expire_ignored(bool mode)
00691 {
00692     lock_cache_interface();
00693 
00694     d_expire_ignored = mode;
00695 
00696     unlock_cache_interface();
00697 }
00698 
00699 /* Is the cache ignoring Expires headers returned with responses that have
00700    been cached? */
00701 
00702 bool
00703 HTTPCache::is_expire_ignored() const
00704 {
00705     return d_expire_ignored;
00706 }
00707 
00723 void
00724 HTTPCache::set_max_size(unsigned long size)
00725 {
00726     lock_cache_interface();
00727 
00728     try {
00729         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
00730                                  MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
00731         unsigned long old_size = d_total_size;
00732         d_total_size = new_size;
00733         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
00734         d_gc_buffer = d_total_size / CACHE_GC_PCT;
00735 
00736         if (new_size < old_size && startGC()) {
00737             perform_garbage_collection();
00738             d_http_cache_table->cache_index_write();
00739         }
00740     }
00741     catch (...) {
00742         unlock_cache_interface();
00743         DBGN(cerr << "Unlocking interface." << endl);
00744         throw;
00745     }
00746 
00747     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
00748          << " with " << d_folder_size
00749          << " bytes for meta information and folders and at least "
00750          << d_gc_buffer << " bytes free after every gc" << endl);
00751 
00752     unlock_cache_interface();
00753 }
00754 
00757 unsigned long
00758 HTTPCache::get_max_size() const
00759 {
00760     return d_total_size / MEGA;
00761 }
00762 
00771 void
00772 HTTPCache::set_max_entry_size(unsigned long size)
00773 {
00774     lock_cache_interface();
00775 
00776     try {
00777         unsigned long new_size = size * MEGA;
00778         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
00779             unsigned long old_size = d_max_entry_size;
00780             d_max_entry_size = new_size;
00781             if (new_size < old_size && startGC()) {
00782                 perform_garbage_collection();
00783                 d_http_cache_table->cache_index_write();
00784             }
00785         }
00786     }
00787     catch (...) {
00788         unlock_cache_interface();
00789         throw;
00790     }
00791 
00792     DBG2(cerr << "Cache...... Max entry cache size is "
00793          << d_max_entry_size << endl);
00794 
00795     unlock_cache_interface();
00796 }
00797 
00802 unsigned long
00803 HTTPCache::get_max_entry_size() const
00804 {
00805     return d_max_entry_size / MEGA;
00806 }
00807 
00818 void
00819 HTTPCache::set_default_expiration(const int exp_time)
00820 {
00821     lock_cache_interface();
00822 
00823     d_default_expiration = exp_time;
00824 
00825     unlock_cache_interface();
00826 }
00827 
00830 int
00831 HTTPCache::get_default_expiration() const
00832 {
00833     return d_default_expiration;
00834 }
00835 
00840 void
00841 HTTPCache::set_always_validate(bool validate)
00842 {
00843     d_always_validate = validate;
00844 }
00845 
00849 bool
00850 HTTPCache::get_always_validate() const
00851 {
00852     return d_always_validate;
00853 }
00854 
00871 void
00872 HTTPCache::set_cache_control(const vector<string> &cc)
00873 {
00874     lock_cache_interface();
00875 
00876     try {
00877         d_cache_control = cc;
00878 
00879         vector<string>::const_iterator i;
00880         for (i = cc.begin(); i != cc.end(); ++i) {
00881             string header = (*i).substr(0, (*i).find(':'));
00882             string value = (*i).substr((*i).find(": ") + 2);
00883             if (header != "Cache-Control") {
00884                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
00885             }
00886             else {
00887                 if (value == "no-cache" || value == "no-store")
00888                     d_cache_enabled = false;
00889                 else if (value.find("max-age") != string::npos) {
00890                     string max_age = value.substr(value.find("=" + 1));
00891                     d_max_age = parse_time(max_age.c_str());
00892                 }
00893                 else if (value == "max-stale")
00894                     d_max_stale = 0; // indicates will take anything;
00895                 else if (value.find("max-stale") != string::npos) {
00896                     string max_stale = value.substr(value.find("=" + 1));
00897                     d_max_stale = parse_time(max_stale.c_str());
00898                 }
00899                 else if (value.find("min-fresh") != string::npos) {
00900                     string min_fresh = value.substr(value.find("=" + 1));
00901                     d_min_fresh = parse_time(min_fresh.c_str());
00902                 }
00903             }
00904         }
00905     }
00906     catch (...) {
00907         unlock_cache_interface();
00908         throw;
00909     }
00910 
00911     unlock_cache_interface();
00912 }
00913 
00914 
00919 vector<string>
00920 HTTPCache::get_cache_control()
00921 {
00922     return d_cache_control;
00923 }
00924 
00926 
00935 bool
00936 HTTPCache::is_url_in_cache(const string &url)
00937 {
00938     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
00939 
00940     HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
00941     bool status = entry != 0;
00942     if (entry) {
00943         entry->unlock_read_response();
00944     }
00945     return  status;
00946 }
00947 
00953 bool
00954 is_hop_by_hop_header(const string &header)
00955 {
00956     return header.find("Connection") != string::npos
00957            || header.find("Keep-Alive") != string::npos
00958            || header.find("Proxy-Authenticate") != string::npos
00959            || header.find("Proxy-Authorization") != string::npos
00960            || header.find("Transfer-Encoding") != string::npos
00961            || header.find("Upgrade") != string::npos;
00962 }
00963 
00975 void
00976 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
00977 {
00978     string fname = cachename + CACHE_META;
00979     d_open_files.push_back(fname);
00980 
00981     FILE *dest = fopen(fname.c_str(), "w");
00982     if (!dest) {
00983         throw InternalErr(__FILE__, __LINE__,
00984                           "Could not open named cache entry file.");
00985     }
00986 
00987     vector<string>::const_iterator i;
00988     for (i = headers.begin(); i != headers.end(); ++i) {
00989         if (!is_hop_by_hop_header(*i)) {
00990             int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
00991             if (s != 1) {
00992                 fclose(dest);
00993                 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
00994             }
00995             s = fwrite("\n", 1, 1, dest);
00996             if (s != 1) {
00997                 fclose(dest);
00998                 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
00999             }
01000         }
01001     }
01002 
01003     int res = fclose(dest);
01004     if (res) {
01005         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
01006             << dest << endl);
01007     }
01008 
01009     d_open_files.pop_back();
01010 }
01011 
01022 void
01023 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01024 {
01025     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01026     if (!md) {
01027         throw InternalErr(__FILE__, __LINE__,
01028                           "Could not open named cache entry meta data file.");
01029     }
01030 
01031     char line[1024];
01032     while (!feof(md) && fgets(line, 1024, md)) {
01033         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
01034         headers.push_back(string(line));
01035     }
01036 
01037     int res = fclose(md);
01038     if (res) {
01039         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01040             << md << endl);
01041     }
01042 }
01043 
01065 int
01066 HTTPCache::write_body(const string &cachename, const FILE *src)
01067 {
01068     d_open_files.push_back(cachename);
01069 
01070     FILE *dest = fopen(cachename.c_str(), "wb");
01071     if (!dest) {
01072         throw InternalErr(__FILE__, __LINE__,
01073                           "Could not open named cache entry file.");
01074     }
01075 
01076     // Read and write in 1k blocks; an attempt at doing this efficiently.
01077     // 09/30/02 jhrg
01078     char line[1024];
01079     size_t n;
01080     int total = 0;
01081     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01082         total += fwrite(line, 1, n, dest);
01083         DBG2(sleep(3));
01084     }
01085 
01086     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01087         int res = fclose(dest);
01088         res = res & unlink(cachename.c_str());
01089         if (res) {
01090             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01091                 << dest << endl);
01092         }
01093         throw InternalErr(__FILE__, __LINE__,
01094                           "I/O error transferring data to the cache.");
01095     }
01096 
01097     rewind(const_cast<FILE *>(src));
01098 
01099     int res = fclose(dest);
01100     if (res) {
01101         DBG(cerr << "HTTPCache::write_body - Failed to close "
01102             << dest << endl);
01103     }
01104 
01105     d_open_files.pop_back();
01106 
01107     return total;
01108 }
01109 
01118 FILE *
01119 HTTPCache::open_body(const string &cachename)
01120 {
01121     DBG(cerr << "cachename: " << cachename << endl);
01122 
01123     FILE *src = fopen(cachename.c_str(), "rb"); // Read only
01124     if (!src)
01125         throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
01126 
01127     return src;
01128 }
01129 
01155 bool
01156 HTTPCache::cache_response(const string &url, time_t request_time,
01157                           const vector<string> &headers, const FILE *body)
01158 {
01159     lock_cache_interface();
01160 
01161     DBG(cerr << "Caching url: " << url << "." << endl);
01162 
01163     try {
01164         // If this is not an http or https URL, don't cache.
01165         if (url.find("http:") == string::npos &&
01166             url.find("https:") == string::npos) {
01167             unlock_cache_interface();
01168             return false;
01169         }
01170 
01171         // This does nothing if url is not already in the cache. It's
01172         // more efficient to do this than to first check and see if the entry
01173         // exists. 10/10/02 jhrg
01174         d_http_cache_table->remove_entry_from_cache_table(url);
01175 
01176         HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
01177         entry->lock_write_response();
01178 
01179         try {
01180             d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
01181             if (entry->is_no_cache()) {
01182                 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
01183                     << "(" << url << ")" << endl);
01184                 entry->unlock_write_response();
01185                 delete entry; entry = 0;
01186                 unlock_cache_interface();
01187                 return false;
01188             }
01189 
01190             // corrected_initial_age, freshness_lifetime, response_time.
01191             d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01192 
01193             d_http_cache_table->create_location(entry); // cachename, cache_body_fd
01194             // move these write function to cache table
01195             entry->set_size(write_body(entry->get_cachename(), body));
01196             write_metadata(entry->get_cachename(), headers);
01197             d_http_cache_table->add_entry_to_cache_table(entry);
01198             entry->unlock_write_response();
01199         }
01200         catch (ResponseTooBigErr &e) {
01201             // Oops. Bummer. Clean up and exit.
01202             DBG(cerr << e.get_error_message() << endl);
01203             REMOVE(entry->get_cachename().c_str());
01204             REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
01205             DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
01206                 << ")" << endl);
01207             entry->unlock_write_response();
01208             delete entry; entry = 0;
01209             unlock_cache_interface();
01210             return false;
01211         }
01212 
01213         if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
01214             if (startGC())
01215                 perform_garbage_collection();
01216 
01217             d_http_cache_table->cache_index_write(); // resets new_entries
01218         }
01219     }
01220     catch (...) {
01221         unlock_cache_interface();
01222         throw;
01223     }
01224 
01225     unlock_cache_interface();
01226 
01227     return true;
01228 }
01229 
01248 vector<string>
01249 HTTPCache::get_conditional_request_headers(const string &url)
01250 {
01251     lock_cache_interface();
01252 
01253     HTTPCacheTable::CacheEntry *entry = 0;
01254     vector<string> headers;
01255 
01256     DBG(cerr << "Getting conditional request headers for " << url << endl);
01257 
01258     try {
01259         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01260         if (!entry)
01261             throw Error(internal_error, "There is no cache entry for the URL: " + url);
01262 
01263         if (entry->get_etag() != "")
01264             headers.push_back(string("If-None-Match: ") + entry->get_etag());
01265 
01266         if (entry->get_lm() > 0) {
01267                 time_t lm = entry->get_lm();
01268             headers.push_back(string("If-Modified-Since: ")
01269                               + date_time_str(&lm));
01270         }
01271         else if (entry->get_max_age() > 0) {
01272                 time_t max_age = entry->get_max_age();
01273             headers.push_back(string("If-Modified-Since: ")
01274                               + date_time_str(&max_age));
01275         }
01276         else if (entry->get_expires() > 0) {
01277                 time_t expires = entry->get_expires();
01278             headers.push_back(string("If-Modified-Since: ")
01279                               + date_time_str(&expires));
01280         }
01281         entry->unlock_read_response();
01282         unlock_cache_interface();
01283     }
01284     catch (...) {
01285         unlock_cache_interface();
01286         if (entry) {
01287             entry->unlock_read_response();
01288         }
01289         throw;
01290     }
01291 
01292     return headers;
01293 }
01294 
01298 struct HeaderLess: binary_function<const string&, const string&, bool>
01299 {
01300     bool operator()(const string &s1, const string &s2) const {
01301         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01302     }
01303 };
01304 
01318 void
01319 HTTPCache::update_response(const string &url, time_t request_time,
01320                            const vector<string> &headers)
01321 {
01322     lock_cache_interface();
01323 
01324     HTTPCacheTable::CacheEntry *entry = 0;
01325     DBG(cerr << "Updating the response headers for: " << url << endl);
01326 
01327     try {
01328         entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
01329         if (!entry)
01330             throw Error(internal_error, "There is no cache entry for the URL: " + url);
01331 
01332         // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
01333         d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
01334 
01335         // Update corrected_initial_age, freshness_lifetime, response_time.
01336         d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01337 
01338         // Merge the new headers with those in the persistent store. How:
01339         // Load the new headers into a set, then merge the old headers. Since
01340         // set<> ignores duplicates, old headers with the same name as a new
01341         // header will got into the bit bucket. Define a special compare
01342         // functor to make sure that headers are compared using only their
01343         // name and not their value too.
01344         set<string, HeaderLess> merged_headers;
01345 
01346         // Load in the new headers
01347         copy(headers.begin(), headers.end(),
01348              inserter(merged_headers, merged_headers.begin()));
01349 
01350         // Get the old headers and load them in.
01351         vector<string> old_headers;
01352         read_metadata(entry->get_cachename(), old_headers);
01353         copy(old_headers.begin(), old_headers.end(),
01354              inserter(merged_headers, merged_headers.begin()));
01355 
01356         // Read the values back out. Use reverse iterators with back_inserter
01357         // to preserve header order. NB: vector<> does not support push_front
01358         // so we can't use front_inserter(). 01/09/03 jhrg
01359         vector<string> result;
01360         copy(merged_headers.rbegin(), merged_headers.rend(),
01361              back_inserter(result));
01362 
01363         write_metadata(entry->get_cachename(), result);
01364         entry->unlock_write_response();
01365         unlock_cache_interface();
01366     }
01367     catch (...) {
01368         if (entry) {
01369             entry->unlock_read_response();
01370         }
01371         unlock_cache_interface();
01372         throw;
01373     }
01374 }
01375 
01387 bool
01388 HTTPCache::is_url_valid(const string &url)
01389 {
01390     lock_cache_interface();
01391 
01392     bool freshness;
01393     HTTPCacheTable::CacheEntry *entry = 0;
01394 
01395     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
01396 
01397     try {
01398         if (d_always_validate) {
01399             unlock_cache_interface();
01400             return false;  // force re-validation.
01401         }
01402 
01403         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01404         if (!entry)
01405             throw Error(internal_error, "There is no cache entry for the URL: " + url);
01406 
01407         // If we supported range requests, we'd need code here to check if
01408         // there was only a partial response in the cache. 10/02/02 jhrg
01409 
01410         // In case this entry is of type "must-revalidate" then we consider it
01411         // invalid.
01412         if (entry->get_must_revalidate()) {
01413             entry->unlock_read_response();
01414             unlock_cache_interface();
01415             return false;
01416         }
01417 
01418         time_t resident_time = time(NULL) - entry->get_response_time();
01419         time_t current_age = entry->get_corrected_initial_age() + resident_time;
01420 
01421         // Check that the max-age, max-stale, and min-fresh directives
01422         // given in the request cache control header is followed.
01423         if (d_max_age >= 0 && current_age > d_max_age) {
01424             DBG(cerr << "Cache....... Max-age validation" << endl);
01425             entry->unlock_read_response();
01426             unlock_cache_interface();
01427             return false;
01428         }
01429         if (d_min_fresh >= 0
01430             && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
01431             DBG(cerr << "Cache....... Min-fresh validation" << endl);
01432             entry->unlock_read_response();
01433             unlock_cache_interface();
01434             return false;
01435         }
01436 
01437         freshness = (entry->get_freshness_lifetime()
01438                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
01439         entry->unlock_read_response();
01440         unlock_cache_interface();
01441     }
01442     catch (...) {
01443         if (entry) {
01444             entry->unlock_read_response();
01445         }
01446         unlock_cache_interface();
01447         throw;
01448     }
01449 
01450     return freshness;
01451 }
01452 
01480 FILE * HTTPCache::get_cached_response(const string &url,
01481                 vector<string> &headers, string &cacheName) {
01482     lock_cache_interface();
01483 
01484     FILE *body = 0;
01485     HTTPCacheTable::CacheEntry *entry = 0;
01486 
01487     DBG(cerr << "Getting the cached response for " << url << endl);
01488 
01489     try {
01490         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01491         if (!entry) {
01492                 unlock_cache_interface();
01493                 return 0;
01494         }
01495 
01496         cacheName = entry->get_cachename();
01497         read_metadata(entry->get_cachename(), headers);
01498 
01499         DBG(cerr << "Headers just read from cache: " << endl);
01500         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
01501 
01502         body = open_body(entry->get_cachename());
01503 
01504         DBG(cerr << "Returning: " << url << " from the cache." << endl);
01505 
01506         d_http_cache_table->bind_entry_to_data(entry, body);
01507     }
01508     catch (...) {
01509         // Why make this unlock operation conditional on entry?
01510         if (entry)
01511                 unlock_cache_interface();
01512         if (body != 0)
01513             fclose(body);
01514         throw;
01515     }
01516 
01517     unlock_cache_interface();
01518 
01519     return body;
01520 }
01521 
01533 FILE *
01534 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
01535 {
01536         string discard_name;
01537         return get_cached_response(url, headers, discard_name);
01538 }
01539 
01550 FILE *
01551 HTTPCache::get_cached_response(const string &url)
01552 {
01553         string discard_name;
01554         vector<string> discard_headers;
01555         return get_cached_response(url, discard_headers, discard_name);
01556 }
01557 
01570 void
01571 HTTPCache::release_cached_response(FILE *body)
01572 {
01573     lock_cache_interface();
01574 
01575     try {
01576         // fclose(body); This results in a seg fault on linux jhrg 8/27/13
01577         d_http_cache_table->uncouple_entry_from_data(body);
01578     }
01579     catch (...) {
01580         unlock_cache_interface();
01581         throw;
01582     }
01583 
01584     unlock_cache_interface();
01585 }
01586 
01599 void
01600 HTTPCache::purge_cache()
01601 {
01602     lock_cache_interface();
01603 
01604     try {
01605         if (d_http_cache_table->is_locked_read_responses())
01606             throw Error(internal_error, "Attempt to purge the cache with entries in use.");
01607 
01608         d_http_cache_table->delete_all_entries();
01609     }
01610     catch (...) {
01611         unlock_cache_interface();
01612         throw;
01613     }
01614 
01615     unlock_cache_interface();
01616 }
01617 
01618 } // namespace libdap