Package ndg :: Package httpsclient :: Module utils
[hide private]

Source Code for Module ndg.httpsclient.utils

  1  """Utilities using NDG HTTPS Client, including a main module that can be used to 
  2  fetch from a URL. 
  3  """ 
  4  __author__ = "R B Wilkinson" 
  5  __date__ = "09/12/11" 
  6  __copyright__ = "(C) 2011 Science and Technology Facilities Council" 
  7  __license__ = "BSD - see LICENSE file in top-level directory" 
  8  __contact__ = "Philip.Kershaw@stfc.ac.uk" 
  9  __revision__ = '$Id$' 
 10   
 11  import logging 
 12  from optparse import OptionParser 
 13  import os 
 14  import sys 
 15   
 16  if sys.version_info[0] > 2: 
 17      import http.cookiejar as cookiejar_ 
 18      import http.client as http_client_ 
 19      from urllib.request import Request as Request_ 
 20      from urllib.request import HTTPHandler as HTTPHandler_ 
 21      from urllib.request import HTTPCookieProcessor as HTTPCookieProcessor_ 
 22      from urllib.request import HTTPBasicAuthHandler as HTTPBasicAuthHandler_ 
 23      from urllib.request import HTTPPasswordMgrWithDefaultRealm as \ 
 24                                              HTTPPasswordMgrWithDefaultRealm_ 
 25      from urllib.request import ProxyHandler as ProxyHandler_ 
 26      from urllib.error import HTTPError as HTTPError_ 
 27      import urllib.parse as urlparse_ 
 28  else: 
 29      import cookielib as cookiejar_ 
 30      import httplib as http_client_ 
 31      from urllib2 import Request as Request_ 
 32      from urllib2 import HTTPHandler as HTTPHandler_ 
 33      from urllib2 import HTTPCookieProcessor as HTTPCookieProcessor_ 
 34      from urllib2 import HTTPBasicAuthHandler as HTTPBasicAuthHandler_ 
 35      from urllib2 import HTTPPasswordMgrWithDefaultRealm as \ 
 36                                              HTTPPasswordMgrWithDefaultRealm_ 
 37      from urllib2 import ProxyHandler as ProxyHandler_ 
 38      from urllib2 import HTTPError as HTTPError_ 
 39      import urlparse as urlparse_ 
 40   
 41  from ndg.httpsclient.urllib2_build_opener import build_opener 
 42  from ndg.httpsclient.https import HTTPSContextHandler 
 43  from ndg.httpsclient import ssl_context_util 
 44   
 45  log = logging.getLogger(__name__) 
 46   
47 -class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor_):
48 """Cookie processor that adds new cookies (instead of replacing the existing 49 ones as HTTPCookieProcessor does) 50 """
51 - def http_request(self, request):
52 """Processes cookies for a HTTP request. 53 @param request: request to process 54 @type request: urllib2.Request 55 @return: request 56 @rtype: urllib2.Request 57 """ 58 COOKIE_HEADER_NAME = "Cookie" 59 tmp_request = Request_(request.get_full_url(), request.data, {}, 60 request.origin_req_host, 61 request.unverifiable) 62 self.cookiejar.add_cookie_header(tmp_request) 63 # Combine existing and new cookies. 64 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME) 65 if new_cookies: 66 if request.has_header(COOKIE_HEADER_NAME): 67 # Merge new cookies with existing ones. 68 old_cookies = request.get_header(COOKIE_HEADER_NAME) 69 merged_cookies = '; '.join([old_cookies, new_cookies]) 70 request.add_unredirected_header(COOKIE_HEADER_NAME, 71 merged_cookies) 72 else: 73 # No existing cookies so just set new ones. 74 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies) 75 return request
76 77 # Process cookies for HTTPS in the same way. 78 https_request = http_request
79 80
81 -class URLFetchError(Exception):
82 """Error fetching content from URL"""
83 84
85 -def fetch_from_url(url, config, data=None, handlers=None):
86 """Returns data retrieved from a URL. 87 @param url: URL to attempt to open 88 @type url: basestring 89 @param config: SSL context configuration 90 @type config: Configuration 91 @return data retrieved from URL or None 92 """ 93 return_code, return_message, response = open_url(url, config, data=data, 94 handlers=handlers) 95 if return_code and return_code == http_client_.OK: 96 return_data = response.read() 97 response.close() 98 return return_data 99 else: 100 raise URLFetchError(return_message)
101
102 -def fetch_from_url_to_file(url, config, output_file, data=None, handlers=None):
103 """Writes data retrieved from a URL to a file. 104 @param url: URL to attempt to open 105 @type url: basestring 106 @param config: SSL context configuration 107 @type config: Configuration 108 @param output_file: output file 109 @type output_file: basestring 110 @return: tuple ( 111 returned HTTP status code or 0 if an error occurred 112 returned message 113 boolean indicating whether access was successful) 114 """ 115 return_code, return_message, response = open_url(url, config, data=data, 116 handlers=handlers) 117 if return_code == http_client_.OK: 118 return_data = response.read() 119 response.close() 120 outfile = open(output_file, "w") 121 outfile.write(return_data) 122 outfile.close() 123 124 return return_code, return_message, return_code == http_client_.OK
125 126
127 -def fetch_stream_from_url(url, config, data=None, handlers=None):
128 """Returns data retrieved from a URL. 129 @param url: URL to attempt to open 130 @type url: basestring 131 @param config: SSL context configuration 132 @type config: Configuration 133 @param data: HTTP POST data 134 @type data: str 135 @param handlers: list of custom urllib2 handlers to add to the request 136 @type handlers: iterable 137 @return: data retrieved from URL or None 138 @rtype: file derived type 139 """ 140 return_code, return_message, response = open_url(url, config, data=data, 141 handlers=handlers) 142 if return_code and return_code == http_client_.OK: 143 return response 144 else: 145 raise URLFetchError(return_message)
146 147
148 -def open_url(url, config, data=None, handlers=None):
149 """Attempts to open a connection to a specified URL. 150 @param url: URL to attempt to open 151 @param config: SSL context configuration 152 @type config: Configuration 153 @param data: HTTP POST data 154 @type data: str 155 @param handlers: list of custom urllib2 handlers to add to the request 156 @type handlers: iterable 157 @return: tuple ( 158 returned HTTP status code or 0 if an error occurred 159 returned message or error description 160 response object) 161 """ 162 debuglevel = 1 if config.debug else 0 163 164 # Set up handlers for URL opener. 165 if config.cookie: 166 cj = config.cookie 167 else: 168 cj = cookiejar_.CookieJar() 169 170 # Use a cookie processor that accumulates cookies when redirects occur so 171 # that an application can redirect for authentication and retain both any 172 # cookies for the application and the security system (c.f., 173 # urllib2.HTTPCookieProcessor which replaces cookies). 174 cookie_handler = AccumulatingHTTPCookieProcessor(cj) 175 176 if not handlers: 177 handlers = [] 178 179 handlers.append(cookie_handler) 180 181 if config.debug: 182 http_handler = HTTPHandler_(debuglevel=debuglevel) 183 https_handler = HTTPSContextHandler(config.ssl_context, 184 debuglevel=debuglevel) 185 handlers.extend([http_handler, https_handler]) 186 187 if config.http_basicauth: 188 # currently only supports http basic auth 189 auth_handler = HTTPBasicAuthHandler_(HTTPPasswordMgrWithDefaultRealm_()) 190 auth_handler.add_password(realm=None, uri=url, 191 user=config.httpauth[0], 192 passwd=config.httpauth[1]) 193 handlers.append(auth_handler) 194 195 196 # Explicitly remove proxy handling if the host is one listed in the value of 197 # the no_proxy environment variable because urllib2 does use proxy settings 198 # set via http_proxy and https_proxy, but does not take the no_proxy value 199 # into account. 200 if not _should_use_proxy(url, config.no_proxy): 201 handlers.append(ProxyHandler_({})) 202 log.debug("Not using proxy") 203 elif config.proxies: 204 handlers.append(ProxyHandler_(config.proxies)) 205 log.debug("Configuring proxies: %s" % config.proxies) 206 207 opener = build_opener(*handlers, ssl_context=config.ssl_context) 208 209 headers = config.headers 210 if headers is None: 211 headers = {} 212 213 request = Request_(url, data, headers) 214 215 # Open the URL and check the response. 216 return_code = 0 217 return_message = '' 218 response = None 219 220 # FIXME 221 response = opener.open(request) 222 223 try: 224 response = opener.open(request) 225 return_message = response.msg 226 return_code = response.code 227 if log.isEnabledFor(logging.DEBUG): 228 for index, cookie in enumerate(cj): 229 log.debug("%s : %s", index, cookie) 230 231 except HTTPError_ as exc: 232 return_code = exc.code 233 return_message = "Error: %s" % exc.msg 234 if log.isEnabledFor(logging.DEBUG): 235 log.debug("%s %s", exc.code, exc.msg) 236 237 except Exception as exc: 238 return_message = "Error: %s" % exc.__str__() 239 if log.isEnabledFor(logging.DEBUG): 240 import traceback 241 log.debug(traceback.format_exc()) 242 243 return (return_code, return_message, response)
244 245
246 -def _should_use_proxy(url, no_proxy=None):
247 """Determines whether a proxy should be used to open a connection to the 248 specified URL, based on the value of the no_proxy environment variable. 249 @param url: URL 250 @type url: basestring or urllib2.Request 251 """ 252 if no_proxy is None: 253 no_proxy_effective = os.environ.get('no_proxy', '') 254 else: 255 no_proxy_effective = no_proxy 256 257 urlObj = urlparse_.urlparse(_url_as_string(url)) 258 for np in [h.strip() for h in no_proxy_effective.split(',')]: 259 if urlObj.hostname == np: 260 return False 261 262 return True
263
264 -def _url_as_string(url):
265 """Returns the URL string from a URL value that is either a string or 266 urllib2.Request.. 267 @param url: URL 268 @type url: basestring or urllib2.Request 269 @return: URL string 270 @rtype: basestring 271 """ 272 if isinstance(url, Request_): 273 return url.get_full_url() 274 elif isinstance(url, str): 275 return url 276 else: 277 raise TypeError("Expected type %r or %r" % 278 (str, Request_))
279 280
281 -class Configuration(object):
282 """Connection configuration. 283 """
284 - def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None, 285 cookie=None, http_basicauth=None, headers=None):
286 """ 287 @param ssl_context: SSL context to use with this configuration 288 @type ssl_context: OpenSSL.SSL.Context 289 @param debug: if True, output debugging information 290 @type debug: bool 291 @param proxies: proxies to use for 292 @type proxies: dict with basestring keys and values 293 @param no_proxy: hosts for which a proxy should not be used 294 @type no_proxy: basestring 295 @param cookie: cookies to set for request 296 @type cookie: cookielib.CookieJar (python 3 - http.cookiejar) 297 @param http_basicauth: http authentication, or None 298 @type http_basicauth: tuple of (username,password) 299 @param headers: http headers 300 @type headers: dict 301 """ 302 self.ssl_context = ssl_context 303 self.debug = debug 304 self.proxies = proxies 305 self.no_proxy = no_proxy 306 self.cookie = cookie 307 self.http_basicauth = http_basicauth 308 self.headers = headers
309 310
311 -def main():
312 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL. 313 ''' 314 parser = OptionParser(usage="%prog [options] url") 315 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE", 316 default=os.path.expanduser("~/credentials.pem"), 317 help="Certificate file - defaults to $HOME/credentials.pem") 318 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE", 319 default=None, 320 help="Private key file - defaults to the certificate file") 321 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir", 322 metavar="PATH", 323 default=None, 324 help="Trusted CA certificate file directory") 325 parser.add_option("-d", "--debug", action="store_true", dest="debug", 326 default=False, 327 help="Print debug information.") 328 parser.add_option("-p", "--post-data-file", dest="data_file", 329 metavar="FILE", default=None, 330 help="POST data file") 331 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE", 332 default=None, help="Output file") 333 parser.add_option("-n", "--no-verify-peer", action="store_true", 334 dest="no_verify_peer", default=False, 335 help="Skip verification of peer certificate.") 336 parser.add_option("-a", "--basicauth", dest="basicauth", 337 metavar="USER:PASSWD", 338 default=None, 339 help="HTTP authentication credentials") 340 parser.add_option("--header", action="append", dest="headers", 341 metavar="HEADER: VALUE", 342 help="Add HTTP header to request") 343 (options, args) = parser.parse_args() 344 if len(args) != 1: 345 parser.error("Incorrect number of arguments") 346 347 url = args[0] 348 349 if options.debug: 350 logging.getLogger().setLevel(logging.DEBUG) 351 352 if options.key_file and os.path.exists(options.key_file): 353 key_file = options.key_file 354 else: 355 key_file = None 356 357 if options.cert_file and os.path.exists(options.cert_file): 358 cert_file = options.cert_file 359 else: 360 cert_file = None 361 362 if options.ca_dir and os.path.exists(options.ca_dir): 363 ca_dir = options.ca_dir 364 else: 365 ca_dir = None 366 367 verify_peer = not options.no_verify_peer 368 369 if options.data_file and os.path.exists(options.data_file): 370 data_file = open(options.data_file) 371 data = data_file.read() 372 data_file.close() 373 else: 374 data = None 375 376 if options.basicauth: 377 http_basicauth = options.basicauth.split(':', 1) 378 else: 379 http_basicauth = None 380 381 headers = {} 382 if options.headers: 383 for h in options.headers: 384 key, val = h.split(':', 1) 385 headers[key.strip()] = val.lstrip() 386 387 # If a private key file is not specified, the key is assumed to be stored in 388 # the certificate file. 389 ssl_context = ssl_context_util.make_ssl_context(key_file, 390 cert_file, 391 None, 392 ca_dir, 393 verify_peer, 394 url) 395 396 config = Configuration(ssl_context, 397 options.debug, 398 http_basicauth=http_basicauth, 399 headers=headers) 400 if options.output_file: 401 return_code, return_message = fetch_from_url_to_file( 402 url, 403 config, 404 options.output_file, 405 data)[:2] 406 raise SystemExit(return_code, return_message) 407 else: 408 data = fetch_from_url(url, config) 409 print(data)
410 411 412 if __name__=='__main__': 413 logging.basicConfig() 414 main() 415