1 """Utilities using NDG HTTPS Client, including a main module that can be used to
2 fetch from a URL.
3 """
4 __author__ = "R B Wilkinson"
5 __date__ = "09/12/11"
6 __copyright__ = "(C) 2011 Science and Technology Facilities Council"
7 __license__ = "BSD - see LICENSE file in top-level directory"
8 __contact__ = "Philip.Kershaw@stfc.ac.uk"
9 __revision__ = '$Id$'
10
11 import logging
12 from optparse import OptionParser
13 import os
14 import sys
15
16 if sys.version_info[0] > 2:
17 import http.cookiejar as cookiejar_
18 import http.client as http_client_
19 from urllib.request import Request as Request_
20 from urllib.request import HTTPHandler as HTTPHandler_
21 from urllib.request import HTTPCookieProcessor as HTTPCookieProcessor_
22 from urllib.request import HTTPBasicAuthHandler as HTTPBasicAuthHandler_
23 from urllib.request import HTTPPasswordMgrWithDefaultRealm as \
24 HTTPPasswordMgrWithDefaultRealm_
25 from urllib.request import ProxyHandler as ProxyHandler_
26 from urllib.error import HTTPError as HTTPError_
27 import urllib.parse as urlparse_
28 else:
29 import cookielib as cookiejar_
30 import httplib as http_client_
31 from urllib2 import Request as Request_
32 from urllib2 import HTTPHandler as HTTPHandler_
33 from urllib2 import HTTPCookieProcessor as HTTPCookieProcessor_
34 from urllib2 import HTTPBasicAuthHandler as HTTPBasicAuthHandler_
35 from urllib2 import HTTPPasswordMgrWithDefaultRealm as \
36 HTTPPasswordMgrWithDefaultRealm_
37 from urllib2 import ProxyHandler as ProxyHandler_
38 from urllib2 import HTTPError as HTTPError_
39 import urlparse as urlparse_
40
41 from ndg.httpsclient.urllib2_build_opener import build_opener
42 from ndg.httpsclient.https import HTTPSContextHandler
43 from ndg.httpsclient import ssl_context_util
44
45 log = logging.getLogger(__name__)
46
48 """Cookie processor that adds new cookies (instead of replacing the existing
49 ones as HTTPCookieProcessor does)
50 """
52 """Processes cookies for a HTTP request.
53 @param request: request to process
54 @type request: urllib2.Request
55 @return: request
56 @rtype: urllib2.Request
57 """
58 COOKIE_HEADER_NAME = "Cookie"
59 tmp_request = Request_(request.get_full_url(), request.data, {},
60 request.origin_req_host,
61 request.unverifiable)
62 self.cookiejar.add_cookie_header(tmp_request)
63
64 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
65 if new_cookies:
66 if request.has_header(COOKIE_HEADER_NAME):
67
68 old_cookies = request.get_header(COOKIE_HEADER_NAME)
69 merged_cookies = '; '.join([old_cookies, new_cookies])
70 request.add_unredirected_header(COOKIE_HEADER_NAME,
71 merged_cookies)
72 else:
73
74 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
75 return request
76
77
78 https_request = http_request
79
80
82 """Error fetching content from URL"""
83
84
86 """Returns data retrieved from a URL.
87 @param url: URL to attempt to open
88 @type url: basestring
89 @param config: SSL context configuration
90 @type config: Configuration
91 @return data retrieved from URL or None
92 """
93 return_code, return_message, response = open_url(url, config, data=data,
94 handlers=handlers)
95 if return_code and return_code == http_client_.OK:
96 return_data = response.read()
97 response.close()
98 return return_data
99 else:
100 raise URLFetchError(return_message)
101
103 """Writes data retrieved from a URL to a file.
104 @param url: URL to attempt to open
105 @type url: basestring
106 @param config: SSL context configuration
107 @type config: Configuration
108 @param output_file: output file
109 @type output_file: basestring
110 @return: tuple (
111 returned HTTP status code or 0 if an error occurred
112 returned message
113 boolean indicating whether access was successful)
114 """
115 return_code, return_message, response = open_url(url, config, data=data,
116 handlers=handlers)
117 if return_code == http_client_.OK:
118 return_data = response.read()
119 response.close()
120 outfile = open(output_file, "w")
121 outfile.write(return_data)
122 outfile.close()
123
124 return return_code, return_message, return_code == http_client_.OK
125
126
128 """Returns data retrieved from a URL.
129 @param url: URL to attempt to open
130 @type url: basestring
131 @param config: SSL context configuration
132 @type config: Configuration
133 @param data: HTTP POST data
134 @type data: str
135 @param handlers: list of custom urllib2 handlers to add to the request
136 @type handlers: iterable
137 @return: data retrieved from URL or None
138 @rtype: file derived type
139 """
140 return_code, return_message, response = open_url(url, config, data=data,
141 handlers=handlers)
142 if return_code and return_code == http_client_.OK:
143 return response
144 else:
145 raise URLFetchError(return_message)
146
147
148 -def open_url(url, config, data=None, handlers=None):
149 """Attempts to open a connection to a specified URL.
150 @param url: URL to attempt to open
151 @param config: SSL context configuration
152 @type config: Configuration
153 @param data: HTTP POST data
154 @type data: str
155 @param handlers: list of custom urllib2 handlers to add to the request
156 @type handlers: iterable
157 @return: tuple (
158 returned HTTP status code or 0 if an error occurred
159 returned message or error description
160 response object)
161 """
162 debuglevel = 1 if config.debug else 0
163
164
165 if config.cookie:
166 cj = config.cookie
167 else:
168 cj = cookiejar_.CookieJar()
169
170
171
172
173
174 cookie_handler = AccumulatingHTTPCookieProcessor(cj)
175
176 if not handlers:
177 handlers = []
178
179 handlers.append(cookie_handler)
180
181 if config.debug:
182 http_handler = HTTPHandler_(debuglevel=debuglevel)
183 https_handler = HTTPSContextHandler(config.ssl_context,
184 debuglevel=debuglevel)
185 handlers.extend([http_handler, https_handler])
186
187 if config.http_basicauth:
188
189 auth_handler = HTTPBasicAuthHandler_(HTTPPasswordMgrWithDefaultRealm_())
190 auth_handler.add_password(realm=None, uri=url,
191 user=config.httpauth[0],
192 passwd=config.httpauth[1])
193 handlers.append(auth_handler)
194
195
196
197
198
199
200 if not _should_use_proxy(url, config.no_proxy):
201 handlers.append(ProxyHandler_({}))
202 log.debug("Not using proxy")
203 elif config.proxies:
204 handlers.append(ProxyHandler_(config.proxies))
205 log.debug("Configuring proxies: %s" % config.proxies)
206
207 opener = build_opener(*handlers, ssl_context=config.ssl_context)
208
209 headers = config.headers
210 if headers is None:
211 headers = {}
212
213 request = Request_(url, data, headers)
214
215
216 return_code = 0
217 return_message = ''
218 response = None
219
220
221 response = opener.open(request)
222
223 try:
224 response = opener.open(request)
225 return_message = response.msg
226 return_code = response.code
227 if log.isEnabledFor(logging.DEBUG):
228 for index, cookie in enumerate(cj):
229 log.debug("%s : %s", index, cookie)
230
231 except HTTPError_ as exc:
232 return_code = exc.code
233 return_message = "Error: %s" % exc.msg
234 if log.isEnabledFor(logging.DEBUG):
235 log.debug("%s %s", exc.code, exc.msg)
236
237 except Exception as exc:
238 return_message = "Error: %s" % exc.__str__()
239 if log.isEnabledFor(logging.DEBUG):
240 import traceback
241 log.debug(traceback.format_exc())
242
243 return (return_code, return_message, response)
244
245
247 """Determines whether a proxy should be used to open a connection to the
248 specified URL, based on the value of the no_proxy environment variable.
249 @param url: URL
250 @type url: basestring or urllib2.Request
251 """
252 if no_proxy is None:
253 no_proxy_effective = os.environ.get('no_proxy', '')
254 else:
255 no_proxy_effective = no_proxy
256
257 urlObj = urlparse_.urlparse(_url_as_string(url))
258 for np in [h.strip() for h in no_proxy_effective.split(',')]:
259 if urlObj.hostname == np:
260 return False
261
262 return True
263
265 """Returns the URL string from a URL value that is either a string or
266 urllib2.Request..
267 @param url: URL
268 @type url: basestring or urllib2.Request
269 @return: URL string
270 @rtype: basestring
271 """
272 if isinstance(url, Request_):
273 return url.get_full_url()
274 elif isinstance(url, str):
275 return url
276 else:
277 raise TypeError("Expected type %r or %r" %
278 (str, Request_))
279
280
282 """Connection configuration.
283 """
284 - def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
285 cookie=None, http_basicauth=None, headers=None):
286 """
287 @param ssl_context: SSL context to use with this configuration
288 @type ssl_context: OpenSSL.SSL.Context
289 @param debug: if True, output debugging information
290 @type debug: bool
291 @param proxies: proxies to use for
292 @type proxies: dict with basestring keys and values
293 @param no_proxy: hosts for which a proxy should not be used
294 @type no_proxy: basestring
295 @param cookie: cookies to set for request
296 @type cookie: cookielib.CookieJar (python 3 - http.cookiejar)
297 @param http_basicauth: http authentication, or None
298 @type http_basicauth: tuple of (username,password)
299 @param headers: http headers
300 @type headers: dict
301 """
302 self.ssl_context = ssl_context
303 self.debug = debug
304 self.proxies = proxies
305 self.no_proxy = no_proxy
306 self.cookie = cookie
307 self.http_basicauth = http_basicauth
308 self.headers = headers
309
310
312 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
313 '''
314 parser = OptionParser(usage="%prog [options] url")
315 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
316 default=os.path.expanduser("~/credentials.pem"),
317 help="Certificate file - defaults to $HOME/credentials.pem")
318 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
319 default=None,
320 help="Private key file - defaults to the certificate file")
321 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
322 metavar="PATH",
323 default=None,
324 help="Trusted CA certificate file directory")
325 parser.add_option("-d", "--debug", action="store_true", dest="debug",
326 default=False,
327 help="Print debug information.")
328 parser.add_option("-p", "--post-data-file", dest="data_file",
329 metavar="FILE", default=None,
330 help="POST data file")
331 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
332 default=None, help="Output file")
333 parser.add_option("-n", "--no-verify-peer", action="store_true",
334 dest="no_verify_peer", default=False,
335 help="Skip verification of peer certificate.")
336 parser.add_option("-a", "--basicauth", dest="basicauth",
337 metavar="USER:PASSWD",
338 default=None,
339 help="HTTP authentication credentials")
340 parser.add_option("--header", action="append", dest="headers",
341 metavar="HEADER: VALUE",
342 help="Add HTTP header to request")
343 (options, args) = parser.parse_args()
344 if len(args) != 1:
345 parser.error("Incorrect number of arguments")
346
347 url = args[0]
348
349 if options.debug:
350 logging.getLogger().setLevel(logging.DEBUG)
351
352 if options.key_file and os.path.exists(options.key_file):
353 key_file = options.key_file
354 else:
355 key_file = None
356
357 if options.cert_file and os.path.exists(options.cert_file):
358 cert_file = options.cert_file
359 else:
360 cert_file = None
361
362 if options.ca_dir and os.path.exists(options.ca_dir):
363 ca_dir = options.ca_dir
364 else:
365 ca_dir = None
366
367 verify_peer = not options.no_verify_peer
368
369 if options.data_file and os.path.exists(options.data_file):
370 data_file = open(options.data_file)
371 data = data_file.read()
372 data_file.close()
373 else:
374 data = None
375
376 if options.basicauth:
377 http_basicauth = options.basicauth.split(':', 1)
378 else:
379 http_basicauth = None
380
381 headers = {}
382 if options.headers:
383 for h in options.headers:
384 key, val = h.split(':', 1)
385 headers[key.strip()] = val.lstrip()
386
387
388
389 ssl_context = ssl_context_util.make_ssl_context(key_file,
390 cert_file,
391 None,
392 ca_dir,
393 verify_peer,
394 url)
395
396 config = Configuration(ssl_context,
397 options.debug,
398 http_basicauth=http_basicauth,
399 headers=headers)
400 if options.output_file:
401 return_code, return_message = fetch_from_url_to_file(
402 url,
403 config,
404 options.output_file,
405 data)[:2]
406 raise SystemExit(return_code, return_message)
407 else:
408 data = fetch_from_url(url, config)
409 print(data)
410
411
412 if __name__=='__main__':
413 logging.basicConfig()
414 main()
415