Package grizzled :: Package file :: Module includer
[hide private]
[frames] | no frames]

Source Code for Module grizzled.file.includer

  1  #!/usr/bin/env python 
  2   
  3  # NOTE: Documentation is intended to be processed by epydoc and contains 
  4  # epydoc markup. 
  5   
  6  ''' 
  7  Introduction 
  8  ============ 
  9   
 10  The ``grizzled.file.includer`` module contains a class that can be used to 
 11  process includes within a text file, returning a file-like object. It also 
 12  contains some utility functions that permit using include-enabled files in 
 13  other contexts. 
 14   
 15  Include Syntax 
 16  ============== 
 17   
 18  The *include* syntax is defined by a regular expression; any line that matches 
 19  the regular expression is treated as an *include* directive. The default 
 20  regular expression matches include directives like this:: 
 21   
 22      %include "/absolute/path/to/file" 
 23      %include "../relative/path/to/file" 
 24      %include "local_reference" 
 25      %include "http://localhost/path/to/my.config" 
 26   
 27  Relative and local file references are relative to the including file or URL. 
 28  That, if an ``Includer`` is processing file "/home/bmc/foo.txt" and encounters 
 29  an attempt to include file "bar.txt", it will assume "bar.txt" is to be found 
 30  in "/home/bmc". 
 31   
 32  Similarly, if an ``Includer`` is processing URL "http://localhost/bmc/foo.txt" 
 33  and encounters an attempt to include file "bar.txt", it will assume "bar.txt" 
 34  is to be found at "http://localhost/bmc/bar.txt". 
 35   
 36  Nested includes are permitted; that is, an included file may, itself, include 
 37  other files. The maximum recursion level is configurable and defaults to 100. 
 38   
 39  The include syntax can be changed by passing a different regular expression to 
 40  the ``Includer`` class constructor. 
 41   
 42  Usage 
 43  ===== 
 44   
 45  This module provides an ``Includer`` class, which processes include directives 
 46  in a file and behaves like a file-like object. See the class documentation for 
 47  more details. 
 48   
 49  The module also provides a ``preprocess()`` convenience function that can be 
 50  used to preprocess a file; it returns the path to the resulting preprocessed 
 51  file. 
 52   
 53  Examples 
 54  ======== 
 55   
 56  Preprocess a file containing include directives, then read the result: 
 57   
 58  .. python:: 
 59   
 60      import includer 
 61      import sys 
 62   
 63      inc = includer.Includer(path) 
 64      for line in inc: 
 65          sys.stdout.write(line) 
 66   
 67   
 68  Use an include-enabled file with the standard Python logging module: 
 69   
 70  .. python:: 
 71   
 72      import logging 
 73      import includer 
 74   
 75      logging.fileConfig(includer.preprocess("mylog.cfg")) 
 76   
 77  ''' 
 78   
 79  __docformat__ = "restructuredtext en" 
 80  __all__ = ['Includer', 'IncludeError', 'preprocess'] 
 81   
 82  # --------------------------------------------------------------------------- 
 83  # Imports 
 84  # --------------------------------------------------------------------------- 
 85   
 86  import logging 
 87  import os 
 88  import sys 
 89  import re 
 90  import tempfile 
 91  import atexit 
 92  import urllib2 
 93  import urlparse 
 94   
 95  import grizzled.exception 
 96  from grizzled.file import unlink_quietly 
 97   
 98  # --------------------------------------------------------------------------- 
 99  # Exports 
100  # --------------------------------------------------------------------------- 
101   
102  __all__ = ['IncludeError', 'Includer', 'preprocess'] 
103   
104  # --------------------------------------------------------------------------- 
105  # Logging 
106  # --------------------------------------------------------------------------- 
107   
108  log = logging.getLogger('includer') 
109 110 # --------------------------------------------------------------------------- 111 # Public classes 112 # --------------------------------------------------------------------------- 113 114 -class IncludeError(grizzled.exception.ExceptionWithMessage):
115 """ 116 Thrown by ``Includer`` when an error occurs while processing the file. 117 An ``IncludeError`` object always contains a single string value that 118 contains an error message describing the problem. 119 """ 120 pass
121
122 -class Includer(object):
123 ''' 124 An ``Includer`` object preprocesses a path or file-like object, 125 expanding include references. The resulting ``Includer`` object is a 126 file-like object, offering the same methods and capabilities as an open 127 file. 128 129 By default, ``Includer`` supports this include syntax:: 130 131 %include "path" 132 %include "url" 133 134 However, the include directive syntax is controlled by a regular 135 expression, so it can be configured. 136 137 See the module documentation for details. 138 '''
139 - def __init__(self, 140 source, 141 include_regex='^%include\s"([^"]+)"', 142 max_nest_level=100, 143 output=None):
144 """ 145 Create a new ``Includer`` object. 146 147 :Parameters: 148 source : file or str 149 The source to be read and expanded. May be an open file-like 150 object, a path name, or a URL string. 151 include_regex : str 152 Regular expression defining the include syntax. Must contain a 153 single parenthetical group that can be used to extract the 154 included file or URL. 155 max_nest_level : int 156 Maximum include nesting level. Exceeding this level will cause 157 ``Includer`` to throw an ``IncludeError``. 158 output : str or file 159 A string (path name) or file-like object to which to save the 160 expanded output. 161 162 :raise IncludeError: On error 163 """ 164 165 if isinstance(source, str): 166 f, is_url, name = self.__open(source, None, False) 167 else: 168 # Assume file-like object. 169 f = source 170 is_url = False 171 try: 172 name = source.name 173 except AttributeError: 174 name = None 175 176 self.closed = False 177 self.mode = None 178 self.__include_pattern = re.compile(include_regex) 179 self.__name = name 180 181 if output == None: 182 from cStringIO import StringIO 183 output = StringIO() 184 185 self.__maxnest = max_nest_level 186 self.__nested = 0 187 self.__process_includes(f, name, is_url, output) 188 self.__f = output 189 self.__f.seek(0)
190 191 @property
192 - def name(self):
193 """ 194 Get the name of the file being processed. 195 """ 196 return self.__name
197
198 - def __iter__(self):
199 return self
200
201 - def next(self):
202 """A file object is its own iterator. 203 204 :rtype: string 205 :return: the next line from the file 206 207 :raise StopIteration: end of file 208 :raise IncludeError: on error 209 """ 210 line = self.readline() 211 if (line == None) or (len(line) == 0): 212 raise StopIteration 213 return line
214
215 - def close(self):
216 """Close the includer, preventing any further I/O operations.""" 217 if not self.closed: 218 self.closed = true 219 self.__f.close() 220 del self.__f
221
222 - def fileno(self):
223 """ 224 Get the file descriptor. Returns the descriptor of the file being 225 read. 226 227 :rtype: int 228 :return: the file descriptor of the file being read 229 """ 230 _complain_if_closed(self.closed) 231 return self.__f.fileno()
232
233 - def isatty(self):
234 """ 235 Determine whether the file being processed is a TTY or not. 236 237 :return: ``True`` or ``False`` 238 """ 239 _complain_if_closed(self.closed) 240 return self.__f.isatty()
241
242 - def seek(self, pos, mode=0):
243 """ 244 Seek to the specified file offset in the include-processed file. 245 246 :Parameters: 247 pos : int 248 file offset 249 mode : int 250 the seek mode, as specified to a Python file's ``seek()`` 251 method 252 """ 253 self.__f.seek(pos, mode)
254
255 - def tell(self):
256 """ 257 Get the current file offset. 258 259 :rtype: int 260 :return: current file offset 261 """ 262 _complain_if_closed(self.closed) 263 return self.__f.tell()
264
265 - def read(self, n=-1):
266 """ 267 Read *n* bytes from the open file. 268 269 :Parameters: 270 n : int 271 Number of bytes to read. A negative number instructs 272 the method to read all remaining bytes. 273 274 :return: the bytes read 275 """ 276 _complain_if_closed(self.closed) 277 return self.__f.read(n)
278
279 - def readline(self, length=-1):
280 """ 281 Read the next line from the file. 282 283 :Parameters: 284 length : int 285 a length hint, or negative if you don't care 286 287 :rtype: str 288 :return: the line read 289 """ 290 _complain_if_closed(self.closed) 291 return self.__f.readline(length)
292
293 - def readlines(self, sizehint=0):
294 """ 295 Read all remaining lines in the file. 296 297 :rtype: array 298 :return: array of lines 299 """ 300 _complain_if_closed(self.closed) 301 return self.__f.readlines(sizehint)
302
303 - def truncate(self, size=None):
304 """Not supported, since ``Includer`` objects are read-only.""" 305 raise IncludeError, 'Includers are read-only file objects.'
306
307 - def write(self, s):
308 """Not supported, since ``Includer`` objects are read-only.""" 309 raise IncludeError, 'Includers are read-only file objects.'
310
311 - def writelines(self, iterable):
312 """Not supported, since ``Includer`` objects are read-only.""" 313 raise IncludeError, 'Includers are read-only file objects.'
314
315 - def flush(self):
316 """No-op.""" 317 pass
318
319 - def getvalue(self):
320 """ 321 Retrieve the entire contents of the file, which includes expanded, 322 at any time before the ``close()`` method is called. 323 324 :rtype: string 325 :return: a single string containing the contents of the file 326 """ 327 return ''.join(self.readlines())
328
329 - def __process_includes(self, file_in, filename, is_url, file_out):
330 log.debug('Processing includes in "%s", is_url=%s' % (filename, is_url)) 331 332 for line in file_in: 333 match = self.__include_pattern.search(line) 334 if match: 335 if self.__nested >= self.__maxnest: 336 raise IncludeError, 'Exceeded maximum include recursion ' \ 337 'depth of %d' % self.__maxnest 338 339 inc_name = match.group(1) 340 logging.debug('Found include directive: %s' % line[:-1]) 341 f, included_is_url, included_name = self.__open(inc_name, 342 filename, 343 is_url) 344 self.__nested += 1 345 self.__process_includes(f, filename, is_url, file_out) 346 self.__nested -= 1 347 else: 348 file_out.write(line)
349
350 - def __open(self, name_to_open, enclosing_file, enclosing_file_is_url):
351 is_url = False 352 openFunc = None 353 354 parsed_url = urlparse.urlparse(name_to_open) 355 356 # Account for Windows drive letters. 357 358 if (parsed_url.scheme != '') and (len(parsed_url.scheme) > 1): 359 openFunc = urllib2.urlopen 360 is_url = True 361 362 else: 363 # It's not a URL. What we do now depends on the including file. 364 365 if enclosing_file_is_url: 366 # Use the parent URL as the base URL. 367 368 name_to_open = urlparse.urljoin(enclosing_file, name_to_open) 369 open_func = urllib2.urlopen 370 is_url = True 371 372 elif not os.path.isabs(name_to_open): 373 # Not an absolute file. Base it on the parent. 374 375 enclosing_dir = None 376 if enclosing_file == None: 377 enclosing_dir = os.getcwd() 378 else: 379 enclosing_dir = os.path.dirname(enclosing_file) 380 381 name_to_open = os.path.join(enclosing_dir, name_to_open) 382 open_func = open 383 384 else: 385 open_func = open 386 387 assert(name_to_open != None) 388 assert(open_func != None) 389 390 try: 391 log.debug('Opening "%s"' % name_to_open) 392 f = open_func(name_to_open) 393 except: 394 raise IncludeError, 'Unable to open "%s" as a file or a URL' %\ 395 name_to_open 396 return (f, is_url, name_to_open)
397
398 # --------------------------------------------------------------------------- 399 # Public functions 400 # --------------------------------------------------------------------------- 401 402 -def preprocess(file_or_url, output=None, temp_suffix='.txt', temp_prefix='inc'):
403 """ 404 Process all include directives in the specified file, returning a path 405 to a temporary file that contains the results of the expansion. The 406 temporary file is automatically removed when the program exits, though 407 the caller is free to remove it whenever it is no longer needed. 408 409 :Parameters: 410 file_or_url : file or str 411 URL or path to file to be expanded; or, a file-like object 412 output : file 413 A file or file-like object to receive the output. 414 temp_suffix : str 415 suffix to use with temporary file that holds preprocessed output 416 temp_prefix : str 417 prefix to use with temporary file that holds preprocessed output 418 419 :rtype: string 420 :return: ``output``, if ``output`` is not ``None``; otherwise, the path to 421 temporary file containing expanded content 422 """ 423 result = None 424 path = None 425 if not output: 426 fd, path = tempfile.mkstemp(suffix=temp_suffix, prefix=temp_prefix) 427 output = open(path, 'w') 428 atexit.register(unlink_quietly, path) 429 os.close(fd) 430 result = path 431 else: 432 result = output 433 434 Includer(file_or_url, output=output) 435 return result
436
437 438 # --------------------------------------------------------------------------- 439 # Private functions 440 # --------------------------------------------------------------------------- 441 442 -def _complain_if_closed(closed):
443 if closed: 444 raise IncludeError, "I/O operation on closed file"
445 446 # --------------------------------------------------------------------------- 447 # Main program (for testing) 448 # --------------------------------------------------------------------------- 449 450 if __name__ == '__main__': 451 452 format = '%(asctime)s %(name)s %(levelname)s %(message)s' 453 logging.basicConfig(level=logging.DEBUG, format=format) 454 455 for file in sys.argv[1:]: 456 import cStringIO as StringIO 457 out = StringIO.StringIO() 458 preprocess(file, output=out) 459 460 header = 'File: %s, via preprocess()' 461 sep = '-' * len(header) 462 print '\n%s\n%s\n%s\n' % (sep, header, sep) 463 for line in out.readlines(): 464 sys.stdout.write(line) 465 print sep 466 467 inc = Includer(file) 468 header = 'File: %s, via Includer' 469 sep = '-' * len(header) 470 print '\n%s\n%s\n%s\n' % (sep, header, sep) 471 for line in inc: 472 sys.stdout.write(line) 473 print '%s' % sep 474