1
2
3
4
5
6 '''
7 Introduction
8 ============
9
10 The ``grizzled.file.includer`` module contains a class that can be used to
11 process includes within a text file, returning a file-like object. It also
12 contains some utility functions that permit using include-enabled files in
13 other contexts.
14
15 Include Syntax
16 ==============
17
18 The *include* syntax is defined by a regular expression; any line that matches
19 the regular expression is treated as an *include* directive. The default
20 regular expression matches include directives like this::
21
22 %include "/absolute/path/to/file"
23 %include "../relative/path/to/file"
24 %include "local_reference"
25 %include "http://localhost/path/to/my.config"
26
27 Relative and local file references are relative to the including file or URL.
28 That, if an ``Includer`` is processing file "/home/bmc/foo.txt" and encounters
29 an attempt to include file "bar.txt", it will assume "bar.txt" is to be found
30 in "/home/bmc".
31
32 Similarly, if an ``Includer`` is processing URL "http://localhost/bmc/foo.txt"
33 and encounters an attempt to include file "bar.txt", it will assume "bar.txt"
34 is to be found at "http://localhost/bmc/bar.txt".
35
36 Nested includes are permitted; that is, an included file may, itself, include
37 other files. The maximum recursion level is configurable and defaults to 100.
38
39 The include syntax can be changed by passing a different regular expression to
40 the ``Includer`` class constructor.
41
42 Usage
43 =====
44
45 This module provides an ``Includer`` class, which processes include directives
46 in a file and behaves like a file-like object. See the class documentation for
47 more details.
48
49 The module also provides a ``preprocess()`` convenience function that can be
50 used to preprocess a file; it returns the path to the resulting preprocessed
51 file.
52
53 Examples
54 ========
55
56 Preprocess a file containing include directives, then read the result:
57
58 .. python::
59
60 import includer
61 import sys
62
63 inc = includer.Includer(path)
64 for line in inc:
65 sys.stdout.write(line)
66
67
68 Use an include-enabled file with the standard Python logging module:
69
70 .. python::
71
72 import logging
73 import includer
74
75 logging.fileConfig(includer.preprocess("mylog.cfg"))
76
77 '''
78
79 __docformat__ = "restructuredtext en"
80 __all__ = ['Includer', 'IncludeError', 'preprocess']
81
82
83
84
85
86 import logging
87 import os
88 import sys
89 import re
90 import tempfile
91 import atexit
92 import urllib2
93 import urlparse
94
95 import grizzled.exception
96 from grizzled.file import unlink_quietly
97
98
99
100
101
102 __all__ = ['IncludeError', 'Includer', 'preprocess']
103
104
105
106
107
108 log = logging.getLogger('includer')
109
110
111
112
113
114 -class IncludeError(grizzled.exception.ExceptionWithMessage):
115 """
116 Thrown by ``Includer`` when an error occurs while processing the file.
117 An ``IncludeError`` object always contains a single string value that
118 contains an error message describing the problem.
119 """
120 pass
121
123 '''
124 An ``Includer`` object preprocesses a path or file-like object,
125 expanding include references. The resulting ``Includer`` object is a
126 file-like object, offering the same methods and capabilities as an open
127 file.
128
129 By default, ``Includer`` supports this include syntax::
130
131 %include "path"
132 %include "url"
133
134 However, the include directive syntax is controlled by a regular
135 expression, so it can be configured.
136
137 See the module documentation for details.
138 '''
139 - def __init__(self,
140 source,
141 include_regex='^%include\s"([^"]+)"',
142 max_nest_level=100,
143 output=None):
144 """
145 Create a new ``Includer`` object.
146
147 :Parameters:
148 source : file or str
149 The source to be read and expanded. May be an open file-like
150 object, a path name, or a URL string.
151 include_regex : str
152 Regular expression defining the include syntax. Must contain a
153 single parenthetical group that can be used to extract the
154 included file or URL.
155 max_nest_level : int
156 Maximum include nesting level. Exceeding this level will cause
157 ``Includer`` to throw an ``IncludeError``.
158 output : str or file
159 A string (path name) or file-like object to which to save the
160 expanded output.
161
162 :raise IncludeError: On error
163 """
164
165 if isinstance(source, str):
166 f, is_url, name = self.__open(source, None, False)
167 else:
168
169 f = source
170 is_url = False
171 try:
172 name = source.name
173 except AttributeError:
174 name = None
175
176 self.closed = False
177 self.mode = None
178 self.__include_pattern = re.compile(include_regex)
179 self.__name = name
180
181 if output == None:
182 from cStringIO import StringIO
183 output = StringIO()
184
185 self.__maxnest = max_nest_level
186 self.__nested = 0
187 self.__process_includes(f, name, is_url, output)
188 self.__f = output
189 self.__f.seek(0)
190
191 @property
193 """
194 Get the name of the file being processed.
195 """
196 return self.__name
197
200
202 """A file object is its own iterator.
203
204 :rtype: string
205 :return: the next line from the file
206
207 :raise StopIteration: end of file
208 :raise IncludeError: on error
209 """
210 line = self.readline()
211 if (line == None) or (len(line) == 0):
212 raise StopIteration
213 return line
214
216 """Close the includer, preventing any further I/O operations."""
217 if not self.closed:
218 self.closed = true
219 self.__f.close()
220 del self.__f
221
223 """
224 Get the file descriptor. Returns the descriptor of the file being
225 read.
226
227 :rtype: int
228 :return: the file descriptor of the file being read
229 """
230 _complain_if_closed(self.closed)
231 return self.__f.fileno()
232
234 """
235 Determine whether the file being processed is a TTY or not.
236
237 :return: ``True`` or ``False``
238 """
239 _complain_if_closed(self.closed)
240 return self.__f.isatty()
241
242 - def seek(self, pos, mode=0):
243 """
244 Seek to the specified file offset in the include-processed file.
245
246 :Parameters:
247 pos : int
248 file offset
249 mode : int
250 the seek mode, as specified to a Python file's ``seek()``
251 method
252 """
253 self.__f.seek(pos, mode)
254
256 """
257 Get the current file offset.
258
259 :rtype: int
260 :return: current file offset
261 """
262 _complain_if_closed(self.closed)
263 return self.__f.tell()
264
265 - def read(self, n=-1):
266 """
267 Read *n* bytes from the open file.
268
269 :Parameters:
270 n : int
271 Number of bytes to read. A negative number instructs
272 the method to read all remaining bytes.
273
274 :return: the bytes read
275 """
276 _complain_if_closed(self.closed)
277 return self.__f.read(n)
278
280 """
281 Read the next line from the file.
282
283 :Parameters:
284 length : int
285 a length hint, or negative if you don't care
286
287 :rtype: str
288 :return: the line read
289 """
290 _complain_if_closed(self.closed)
291 return self.__f.readline(length)
292
294 """
295 Read all remaining lines in the file.
296
297 :rtype: array
298 :return: array of lines
299 """
300 _complain_if_closed(self.closed)
301 return self.__f.readlines(sizehint)
302
304 """Not supported, since ``Includer`` objects are read-only."""
305 raise IncludeError, 'Includers are read-only file objects.'
306
308 """Not supported, since ``Includer`` objects are read-only."""
309 raise IncludeError, 'Includers are read-only file objects.'
310
312 """Not supported, since ``Includer`` objects are read-only."""
313 raise IncludeError, 'Includers are read-only file objects.'
314
316 """No-op."""
317 pass
318
320 """
321 Retrieve the entire contents of the file, which includes expanded,
322 at any time before the ``close()`` method is called.
323
324 :rtype: string
325 :return: a single string containing the contents of the file
326 """
327 return ''.join(self.readlines())
328
330 log.debug('Processing includes in "%s", is_url=%s' % (filename, is_url))
331
332 for line in file_in:
333 match = self.__include_pattern.search(line)
334 if match:
335 if self.__nested >= self.__maxnest:
336 raise IncludeError, 'Exceeded maximum include recursion ' \
337 'depth of %d' % self.__maxnest
338
339 inc_name = match.group(1)
340 logging.debug('Found include directive: %s' % line[:-1])
341 f, included_is_url, included_name = self.__open(inc_name,
342 filename,
343 is_url)
344 self.__nested += 1
345 self.__process_includes(f, filename, is_url, file_out)
346 self.__nested -= 1
347 else:
348 file_out.write(line)
349
350 - def __open(self, name_to_open, enclosing_file, enclosing_file_is_url):
351 is_url = False
352 openFunc = None
353
354 parsed_url = urlparse.urlparse(name_to_open)
355
356
357
358 if (parsed_url.scheme != '') and (len(parsed_url.scheme) > 1):
359 openFunc = urllib2.urlopen
360 is_url = True
361
362 else:
363
364
365 if enclosing_file_is_url:
366
367
368 name_to_open = urlparse.urljoin(enclosing_file, name_to_open)
369 open_func = urllib2.urlopen
370 is_url = True
371
372 elif not os.path.isabs(name_to_open):
373
374
375 enclosing_dir = None
376 if enclosing_file == None:
377 enclosing_dir = os.getcwd()
378 else:
379 enclosing_dir = os.path.dirname(enclosing_file)
380
381 name_to_open = os.path.join(enclosing_dir, name_to_open)
382 open_func = open
383
384 else:
385 open_func = open
386
387 assert(name_to_open != None)
388 assert(open_func != None)
389
390 try:
391 log.debug('Opening "%s"' % name_to_open)
392 f = open_func(name_to_open)
393 except:
394 raise IncludeError, 'Unable to open "%s" as a file or a URL' %\
395 name_to_open
396 return (f, is_url, name_to_open)
397
398
399
400
401
402 -def preprocess(file_or_url, output=None, temp_suffix='.txt', temp_prefix='inc'):
403 """
404 Process all include directives in the specified file, returning a path
405 to a temporary file that contains the results of the expansion. The
406 temporary file is automatically removed when the program exits, though
407 the caller is free to remove it whenever it is no longer needed.
408
409 :Parameters:
410 file_or_url : file or str
411 URL or path to file to be expanded; or, a file-like object
412 output : file
413 A file or file-like object to receive the output.
414 temp_suffix : str
415 suffix to use with temporary file that holds preprocessed output
416 temp_prefix : str
417 prefix to use with temporary file that holds preprocessed output
418
419 :rtype: string
420 :return: ``output``, if ``output`` is not ``None``; otherwise, the path to
421 temporary file containing expanded content
422 """
423 result = None
424 path = None
425 if not output:
426 fd, path = tempfile.mkstemp(suffix=temp_suffix, prefix=temp_prefix)
427 output = open(path, 'w')
428 atexit.register(unlink_quietly, path)
429 os.close(fd)
430 result = path
431 else:
432 result = output
433
434 Includer(file_or_url, output=output)
435 return result
436
443 if closed:
444 raise IncludeError, "I/O operation on closed file"
445
446
447
448
449
450 if __name__ == '__main__':
451
452 format = '%(asctime)s %(name)s %(levelname)s %(message)s'
453 logging.basicConfig(level=logging.DEBUG, format=format)
454
455 for file in sys.argv[1:]:
456 import cStringIO as StringIO
457 out = StringIO.StringIO()
458 preprocess(file, output=out)
459
460 header = 'File: %s, via preprocess()'
461 sep = '-' * len(header)
462 print '\n%s\n%s\n%s\n' % (sep, header, sep)
463 for line in out.readlines():
464 sys.stdout.write(line)
465 print sep
466
467 inc = Includer(file)
468 header = 'File: %s, via Includer'
469 sep = '-' * len(header)
470 print '\n%s\n%s\n%s\n' % (sep, header, sep)
471 for line in inc:
472 sys.stdout.write(line)
473 print '%s' % sep
474