Package python-module-logilab-mtconverter-0 :: Package 8 :: Package 4 :: Package transforms :: Module odt2text
[frames] | no frames]

Source Code for Module python-module-logilab-mtconverter-0.8.4.transforms.odt2text

 1  # copyright 2006-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
 2  # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr 
 3  # 
 4  # This file is part of logilab-mtconverter. 
 5  # 
 6  # logilab-mtconverter is free software: you can redistribute it and/or modify it 
 7  # under the terms of the GNU Lesser General Public License as published by the 
 8  # Free Software Foundation, either version 2.1 of the License, or (at your 
 9  # option) any later version. 
10  # 
11  # logilab-mtconverter is distributed in the hope that it will be useful, but 
12  # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
13  # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License 
14  # for more details. 
15  # 
16  # You should have received a copy of the GNU Lesser General Public License along 
17  # with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>. 
18  """odt2text: Turn odt file into equivalent plain text file. 
19  Copyright (C) 2009 Logilab S.A. 
20  """ 
21  from zipfile import ZipFile 
22  from lxml import etree 
23  from tempfile import TemporaryFile as tmpfile 
24   
25  from logilab.mtconverter.transform import Transform 
26   
27 -class odt_to_unformatted_text(Transform):
28 """transforms odt content to unformatted plain text""" 29 30 name = "odt_to_text" 31 inputs = ("application/vnd.oasis.opendocument.text",) 32 output = "text/plain" 33
34 - def _convert(self, trdata):
35 data = trdata.data 36 # XXX ZipFile should also accept a string 37 # however, there is some bug within 38 # so we feed it a file 39 if isinstance(data, str): 40 tmp = tmpfile(mode='w+b') 41 tmp.write(data) 42 tmp.seek(0) 43 data = tmp 44 # /XXX 45 zip = ZipFile(data, 'r') 46 alltext = [] 47 for subelt in ('content.xml', 'meta.xml'): 48 root = etree.fromstring(zip.read(subelt)) 49 for node in root.iter(): 50 for attr in ('text', 'tail'): 51 text = getattr(node, attr) 52 if text: 53 text = text.strip() 54 if text: 55 alltext.append(text) 56 return u' '.join(alltext)
57