Package python-module-logilab-mtconverter-0 :: Package 8 :: Package 4 :: Package test :: Module unittest_transforms
[frames] | no frames]

Source Code for Module python-module-logilab-mtconverter-0.8.4.test.unittest_transforms

  1  # -*- coding: iso-8859-1 -*- 
  2  # copyright 2006-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. 
  3  # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr 
  4  # 
  5  # This file is part of logilab-mtconverter. 
  6  # 
  7  # logilab-mtconverter is free software: you can redistribute it and/or modify it 
  8  # under the terms of the GNU Lesser General Public License as published by the 
  9  # Free Software Foundation, either version 2.1 of the License, or (at your 
 10  # option) any later version. 
 11  # 
 12  # logilab-mtconverter is distributed in the hope that it will be useful, but 
 13  # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
 14  # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License 
 15  # for more details. 
 16  # 
 17  # You should have received a copy of the GNU Lesser General Public License along 
 18  # with logilab-mtconverter. If not, see <http://www.gnu.org/licenses/>. 
 19  from logilab.common.testlib import TestCase, unittest_main 
 20   
 21  from logilab.mtconverter.engine import TransformEngine 
 22  from logilab.mtconverter import TransformData, TransformError, \ 
 23       register_base_transforms, register_pil_transforms 
 24   
 25  ENGINE = TransformEngine() 
 26  register_base_transforms(ENGINE) 
 27  register_pil_transforms(ENGINE) 
 28   
 29  import logilab.mtconverter as mtc 
 30  import os 
 31  import os.path as osp 
 32  import subprocess 
 33  DATAPATH = osp.dirname(__file__) 
 34   
35 -class MiscTransformsTC(TestCase):
36 - def test_html_to_text(self):
37 data = TransformData(u'<b>yo (zou éà ;)</b>', 'text/html', 'utf8') 38 converted = ENGINE.convert(data, 'text/plain').decode().strip() 39 self.assertEqual(converted, u'**yo (zou éà ;)**') 40 41 data = TransformData(u'<p>yo <br/>zogzog </p>', 'text/html', 'utf8') 42 converted = ENGINE.convert(data, 'text/plain').decode().strip() 43 self.assertEqual(converted, u'yo\n\nzogzog')
44
45 - def test_html_to_text_noenc(self):
46 self.skipTest('Encoding detection with chardet does not work') 47 # will trigger guess_encoding, check non-utf8 encoding 48 data = TransformData(u"<b>yo (l'état à l'oeuf)</b>".encode('latin1'), 'text/html') 49 self.assertIn(data.encoding, ('latin1', 'windows-1252')) 50 data.check_encoding() 51 52 converted = ENGINE.convert(data, 'text/plain').decode().strip() 53 self.assertEqual(converted, u'**yo (zou éà ;)**')
54
55 - def test_xml_to_text(self):
56 data = TransformData(u'<root><b>yo (zou éà ;)</b>a<tag/>b<root>', 'application/xml', 'utf8') 57 converted = ENGINE.convert(data, 'text/plain').decode().strip() 58 self.assertEqual(converted, u'yo (zou éà ;) a b')
59 60
62 _data = u"""-----BEGIN PGP SIGNATURE----- 63 Version: GnuPG v1.4.9 (GNU/Linux) 64 65 iEYEARECAAYFAkxX5p8ACgkQkjcInxztrI64QQCggKA+PmbLYnGNtBB3Lb3pO3P8 66 r2MAoIO1DSsuM23SzgmqubGJEZuSRWhR 67 =GDDk 68 -----END PGP SIGNATURE----- 69 """ 70 data = TransformData(_data, 'application/pgp-signature') 71 converted = ENGINE.convert(data, 'text/plain').decode() 72 self.assertMultiLineEqual(converted, _data)
73 74
75 - def test_odt_to_text(self):
76 data = TransformData(open(osp.join(DATAPATH, 'hello.odt')), 77 'application/vnd.oasis.opendocument.text', 'utf8') 78 converted = ENGINE.convert(data, 'text/plain').decode().strip() 79 self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S') 80 # ZipFile will complain that 81 # TypeError: file() argument 1 must be (encoded string without NULL bytes), not str 82 # if given a plain str ... we shielded us from that. 83 data = TransformData(open(osp.join(DATAPATH, 'hello.odt')).read(), 84 'application/vnd.oasis.opendocument.text', 'utf8') 85 converted = ENGINE.convert(data, 'text/plain').decode().strip() 86 self.assertEqual(converted, u'Hello ! OpenOffice.org/2.4$Unix OpenOffice.org_project/680m17$Build-9310 Hello quoi de neuf doc ? bonjour 2008-07-08T16:19:35 2009-01-09T14:44:54 mot-clef 1 PT37S')
87
88 - def test_pdf_to_text(self):
89 subprocess.check_call(['pdflatex', 'hello'], cwd=osp.abspath(DATAPATH), stdout=open(os.devnull, 'w')) 90 data = TransformData(open(osp.join(DATAPATH, 'hello.pdf')).read(), 91 'application/pdf', 'utf8') 92 converted = ENGINE.convert(data, 'text/plain').decode().strip() 93 self.assertEqual(converted, u'hello')
94
95 - def tearDown(self):
96 for ext in ('pdf', 'aux', 'log'): 97 try: 98 os.unlink(osp.join(DATAPATH, 'hello.' + ext)) 99 except OSError: 100 pass
101 102 if __name__ == '__main__': 103 unittest_main() 104