|
- import io
- import unittest
- import defusedxml.ElementTree as ET
- from toolchain.parsers.parsing_error import ParsingError
- from toolchain.parsers.tmx_parser import TmxParser
- class TestTmxParser(unittest.TestCase):
- ROOT_TEMPLATE = \
- "<tmx version=\"1.4\">\
- <header/>\
- <body>\
- {0}\
- </body>\
- </tmx>"
- LANGUAGE_CODE_SRC = "en"
- LANGUAGE_CODE_TGT = "ga"
- def setUp(self):
- self.output_src = io.StringIO()
- self.output_tgt = io.StringIO()
- self.parser = TmxParser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
- def tearDown(self):
- self.output_src.close()
- self.output_tgt.close()
- def make_document(self, content):
- return ET.fromstring(TestTmxParser.ROOT_TEMPLATE.format(content))
- def test_empty_body(self):
- document = self.make_document("")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_absent_tgt(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_absent_src(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_empty_tgt(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg></seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_single_simple(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_language_variants(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en-GB\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_language_missing(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv>\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- with self.assertRaises(ParsingError):
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_inner_node_empty(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg><inner/>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_inner_node_nonempty_preceding(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg><inner>ye</inner>llow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_inner_node_nonempty_following(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yell<inner>ow</inner></seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_extra_whitespace_leading_trailing(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow </seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg> \tbuí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow \n")
- self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
- def test_extra_whitespace_contained(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>cáca \tmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
- def test_newline_contained(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>cáca\nmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
- def test_only_whitespace(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg> </seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), " \n")
- def test_only_newline(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>\n</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "\n")
- def test_multiple(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>horse</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>capall</seg>\
- </tuv>\
- </tu>\
- <tu>\
- <tuv xml:lang=\"en-IE\">\
- <seg>eat</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>ith</seg>\
- </tuv>\
- </tu>\
- <tu>\
- <tuv xml:lang=\"en-GB\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>cáca\nmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "horse\neat\ncake\n")
- self.assertEqual(self.output_tgt.getvalue(), "capall\nith\ncácamilis\n")
- if __name__ == "__main__":
- unittest.main()
|