123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327 |
- import io
- import unittest
- import defusedxml.ElementTree as ET
- from toolchain.parsers.parsing_error import ParsingError
- from toolchain.parsers.tmx_parser import TmxParser
- class TestTmxParser(unittest.TestCase):
- ROOT_TEMPLATE = \
- "<tmx version=\"1.4\">\
- <header/>\
- <body>\
- {0}\
- </body>\
- </tmx>"
- LANGUAGE_CODE_SRC = "en"
- LANGUAGE_CODE_TGT = "ga"
- def setUp(self):
- self.output_src = io.StringIO()
- self.output_tgt = io.StringIO()
- self.parser = TmxParser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
- def tearDown(self):
- self.output_src.close()
- self.output_tgt.close()
- def make_document(self, content):
- return ET.fromstring(TestTmxParser.ROOT_TEMPLATE.format(content))
- def test_empty_body(self):
- document = self.make_document("")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_absent_tgt(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_absent_src(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_empty_tgt(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg></seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_single_simple(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_language_variants(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en-GB\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_language_missing(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv>\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- with self.assertRaises(ParsingError):
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_inner_node_empty(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg><inner/>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_inner_node_nonempty_preceding(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg><inner>ye</inner>llow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_inner_node_nonempty_following(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yell<inner>ow</inner></seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>buí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "buí\n")
- def test_extra_whitespace_leading_trailing(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow </seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg> \tbuí</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow \n")
- self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
- def test_extra_whitespace_contained(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>cáca \tmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
- def test_newline_contained(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>cáca\nmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
- def test_only_whitespace(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg> </seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), " \n")
- def test_only_newline(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>yellow</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>\n</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "\n")
- def test_multiple(self):
- document = self.make_document("\
- <tu>\
- <tuv xml:lang=\"en\">\
- <seg>horse</seg>\
- </tuv>\
- <tuv xml:lang=\"ga\">\
- <seg>capall</seg>\
- </tuv>\
- </tu>\
- <tu>\
- <tuv xml:lang=\"en-IE\">\
- <seg>eat</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>ith</seg>\
- </tuv>\
- </tu>\
- <tu>\
- <tuv xml:lang=\"en-GB\">\
- <seg>cake</seg>\
- </tuv>\
- <tuv xml:lang=\"ga-IE\">\
- <seg>cáca\nmilis</seg>\
- </tuv>\
- </tu>\
- ")
- self.parser.parse_content(document, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "horse\neat\ncake\n")
- self.assertEqual(self.output_tgt.getvalue(), "capall\nith\ncácamilis\n")
- if __name__ == "__main__":
- unittest.main()
|