123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- import io
- import unittest
- import defusedxml.ElementTree as ET
- from toolchain.parsers.sdltm_parser import SdltmParser
- class TestSdltmParser(unittest.TestCase):
- ROOT_TEMPLATE_VALID = \
- "<Segment>\
- <Elements>{0}</Elements>\
- <CultureName>{1}</CultureName>\
- </Segment>"
- ROOT_TEMPLATE_MISSING_LANGUAGE = \
- "<Segment>\
- <Elements>{0}</Elements>\
- </Segment>"
- LANGUAGE_CODE_SRC = "en"
- LANGUAGE_CODE_TGT = "ga"
- def setUp(self):
- self.output_src = io.StringIO()
- self.output_tgt = io.StringIO()
- self.parser = SdltmParser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
- def tearDown(self):
- self.output_src.close()
- self.output_tgt.close()
- def make_root(self, template, content, lang_code):
- return ET.fromstring(template.format(content, lang_code))
- def test_missing_language(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text><Value>capall</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_MISSING_LANGUAGE, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_missing_value(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_empty_language(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text><Value>capall</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_unknown_language(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text><Value>Pferd</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "de")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_no_texts(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = ""
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_empty_value(self):
- first_texts = "<Text><Value></Value></Text>"
- second_texts = "<Text><Value>capall</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_in_order(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text><Value>capall</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "horse\n")
- self.assertEqual(self.output_tgt.getvalue(), "capall\n")
- def test_reverse_order(self):
- first_texts = "<Text><Value>capall</Value></Text>"
- second_texts = "<Text><Value>horse</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "ga")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "en")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "horse\n")
- self.assertEqual(self.output_tgt.getvalue(), "capall\n")
- def test_language_variants(self):
- first_texts = "<Text><Value>horse</Value></Text>"
- second_texts = "<Text><Value>capall</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en-GB")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga-IE")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "horse\n")
- self.assertEqual(self.output_tgt.getvalue(), "capall\n")
- def test_extra_whitespace_leading_trailing(self):
- first_texts = "<Text><Value>yellow </Value></Text>"
- second_texts = "<Text><Value> \tbuí</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow \n")
- self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
- def test_extra_whitespace_contained(self):
- first_texts = "<Text><Value>cake</Value></Text>"
- second_texts = "<Text><Value>cáca \tmilis</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
- def test_newline_contained(self):
- first_texts = "<Text><Value>cake</Value></Text>"
- second_texts = "<Text><Value>cáca\nmilis</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "cake\n")
- self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
- def test_only_whitespace(self):
- first_texts = "<Text><Value>yellow</Value></Text>"
- second_texts = "<Text><Value> </Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), " \n")
- def test_only_newline(self):
- first_texts = "<Text><Value>yellow</Value></Text>"
- second_texts = "<Text><Value>\n</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "yellow\n")
- self.assertEqual(self.output_tgt.getvalue(), "\n")
- def test_multiple_texts(self):
- first_texts = "<Text><Value>bread </Value></Text><Text><Value>and</Value></Text><Text><Value> jam</Value></Text>"
- second_texts = "<Text><Value>arán</Value></Text><Text><Value> agus </Value></Text><Text><Value>subh</Value></Text>"
- root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
- root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
- self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
- self.assertEqual(self.output_src.getvalue(), "bread and jam\n")
- self.assertEqual(self.output_tgt.getvalue(), "arán agus subh\n")
- if __name__ == "__main__":
- unittest.main()
|