|
@@ -0,0 +1,750 @@
|
|
|
+import io
|
|
|
+import unittest
|
|
|
+import xml.etree.ElementTree as ET
|
|
|
+
|
|
|
+from toolchain.parsers.parsing_error import ParsingError
|
|
|
+from toolchain.parsers.xliff_parser import Xliff12Parser, Xliff20Parser
|
|
|
+
|
|
|
+class TestXliff12Parser(unittest.TestCase):
|
|
|
+
|
|
|
+ LANGUAGE_CODE_SRC = "en"
|
|
|
+ LANGUAGE_CODE_TGT = "ga"
|
|
|
+
|
|
|
+ ROOT_TEMPLATE = \
|
|
|
+ "<xliff xmlns=\"urn:oasis:names:tc:xliff:document:1.2\" version=\"1.2\">\
|
|
|
+ <file original=\"/path/to/original\" source-language=\"{0}\" target-language=\"{1}\" datatype=\"datatype\">\
|
|
|
+ <header/>\
|
|
|
+ <body>\
|
|
|
+ {2}\
|
|
|
+ </body>\
|
|
|
+ </file>\
|
|
|
+ </xliff>"
|
|
|
+
|
|
|
+ def setUp(self):
|
|
|
+ self.output_src = io.StringIO()
|
|
|
+ self.output_tgt = io.StringIO()
|
|
|
+ self.parser = Xliff12Parser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
|
|
|
+
|
|
|
+
|
|
|
+ def tearDown(self):
|
|
|
+ self.output_src.close()
|
|
|
+ self.output_tgt.close()
|
|
|
+
|
|
|
+
|
|
|
+ def make_document(self, source_language_code, target_language_code, content):
|
|
|
+ return ET.fromstring(TestXliff12Parser.ROOT_TEMPLATE.format(source_language_code, target_language_code, content))
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_body(self):
|
|
|
+ document = self.make_document("en", "ga", "")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_absent_src(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_absent_tgt(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_src(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source/>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_tgt(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target/>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_language_variants(self):
|
|
|
+ document = self.make_document("en-GB", "ga-IE", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_languages_reversed(self):
|
|
|
+ document = self.make_document("ga", "en", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>capall</source>\
|
|
|
+ <target>horse</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_all(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source><inner>yellow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_start(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source><inner>ye</inner>llow</source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_all(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>yell<inner>ow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_with_group(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <group>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_multiple_group(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_extra_whitespace_leading_trailing(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>yellow </source>\
|
|
|
+ <target> \tbuí</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow \n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_extra_whitespace_contained(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>cake</source>\
|
|
|
+ <target>cáca \tmilis</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "cake\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_newline_contained(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>cake</source>\
|
|
|
+ <target>cáca\nmilis</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "cake\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_only_whitespace(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>yellow</source>\
|
|
|
+ <target> </target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), " \n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_only_newline(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>yellow</source>\
|
|
|
+ <target>\n</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_multiple_valid(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ <group>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>eat</source>\
|
|
|
+ </trans-unit>\
|
|
|
+ </group>\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>yell<inner>ow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </trans-unit>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\nyellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\nbuí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_no_target_language(self):
|
|
|
+ document = ET.fromstring("\
|
|
|
+ <xliff xmlns=\"urn:oasis:names:tc:xliff:document:1.2\" version=\"1.2\">\
|
|
|
+ <file original=\"/path/to/original\" source-language=\"en\" datatype=\"datatype\">\
|
|
|
+ <header/>\
|
|
|
+ <body>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ </body>\
|
|
|
+ </file>\
|
|
|
+ </xliff>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_no_source_language(self):
|
|
|
+ document = ET.fromstring("\
|
|
|
+ <xliff xmlns=\"urn:oasis:names:tc:xliff:document:1.2\" version=\"1.2\">\
|
|
|
+ <file original=\"/path/to/original\" target-language=\"ga\" datatype=\"datatype\">\
|
|
|
+ <header/>\
|
|
|
+ <body>\
|
|
|
+ <trans-unit>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </trans-unit>\
|
|
|
+ </body>\
|
|
|
+ </file>\
|
|
|
+ </xliff>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ with self.assertRaises(ParsingError):
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+class TestXliff20Parser(unittest.TestCase):
|
|
|
+
|
|
|
+ LANGUAGE_CODE_SRC = "en"
|
|
|
+ LANGUAGE_CODE_TGT = "ga"
|
|
|
+
|
|
|
+ ROOT_TEMPLATE = \
|
|
|
+ "<xliff xmlns=\"urn:oasis:names:tc:xliff:document:2.0\" version=\"2.0\" srcLang=\"{0}\" trgLang=\"{1}\">\
|
|
|
+ <file>\
|
|
|
+ {2}\
|
|
|
+ </file>\
|
|
|
+ </xliff>"
|
|
|
+
|
|
|
+ def setUp(self):
|
|
|
+ self.output_src = io.StringIO()
|
|
|
+ self.output_tgt = io.StringIO()
|
|
|
+ self.parser = Xliff20Parser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
|
|
|
+
|
|
|
+
|
|
|
+ def tearDown(self):
|
|
|
+ self.output_src.close()
|
|
|
+ self.output_tgt.close()
|
|
|
+
|
|
|
+
|
|
|
+ def make_document(self, source_language_code, target_language_code, content):
|
|
|
+ return ET.fromstring(TestXliff20Parser.ROOT_TEMPLATE.format(source_language_code, target_language_code, content))
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_body(self):
|
|
|
+ document = self.make_document("en", "ga", "")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_absent_src(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_absent_tgt(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_src(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source/>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_empty_tgt(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target/>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_language_variants(self):
|
|
|
+ document = self.make_document("en-GB", "ga-IE", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_languages_reversed(self):
|
|
|
+ document = self.make_document("ga", "en", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>capall</source>\
|
|
|
+ <target>horse</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_all(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source><inner>yellow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_start(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source><inner>ye</inner>llow</source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_inner_tags_all(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>yell<inner>ow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "buí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_with_group(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <group>\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_single_valid_multiple_group(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_extra_whitespace_leading_trailing(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>yellow </source>\
|
|
|
+ <target> \tbuí</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow \n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_extra_whitespace_contained(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>cake</source>\
|
|
|
+ <target>cáca \tmilis</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "cake\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_newline_contained(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>cake</source>\
|
|
|
+ <target>cáca\nmilis</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "cake\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_only_whitespace(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>yellow</source>\
|
|
|
+ <target> </target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), " \n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_only_newline(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>yellow</source>\
|
|
|
+ <target>\n</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "yellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_multiple_valid(self):
|
|
|
+ document = self.make_document("en", "ga", "\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ <group>\
|
|
|
+ <unit id=\"13\">\
|
|
|
+ <segment>\
|
|
|
+ <source>eat</source>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </group>\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <group>\
|
|
|
+ <unit id=\"49\">\
|
|
|
+ <segment>\
|
|
|
+ <source>yell<inner>ow</inner></source>\
|
|
|
+ <target><g>buí</g></target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ </group>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "horse\nyellow\n")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "capall\nbuí\n")
|
|
|
+
|
|
|
+
|
|
|
+ def test_no_target_language(self):
|
|
|
+ document = ET.fromstring("\
|
|
|
+ <xliff xmlns=\"urn:oasis:names:tc:xliff:document:2.0\" version=\"2.0\" srcLang=\"en\">\
|
|
|
+ <file>\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </file>\
|
|
|
+ </xliff>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+ def test_no_source_language(self):
|
|
|
+ document = ET.fromstring("\
|
|
|
+ <xliff xmlns=\"urn:oasis:names:tc:xliff:document:2.0\" version=\"2.0\" trgLang=\"ga\">\
|
|
|
+ <file>\
|
|
|
+ <unit id=\"7\">\
|
|
|
+ <segment>\
|
|
|
+ <source>horse</source>\
|
|
|
+ <target>capall</target>\
|
|
|
+ </segment>\
|
|
|
+ </unit>\
|
|
|
+ </file>\
|
|
|
+ </xliff>\
|
|
|
+ ")
|
|
|
+
|
|
|
+ with self.assertRaises(ParsingError):
|
|
|
+ self.parser.parse_content(document, self.output_src, self.output_tgt)
|
|
|
+
|
|
|
+ self.assertEqual(self.output_src.getvalue(), "")
|
|
|
+ self.assertEqual(self.output_tgt.getvalue(), "")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ unittest.main()
|