test_sdltm_parser.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. import io
  2. import unittest
  3. import defusedxml.ElementTree as ET
  4. from toolchain.parsers.sdltm_parser import SdltmParser
  5. class TestSdltmParser(unittest.TestCase):
  6. ROOT_TEMPLATE_VALID = \
  7. "<Segment>\
  8. <Elements>{0}</Elements>\
  9. <CultureName>{1}</CultureName>\
  10. </Segment>"
  11. ROOT_TEMPLATE_MISSING_LANGUAGE = \
  12. "<Segment>\
  13. <Elements>{0}</Elements>\
  14. </Segment>"
  15. LANGUAGE_CODE_SRC = "en"
  16. LANGUAGE_CODE_TGT = "ga"
  17. def setUp(self):
  18. self.output_src = io.StringIO()
  19. self.output_tgt = io.StringIO()
  20. self.parser = SdltmParser(self.LANGUAGE_CODE_SRC, self.LANGUAGE_CODE_TGT)
  21. def tearDown(self):
  22. self.output_src.close()
  23. self.output_tgt.close()
  24. def make_root(self, template, content, lang_code):
  25. return ET.fromstring(template.format(content, lang_code))
  26. def test_missing_language(self):
  27. first_texts = "<Text><Value>horse</Value></Text>"
  28. second_texts = "<Text><Value>capall</Value></Text>"
  29. root_first = self.make_root(self.ROOT_TEMPLATE_MISSING_LANGUAGE, first_texts, "en")
  30. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  31. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  32. self.assertEqual(self.output_src.getvalue(), "")
  33. self.assertEqual(self.output_tgt.getvalue(), "")
  34. def test_missing_value(self):
  35. first_texts = "<Text><Value>horse</Value></Text>"
  36. second_texts = "<Text></Text>"
  37. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  38. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  39. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  40. self.assertEqual(self.output_src.getvalue(), "")
  41. self.assertEqual(self.output_tgt.getvalue(), "")
  42. def test_empty_language(self):
  43. first_texts = "<Text><Value>horse</Value></Text>"
  44. second_texts = "<Text><Value>capall</Value></Text>"
  45. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  46. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "")
  47. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  48. self.assertEqual(self.output_src.getvalue(), "")
  49. self.assertEqual(self.output_tgt.getvalue(), "")
  50. def test_unknown_language(self):
  51. first_texts = "<Text><Value>horse</Value></Text>"
  52. second_texts = "<Text><Value>Pferd</Value></Text>"
  53. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  54. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "de")
  55. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  56. self.assertEqual(self.output_src.getvalue(), "")
  57. self.assertEqual(self.output_tgt.getvalue(), "")
  58. def test_no_texts(self):
  59. first_texts = "<Text><Value>horse</Value></Text>"
  60. second_texts = ""
  61. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  62. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  63. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  64. self.assertEqual(self.output_src.getvalue(), "")
  65. self.assertEqual(self.output_tgt.getvalue(), "")
  66. def test_empty_value(self):
  67. first_texts = "<Text><Value></Value></Text>"
  68. second_texts = "<Text><Value>capall</Value></Text>"
  69. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  70. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  71. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  72. self.assertEqual(self.output_src.getvalue(), "")
  73. self.assertEqual(self.output_tgt.getvalue(), "")
  74. def test_in_order(self):
  75. first_texts = "<Text><Value>horse</Value></Text>"
  76. second_texts = "<Text><Value>capall</Value></Text>"
  77. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  78. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  79. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  80. self.assertEqual(self.output_src.getvalue(), "horse\n")
  81. self.assertEqual(self.output_tgt.getvalue(), "capall\n")
  82. def test_reverse_order(self):
  83. first_texts = "<Text><Value>capall</Value></Text>"
  84. second_texts = "<Text><Value>horse</Value></Text>"
  85. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "ga")
  86. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "en")
  87. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  88. self.assertEqual(self.output_src.getvalue(), "horse\n")
  89. self.assertEqual(self.output_tgt.getvalue(), "capall\n")
  90. def test_language_variants(self):
  91. first_texts = "<Text><Value>horse</Value></Text>"
  92. second_texts = "<Text><Value>capall</Value></Text>"
  93. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en-GB")
  94. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga-IE")
  95. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  96. self.assertEqual(self.output_src.getvalue(), "horse\n")
  97. self.assertEqual(self.output_tgt.getvalue(), "capall\n")
  98. def test_extra_whitespace_leading_trailing(self):
  99. first_texts = "<Text><Value>yellow </Value></Text>"
  100. second_texts = "<Text><Value> \tbuí</Value></Text>"
  101. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  102. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  103. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  104. self.assertEqual(self.output_src.getvalue(), "yellow \n")
  105. self.assertEqual(self.output_tgt.getvalue(), " \tbuí\n")
  106. def test_extra_whitespace_contained(self):
  107. first_texts = "<Text><Value>cake</Value></Text>"
  108. second_texts = "<Text><Value>cáca \tmilis</Value></Text>"
  109. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  110. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  111. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  112. self.assertEqual(self.output_src.getvalue(), "cake\n")
  113. self.assertEqual(self.output_tgt.getvalue(), "cáca \tmilis\n")
  114. def test_newline_contained(self):
  115. first_texts = "<Text><Value>cake</Value></Text>"
  116. second_texts = "<Text><Value>cáca\nmilis</Value></Text>"
  117. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  118. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  119. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  120. self.assertEqual(self.output_src.getvalue(), "cake\n")
  121. self.assertEqual(self.output_tgt.getvalue(), "cácamilis\n")
  122. def test_only_whitespace(self):
  123. first_texts = "<Text><Value>yellow</Value></Text>"
  124. second_texts = "<Text><Value> </Value></Text>"
  125. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  126. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  127. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  128. self.assertEqual(self.output_src.getvalue(), "yellow\n")
  129. self.assertEqual(self.output_tgt.getvalue(), " \n")
  130. def test_only_newline(self):
  131. first_texts = "<Text><Value>yellow</Value></Text>"
  132. second_texts = "<Text><Value>\n</Value></Text>"
  133. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  134. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  135. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  136. self.assertEqual(self.output_src.getvalue(), "yellow\n")
  137. self.assertEqual(self.output_tgt.getvalue(), "\n")
  138. def test_multiple_texts(self):
  139. first_texts = "<Text><Value>bread </Value></Text><Text><Value>and</Value></Text><Text><Value> jam</Value></Text>"
  140. second_texts = "<Text><Value>arán</Value></Text><Text><Value> agus </Value></Text><Text><Value>subh</Value></Text>"
  141. root_first = self.make_root(self.ROOT_TEMPLATE_VALID, first_texts, "en")
  142. root_second = self.make_root(self.ROOT_TEMPLATE_VALID, second_texts, "ga")
  143. self.parser.parse_segment(root_first, root_second, self.output_src, self.output_tgt)
  144. self.assertEqual(self.output_src.getvalue(), "bread and jam\n")
  145. self.assertEqual(self.output_tgt.getvalue(), "arán agus subh\n")
  146. if __name__ == "__main__":
  147. unittest.main()