1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import io
- import os
- import pathlib
- import subprocess
- import tempfile
- import unittest
- from toolchain.sentalign.sentalign_error import SentalignError
- from toolchain.sentalign.sentence_aligner import SentenceAligner
- class TestSentenceAligner(unittest.TestCase):
- INPUT_BASE_PATH = pathlib.Path("test-res/sentalign").resolve()
- def setUp(self):
- self.output_src = io.StringIO()
- self.output_tgt = io.StringIO()
- self.aligner = SentenceAligner(os.environ["HUNALIGNPATH"])
- self.config = {
- "dictionary" : self.INPUT_BASE_PATH.joinpath("dictionary.txt"),
- "subprocess_timeout" : "20",
- }
- def tearDown(self):
- self.output_src.close()
- self.output_tgt.close()
- def run_test(self, input_file_src, input_file_tgt, output_expected_src, output_expected_tgt):
- input_path_src = self.INPUT_BASE_PATH.joinpath(input_file_src)
- input_path_tgt = self.INPUT_BASE_PATH.joinpath(input_file_tgt)
- with tempfile.TemporaryDirectory() as output_artefact_dir:
- self.aligner.align_files(input_path_src, input_path_tgt,
- self.output_src, self.output_tgt, output_artefact_dir, self.config)
- self.assertEqual(self.output_src.getvalue(), output_expected_src)
- self.assertEqual(self.output_tgt.getvalue(), output_expected_tgt)
- def test_empty(self):
- self.run_test("empty.src.txt", "empty.tgt.txt", "\n", "\n")
- def test_already_aligned(self):
- self.run_test("aligned.src.txt", "aligned.tgt.txt",
- "aniseed\nbasil\ncinnamon dill\nelderflower fennel ginger\nhorseradish\n\n",
- "ánísééd\nbásíl\ncínnámón díll\néldérflówér fénnél gíngér\nhórsérádísh\n\n")
- def test_unaligned(self):
- self.run_test("unaligned.src.txt", "unaligned.tgt.txt",
- "aniseed\nbasil cinnamon dill elderflower\nfennel\nginger horseradish\n\n",
- "ánísééd\nbásíl cínnámón díll éldérflówér\nfénnél\ngíngér hórsérádísh\n\n")
- def test_timeout(self):
- self.config["subprocess_timeout"] = 0
- with self.assertRaises(SentalignError):
- self.run_test("unaligned.src.txt", "unaligned.tgt.txt",
- "aniseed\nbasil cinnamon dill elderflower\nfennel\nginger horseradish\n\n",
- "ánísééd\nbásíl cínnámón díll éldérflówér\nfénnél\ngíngér hórsérádísh\n\n")
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- def test_subprocess_error(self):
- self.config["dictionary"] = self.INPUT_BASE_PATH.joinpath("nonexistent-dictionary.txt")
- with self.assertRaises(SentalignError):
- self.run_test("unaligned.src.txt", "unaligned.tgt.txt",
- "aniseed\nbasil cinnamon dill elderflower\nfennel\nginger horseradish\n\n",
- "ánísééd\nbásíl cínnámón díll éldérflówér\nfénnél\ngíngér hórsérádísh\n\n")
- self.assertEqual(self.output_src.getvalue(), "")
- self.assertEqual(self.output_tgt.getvalue(), "")
- if __name__ == "__main__":
- unittest.main()
|