1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import pathlib
- import tempfile
- import unittest
- from toolchain.docalign.document_aligner import DocumentAligner
- class TestDocumentAligner(unittest.TestCase):
- INPUT_BASE_DIR = pathlib.Path("test-res/docalign").resolve()
- def setUp(self):
- self.config = {}
- self.aligner = DocumentAligner(self.config)
- def tearDown(self):
- pass
- def run_test(self, test_dir):
- file_list_src = list(test_dir.glob("*.src.txt"))
- file_list_tgt = list(test_dir.glob("*.tgt.txt"))
- expected_matches = []
- expected_unmatched_src = []
- expected_unmatched_tgt = []
- for file_src in file_list_src:
- file_tgt = pathlib.Path(str(file_src).replace("src.txt", "tgt.txt"))
- if file_tgt in file_list_tgt:
- expected_matches.append((file_src, file_tgt))
- else:
- expected_unmatched_src.append(file_src)
- for file_tgt in file_list_tgt:
- if not any(match[1] == file_tgt for match in expected_matches):
- expected_unmatched_tgt.append(file_tgt)
- with tempfile.TemporaryDirectory() as docalign_artefact_dir:
- docalign_artefact_dir = pathlib.Path(docalign_artefact_dir)
- matches, unmatched_src, unmatched_tgt = self.aligner.align(file_list_src, file_list_tgt, docalign_artefact_dir)
- self.assertEqual(set(matches), set(expected_matches))
- self.assertEqual(set(unmatched_src), set(expected_unmatched_src))
- self.assertEqual(set(unmatched_tgt), set(expected_unmatched_tgt))
- self.check_length_matches(docalign_artefact_dir.joinpath("alignments.txt"), matches)
- self.check_length_matches(docalign_artefact_dir.joinpath("unmatched_src.txt"), unmatched_src)
- self.check_length_matches(docalign_artefact_dir.joinpath("unmatched_tgt.txt"), unmatched_tgt)
- def check_length_matches(self, filepath, document_list):
- with open(filepath) as f:
- linecount = sum(1 for _ in f)
- assert linecount == len(document_list)
- def test_document_aligner(self):
- test_count = 0
- for test_dir in pathlib.Path(self.INPUT_BASE_DIR).iterdir():
- if test_dir.is_dir():
- test_count += 1
- with self.subTest(msg=test_dir.name):
- self.run_test(test_dir)
- print("\nTests run for document aligner: {0}".format(test_count))
- if __name__ == "__main__":
- unittest.main()
|