tmx_creator.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import argparse
  2. import logging
  3. import time
  4. from jinja2 import Template
  5. logger = logging.getLogger(__name__)
  6. class TmxCreator:
  7. DATE_FORMAT = "%Y%m%dT%H%M%SZ" # To meet ISO 8601 and as recommended by 1.4b
  8. REQUIRED_ARGS = [
  9. "adminlang",
  10. "datatype",
  11. "o_tmf",
  12. "segtype",
  13. "srclang",
  14. "tgtlang",
  15. ]
  16. def create(self, input_path_template, input_path_src, input_path_tgt, output_path, additional_args={}):
  17. logger.info("Creating TMX file from {0} and {1} to {2}.".format(input_path_src, input_path_tgt, output_path))
  18. logger.debug("Creating TMX file to {0} using template {1} and additional args {2}.".format(output_path, input_path_template, additional_args))
  19. with open(input_path_template) as template_file, open(input_path_src) as contents_src,\
  20. open(input_path_tgt) as contents_tgt, open(output_path, "w") as output_file:
  21. self.create_tmx(template_file.read(), contents_src, contents_tgt, output_file, additional_args)
  22. def create_tmx(self, template_input, input_src, input_tgt, output, additional_args):
  23. template = Template(template_input, autoescape=True)
  24. creation_date = self.make_creation_date()
  25. input_src = map(str.strip, input_src)
  26. input_tgt = map(str.strip, input_tgt)
  27. tus=zip(input_src, input_tgt)
  28. args = {
  29. "creation_date" : creation_date,
  30. "tus" : tus,
  31. }
  32. for k, v in additional_args.items():
  33. args[k] = v
  34. for required_arg in self.REQUIRED_ARGS:
  35. if not required_arg in args:
  36. logger.warning("Attribute {0} required by TMX 1.4b not given.".format(required_arg))
  37. output.write(template.render(args))
  38. def make_creation_date(self):
  39. return time.strftime(self.DATE_FORMAT)
  40. if __name__ == "__main__":
  41. argparser = argparse.ArgumentParser()
  42. argparser.add_argument("template_path", help="path to template file")
  43. argparser.add_argument("input_path_src", help="path to input file of source language")
  44. argparser.add_argument("input_path_tgt", help="path to input file of target language")
  45. argparser.add_argument("output_path", help="path to output TMX file")
  46. argparser.add_argument("adminlang", help="language code of administrative language")
  47. argparser.add_argument("datatype", help="type of data contained")
  48. argparser.add_argument("disclaimer", help="disclaimer text")
  49. argparser.add_argument("distributor", help="resource distributor")
  50. argparser.add_argument("licence", help="licence name")
  51. argparser.add_argument("o_tmf", help="original translation memory format")
  52. argparser.add_argument("segtype", help="segmentation type")
  53. argparser.add_argument("srclang", help="language code of source language")
  54. argparser.add_argument("tgtlang", help="language code of target language")
  55. args = argparser.parse_args()
  56. additional_args = {
  57. "adminlang" : args.adminlang,
  58. "datatype" : args.datatype,
  59. "disclaimer" : args.disclaimer,
  60. "distributor" : args.distributor,
  61. "licence" : args.licence,
  62. "o_tmf" : args.o_tmf,
  63. "segtype" : args.segtype,
  64. "srclang" : args.srclang,
  65. "tgtlang" : args.tgtlang,
  66. }
  67. TmxCreator().create(args.template_path, args.input_path_src, args.input_path_tgt, args.output_path, additional_args)