import argparse import logging import time from jinja2 import Template logger = logging.getLogger(__name__) class TmxCreator: DATE_FORMAT = "%Y%m%dT%H%M%SZ" # To meet ISO 8601 and as recommended by 1.4b REQUIRED_ARGS = [ "adminlang", "datatype", "o_tmf", "segtype", "srclang", "tgtlang", ] def create(self, input_path_template, input_path_src, input_path_tgt, output_path, additional_args={}): logger.info("Creating TMX file from {0} and {1} to {2}.".format(input_path_src, input_path_tgt, output_path)) logger.debug("Creating TMX file to {0} using template {1} and additional args {2}.".format(output_path, input_path_template, additional_args)) with open(input_path_template) as template_file, open(input_path_src) as contents_src,\ open(input_path_tgt) as contents_tgt, open(output_path, "w") as output_file: self.create_tmx(template_file.read(), contents_src, contents_tgt, output_file, additional_args) def create_tmx(self, template_input, input_src, input_tgt, output, additional_args): template = Template(template_input, autoescape=True) creation_date = self.make_creation_date() input_src = map(str.strip, input_src) input_tgt = map(str.strip, input_tgt) tus=zip(input_src, input_tgt) args = { "creation_date" : creation_date, "tus" : tus, } for k, v in additional_args.items(): args[k] = v for required_arg in self.REQUIRED_ARGS: if not required_arg in args: logger.warning("Attribute {0} required by TMX 1.4b not given.".format(required_arg)) output.write(template.render(args)) def make_creation_date(self): return time.strftime(self.DATE_FORMAT) if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument("template_path", help="path to template file") argparser.add_argument("input_path_src", help="path to input file of source language") argparser.add_argument("input_path_tgt", help="path to input file of target language") argparser.add_argument("output_path", help="path to output TMX file") argparser.add_argument("adminlang", help="language code of administrative language") argparser.add_argument("datatype", help="type of data contained") argparser.add_argument("disclaimer", help="disclaimer text") argparser.add_argument("distributor", help="resource distributor") argparser.add_argument("licence", help="licence name") argparser.add_argument("o_tmf", help="original translation memory format") argparser.add_argument("segtype", help="segmentation type") argparser.add_argument("srclang", help="language code of source language") argparser.add_argument("tgtlang", help="language code of target language") args = argparser.parse_args() additional_args = { "adminlang" : args.adminlang, "datatype" : args.datatype, "disclaimer" : args.disclaimer, "distributor" : args.distributor, "licence" : args.licence, "o_tmf" : args.o_tmf, "segtype" : args.segtype, "srclang" : args.srclang, "tgtlang" : args.tgtlang, } TmxCreator().create(args.template_path, args.input_path_src, args.input_path_tgt, args.output_path, additional_args)