create_index.py 817 B

12345678910111213141516171819202122232425262728293031
  1. #!/usr/bin/env python3
  2. import argparse
  3. import pyterrier as pt
  4. def parse_arguments():
  5. parser = argparse.ArgumentParser()
  6. parser.add_argument('xml_list')
  7. parser.add_argument('index_dir')
  8. return parser.parse_args()
  9. if __name__=="__main__":
  10. args = parse_arguments()
  11. pt.init()
  12. xml_list = []
  13. with open(args.xml_list) as ifile:
  14. for line in ifile:
  15. line = line.rstrip('\n')
  16. xml_list.append(line)
  17. indexer = pt.TRECCollectionIndexer('./'+args.index_dir)
  18. index_properies = {
  19. "block.indexing":"true",
  20. "invertedfile.lexiconscanner":"pointers",
  21. 'indexer.meta.forward.keys': 'docno',
  22. 'indexer.meta.forward.keylens': '50'}
  23. indexer.setProperties(**index_properies)
  24. indexref = indexer.index(xml_list)