12345678910111213141516171819202122232425262728293031 |
- #!/usr/bin/env python3
- import argparse
- import pyterrier as pt
- def parse_arguments():
- parser = argparse.ArgumentParser()
- parser.add_argument('xml_list')
- parser.add_argument('index_dir')
- return parser.parse_args()
- if __name__=="__main__":
- args = parse_arguments()
- pt.init()
- xml_list = []
- with open(args.xml_list) as ifile:
- for line in ifile:
- line = line.rstrip('\n')
- xml_list.append(line)
- indexer = pt.TRECCollectionIndexer('./'+args.index_dir)
- index_properies = {
- "block.indexing":"true",
- "invertedfile.lexiconscanner":"pointers",
- 'indexer.meta.forward.keys': 'docno',
- 'indexer.meta.forward.keylens': '50'}
- indexer.setProperties(**index_properies)
- indexref = indexer.index(xml_list)
|