12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- #!/usr/bin/env python3
- import argparse
- import pandas as pd
- import pyterrier as pt
- id_template = 'spotify:episode:{}_{}'
- def parse_arguments():
- parser = argparse.ArgumentParser()
- parser.add_argument('data_properties')
- parser.add_argument('topics')
- parser.add_argument('run_id', type=str)
- parser.add_argument('--format',
- choices=['trec', 'submission'], default='trec')
- return parser.parse_args()
- def write_submission(df):
- print('RUNID QUERYID RANK SCORE EPISODEID OFFSET')
- for t in df.itertuples():
- qid, rank, docno, score = t[1], t[2], t[3], t[4]
- episode, timestamp = docno.split('_')
- start_time = str(float(timestamp.split('-')[0]))
- episode_id = id_template.format(episode, start_time)
- print('{} {} {} {} {} {}'.format(
- args.run_id, qid, rank, score, episode_id, start_time))
- def write_trec(df):
- print('query-id Q0 document-id rank score STANDARD')
- for t in df.itertuples():
- qid, rank, docno, score = t[1], t[2], t[3], t[4]
- episode, timestamp = docno.split('_')
- start_time = str(float(timestamp.split('-')[0]))
- episode_id = id_template.format(episode, start_time)
- print('{} {} {} {} {} {}'.format(
- qid, '0', episode_id, rank, score, args.run_id))
- if __name__=="__main__":
- args = parse_arguments()
- pt.init()
- index_dir = './' + args.data_properties
- index_ref = pt.IndexRef.of(index_dir)
- index = pt.IndexFactory.of(index_ref)
- topics = pt.Utils.parse_trecxml_topics_file(args.topics)
- retr = pt.BatchRetrieve(index)
- res = retr.transform(topics)
- df = pd.DataFrame(res, columns=['qid', 'rank', 'docno', 'score'])
- if args.format == 'trec':
- write_trec(df)
- elif args.format == 'submission':
- write_submission(df)
|