|
@@ -0,0 +1,58 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+
|
|
|
+import argparse
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+import pyterrier as pt
|
|
|
+
|
|
|
+id_template = 'spotify:episode:{}_{}'
|
|
|
+
|
|
|
+def parse_arguments():
|
|
|
+ parser = argparse.ArgumentParser()
|
|
|
+ parser.add_argument('data_properties')
|
|
|
+ parser.add_argument('topics')
|
|
|
+ parser.add_argument('run_id', type=str)
|
|
|
+ parser.add_argument('--format',
|
|
|
+ choices=['trec', 'submission'], default='trec')
|
|
|
+ return parser.parse_args()
|
|
|
+
|
|
|
+def write_submission(df):
|
|
|
+ print('RUNID QUERYID RANK SCORE EPISODEID OFFSET')
|
|
|
+ for t in df.itertuples():
|
|
|
+ qid, rank, docno, score = t[1], t[2], t[3], t[4]
|
|
|
+
|
|
|
+ episode, timestamp = docno.split('_')
|
|
|
+ start_time = str(float(timestamp.split('-')[0]))
|
|
|
+ episode_id = id_template.format(episode, start_time)
|
|
|
+ print('{} {} {} {} {} {}'.format(
|
|
|
+ args.run_id, qid, rank, score, episode_id, start_time))
|
|
|
+
|
|
|
+def write_trec(df):
|
|
|
+ print('query-id Q0 document-id rank score STANDARD')
|
|
|
+ for t in df.itertuples():
|
|
|
+ qid, rank, docno, score = t[1], t[2], t[3], t[4]
|
|
|
+
|
|
|
+ episode, timestamp = docno.split('_')
|
|
|
+ start_time = str(float(timestamp.split('-')[0]))
|
|
|
+ episode_id = id_template.format(episode, start_time)
|
|
|
+ print('{} {} {} {} {} {}'.format(
|
|
|
+ qid, '0', episode_id, rank, score, args.run_id))
|
|
|
+
|
|
|
+if __name__=="__main__":
|
|
|
+ args = parse_arguments()
|
|
|
+
|
|
|
+ pt.init()
|
|
|
+
|
|
|
+ index_dir = './' + args.data_properties
|
|
|
+ index_ref = pt.IndexRef.of(index_dir)
|
|
|
+ index = pt.IndexFactory.of(index_ref)
|
|
|
+
|
|
|
+ topics = pt.Utils.parse_trecxml_topics_file(args.topics)
|
|
|
+ retr = pt.BatchRetrieve(index)
|
|
|
+ res = retr.transform(topics)
|
|
|
+
|
|
|
+ df = pd.DataFrame(res, columns=['qid', 'rank', 'docno', 'score'])
|
|
|
+ if args.format == 'trec':
|
|
|
+ write_trec(df)
|
|
|
+ elif args.format == 'submission':
|
|
|
+ write_submission(df)
|