123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- #! /usr/bin/python
- ## This script gets parser accuracy scores for a set of sentences, along
- ## with their individual translation scores by various systems (e.g. TER
- ## scores) and outputs statistics collected from the input.
- ## This statistics include:
- ## - the number of sentences on which each system performed the best in
- ## the ranges of specified intervals of parser accuracy
- ## - the percentage share of each system in each range
- ## - the average score of each system in each range
- ##
- ## Systems are identified automatically from the input file, by considering
- ## the first column being the parser accuracy and the following N, where
- ## N is supplied as argument, being scores of each system for each sentence.
- ## The first row is supposed to be the title row containing system names.
- ##
- ## Current version: 1.0
- ##
- from collections import namedtuple
- from operator import itemgetter
- from decimal import *
- from random import choice
- import sys, optparse
- ##----------------------------------------------------------------------
- ## Returns the index of best score in the provided list of scores.
- ## For tiebreaking, when more than one scores are equally the best, it
- ## optionally chooses the best score in the acsending order of score list
- ## or selects one randomly
- def getBestScoreIdx(pScores, pTieBreak):
- vlBestScoreIdxs = []
- vBestScore = min(pScores)
-
- # we need to check all scores, since we need to know all best scores
- vScoreIdx = 0
- for vScore in pScores:
- if vScore == vBestScore:
- vlBestScoreIdxs.append(vScoreIdx)
- vScoreIdx += 1
- # now tiebreaking if needed
- if len(vlBestScoreIdxs) > 1:
- if pTieBreak == 'r':
- return choice(vlBestScoreIdxs)
- else:
- return vlBestScoreIdxs[0] ## choose the first in ascending order
- ## (equal to pTieBreak == 'a')
- else:
- return vlBestScoreIdxs[0]
- ##----------------------------------------------------------------------
- ## Writes the sorted input data into file
- def writeSortedData(pSortedFileName, pData, pSystems, pDelim):
- try:
- vfSortFile = open(pSortedFileName, 'w')
- except IOError:
- sys.exit('Can\'t create output sorted file: ' + pSortedFileName)
-
- # writing header
- vfSortFile.write("PA" + pDelim +
- "sentence" + pDelim +
- opts.delim.join(pSystems) + pDelim +
- "best system\n")
-
- # writing data
- for data in pData:
- vfSortFile.write(data[1] + pDelim +
- data[0] + pDelim +
- ','.join(data[2:]) +
- '\n')
-
- vfSortFile.close()
- ##======================================================================
- ## main
- def main(argv=None):
- if argv is None:
- argv = sys.argv
-
- parser = optparse.OptionParser(usage="%prog <SCORES FILE> <NUMBER OF SYSTEMS> <PARSER ACCURACY INTERVAL> [options]" +
- "\nThis script extracts statistics for parser-accuracy-based system combination.", version="%prog 1.0")
- parser.add_option("-d", "--delimiter", help="the delimiter string separating score columns", metavar="DELIMITER", dest="delim", default=" ", action="store")
- parser.add_option("-e", "--emptyrange", help="include empty ranges (ranges with no data)", metavar="EMPTY RANGES", dest="emptyRange", action="store_true")
- parser.add_option("-t", "--tiebreak", help="the tie breaking method (a: ascending system order, r: random)", metavar="TIEBREAK", dest="tieBreak", default="a", action="store")
- parser.add_option("-s", "--sortfile", help="the file name to output the input sorted by parser accuracy", metavar="SORTFILE", dest="sortedFileName", action="store")
- (opts, posArgs) = parser.parse_args()
-
- # checking arguments
-
- if len(posArgs) < 3:
- parser.error("At least 3 arguments are required!")
-
- if not (posArgs[1].isdigit() and posArgs[2].isdigit()):
- parser.error("The second and third arguments should be a number!")
- else:
- vSysNum = int(posArgs[1])
- vPAInterval = int(posArgs[2])
- # opening scores file
-
- vScoresFileName = posArgs[0]
- try:
- vfScores = open(vScoresFileName, 'r')
- except IOError:
- sys.exit('Can\'t open scores file: ' + vScoresFileName)
-
- # extracting system names from the first row of scores file
- vlSystems = vfScores.readline().split(opts.delim)[1:vSysNum + 1]
-
-
- ## loading data into a list of tuples (no, pa, score of system 1, ...,
- ## best system)
- vlData = []
- vSentenceCntr = 1
- for line in vfScores.readlines():
- vlLine = line.split(opts.delim)
- vlSysScores = [Decimal(x) for x in vlLine[1:vSysNum + 1]]
- vBestSysIdx = int(getBestScoreIdx(vlSysScores, opts.tieBreak))
- vlData.append(tuple([int(vSentenceCntr)] +
- [Decimal(vlLine[0])] +
- vlSysScores +
- [vBestSysIdx]))
- vSentenceCntr += 1
- # sorting data based on pa
- vlData.sort(key=itemgetter(1))
-
- # writing the sorted data into a file if requested
- if opts.sortedFileName != None:
- writeSortedData(opts.sortedFileName, vlData, vlSystems, opts.delim)
-
-
- # creating data structure to store pa interval statistics of systems
- PAIvalSysStat = namedtuple('PAIvalSysStat', 'rLower, rUpper, sysCounts, sysPercents, avgScores')
- vlPAIvalSysStat = []
-
-
- # creating ranges
-
- ## calculating the upper bound of last parser accuracy range based on
- ## interval and maximum accuracy in data
- vMaxPA = vlData[-1][1]
- if vMaxPA % vPAInterval != 0:
- vLRUpper = (int(vMaxPA / vPAInterval) + 1) * vPAInterval
- else:
- vLRUpper = vMaxPA
-
- # computing interval statistics
- vDataIdx = 0
- ## looping through possible ranges, creating them, and computing statistics
- ## for each range
- for vRange in range(0, vLRUpper, vPAInterval):
- ## 0 is treated as special case since it's included in the range as
- ## opposed to the other range lower bounds (i.e. [0, 5], (5,10], ...)
- if vRange == 0:
- vRLower = -1
- vRUpper = vPAInterval
- else:
- vRLower = vRange
- vRUpper = vRLower + vPAInterval
-
- vRangeItemsCntr = 0
- vlRangeBSysCounts = [0]*vSysNum
- vlRangeSysScoreSum = [0]*vSysNum
-
- # looping through data to compute the best system counts and average scores
- while vDataIdx < len(vlData):
- if vRLower < vlData[vDataIdx][1] <= vRUpper:
- ## adding one to the statistics of the best system (its index is in
- ## vDatum[-1])
- vlRangeBSysCounts[vlData[vDataIdx][-1]] += 1
-
- ## computing the sum of scores for each system (this will be divided
- ## by the number of sentences in range later when it was found)
- for sysIdx in range(0, len(vlSystems)):
- vlRangeSysScoreSum[sysIdx] = Decimal(vlRangeSysScoreSum[sysIdx]) + Decimal(vlData[vDataIdx][sysIdx + 2])
- #print vlRangeSysScoreSum[-1],
- vDataIdx += 1
- vRangeItemsCntr += 1
- else:
- break
-
- ## finalizing computing the average scores by dividing collected sums
- if vRangeItemsCntr == 0:
- vlRangeAvgScores = [0]*vSysNum
- else:
- vlRangeAvgScores = [Decimal(score / vRangeItemsCntr).quantize(Decimal('0.01')) for score in vlRangeSysScoreSum]
-
- ## computing ratios of counts of each system with respect to the sum of
- ## counts of all system
- ## Note that the last one is not directly calculated. Insread it's
- ## cacluated bu sybtracting the percentage so far from 100.
- vlRangePercents = []
- if sum(vlRangeBSysCounts) == 0:
- vlRangePercents = [0] * len(vlSystems)
- else:
- for idx in range (0, len(vlSystems) - 1):
- vlRangePercents.append((Decimal(vlRangeBSysCounts[idx]) * 100 / sum(vlRangeBSysCounts)).quantize(Decimal('0.1')))
- vlRangePercents.append((100 - sum(vlRangePercents)).quantize(Decimal('0.1')))
-
- # returning back the vRLower to 0 after treating 0 special case
- if vRLower == -1:
- vRLower = 0
-
- ## inserting the statistics for the range
- ## optionally, empty ranges (range with no data) will also be inserted
- if opts.emptyRange or sum(vlRangeBSysCounts) > 0:
- vlPAIvalSysStat.append(PAIvalSysStat(vRLower, vRUpper, vlRangeBSysCounts,
- vlRangePercents, vlRangeAvgScores))
-
- ## printing the statistics
- ## The same delimiter in input file is used to format output.
-
- # printing header
- print ("range" + opts.delim +
- opts.delim.join(vlSystems) + opts.delim +
- opts.delim.join('%' + sys for sys in vlSystems) + opts.delim +
- opts.delim.join('avg(' + sys + ')' for sys in vlSystems))
- # printing data
- for vStat in vlPAIvalSysStat:
- print (str(vStat.rLower) + '-' +
- str(vStat.rUpper) + opts.delim +
- opts.delim.join(str(x) for x in vStat.sysCounts) + opts.delim +
- opts.delim.join(str(x) for x in vStat.sysPercents) + opts.delim +
- opts.delim.join(str(x) for x in vStat.avgScores))
-
-
- vfScores.close()
- ##======================================================================
- ## calling main
- if __name__ == "__main__":
- sys.exit(main())
|