extract-pap-sc-stats.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. #! /usr/bin/python
  2. ## This script gets parser accuracy scores for a set of sentences, along
  3. ## with their individual translation scores by various systems (e.g. TER
  4. ## scores) and outputs statistics collected from the input.
  5. ## This statistics include:
  6. ## - the number of sentences on which each system performed the best in
  7. ## the ranges of specified intervals of parser accuracy
  8. ## - the percentage share of each system in each range
  9. ## - the average score of each system in each range
  10. ##
  11. ## Systems are identified automatically from the input file, by considering
  12. ## the first column being the parser accuracy and the following N, where
  13. ## N is supplied as argument, being scores of each system for each sentence.
  14. ## The first row is supposed to be the title row containing system names.
  15. ##
  16. ## Current version: 1.0
  17. ##
  18. from collections import namedtuple
  19. from operator import itemgetter
  20. from decimal import *
  21. from random import choice
  22. import sys, optparse
  23. ##----------------------------------------------------------------------
  24. ## Returns the index of best score in the provided list of scores.
  25. ## For tiebreaking, when more than one scores are equally the best, it
  26. ## optionally chooses the best score in the acsending order of score list
  27. ## or selects one randomly
  28. def getBestScoreIdx(pScores, pTieBreak):
  29. vlBestScoreIdxs = []
  30. vBestScore = min(pScores)
  31. # we need to check all scores, since we need to know all best scores
  32. vScoreIdx = 0
  33. for vScore in pScores:
  34. if vScore == vBestScore:
  35. vlBestScoreIdxs.append(vScoreIdx)
  36. vScoreIdx += 1
  37. # now tiebreaking if needed
  38. if len(vlBestScoreIdxs) > 1:
  39. if pTieBreak == 'r':
  40. return choice(vlBestScoreIdxs)
  41. else:
  42. return vlBestScoreIdxs[0] ## choose the first in ascending order
  43. ## (equal to pTieBreak == 'a')
  44. else:
  45. return vlBestScoreIdxs[0]
  46. ##----------------------------------------------------------------------
  47. ## Writes the sorted input data into file
  48. def writeSortedData(pSortedFileName, pData, pSystems, pDelim):
  49. try:
  50. vfSortFile = open(pSortedFileName, 'w')
  51. except IOError:
  52. sys.exit('Can\'t create output sorted file: ' + pSortedFileName)
  53. # writing header
  54. vfSortFile.write("PA" + pDelim +
  55. "sentence" + pDelim +
  56. opts.delim.join(pSystems) + pDelim +
  57. "best system\n")
  58. # writing data
  59. for data in pData:
  60. vfSortFile.write(data[1] + pDelim +
  61. data[0] + pDelim +
  62. ','.join(data[2:]) +
  63. '\n')
  64. vfSortFile.close()
  65. ##======================================================================
  66. ## main
  67. def main(argv=None):
  68. if argv is None:
  69. argv = sys.argv
  70. parser = optparse.OptionParser(usage="%prog <SCORES FILE> <NUMBER OF SYSTEMS> <PARSER ACCURACY INTERVAL> [options]" +
  71. "\nThis script extracts statistics for parser-accuracy-based system combination.", version="%prog 1.0")
  72. parser.add_option("-d", "--delimiter", help="the delimiter string separating score columns", metavar="DELIMITER", dest="delim", default=" ", action="store")
  73. parser.add_option("-e", "--emptyrange", help="include empty ranges (ranges with no data)", metavar="EMPTY RANGES", dest="emptyRange", action="store_true")
  74. parser.add_option("-t", "--tiebreak", help="the tie breaking method (a: ascending system order, r: random)", metavar="TIEBREAK", dest="tieBreak", default="a", action="store")
  75. parser.add_option("-s", "--sortfile", help="the file name to output the input sorted by parser accuracy", metavar="SORTFILE", dest="sortedFileName", action="store")
  76. (opts, posArgs) = parser.parse_args()
  77. # checking arguments
  78. if len(posArgs) < 3:
  79. parser.error("At least 3 arguments are required!")
  80. if not (posArgs[1].isdigit() and posArgs[2].isdigit()):
  81. parser.error("The second and third arguments should be a number!")
  82. else:
  83. vSysNum = int(posArgs[1])
  84. vPAInterval = int(posArgs[2])
  85. # opening scores file
  86. vScoresFileName = posArgs[0]
  87. try:
  88. vfScores = open(vScoresFileName, 'r')
  89. except IOError:
  90. sys.exit('Can\'t open scores file: ' + vScoresFileName)
  91. # extracting system names from the first row of scores file
  92. vlSystems = vfScores.readline().split(opts.delim)[1:vSysNum + 1]
  93. ## loading data into a list of tuples (no, pa, score of system 1, ...,
  94. ## best system)
  95. vlData = []
  96. vSentenceCntr = 1
  97. for line in vfScores.readlines():
  98. vlLine = line.split(opts.delim)
  99. vlSysScores = [Decimal(x) for x in vlLine[1:vSysNum + 1]]
  100. vBestSysIdx = int(getBestScoreIdx(vlSysScores, opts.tieBreak))
  101. vlData.append(tuple([int(vSentenceCntr)] +
  102. [Decimal(vlLine[0])] +
  103. vlSysScores +
  104. [vBestSysIdx]))
  105. vSentenceCntr += 1
  106. # sorting data based on pa
  107. vlData.sort(key=itemgetter(1))
  108. # writing the sorted data into a file if requested
  109. if opts.sortedFileName != None:
  110. writeSortedData(opts.sortedFileName, vlData, vlSystems, opts.delim)
  111. # creating data structure to store pa interval statistics of systems
  112. PAIvalSysStat = namedtuple('PAIvalSysStat', 'rLower, rUpper, sysCounts, sysPercents, avgScores')
  113. vlPAIvalSysStat = []
  114. # creating ranges
  115. ## calculating the upper bound of last parser accuracy range based on
  116. ## interval and maximum accuracy in data
  117. vMaxPA = vlData[-1][1]
  118. if vMaxPA % vPAInterval != 0:
  119. vLRUpper = (int(vMaxPA / vPAInterval) + 1) * vPAInterval
  120. else:
  121. vLRUpper = vMaxPA
  122. # computing interval statistics
  123. vDataIdx = 0
  124. ## looping through possible ranges, creating them, and computing statistics
  125. ## for each range
  126. for vRange in range(0, vLRUpper, vPAInterval):
  127. ## 0 is treated as special case since it's included in the range as
  128. ## opposed to the other range lower bounds (i.e. [0, 5], (5,10], ...)
  129. if vRange == 0:
  130. vRLower = -1
  131. vRUpper = vPAInterval
  132. else:
  133. vRLower = vRange
  134. vRUpper = vRLower + vPAInterval
  135. vRangeItemsCntr = 0
  136. vlRangeBSysCounts = [0]*vSysNum
  137. vlRangeSysScoreSum = [0]*vSysNum
  138. # looping through data to compute the best system counts and average scores
  139. while vDataIdx < len(vlData):
  140. if vRLower < vlData[vDataIdx][1] <= vRUpper:
  141. ## adding one to the statistics of the best system (its index is in
  142. ## vDatum[-1])
  143. vlRangeBSysCounts[vlData[vDataIdx][-1]] += 1
  144. ## computing the sum of scores for each system (this will be divided
  145. ## by the number of sentences in range later when it was found)
  146. for sysIdx in range(0, len(vlSystems)):
  147. vlRangeSysScoreSum[sysIdx] = Decimal(vlRangeSysScoreSum[sysIdx]) + Decimal(vlData[vDataIdx][sysIdx + 2])
  148. #print vlRangeSysScoreSum[-1],
  149. vDataIdx += 1
  150. vRangeItemsCntr += 1
  151. else:
  152. break
  153. ## finalizing computing the average scores by dividing collected sums
  154. if vRangeItemsCntr == 0:
  155. vlRangeAvgScores = [0]*vSysNum
  156. else:
  157. vlRangeAvgScores = [Decimal(score / vRangeItemsCntr).quantize(Decimal('0.01')) for score in vlRangeSysScoreSum]
  158. ## computing ratios of counts of each system with respect to the sum of
  159. ## counts of all system
  160. ## Note that the last one is not directly calculated. Insread it's
  161. ## cacluated bu sybtracting the percentage so far from 100.
  162. vlRangePercents = []
  163. if sum(vlRangeBSysCounts) == 0:
  164. vlRangePercents = [0] * len(vlSystems)
  165. else:
  166. for idx in range (0, len(vlSystems) - 1):
  167. vlRangePercents.append((Decimal(vlRangeBSysCounts[idx]) * 100 / sum(vlRangeBSysCounts)).quantize(Decimal('0.1')))
  168. vlRangePercents.append((100 - sum(vlRangePercents)).quantize(Decimal('0.1')))
  169. # returning back the vRLower to 0 after treating 0 special case
  170. if vRLower == -1:
  171. vRLower = 0
  172. ## inserting the statistics for the range
  173. ## optionally, empty ranges (range with no data) will also be inserted
  174. if opts.emptyRange or sum(vlRangeBSysCounts) > 0:
  175. vlPAIvalSysStat.append(PAIvalSysStat(vRLower, vRUpper, vlRangeBSysCounts,
  176. vlRangePercents, vlRangeAvgScores))
  177. ## printing the statistics
  178. ## The same delimiter in input file is used to format output.
  179. # printing header
  180. print ("range" + opts.delim +
  181. opts.delim.join(vlSystems) + opts.delim +
  182. opts.delim.join('%' + sys for sys in vlSystems) + opts.delim +
  183. opts.delim.join('avg(' + sys + ')' for sys in vlSystems))
  184. # printing data
  185. for vStat in vlPAIvalSysStat:
  186. print (str(vStat.rLower) + '-' +
  187. str(vStat.rUpper) + opts.delim +
  188. opts.delim.join(str(x) for x in vStat.sysCounts) + opts.delim +
  189. opts.delim.join(str(x) for x in vStat.sysPercents) + opts.delim +
  190. opts.delim.join(str(x) for x in vStat.avgScores))
  191. vfScores.close()
  192. ##======================================================================
  193. ## calling main
  194. if __name__ == "__main__":
  195. sys.exit(main())