lorgparses.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #! /usr/bin/python
  2. ## This module defines functions to process LORG parser output.
  3. ##
  4. ## Current version: 1.1
  5. ##
  6. ## Added:
  7. ## - replaceUnScorable: replaces the problematic parses which evalb couldn't
  8. ## process with a dummy parse tree
  9. ##
  10. from collections import namedtuple
  11. import sys, shutil, os
  12. ##----------------------------------------------------------------------
  13. ## Types
  14. # data structure for returning best parse
  15. Parse = namedtuple('Parse', 'sentenceNo tree')
  16. ##----------------------------------------------------------------------
  17. ## Extracts the parse trees from LORG output, and optionally adds TOP
  18. ## tag. It also optionally ignores those line marked by LORG as comments
  19. ## (lines starting with "### comment: ". LORG considers input sentences
  20. ## starting with # as comments, ignores parsing them and outputs
  21. ## "### comment: " followed by the sentence. This option is useful (by
  22. ## setting to false) when this treatment by LORG is problematic. In this
  23. ## cases, the sentence numbering is jumped by one, so that the calling
  24. ## program can recognize that a parse tree is missing and takes necessary
  25. ## action. This function also consider line starting with 'no parse found'
  26. ## as those could not been parsed and jumps by one sentence number.
  27. def extractParses(pParseFileName, pflgAddTOP, pflgIgnoreComment):
  28. try:
  29. vfParse = open(pParseFileName, 'r')
  30. except IOError:
  31. sys.exit('Can\'t open parse file: ' + pParseFileName)
  32. vlParses = []
  33. vParseCntr = 0
  34. for line in vfParse:
  35. if line.startswith('### comment:') and not pflgIgnoreComment:
  36. vParseCntr += 1
  37. elif line.startswith('no parse found'):
  38. vParseCntr += 1
  39. elif line.startswith('( ('):
  40. vParseCntr += 1
  41. if pflgAddTOP:
  42. vlParses.append(Parse(vParseCntr, "(TOP" + line.strip('\n')[1:]))
  43. else:
  44. vlParses.append(Parse(vParseCntr, line.strip('\n')))
  45. elif not line.startswith('###'):
  46. sys.exit("Unknown line opening: " + line)
  47. break
  48. vfParse.close()
  49. return vlParses
  50. ##----------------------------------------------------------------------
  51. ## This function takes the parse file (tree per line) and replaces the
  52. ## problematic parses which evalb couldn't process with a dummy parse tree.
  53. ## Currently, the sentence is identified by its line number in the file
  54. ## is an argument to this function. It can be imporoved to detect such
  55. ## sentences itself!
  56. ## Note that this function is not specific to LORG output, as its input
  57. ## is considered in a tree-per-line format.
  58. def replaceUnscorable(pParseFileName, pSentenceLineNo, pflgOverwrite):
  59. # opening the input file
  60. try:
  61. vfParse = open(pParseFileName, 'r')
  62. except IOError:
  63. sys.exit('Can\'t open parse file: ' + pParseFileName)
  64. ## creating the output file
  65. ## The new file is named the same as input file followed by .new suffix.
  66. ## However, if pflgOverwrite is set to true, it will later overwrite the
  67. ## input file
  68. try:
  69. vReplacedFileName = pParseFileName + '.new'
  70. vfReplaced = open(vReplacedFileName, 'w')
  71. except IOError:
  72. print 'Can\'t create output file'
  73. sys.exit(2)
  74. vLineCntr = 1
  75. for vSentence in vfParse:
  76. if vLineCntr == pSentenceLineNo:
  77. # write the dummy pasre into file
  78. vfReplaced.write("(TOP (UNSCORABLE UNSCORABLE))\n")
  79. else:
  80. # if this is not the target sentence, write it as it is
  81. vfReplaced.write(vSentence)
  82. vLineCntr += 1
  83. vfParse.close()
  84. vfReplaced.close()
  85. ## if pflgOverwrite is true, the input file will be overwritten
  86. if pflgOverwrite:
  87. shutil.copy(vReplacedFileName, pParseFileName)
  88. os.remove(vReplacedFileName)