123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- #! /usr/bin/python
- ## This module defines functions to process LORG parser output.
- ##
- ## Current version: 1.1
- ##
- ## Added:
- ## - replaceUnScorable: replaces the problematic parses which evalb couldn't
- ## process with a dummy parse tree
- ##
- from collections import namedtuple
- import sys, shutil, os
- ##----------------------------------------------------------------------
- ## Types
- # data structure for returning best parse
- Parse = namedtuple('Parse', 'sentenceNo tree')
- ##----------------------------------------------------------------------
- ## Extracts the parse trees from LORG output, and optionally adds TOP
- ## tag. It also optionally ignores those line marked by LORG as comments
- ## (lines starting with "### comment: ". LORG considers input sentences
- ## starting with # as comments, ignores parsing them and outputs
- ## "### comment: " followed by the sentence. This option is useful (by
- ## setting to false) when this treatment by LORG is problematic. In this
- ## cases, the sentence numbering is jumped by one, so that the calling
- ## program can recognize that a parse tree is missing and takes necessary
- ## action. This function also consider line starting with 'no parse found'
- ## as those could not been parsed and jumps by one sentence number.
- def extractParses(pParseFileName, pflgAddTOP, pflgIgnoreComment):
- try:
- vfParse = open(pParseFileName, 'r')
- except IOError:
- sys.exit('Can\'t open parse file: ' + pParseFileName)
- vlParses = []
- vParseCntr = 0
- for line in vfParse:
- if line.startswith('### comment:') and not pflgIgnoreComment:
- vParseCntr += 1
- elif line.startswith('no parse found'):
- vParseCntr += 1
- elif line.startswith('( ('):
- vParseCntr += 1
- if pflgAddTOP:
- vlParses.append(Parse(vParseCntr, "(TOP" + line.strip('\n')[1:]))
- else:
- vlParses.append(Parse(vParseCntr, line.strip('\n')))
- elif not line.startswith('###'):
- sys.exit("Unknown line opening: " + line)
- break
-
- vfParse.close()
-
- return vlParses
-
-
- ##----------------------------------------------------------------------
- ## This function takes the parse file (tree per line) and replaces the
- ## problematic parses which evalb couldn't process with a dummy parse tree.
- ## Currently, the sentence is identified by its line number in the file
- ## is an argument to this function. It can be imporoved to detect such
- ## sentences itself!
- ## Note that this function is not specific to LORG output, as its input
- ## is considered in a tree-per-line format.
- def replaceUnscorable(pParseFileName, pSentenceLineNo, pflgOverwrite):
- # opening the input file
- try:
- vfParse = open(pParseFileName, 'r')
- except IOError:
- sys.exit('Can\'t open parse file: ' + pParseFileName)
-
- ## creating the output file
- ## The new file is named the same as input file followed by .new suffix.
- ## However, if pflgOverwrite is set to true, it will later overwrite the
- ## input file
- try:
- vReplacedFileName = pParseFileName + '.new'
- vfReplaced = open(vReplacedFileName, 'w')
- except IOError:
- print 'Can\'t create output file'
- sys.exit(2)
-
- vLineCntr = 1
- for vSentence in vfParse:
- if vLineCntr == pSentenceLineNo:
- # write the dummy pasre into file
- vfReplaced.write("(TOP (UNSCORABLE UNSCORABLE))\n")
- else:
- # if this is not the target sentence, write it as it is
- vfReplaced.write(vSentence)
-
- vLineCntr += 1
-
- vfParse.close()
- vfReplaced.close()
-
- ## if pflgOverwrite is true, the input file will be overwritten
- if pflgOverwrite:
- shutil.copy(vReplacedFileName, pParseFileName)
- os.remove(vReplacedFileName)
-
-
|