123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- #! /usr/bin/python
- ## This script changes the syntactic tags in the input syntax trees to
- ## tags from another tag set according to the specified mapping.
- ##
- ## Mappings are defined in berekelyparses.py.
- ##
- ## Current version: 1.0
- import sys, optparse, berkeleyparses
- ##======================================================================
- ## main
- def main(argv=None):
- if argv is None:
- argv = sys.argv
-
- parser = optparse.OptionParser(usage="%prog <INPUT PARSE FILE>" +
- "\nChanges the syntactic tags in the input syntax trees to tags from another tag set according to the specified mapping.", version="%prog 1.0")
-
- parser.add_option("-m", "--mapping", help="mapping to be used (1: FTB-trained Berkeley to a common tag set over itself and frenchFactored-trained Stanford)", metavar="MAPPING", dest="mapping", default='1', action="store")
- parser.add_option("-t", "--toptag", help="top node tag to be used in output (unchanged if not specified)", metavar="OUTPUT TOP NODE TAG", dest="outToptag", action="store")
- # processing input arguments
- (opts, posArgs) = parser.parse_args()
-
- if len(posArgs) < 1:
- parser.error("At least 1 arguments are required")
-
- # opening input parse file
- try:
- vfInput = open(posArgs[0])
- except IOError:
- sys.exit('Can\'t open input parse file: ' + posArgs[0])
- # processing options
- if opts.outToptag == None:
- vChangeTopNode = False
- else:
- vChangeTopNode = True
-
-
- for vTree in vfInput:
- if vTree.strip('\n ') == '(())': # treating Berkely unparsed sentences
- print vTree.strip('\n')
- elif len(vTree.strip()) != 0:
- berkeleyparses.mapToCommonTagSet(vTree, opts.mapping, vChangeTopNode, opts.outToptag, True)
-
- vfInput.close()
- ##======================================================================
- ## calling main
- if __name__ == "__main__":
- sys.exit(main())
|