rszk
/
scripts


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737
							#! /usr/bin/python
# -*- coding: utf-8 -*-

"""	
	This module defines classes for sentence-level sentiment analysis (SLSA).
	
	Version 0.1                                 (06-Jul-2016)
	- SLSASet, SLSASent are added.
"""


from parse import constparse, depparse
from ml import tk
from nlp import nlp


class SLSASet:
	'''
	Class for sentence level sentiment analysis dataset.
	'''
	
	
	def __init__(self):
		'''
		Constructor 
		'''
		
		self.sentences = []
		
		# WordVector object containing the word vectors of the vocabulary in the dataset
		self.wv = None
		
	
	def load(self, pTxtFilename, pPolarityFilename, pflgTokenize = False, pLanguage = "en"):
		'''
		Loads the data from input files
		
		This is the basic loader of SLSA data which expects the sentences
		(text) and polarity scores to be provided in separate parallel files.  
		'''
		
		
		self.sentences = []
		
		vlTxtLines = open(pTxtFilename).read().strip().split('\n')
		vlPolarityScores = open(pPolarityFilename).read().strip().split('\n')
		
		if len(vlTxtLines) != len(vlPolarityScores):
				raise Exception("Number of sentences does not match number of scores: %s vs. %s" % (len(vlTxtLines), len(vlPolarityScores)))
		
		for vTxtLine, vPolScore in zip(vlTxtLines, vlPolarityScores):
			vSLSASent = SLSASent(pSLSASet = self)
			vSLSASent.load(vTxtLine, vPolScore, pflgTokenize = pflgTokenize, pLanguage = pLanguage)
			self.sentences.append(vSLSASent)
		
	
	@property
	def size(self):
		'''
		Returns the size of the data set which is the number of its sentences
		'''
		
		return len(self.getSentences())
		
	
	@property
	def tokenLength(self):
		'''
		Returns the number of tokens in the data set
		'''
		
		return sum([s.length for s in self.getSentences()])
		
	
	def getSentences(self, pSort = ''):
		'''
		Returns the SLSA sentences
		
		The sort options are:
		- None: in document order
		- text: in sentence text order
		'''
		
		if pSort.lower() == "text":
			return [s for s in sorted(self.sentences, key = lambda x: x.getText())]
		else:
			return [s for s in self.sentences]
		
	
	def addSentences(self, plSentences):
		'''
		Adds SLSA sentences to existing sentences
		
		Currently, no care is taken regarding ID duplication.
		'''
		
		for vSent in plSentences:
			vSent.dataset = self
			self.sentences.append(vSent)
		
	
	def getVocabualry(self):
		'''
		Extracts and returns the vocabulary of the dataset 
		'''
		
		return sorted(set([t for s in self.getSentences() for t in s.getTokens()])) 
		
	
	def extractSentenceForms(self, pSort = None):
		'''
		Returns the surface form of the sentences
		
		The sort options are:
		- None: in document order
		- id:   in sentence ID order
		- text: in sentence text order
		'''
		
		return [s.getText() for s in self.getSentences(pSort = pSort)]
		
	
	def loadConstTrees(self, plConstTrees):
		'''
		Loads the constituency parse trees of the sentences
		
		It assumes that the provided constituency trees are in the order
		in which the sentences are loaded.
		
		The constituency trees can be provided in bracketing format or as
		constparse.ConstTree objects (in a list).
		'''
		
		for vSent, pCTree in zip(self.getSentences(), plConstTrees):
			vSent.loadConstTree(pCTree)
		
	
	def loadDepTrees(self, plDepTrees):
		'''
		Loads the dependency parse trees of the sentences
		
		It assumes that the provided dependency trees are in the order in
		which the sentences are loaded.
		
		The dependency trees are assumed to be provided a list of 
		depparse.DepTree objects.
		'''
		
		for vSent, pDTree in zip(self.getSentences(), plDepTrees):
			vSent.loadDepTree(pDTree)
		
	
	def loadSentimentScores(self, pSentexicon, pNeutralScore = None):
		'''
		Loads polarity scores to sentences from a sentiment lexicon which
		is a Sentexicon object
		
		For details about Sentexicon object, see sentexicon.py
		'''
		
		vTotalWordNum = 0          # total number of words in the data set
		vTotalEntryWordNum = 0     # number of words found in the lexicon
		
		for vSent in self.getSentences():
			vWordNum, vEntryWordNum = vSent.loadSentimentScores(pSentexicon, pNeutralScore)
			
			vTotalWordNum += vWordNum
			vTotalEntryWordNum += vEntryWordNum
		
		return vTotalWordNum, vTotalEntryWordNum
		
	
	def getSentimentScores(self):
		'''
		Returns a dictionary of words in the data set and the sentiment 
		scores attached to them 
		'''
		
		vdResult = {}
		
		for vSent in self.getSentences():
			for vWord, vScore in zip(vSent.getTokens(), vSent.getSentimentScores()):
				vdResult[vWord] = vScore
		
		return vdResult
		
	
	def loadSSInDTrees(self):
		'''
		Loads sentiment scores into dependency tree nodes
		'''
		
		for vSent in self.getSentences():
			vSent.loadSSInDTree()
		
	
	def loadSSInCTrees(self, pPropagation = None):
		'''
		Loads sentiment scores into constituency tree nodes
		'''
		
		for vSent in self.getSentences():
			vSent.loadSSInCTree(pPropagation = pPropagation)
		
	
	def loadWordVectors(self, pWordVectors, pflgFilter = True):
		''' 
		Loads word vectors from a file or WordVector object, whichever is given
		
		By default, it filters out the words not in the data vocabulary, which can be changed to not filter (e.g. when 
		the input is already filtered).   
		'''
		
		from ml import wv
		
		self.wv = wv.WordVector()
		
		if type(pWordVectors) == str:
			vWV.load(pWVFilename = vWVFile, plFilterVocab = self.getVocabualry())
		else:
			self.wv = pWordVectors
		
	
class SLSASent:
	'''
	Class for SLSA sentence
	'''
	
	
	def __init__(self, pSLSASet):
		'''
		Constructor
		'''
		
		# SLSASet the sentence belongs to
		self.dataset = pSLSASet
		
		self.text = None
		self.polarity = None 
		
		self.cTree = None
		self.dTree = None
		self.sentScores = []     # sentiment score, one per word in the tokenized self.text
		
	
	def load(self, pText, pPolarity = None, pflgTokenize = False, pLanguage = "en"):
		'''
		Loads sentence form and its label
		
		It optionally tokenizes the text.
		'''
		
		if pflgTokenize:
			self.text = nlp.tokenizeSegment(pText, pLang = pLanguage, pflgTokenizeFSlash = False)
		else:
			self.text = pText
		
		self.polarity = pPolarity
		
	
	def getText(self):
		'''
		Returns sentence text (form)
		'''
		
		return self.text
		
	
	def getTokens(self):
		'''
		Returns the tokenization of the sentence
		
		The sentence text is assumed to be in tokenized format and only 
		splits on space.
		'''
		
		return self.getText().split()
		
	
	def getPolarity(self):
		'''
		Returns the polarity of the sentence
		'''
		
		return self.polarity
		
	
	@property
	def length(self):
		'''
		Returns the sentence length
		'''
		
		return len(self.getTokens())
		
	
	def getConstTree(self):
		'''
		Returns the constituency parse tree of the sentence
		
		The returned object is of type constparse.ConstTree
		'''
		
		return self.cTree
		
	
	def getPOSTags(self):
		'''
		Returns the list of POS tags which matches the token list
		
		The POS tags are extracted from the constituency tree or dependency tree
		'''
		
		if self.cTree is not None:
			return self.cTree.getPOSs()
		elif self.dTree is not None:
			return self.dTree.getPOSs()
		else:
			return []
		
	
	def getDepTree(self):
		'''
		Returns the dependency parse tree of the sentence
		
		The returned object is of type depparse.DepTree
		'''
		
		return self.dTree
		
	
	def loadConstTree(self, pConstTree):
		'''
		Loads the constituency parse tree of the sentence
		
		The contituency tree can be provided in bracketing format or as
		constparse.ConstTree object.
		'''
		
		# loading the tree
		
		if isinstance(pConstTree, constparse.ConstTree):
			vConstTree = pConstTree.getPTBFormat()
		else:
			vConstTree = pConstTree
		
		self.cTree = SLSACTree()
		self.cTree.loadPTBTree(vConstTree, pflgExpandTerminal = True)
		
		# sanity check; comment out
		#if self.cTree.surface != self.getText():
		#	print "Sentence and tree mismatch:\nSentence: %s\nTree: %s\n" % (self.getText(), self.cTree.surface)
		
	
	def loadDepTree(self, pDepTree):
		'''
		Loads the dependency parse tree of the sentence
		
		The dependency tree is assumed to be depparse.DepTree object.
		'''
		
		# loading the tree
		
		if not isinstance(pDepTree, depparse.DepTree):
			raise Exception("A DepTree object is expected!")
		
		self.dTree = SLSADTree()
		self.dTree.loadFromDepTree(pDepTree = pDepTree)
		
		# sanity check; comment out
		#if self.dTree.surface != self.getText():
		#	print "Sentence and tree mismatch:\nSentence: %s\nTree:%s\n" % (self.getText(), self.dTree.surface)
		
	
	def getPTBConstTree(self):
		'''
		Returns the constituency tree of the sentence in PTB bracketing format
		'''
		
		return self.cTree.getPTBFormat()
		
	
	def loadSentimentScores(self, pSentexicon, pNeutralScore = None):
		'''
		Loads sentiment scores to words in the sentence from a sentiment
		lexicon which is a Sentexicon object
		
		It returns a tuple of the total number of words in the sentence 
		and the number of words found in the lexicon.
		
		If the word is not found in the lexicon, None will be used.
		
		For details about Sentexicon object, see sentexicon.py
		'''
		
		vlWords = self.getTokens()
		self.sentScores = []
		
		for vWord in vlWords:
			vScore = pSentexicon.getScore(vWord)
			if vScore is None and pNeutralScore is not None:
				self.sentScores.append(pNeutralScore)
			else:
				self.sentScores.append(vScore)
		
		return self.length, len([s for s in self.sentScores if s is not None])
		
	
	def getSentimentScores(self):
		'''
		Returns the sentiment scores in a list corresponding to the token list
		'''
		
		return self.sentScores
		
	
	def loadSSInDTree(self):
		'''
		Loads sentiment scores into the dependency tree of the sentence
		'''
		
		self.dTree.loadSentScores(self.sentScores)
		
	
	def loadSSInCTree(self, pPropagation = None):
		'''
		Loads sentiment scores into the constituency tree of the sentence
		'''
		
		self.cTree.loadSentScores(self.sentScores, pPropagation = pPropagation)
		
	
	def generateNGramKTree(self, pNodeContentType = "word",  pFormat = "binary", pdOptions = None):
		'''
		Generates and returns the tree representation of the surface form
		of the sentence
		'''
		
		vSLSANGramKTree = SLSANGramKTree(pSLSASent = self, pNodeContentType = pNodeContentType, pdOptions = pdOptions)
		
		return vSLSANGramKTree.generateNGramKTree(pFormat = pFormat, pdOptions = pdOptions)
		
	
	def getAvgSentScore(self):
		'''
		Calculates and returns the average sentiment score sentence tokens
		'''
		
		vlSentScores = self.getSentimentScores()
		
		return sum(vlSentScores) / len(vlSentScores)
		
	
	def getPolarScores(self, pNeutralScore = None):
		'''
		Returns sentiment scores with non-neutral polarity
		
		Neutral polarity score can be set as parameter. It is None by default meaning that no polarity score is assigned
		to neutral words.
		'''
		
		return [s for s in self.sentScores if s != pNeutralScore]
		
	
	def getWordVectors(self):
		'''
		Returns the word vectors of the sentence tokens
		'''
		
		return [self.dataset.wv.getVector(t) for t in self.getTokens()]
		
	
class SLSACTree(constparse.ConstTree):
	'''
	Class for constituency parse tree of a SLSA sentence
	'''
	
	
	def __init__(self):
		'''
		Constructor
		'''
		
		constparse.ConstTree.__init__(self)
	
	
	def _createNewTree(self):
		'''
		Creates and returns a new SLSACTree
		 
		This method is useful in class inheriting.
		'''
		
		return SLSACTree()
	
	
	def _createRoot(self):
		'''
		Creates and returns the root node
		'''
		
		return SLSACNode()
		
	
	def modifyNPStruct(self):
		'''
		Modifies the structure of the noun phrases in order to avoid
		term/constituent mismatch caused by a flat NP structure
		'''
		
		self.root.modifyNPStruct()
		
	
	def loadSentScores(self, plSentScores, pPropagation = None):
		'''
		Loads sentiment scores to tree nodes
		
		The sentiment scores are given in a list which corresponds to the
		terminal (i.e. token) list.
		
		Propagation argument specifies the method by which the score are 
		propagated from terminals up to the root node. None means scores
		are only assigned to the terminal nodes, thus they are 0 for the 
		phrase nodes.
		'''
		
		self.root.loadSentScores(plSentScores, pPropagation = pPropagation)
		
	
	def extractTopmostVP(self, pSpan):
		'''
		Extracts and returns the topmost verb phrase node above the given span in the tree
		'''
		
		return self.root.extractTopmostVP(pSpan)
		
	
class SLSACNode(constparse.ConstNode):
	'''
	Class for constituency tree node of a SLSA sentence
	'''
	
	
	def __init__(self):
		'''
		Constructor
		'''
		
		constparse.ConstNode.__init__(self)
		
		# sentiment score
		self.sentScore = None
		
		# list of type OpinionExpression
		self.oe = []
		
	
	def deepCopy(self, pflgCopyTree = False):
		'''
		NOTE: it seems python deepcopy() works better. Try the idea before
		using this method.
		
		Creates and returns a deep copy of the node optionally including 
		the sub tree under it
		'''
		
		vNodeCopy = constparse.ConstNode.deepCopy(self, pflgCopyTree)
		
		# copying the sentiment score
		vNodeCopy.sentScore = self.sentScore
		
		return vNodeCopy
		
	
	def shallowCopy(self, pflgCopyTree = False):
		'''
		NOTE: before using, check if python shallowcopy() does not work
		as expected.
		
		Creates and returns a shallow copy of the node
		
		The shallow copy does not have a parent and children.
		'''
		
		vNodeCopy = constparse.ConstNode.shallowCopy(self, pflgCopyTree)
		
		# copying the sentiment score
		vNodeCopy.sentScore = self.sentScore
		
		return vNodeCopy
		
	
	def _getNewNode(self):
		'''
		Creates and returns a node
		'''
		
		return SLSACNode()
		
	
	def setSentScore(self, pScore):
		'''
		Sets the value of sentiment score of the node's word
		'''
		
		self.sentScore = pScore
		
	
	def getSentScore(self):
		'''
		Returns the sentiment score of the node's word
		'''
		
		return self.sentScore
		
	
	def modifyNPStruct(self):
		'''
		Modifies the structure of the noun phrases in order to avoid
		term/constituent mismatch caused by a flat NP structure
		'''
		
		if self.getSynTag() == "NP":
			vChildLabelSeq = ' '.join(self.getChildrenTags())
			
			if re.search("^(DT|PRP\$) NN[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (2, 3))
				return
			
			if re.search("^(DT|PRP\$) JJ[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("ADJP", (2, 3))
				return
			
			if re.search("^(DT|PRP\$) VBG NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (2, 3))
				return
			
			if re.search("^(DT|PRP\$) NN[A-Z]* NN[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (2, 4))
				return
			
			if re.search("^(DT|PRP\$) JJ[A-Z]* NN[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (3, 4))
				self.insertIntermChild("NP", (2, 3))
				return
			
			if re.search("^(DT|PRP\$) ADJP NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (2, 3))
				return
			
			if re.search("^(DT|PRP\$) CD NN[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (2, 4))
				return
			
			if re.search("^(DT|PRP\$) JJ[A-Z]* JJ[A-Z]* NN[A-Z]*$", vChildLabelSeq):
				self.insertIntermChild("NP", (3, 4))
				self.insertIntermChild("NP", (2, 3))
				return
		
		for vChild in self.getChildren():
			vChild.modifyNPStruct()
	
	
	def loadSentScores(self, plSentScores, pPropagation = None):
		'''
		Loads sentiment scores to nodes in the subtree
		
		The sentiment scores are given in a list which corresponds to the
		terminal (i.e. token) list.
		
		Neutral score will be used instead of None for words (and nodes when
		propagating) without a sentiment score (None). 
		
		Propagation argument specifies the method by which the score are 
		propagated from terminals up to this node. None means scores
		are only assigned to the terminal nodes, thus they are 0 for the 
		phrase nodes. The following are the possible methods:
		- sum: nodes score is the sum of its children score
		- vote: node score is the dominant positive or negative score in the 
		        children nodes (i.e. more +1: score is +1, more -1: score -1) 
		'''
		
		vTokenSpan = self.getTokenSpan()
		
		if self.isTerminal():
			# sanity check
			if vTokenSpan[0] != vTokenSpan[1]:
				raise Exception("Either the node is not terminal or its span is wrong: %s" % self)
			else:
				self.setSentScore(plSentScores[vTokenSpan[0] - 1])
		else:
			vlChildrenSSores = []
			for vChild in self.getChildren():
				vlChildrenSSores.append(vChild.loadSentScores(plSentScores, pPropagation))
			
			# calculating the sentiment score of the node based on its children's (propagation)
			if pPropagation is not None:
				if pPropagation.lower() == "sum":
					self.setSentScore(sum(vlChildrenSSores))
				elif pPropagation.lower() == "vote":
					self.setSentScore(self._getDominantSentiment(vlChildrenSSores))
		
		return self.getSentScore()
	
	
	def _getDominantSentiment(self, plScores):
		'''
		Returns +1 or -1 whichever is dominant in the given list of sentiment 
		scores
		
		If the same number of both sentiment scores exist, 0 is returned.
		'''
		
		vPosCount = 0
		vNegCount = 0
		
		for vScore in plScores:
			if vScore == 1:
				vPosCount += 1
			elif vScore == -1:
				vNegCount += 1
		
		if vPosCount > vNegCount:
			return 1
		elif vPosCount < vNegCount:
			return -1
		else:
			return 0
		
	
	def extractTopmostVP(self, pSpan):
		'''
		Extracts and returns the topmost verb phrase node which overlaps the given span in the node subtree
		
		Overlap means that the given span and the span of the VP must not ne disjoint. So, left and right crossing will
		also be considered.
		'''
		
		if self.getSynTag() == 'VP':
			vSpanRel = self.getTokenSpanRelation(pSpan)
			if vSpanRel != -4:
				return self
			else:
				return None 
		else:
			for vChild in self.getChildren():
				vTopVP =  vChild.extractTopmostVP(pSpan)
				if vTopVP is not None:
					return vTopVP
		
		return None
		
	
class SLSADTree(depparse.DepTree):
	'''
	Class for dependency parse tree of a SLSA sentence
	'''
	
	
	def __init__(self, pLanguage = ''):
		'''
		Constructor
		'''
		
		depparse.DepTree.__init__(self, pLanguage = pLanguage)
		
	
	def loadFromDepTree(self, pDepTree):
		'''
		Loads the tree from DepTree object
		'''
		
		# 1. nodes
		
		self.nodes = []
		
		for vNode in pDepTree.nodes:
			self.nodes.append(SLSADNode())
			self.nodes[-1].loadFromDepNode(pSLSADTree = self, pDepNode = vNode)
		
		# 2. SRL
		
		self.srl = pDepTree.srl
		
		# 3. language
		
		self.language = pDepTree.language
		
	
	def _createNewTree(self, pLanguage = ''):
		'''
		Creates and returns a new tree
		'''
		
		return SLSADTree(pLanguage = pLanguage)
		
	
	def loadSentScores(self, plSentScores):
		'''
		Loads sentiment scores to tree nodes
		
		The sentiment scores are given in a list which corresponds to the
		tree node list.
		'''
		
		for vNode, vSentScore in zip(self.getNodes(), plSentScores):
			vNode.setSentScore(vSentScore)
		
	
	def generateDepKTree(self, pFormat = "(rel form)", pdOptions = {}):
		'''
		Generates the dependency tree representation in PTB bracketing 
		for tree kernels
		
		pdOptions provides options specific to each format.
		'''
		
		vDepKTree = SLSADKTree(pDepTree = self)
		
		return vDepKTree.generateDepKTree(pNode = "root", pFormat = pFormat, pdOptions = pdOptions)
		
	
class SLSADNode(depparse.DepNode):
	'''
	Class for dependency parse node of a SLSA sentence
	'''
	
	
	def __init__(self, pSLSADTree = None, pForm = "", pPosition = 0, plHeadDeps = None, plDependents = None, pPOSTag = "", plPredRoles = None, pSentScore = None):
		'''
		Constructor
		'''
		
		depparse.DepNode.__init__(self, pDepTree = pSLSADTree, pForm = pForm, pPosition = pPosition, plHeadDeps = plHeadDeps, plDependents = plDependents, pPOSTag = pPOSTag, plPredRoles = plPredRoles)
		
		# sentiment score
		self.sentScore = pSentScore
		
	
	def loadFromDepNode(self, pSLSADTree, pDepNode):
		'''
		Loads the node data from DepNode object
		'''
		
		self.depTree = pSLSADTree                   # SLSA dependency tree the node belongs to
		self.form = pDepNode.form                   # token surface form
		self.position = pDepNode.position           # token position in the sentence
		self.headDeps = pDepNode.headDeps           # list of head and dependency tuples
		self.dependents = pDepNode.dependents       # children
		self.posTag = pDepNode.posTag               # POS tag
		self.predRoles = pDepNode.predRoles         # list of (predicate position, semantic role) tuples
		
	
	def _createNewNode(self, pDepTree = None, pForm = "", pPosition = 0, plHeadDeps = None, plDependents = None, pPOSTag = "", plPredRoles = None, pSentScore = None):
		'''
		Creates and returns an new node
		'''
		
		return SLSADNode(pSLSADTree = pDepTree,
		                 pForm = pForm,
		                 pPosition = pPosition,
		                 plHeadDeps = plHeadDeps[:],
		                 plDependents = plDependents[:],
		                 pPOSTag = pPOSTag,
		                 plPredRoles = plPredRoles[:],
		                 pSentScore = pSentScore)
		
	
	def deepCopy(self, pDepTree):
		'''
		NOTE: it seems python deepcopy() works better. Try the idea before
		using this method.
		
		Creates and returns a new dependency node which is a deep copy of 
		the current node
		'''
		
		return self._createNewNode(pDepTree = pDepTree,
		                           pForm = self.form,
		                           pPosition = self.position,
		                           plHeadDeps = self.headDeps[:],
		                           plDependents = self.dependents[:],
		                           pPOSTag = self.posTag,
		                           plPredRoles = self.predRoles[:],
		                           pSentScore = self.sentScore)
		
	
	def setSentScore(self, pScore):
		'''
		Sets the value of sentiment score of the node's word
		'''
		
		self.sentScore = pScore
		
	
	def getSentScore(self):
		'''
		Returns the sentiment score of the node's word
		'''
		
		return self.sentScore
		
	
class SLSADKTree(tk.DepKTree):
	'''
	The class for SLSA dependency tree for use in tree kernels.
	
	For use in tree kernels, the tree is represented in PTB bracketing 
	format.
	'''
	
	
	def generateDepKTree(self, pNode = "root", pFormat = "(rel form)", pdOptions = {}):
		'''
		Generates dependency kernel tree or subtree under a given node in
		the required format 
		
		NOTE: The subtree here should not be confused with the notion of
		subtree as a tree kernel variation used in parallel to subset tree
		kernel.
		
		The default format is (rel form) which is the pure dependency tree
		with only dependency relations and token forms as node labels. In
		general, the format string is the representation of the innermost
		treelet in the format. See each format-specific method for exact
		details.
		
		pdOptions provides options specific to each format.
		
		See the documentation of the parent class.
		'''
		
		if type(pNode) is str and pNode.lower() == "root":
			pNode = self.depTree.root
		
		if pFormat == "(score (rel (pos form)))":
			self.kTree = "( %s)" % ''.join([self._generateSLSADKSubtree1(self.depTree.getNode(d), 0, pdOptions) for d in pNode.dependents])
		elif pFormat == "(rel (pos score))":
			self.kTree = "( %s)" % ''.join([self._generateSLSADKSubtree2(self.depTree.getNode(d), 0, pdOptions) for d in pNode.dependents])
		elif pFormat == "(score (rel (roles (pos form))))":
			self.kTree = "( %s)" % ''.join([self._generateSLSADKSubtree3(self.depTree.getNode(d), 0, pdOptions) for d in pNode.dependents])
		elif pFormat == "(score (rel (pos_roles form)))":
			self.kTree = "( %s)" % ''.join([self._generateSLSADKSubtree4(self.depTree.getNode(d), 0, pdOptions) for d in pNode.dependents])
		elif pFormat == "(rel_score (pos form))":
			self.kTree = "( %s)" % ''.join([self._generateSLSADKSubtree5(self.depTree.getNode(d), 0, pdOptions) for d in pNode.dependents])
		else:
			self.kTree = tk.DepKTree.generateDepKTree(self, pNode = pNode, pFormat = pFormat)
		
		return self.kTree
		
	
	# (score (rel (pos form))) -> no equivalent number format in version 0.3
	def _generateSLSADKSubtree1(self, pNode, pCurrentHead, pdOptions = {}):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (rel (pos form))) format
		
		pCurrentHead identifies which head is calling this method in case
		the node has multiple heads.
		
		pdOptions contains the following options to be used in formatting:
		- neutral: the way the neutral words, i.e. those without a sentiment 
		           score should be treated. The possible values include
		           an empty string which means do not add any node for such
		           words, and a string value which will be used as a node
		           to be inserted in the same way the scores are.
		'''
		
		vDependents = ''.join([self._generateSLSADKSubtree1(self.depTree.getNode(d), pNode.position, pdOptions) for d in pNode.dependents])
		
		if pNode.getSentScore() is not None:
			if len(pNode.dependents) == 0:
				vKSubtree = "(%s (%s (%s %s)))" % (pNode.getSentScore(),
				                                   pNode.getDepRel(pCurrentHead),
				                                   pNode.getPOSTag(),
				                                   pNode.form)
			else:
				vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getSentScore(),
				                                        pNode.getDepRel(pCurrentHead),
				                                        pNode.getPOSTag(),
				                                        pNode.form,
				                                        vDependents)
		else:
			if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s %s)))" % (pdOptions["neutral"],
					                                   pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pNode.form)
				else:
					vKSubtree = "(%s (%s (%s (%s %s))))" % (pdOptions["neutral"],
					                                        pNode.getDepRel(pCurrentHead),
					                                        pNode.getPOSTag(),
					                                        pNode.form,
					                                        vDependents)
			else:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
					                              pNode.getPOSTag(),
					                              pNode.form)
				else:
					vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pNode.form,
					                                   vDependents)
		
		return vKSubtree
		
	
	# (rel (pos score)) -> no equivalent number format in version 0.3
	def _generateSLSADKSubtree2(self, pNode, pCurrentHead, pdOptions = {}):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (rel (pos score)) format
		
		pCurrentHead identifies which head is calling this method in case
		the node has multiple heads.
		'''
		
		vDependents = ''.join([self._generateSLSADKSubtree2(self.depTree.getNode(d), pNode.position, pdOptions) for d in pNode.dependents])
		
		if pNode.getSentScore() is not None:
			if len(pNode.dependents) == 0:
				vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
				                              pNode.getPOSTag(),
				                              pNode.getSentScore())
			else:
				vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
				                                   pNode.getPOSTag(),
				                                   pNode.getSentScore(),
				                                   vDependents)
		else:
			if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
					                              pNode.getPOSTag(),
					                              pdOptions["neutral"])
				else:
					vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pdOptions["neutral"],
					                                   vDependents)
			else:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s %s)" % (pNode.getDepRel(pCurrentHead),
					                         pNode.getPOSTag())
				else:
					vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   vDependents)
		
		return vKSubtree
		
	
	# (score (rel (roles (pos form)))) -> no equivalent number format in version 0.3
	def _generateSLSADKSubtree3(self, pNode, pCurrentHead, pdOptions = {}):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (rel (roles (pos form)))) format
		
		pCurrentHead identifies which head is calling this method in case
		the node has multiple heads.
		
		pdOptions contains the following options to be used in formatting:
		- neutral: the way the neutral words, i.e. those without a sentiment 
		           score should be treated. The possible values include
		           an empty string which means do not add any node for such
		           words, and a string value which will be used as a node
		           to be inserted in the same way the scores are.
		- no-arg: the way non-argument nodes are represented. The possible
		          values include an empty string which means the node will 
		          be represented as in non-semantic format, and a string 
		          value (e.g. null) which will be used as the label for 
		          representing semantic role of such nodes.  
		'''
		
		vDependents = ''.join([self._generateSLSADKSubtree3(self.depTree.getNode(d), pNode.position, pdOptions) for d in pNode.dependents])
		
		
		if pNode.isArgument():
			if pNode.getSentScore() is not None:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getSentScore(),
					                                        pNode.getDepRel(pCurrentHead),
					                                        '_'.join(pNode.getArgRoles()),
					                                        pNode.getPOSTag(),
					                                        pNode.form)
				else:
					vKSubtree = "(%s (%s (%s (%s (%s %s)))))" % (pNode.getSentScore(),
					                                             pNode.getDepRel(pCurrentHead),
					                                             '_'.join(pNode.getArgRoles()),
					                                             pNode.getPOSTag(),
					                                             pNode.form,
					                                             vDependents)
			else:
				if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pdOptions["neutral"],
						                                        pNode.getDepRel(pCurrentHead),
						                                        '_'.join(pNode.getArgRoles()),
						                                        pNode.getPOSTag(),
						                                        pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s (%s %s)))))" % (pdOptions["neutral"],
						                                             pNode.getDepRel(pCurrentHead),
						                                             '_'.join(pNode.getArgRoles()),
						                                             pNode.getPOSTag(),
						                                             pNode.form,
						                                             vDependents)
				else:
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
						                                   '_'.join(pNode.getArgRoles()),
						                                   pNode.getPOSTag(),
						                                   pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getDepRel(pCurrentHead),
					                                            '_'.join(pNode.getArgRoles()),
						                                        pNode.getPOSTag(),
						                                        pNode.form,
						                                        vDependents)
		elif "no-arg" in pdOptions and pdOptions["no-arg"].strip() != '':
			if pNode.getSentScore() is not None:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getSentScore(),
					                                        pNode.getDepRel(pCurrentHead),
					                                        pdOptions["no-arg"],
					                                        pNode.getPOSTag(),
					                                        pNode.form)
				else:
					vKSubtree = "(%s (%s (%s (%s (%s %s)))))" % (pNode.getSentScore(),
					                                             pNode.getDepRel(pCurrentHead),
					                                             pdOptions["no-arg"],
					                                             pNode.getPOSTag(),
					                                             pNode.form,
					                                             vDependents)
			else:
				if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pdOptions["neutral"],
						                                        pNode.getDepRel(pCurrentHead),
						                                        pdOptions["no-arg"],
						                                        pNode.getPOSTag(),
						                                        pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s (%s %s)))))" % (pdOptions["neutral"],
						                                             pNode.getDepRel(pCurrentHead),
						                                             pdOptions["no-arg"],
						                                             pNode.getPOSTag(),
						                                             pNode.form,
						                                             vDependents)
				else:
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
						                                   pdOptions["no-arg"],
						                                   pNode.getPOSTag(),
						                                   pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getDepRel(pCurrentHead),
					                                            pdOptions["no-arg"],
						                                        pNode.getPOSTag(),
						                                        pNode.form,
						                                        vDependents)
		else:
			if pNode.getSentScore() is not None:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s %s)))" % (pNode.getSentScore(),
					                                   pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pNode.form)
				else:
					vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getSentScore(),
					                                        pNode.getDepRel(pCurrentHead),
					                                        pNode.getPOSTag(),
					                                        pNode.form,
					                                        vDependents)
			else:
				if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s %s)))" % (pdOptions["neutral"],
						                                   pNode.getDepRel(pCurrentHead),
						                                   pNode.getPOSTag(),
						                                   pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pdOptions["neutral"],
						                                        pNode.getDepRel(pCurrentHead),
						                                        pNode.getPOSTag(),
						                                        pNode.form,
						                                        vDependents)
				else:
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
						                              pNode.getPOSTag(),
						                              pNode.form)
					else:
						vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
						                                   pNode.getPOSTag(),
						                                   pNode.form,
						                                   vDependents)
		
		return vKSubtree
		
	
	# (score (rel (pos_roles form))) -> no equivalent number format in version 0.3
	def _generateSLSADKSubtree4(self, pNode, pCurrentHead, pdOptions = {}):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (rel (pos_roles form))) format
		
		pCurrentHead identifies which head is calling this method in case
		the node has multiple heads.
		
		pdOptions contains the following options to be used in formatting:
		- neutral: the way the neutral words, i.e. those without a sentiment 
		           score should be treated. The possible values include
		           an empty string which means do not add any node for such
		           words, and a string value which will be used as a node
		           to be inserted in the same way the scores are.
		'''
		
		vDependents = ''.join([self._generateSLSADKSubtree4(self.depTree.getNode(d), pNode.position, pdOptions) for d in pNode.dependents])
		
		
		if pNode.isArgument():
			if pNode.getSentScore() is not None:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s_%s %s)))" % (pNode.getSentScore(),
					                                      pNode.getDepRel(pCurrentHead),
					                                      pNode.getPOSTag(),
					                                      '_'.join(pNode.getArgRoles()),
					                                      pNode.form)
				else:
					vKSubtree = "(%s (%s (%s_%s (%s %s))))" % (pNode.getSentScore(),
					                                           pNode.getDepRel(pCurrentHead),
					                                           pNode.getPOSTag(),
					                                           '_'.join(pNode.getArgRoles()),
					                                           pNode.form,
					                                           vDependents)
			else:
				if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s_%s %s)))" % (pdOptions["neutral"],
						                                      pNode.getDepRel(pCurrentHead),
						                                      pNode.getPOSTag(),
						                                      '_'.join(pNode.getArgRoles()),
						                                      pNode.form)
					else:
						vKSubtree = "(%s (%s (%s_%s (%s %s))))" % (pdOptions["neutral"],
						                                           pNode.getDepRel(pCurrentHead),
						                                           pNode.getPOSTag(),
						                                           '_'.join(pNode.getArgRoles()),
						                                           pNode.form,
						                                           vDependents)
				else:
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s_%s %s))" % (pNode.getDepRel(pCurrentHead),
						                                 pNode.getPOSTag(),
						                                 '_'.join(pNode.getArgRoles()),
						                                 pNode.form)
					else:
						vKSubtree = "(%s (%s_%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
						                                      pNode.getPOSTag(),
					                                          '_'.join(pNode.getArgRoles()),
						                                      pNode.form,
						                                      vDependents)
		else:
			if pNode.getSentScore() is not None:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s (%s %s)))" % (pNode.getSentScore(),
					                                   pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pNode.form)
				else:
					vKSubtree = "(%s (%s (%s (%s %s))))" % (pNode.getSentScore(),
					                                        pNode.getDepRel(pCurrentHead),
					                                        pNode.getPOSTag(),
					                                        pNode.form,
					                                        vDependents)
			else:
				if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s (%s %s)))" % (pdOptions["neutral"],
						                                   pNode.getDepRel(pCurrentHead),
						                                   pNode.getPOSTag(),
						                                   pNode.form)
					else:
						vKSubtree = "(%s (%s (%s (%s %s))))" % (pdOptions["neutral"],
						                                        pNode.getDepRel(pCurrentHead),
						                                        pNode.getPOSTag(),
						                                        pNode.form,
						                                        vDependents)
				else:
					if len(pNode.dependents) == 0:
						vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
						                              pNode.getPOSTag(),
						                              pNode.form)
					else:
						vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
						                                   pNode.getPOSTag(),
						                                   pNode.form,
						                                   vDependents)
		
		return vKSubtree
		
	
	# (rel_score (pos form)) -> no equivalent number format in version 0.3
	def _generateSLSADKSubtree5(self, pNode, pCurrentHead, pdOptions = {}):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (rel (pos form))) format
		
		pCurrentHead identifies which head is calling this method in case
		the node has multiple heads.
		
		pdOptions contains the following options to be used in formatting:
		- neutral: the way the neutral words, i.e. those without a sentiment 
		           score should be treated. The possible values include
		           an empty string which means do not add any node for such
		           words, and a string value which will be used as a node
		           to be inserted in the same way the scores are.
		'''
		
		vDependents = ''.join([self._generateSLSADKSubtree5(self.depTree.getNode(d), pNode.position, pdOptions) for d in pNode.dependents])
		
		if pNode.getSentScore() is not None:
			if len(pNode.dependents) == 0:
				vKSubtree = "(%s_%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
				                                 pNode.getSentScore(),
				                                 pNode.getPOSTag(),
				                                 pNode.form)
			else:
				vKSubtree = "(%s_%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
				                                      pNode.getSentScore(),
				                                      pNode.getPOSTag(),
				                                      pNode.form,
				                                      vDependents)
		else:
			if "neutral" in pdOptions and pdOptions["neutral"].strip() != '':
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s_%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
					                                 pdOptions["neutral"],
					                                 pNode.getPOSTag(),
					                                 pNode.form)
				else:
					vKSubtree = "(%s_%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
					                                      pdOptions["neutral"],
					                                      pNode.getPOSTag(),
					                                      pNode.form,
					                                      vDependents)
			else:
				if len(pNode.dependents) == 0:
					vKSubtree = "(%s (%s %s))" % (pNode.getDepRel(pCurrentHead),
					                              pNode.getPOSTag(),
					                              pNode.form)
				else:
					vKSubtree = "(%s (%s (%s %s)))" % (pNode.getDepRel(pCurrentHead),
					                                   pNode.getPOSTag(),
					                                   pNode.form,
					                                   vDependents)
		
		return vKSubtree
		
	
class SLSACKTree(tk.ConstKTree):
	'''
	The class for SLSA constituency tree for use in tree kernels
	
	For use in tree kernels, the tree is represented in PTB bracketing 
	format.
	'''
	
	
	def generateConstKTree(self, pFormat = "(phrase (pos form))", pdOptions = None):
		'''
		Generates constituency kernel tree or subtree under a given node in
		the required format 
		
		NOTE: The subtree here should not be confused with the notion of
		subtree as a tree kernel variation used in parallel to subset tree
		kernel.
		
		The default format is (phrase (pos form)) which is the pure constituency
		tree in bracketing (s-expression) format. In general, the format string 
		is the representation of the adequately innermost treelet in the format.
		See each format-specific method for exact details.
		
		See the documentation of the parent class.
		
		pdOptions contains specific options to each format.
		'''
		
		if pFormat.lower() == "(score (score score))":
			self.kTree = "( %s)" % self._generateSLSACKSubtree1(self.constTree.root, pdOptions = pdOptions)
		elif pFormat.lower() == "(score (phrase (score (pos form))))":
			self.kTree = "( %s)" % self._generateSLSACKSubtree2(self.constTree.root, pdOptions = pdOptions)
		elif pFormat.lower() == "(phrase_score (pos_score form))":
			self.kTree = "( %s)" % self._generateSLSACKSubtree3(self.constTree.root, pdOptions = pdOptions)
		elif pFormat.lower() == "(score (phrase_args (score (pos_args form))))":
			self.kTree = "( %s)" % self._generateSLSACKSubtree4(self.constTree.root, pdOptions = pdOptions)
		elif pFormat.lower() == "(phrase (score )(pos (score )(form )))":
			self.kTree = "( %s)" % self._generateSLSACKSubtree5(self.constTree.root, pdOptions = pdOptions)
		else:
			self.kTree = tk.ConstKTree.generateConstKTree(self, pFormat = pFormat)
		
		return self.kTree
		
	
	def _generateSLSACKSubtree1(self, pNode, pdOptions = None):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (score score)) format
		
		In this format, all the nodes represent the sentiment score 
		associated with them.
		
		pdOptions contains the following options:
		- keep-at: if true will keep the AT label and wont use sentiment 
		           scores to replace it (default false) 
		'''
		
		if pNode.isPreTerminal() or pNode.isTerminal():
			vKSubtree = "(%s %s)" % (pNode.getSentScore(),
			                         pNode.getTerminalNodes()[0].getSentScore())
		elif pdOptions is not None and "keep-at" in pdOptions and pdOptions["keep-at"] == True and pNode.getSynTag().upper() == "AT":     
			vKSubtree = "(AT %s)" % (''.join([self._generateSLSACKSubtree1(n, pdOptions) for n in pNode.children]))
		else:
			vKSubtree = "(%s %s)" % (pNode.getSentScore(),
			                         ''.join([self._generateSLSACKSubtree1(n, pdOptions) for n in pNode.children]))
		
		return vKSubtree
		
	
	def _generateSLSACKSubtree2(self, pNode, pdOptions = None):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (phrase (score (pos form)))) 
		format
		
		In this format, all the nodes represent the sentiment score 
		associated with them.
		
		pdOptions contains the following options:
		- keep-at: if true will keep the AT label and wont use sentiment 
		           scores to replace it (default false) 
		'''
		
		if pNode.isPreTerminal():
			vKSubtree = "(%s (%s %s))" % (pNode.getSentScore(),
										  pNode.getSynTag(),
			                              pNode.getTerminal())
		elif pdOptions is not None and "keep-at" in pdOptions and pdOptions["keep-at"] == True and pNode.getSynTag().upper() == "AT":     
			vKSubtree = "(AT %s)" % (''.join([self._generateSLSACKSubtree2(n, pdOptions) for n in pNode.children]))
		else:
			vKSubtree = "(%s (%s %s))" % (pNode.getSentScore(),
			                              pNode.getSynTag(),
			                              ''.join([self._generateSLSACKSubtree2(n, pdOptions) for n in pNode.children]))
		
		return vKSubtree
		
	
	def _generateSLSACKSubtree3(self, pNode, pdOptions = None):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (phrase_score (pos_score form))
		format
		
		In this format, all the nodes represent the sentiment score 
		associated with them.
		
		pdOptions contains the following options:
		- keep-at: if true will keep the AT label and wont use sentiment 
		           scores to replace it (default false) 
		'''
		
		if pNode.isPreTerminal():
			vKSubtree = "(%s_%s %s)" % (pNode.getSynTag(),
			                            pNode.getSentScore(),
			                            pNode.getTerminal())
		elif pdOptions is not None and "keep-at" in pdOptions and pdOptions["keep-at"] == True and pNode.getSynTag().upper() == "AT":     
			vKSubtree = "(AT %s)" % (''.join([self._generateSLSACKSubtree3(n, pdOptions) for n in pNode.children]))
		else:
			vKSubtree = "(%s_%s %s)" % (pNode.getSynTag(),
			                            pNode.getSentScore(),
			                            ''.join([self._generateSLSACKSubtree3(n, pdOptions) for n in pNode.children]))
		
		return vKSubtree
		
	
	def _generateSLSACKSubtree4(self, pNode, pdOptions = None):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (score (phrase_args (score (pos_args form)))) 
		format
		
		In this format, all the nodes represent the sentiment score 
		associated with them.
		
		pdOptions contains the following options:
		- keep-at: if true will keep the AT label and wont use sentiment 
		           scores to replace it (default false) 
		'''
		
		if pNode.isPreTerminal():
			vKSubtree = "(%s (%s %s))" % (pNode.getSentScore(),
										  '_'.join([pNode.getSynTag()] + pNode.getArgRoles()),
			                              pNode.getTerminal())
		elif pdOptions is not None and "keep-at" in pdOptions and pdOptions["keep-at"] == True and pNode.getSynTag().upper() == "AT":     
			vKSubtree = "(AT %s)" % (''.join([self._generateSLSACKSubtree4(n, pdOptions) for n in pNode.children]))
		else:
			vKSubtree = "(%s (%s %s))" % (pNode.getSentScore(),
			                              '_'.join([pNode.getSynTag()] + pNode.getArgRoles()),
			                              ''.join([self._generateSLSACKSubtree4(n, pdOptions) for n in pNode.children]))
		
		return vKSubtree
		
	
	def _generateSLSACKSubtree5(self, pNode, pdOptions = None):
		'''
		Recursively generates the kernel subtree of the given node in
		bracketing representation in (phrase (score )(pos (score )(form ))) 
		format
		
		In this format, all the nodes represent the sentiment score 
		associated with them.
		
		pdOptions contains the following options:
		- keep-at: if true will keep the AT label and wont use sentiment 
		           scores to replace it (default false) 
		'''
		
		if pNode.isPreTerminal():
			vKSubtree = "(%s (%s )(%s ))" % (pNode.getSynTag(),
										     pNode.getSentScore(),
			                                 pNode.getTerminal())
		elif pdOptions is not None and "keep-at" in pdOptions and pdOptions["keep-at"] == True and pNode.getSynTag().upper() == "AT":     
			vKSubtree = "(AT %s)" % (''.join([self._generateSLSACKSubtree5(n, pdOptions) for n in pNode.children]))
		else:
			vKSubtree = "(%s (%s )%s)" % (pNode.getSynTag(),
			                              pNode.getSentScore(),
			                              ''.join([self._generateSLSACKSubtree5(n, pdOptions) for n in pNode.children]))
		
		return vKSubtree
		
	
class SLSACDKTree(tk.ConstDepKTree):
	'''
	Class for integrating SLSA dependency subtrees in SLSA constituency 
	trees for tree kernel use
	'''
	
	
	def _createDepKTree(self):
		'''
		Creates and returns a new SLSADKTree
		'''
		
		return SLSADKTree(pDepTree = self.depTree)
		
	
	def generateConstDepKTree(self, pFormat = "(phrase (pos (form (rel head))))", pdOptions = {}):
		'''
		Generates the tree representation in the required format
		
		See the parent class for more details.
		'''
		
		
		self.kTree = tk.ConstDepKTree.generateConstDepKTree(self, pFormat = pFormat, pdOptions = pdOptions)
		
		return self.kTree
		
	
class SLSANGramKTree(tk.NGramKTree):
	'''
	Class for implementing n-gram tree for SLSA sentences
	'''
	
	
	def __init__(self, pSLSASent, pNodeContentType = "word", pdOptions = None):
		'''
		Constructor
		
		pNodeContentTypes can take:
		- word: word n-gram trees are produced, i.e. nodes are word forms
		- POS: POS n-gram trees are produced, i.e. nodes are POS tags
		- sentiment: sentiment score n-gram trees are produced, i.e. nodes are sentiment polarity scores
		- wvp: prefixed words for word vector similarity computation are produced (e.g. with svmlight-tk-we) 
		
		pdOptions contains specific options to each format.
		'''
		
		if pdOptions is None:
			pdOptions = {}
		
		if pNodeContentType.lower() == "word":
			tk.NGramKTree.__init__(self, plTokens = pSLSASent.getTokens())
		elif pNodeContentType.lower() == "word-lower":
			tk.NGramKTree.__init__(self, plTokens = [t.lower() for t in pSLSASent.getTokens()])
		elif pNodeContentType.lower() == "pos":
			tk.NGramKTree.__init__(self, plTokens = pSLSASent.getPOSTags())
		elif pNodeContentType.lower() in ["sentiment", "polarity"]:
			tk.NGramKTree.__init__(self, plTokens = [str(s) for s in pSLSASent.getSentimentScores()])
		elif pNodeContentType.lower() == "wvp":
			if "prefix" in pdOptions:
				vPrefix= pdOptions["prefix"]
			else:
				vPrefix= "___"
			tk.NGramKTree.__init__(self, plTokens = [vPrefix + t for t in pSLSASent.getTokens()])
		else:
			raise Exception("%s is an invalid node content type!" % pNodeContentType)
		
		self.slsaSent = pSLSASent
		
	
	def generateNGramKTree(self, pFormat = "binary", pdOptions = None):
		'''
		Generates and returns a tree representation of the sentence tokens
		
		pdOptions contains specific options to each format.
		'''
		
		if pdOptions is None:
			pdOptions = {}
		
		if pFormat == "unary":
			self.kTree = tk.NGramKTree.generateNGramKTree(self, pFormat = "unary")
		elif pFormat == "bigram":
			self.kTree = tk.NGramKTree.generateNGramKTree(self, pFormat = "bigram")
		elif pFormat == "binary":
			self.kTree = tk.NGramKTree.generateNGramKTree(self, pFormat = "binary")
		else:
			self.kTree = tk.NGramKTree.generateNGramKTree(self)
		
		return self.kTree