diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py index f2c9a16..d48a1fb 100644 --- a/corenlp/corenlp.py +++ b/corenlp/corenlp.py @@ -45,7 +45,7 @@ WORD_PATTERN = re.compile('\[([^\]]+)\]') CR_PATTERN = re.compile(r"\((\d*),(\d)*,\[(\d*),(\d*)\)\) -> \((\d*),(\d)*,\[(\d*),(\d*)\)\), that is: \"(.*)\" -> \"(.*)\"") -DIRECTORY = "stanford-corenlp-full-2013-06-20" +DIRECTORY = "stanford-corenlp-full-2015-04-20" class bc: @@ -158,7 +158,11 @@ def parse_parser_results(text): line = line.strip() if line.startswith("Sentence #"): - sentence = {'words': [], 'parsetree': [], 'dependencies': []} + index = line.index("sentiment: ") + length = len("sentiment: ") + sentiment = line[(index+length):-2] + sentence = {'sentiment': [], 'words': [], 'parsetree': [], 'dependencies': []} + sentence['sentiment'] = sentiment results["sentences"].append(sentence) state = STATE_TEXT diff --git a/corenlp/default.properties b/corenlp/default.properties index 70ac093..ba3bd67 100644 --- a/corenlp/default.properties +++ b/corenlp/default.properties @@ -1,4 +1,4 @@ -annotators = tokenize, ssplit, pos, lemma, depparse +annotators = tokenize, ssplit, pos, lemma, depparse, parse, sentiment # specify Stanford Dependencies format for backwards compatibility # (new default is Universal Dependencies in 3.5.2)