Skip to content

Commit dc1c337

Browse files
authoredNov 28, 2017
Merge pull request RasaHQ#1 from RasaHQ/master
Update from upstream repo RasaHQ/rasa_nlu@master
2 parents 5316dc3 + d9e4d80 commit dc1c337

9 files changed

+111
-57
lines changed
 

‎README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ In general the process is rather simple:
123123
2. write your code, tests and documentation
124124
3. create a pull request describing your changes
125125

126-
You pull request will be reviewed by a maintainer, who might get back to you about any necessary changes or questions.
126+
You pull request will be reviewed by a maintainer, who might get back to you about any necessary changes or questions. You will also be asked to sign the [Contributor License Agreement](https://cla-assistant.io/RasaHQ/rasa_nlu)
127127

128128
# Advanced installation
129129
### Advanced Python

‎docs/pipeline.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ intent_featurizer_mitie
8080
~~~~~~~~~~~~~~~~~~~~~~~
8181

8282
:Short: MITIE intent featurizer
83-
:Outputs: nothing, used used as an input to intent classifiers that need intent features (e.g. ``intent_classifier_sklearn``)
83+
:Outputs: nothing, used as an input to intent classifiers that need intent features (e.g. ``intent_classifier_sklearn``)
8484
:Description:
8585
Creates feature for intent classification using the MITIE featurizer.
8686

@@ -94,7 +94,7 @@ intent_featurizer_spacy
9494
~~~~~~~~~~~~~~~~~~~~~~~
9595

9696
:Short: spacy intent featurizer
97-
:Outputs: nothing, used used as an input to intent classifiers that need intent features (e.g. ``intent_classifier_sklearn``)
97+
:Outputs: nothing, used as an input to intent classifiers that need intent features (e.g. ``intent_classifier_sklearn``)
9898
:Description:
9999
Creates feature for intent classification using the spacy featurizer.
100100

‎rasa_nlu/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"max_number_of_ngrams": 7,
3434
"pipeline": [],
3535
"response_log": "logs",
36+
"storage": None,
3637
"aws_endpoint_url": None,
3738
"duckling_dimensions": None,
3839
"duckling_http_url": None,

‎rasa_nlu/data_router.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _create_emulator(self):
139139
elif mode.lower() == 'luis':
140140
from rasa_nlu.emulators.luis import LUISEmulator
141141
return LUISEmulator()
142-
elif mode.lower() == 'api':
142+
elif mode.lower() == 'dialogflow':
143143
from rasa_nlu.emulators.dialogflow import DialogflowEmulator
144144
return DialogflowEmulator()
145145
else:

‎rasa_nlu/extractors/duckling_extractor.py

+84-43
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,19 @@
2828
from duckling import DucklingWrapper
2929

3030

31+
def extract_value(match):
32+
if match["value"].get("type") == "interval":
33+
value = {"to": match["value"].get("to", {}).get("value"),
34+
"from": match["value"].get("from", {}).get("value")}
35+
else:
36+
value = match["value"].get("value")
37+
38+
return value
39+
40+
3141
class DucklingExtractor(EntityExtractor):
32-
"""Adds entity normalization by analyzing found entities and transforming them into regular formats."""
42+
"""Adds entity normalization by analyzing found entities and
43+
transforming them into regular formats."""
3344

3445
name = "ner_duckling"
3546

@@ -38,26 +49,34 @@ class DucklingExtractor(EntityExtractor):
3849
@staticmethod
3950
def available_dimensions():
4051
from duckling.dim import Dim
41-
return [m[1] for m in getmembers(Dim) if not m[0].startswith("__") and not m[0].endswith("__")]
52+
return [m[1]
53+
for m in getmembers(Dim)
54+
if not m[0].startswith("__") and not m[0].endswith("__")]
4255

4356
def __init__(self, duckling, dimensions=None):
4457
# type: (DucklingWrapper, Optional[List[Text]]) -> None
4558

46-
self.dimensions = dimensions if dimensions is not None else self.available_dimensions()
59+
super(DucklingExtractor, self).__init__()
4760
self.duckling = duckling
4861

62+
if dimensions is not None:
63+
self.dimensions = dimensions
64+
else:
65+
self.dimensions = self.available_dimensions()
66+
4967
@classmethod
5068
def required_packages(cls):
5169
# type: () -> List[Text]
5270
return ["duckling"]
5371

5472
@classmethod
55-
def _create_duckling_wrapper(cls, language):
73+
def create_duckling_wrapper(cls, language):
5674
from duckling import DucklingWrapper
5775

5876
try:
59-
return DucklingWrapper(language=language) # languages in duckling are eg "de$core"
60-
except ValueError as e: # pragma: no cover
77+
# languages in duckling are eg "de$core"
78+
return DucklingWrapper(language=language)
79+
except ValueError as e: # pragma: no cover
6180
raise Exception("Duckling error. {}".format(e))
6281

6382
@classmethod
@@ -66,12 +85,17 @@ def create(cls, config):
6685

6786
dims = config["duckling_dimensions"]
6887
if dims:
69-
unknown_dimensions = [dim for dim in dims if dim not in cls.available_dimensions()]
88+
unknown_dimensions = [dim
89+
for dim in dims
90+
if dim not in cls.available_dimensions()]
7091
if len(unknown_dimensions) > 0:
71-
raise ValueError("Invalid duckling dimension. Got '{}'. Allowed: {}".format(
72-
", ".join(unknown_dimensions), ", ".join(cls.available_dimensions())))
92+
raise ValueError(
93+
"Invalid duckling dimension. Got '{}'. Allowed: {}"
94+
"".format(", ".join(unknown_dimensions),
95+
", ".join(cls.available_dimensions())))
7396

74-
return DucklingExtractor(cls._create_duckling_wrapper(config["language"]), dims)
97+
wrapper = cls.create_duckling_wrapper(config["language"])
98+
return DucklingExtractor(wrapper, dims)
7599

76100
@classmethod
77101
def cache_key(cls, model_metadata):
@@ -82,55 +106,72 @@ def cache_key(cls, model_metadata):
82106
def process(self, message, **kwargs):
83107
# type: (Message, **Any) -> None
84108

109+
if self.duckling is None:
110+
return
111+
85112
extracted = []
86-
if self.duckling is not None:
87-
ref_time = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S+00:00')
88-
if message.time is not None:
89-
# check if time given is valid
90-
try:
91-
ref_time = datetime.datetime\
92-
.utcfromtimestamp(int(message.time)/1000.0)\
93-
.strftime('%Y-%m-%dT%H:%M:%S+00:00')
94-
logging.debug(
95-
"Passing reference time {} to duckling".format(ref_time))
96-
except Exception as e:
97-
logging.warning(
98-
"Could not parse timestamp {}. "
99-
"Instead current UTC time {} will be passed to duckling".format(message.time, ref_time))
100-
101-
matches = self.duckling.parse(message.text, reference_time=ref_time)
102-
relevant_matches = [match for match in matches if match["dim"] in self.dimensions]
103-
for match in relevant_matches:
104-
entity = {"start": match["start"],
105-
"end": match["end"],
106-
"text": match["text"],
107-
"value": match["value"]["value"],
108-
"additional_info": match["value"],
109-
"entity": match["dim"]}
110-
111-
extracted.append(entity)
113+
114+
current_time = datetime.datetime.utcnow()
115+
ref_time = current_time.strftime('%Y-%m-%dT%H:%M:%S+00:00')
116+
if message.time is not None:
117+
# check if time given is valid
118+
try:
119+
ref_time = datetime.datetime \
120+
.utcfromtimestamp(int(message.time) / 1000.0) \
121+
.strftime('%Y-%m-%dT%H:%M:%S+00:00')
122+
logging.debug("Passing reference time {} "
123+
"to duckling".format(ref_time))
124+
except Exception as e:
125+
logging.warning("Could not parse timestamp {}. Instead "
126+
"current UTC time {} will be passed to "
127+
"duckling. Error: {}"
128+
"".format(message.time, ref_time, e))
129+
130+
matches = self.duckling.parse(message.text, reference_time=ref_time)
131+
relevant_matches = [match
132+
for match in matches
133+
if match["dim"] in self.dimensions]
134+
135+
for match in relevant_matches:
136+
value = extract_value(match)
137+
entity = {"start": match["start"],
138+
"end": match["end"],
139+
"text": match["text"],
140+
"value": value,
141+
"additional_info": match["value"],
142+
"entity": match["dim"]}
143+
144+
extracted.append(entity)
112145

113146
extracted = self.add_extractor_name(extracted)
114-
message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
147+
message.set("entities", message.get("entities", []) + extracted,
148+
add_to_output=True)
115149

116150
def persist(self, model_dir):
117151
# type: (Text) -> Dict[Text, Any]
118152

119-
file_name = self.name+".json"
153+
file_name = self.name + ".json"
120154
full_name = os.path.join(model_dir, file_name)
121155
with io.open(full_name, 'w') as f:
122156
f.write(str(json.dumps({"dimensions": self.dimensions})))
123157
return {"ner_duckling_persisted": file_name}
124158

125159
@classmethod
126-
def load(cls, model_dir, model_metadata, cached_component, **kwargs):
127-
# type: (Text, Metadata, Optional[DucklingExtractor], **Any) -> DucklingExtractor
128-
129-
persisted = os.path.join(model_dir, model_metadata.get("ner_duckling_persisted"))
160+
def load(cls,
161+
model_dir=None, # type: Text
162+
model_metadata=None, # type: Metadata
163+
cached_component=None, # type:Optional[DucklingExtractor]
164+
**kwargs # type: **Any
165+
):
166+
# type: (...) -> DucklingExtractor
167+
168+
persisted = os.path.join(model_dir,
169+
model_metadata.get("ner_duckling_persisted"))
130170
if cached_component:
131171
duckling = cached_component.duckling
132172
else:
133-
duckling = cls._create_duckling_wrapper(model_metadata.get("language"))
173+
language = model_metadata.get("language")
174+
duckling = cls.create_duckling_wrapper(language)
134175

135176
if os.path.isfile(persisted):
136177
with io.open(persisted, encoding='utf-8') as f:

‎rasa_nlu/extractors/duckling_http_extractor.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from rasa_nlu.extractors import EntityExtractor
2020
from rasa_nlu.model import Metadata
2121
from rasa_nlu.training_data import Message
22+
from rasa_nlu.extractors.duckling_extractor import extract_value
2223

2324
logger = logging.getLogger(__name__)
2425

@@ -92,11 +93,12 @@ def process(self, message, **kwargs):
9293
matches = self._duckling_parse(message.text)
9394
relevant_matches = self._filter_irrelevant_matches(matches)
9495
for match in relevant_matches:
96+
value = extract_value(match)
9597
entity = {
9698
"start": match["start"],
9799
"end": match["end"],
98100
"text": match["body"],
99-
"value": match["value"]["value"],
101+
"value": value,
100102
"additional_info": match["value"],
101103
"entity": match["dim"]}
102104

‎rasa_nlu/server.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import argparse
77
import logging
88
import os
9+
import six
910
from functools import wraps
1011

1112
import simplejson
@@ -74,7 +75,10 @@ def decorated(*args, **kwargs):
7475
request.setResponseCode(403)
7576
return 'forbidden'
7677

77-
return f(*args, **kwargs)
78+
if request.method.decode('utf-8', 'strict') == 'OPTIONS':
79+
return '' # if this is an options call we skip running `f`
80+
else:
81+
return f(*args, **kwargs)
7882

7983
return decorated
8084

@@ -86,8 +90,10 @@ def requires_auth(f):
8690
def decorated(*args, **kwargs):
8791
self = args[0]
8892
request = args[1]
89-
token = str(request.args.get('token', [''])[0])
90-
93+
if six.PY3:
94+
token = request.args.get(b'token', [b''])[0].decode("utf8")
95+
else:
96+
token = str(request.args.get('token', [''])[0])
9197
if self.data_router.token is None or token == self.data_router.token:
9298
return f(*args, **kwargs)
9399
request.setResponseCode(401)
@@ -116,13 +122,13 @@ def __init__(self, config, component_builder=None, testing=False):
116122
def _create_data_router(self, config, component_builder):
117123
return DataRouter(config, component_builder)
118124

119-
@app.route("/", methods=['GET'])
125+
@app.route("/", methods=['GET', 'OPTIONS'])
120126
@check_cors
121127
def hello(self, request):
122128
"""Main Rasa route to check if the server is online"""
123129
return "hello from Rasa NLU: " + __version__
124130

125-
@app.route("/parse", methods=['GET', 'POST'])
131+
@app.route("/parse", methods=['GET', 'POST', 'OPTIONS'])
126132
@requires_auth
127133
@check_cors
128134
@inlineCallbacks
@@ -158,7 +164,7 @@ def parse_get(self, request):
158164
logger.exception(e)
159165
returnValue(simplejson.dumps({"error": "{}".format(e)}))
160166

161-
@app.route("/version", methods=['GET'])
167+
@app.route("/version", methods=['GET', 'OPTIONS'])
162168
@requires_auth
163169
@check_cors
164170
def version(self, request):
@@ -167,7 +173,7 @@ def version(self, request):
167173
request.setHeader('Content-Type', 'application/json')
168174
return simplejson.dumps({'version': __version__})
169175

170-
@app.route("/config", methods=['GET'])
176+
@app.route("/config", methods=['GET', 'OPTIONS'])
171177
@requires_auth
172178
@check_cors
173179
def rasaconfig(self, request):
@@ -176,14 +182,14 @@ def rasaconfig(self, request):
176182
request.setHeader('Content-Type', 'application/json')
177183
return simplejson.dumps(self.config.as_dict())
178184

179-
@app.route("/status", methods=['GET'])
185+
@app.route("/status", methods=['GET', 'OPTIONS'])
180186
@requires_auth
181187
@check_cors
182188
def status(self, request):
183189
request.setHeader('Content-Type', 'application/json')
184190
return simplejson.dumps(self.data_router.get_status())
185191

186-
@app.route("/train", methods=['POST'])
192+
@app.route("/train", methods=['POST', 'OPTIONS'])
187193
@requires_auth
188194
@check_cors
189195
@inlineCallbacks

‎rasa_nlu/training_data.py

+3
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ def validate(self):
167167
for intent, group in groupby(examples, lambda e: e.get("intent")):
168168
size = len(list(group))
169169
different_intents.append(intent)
170+
if intent == "":
171+
warnings.warn("Found empty intent, please check your training data."
172+
"This may result in wrong intent predictions.")
170173
if size < self.MIN_EXAMPLES_PER_INTENT:
171174
template = "Intent '{}' has only {} training examples! minimum is {}, training may fail."
172175
warnings.warn(template.format(intent, size, self.MIN_EXAMPLES_PER_INTENT))

‎sample_configs/config_defaults.json

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"max_training_processes": 1,
88
"path": "projects",
99
"response_log": "logs",
10+
"storage": null,
1011
"config": "config.json",
1112
"log_level": "INFO",
1213
"port": 5000,

0 commit comments

Comments
 (0)