28
28
from duckling import DucklingWrapper
29
29
30
30
31
+ def extract_value (match ):
32
+ if match ["value" ].get ("type" ) == "interval" :
33
+ value = {"to" : match ["value" ].get ("to" , {}).get ("value" ),
34
+ "from" : match ["value" ].get ("from" , {}).get ("value" )}
35
+ else :
36
+ value = match ["value" ].get ("value" )
37
+
38
+ return value
39
+
40
+
31
41
class DucklingExtractor (EntityExtractor ):
32
- """Adds entity normalization by analyzing found entities and transforming them into regular formats."""
42
+ """Adds entity normalization by analyzing found entities and
43
+ transforming them into regular formats."""
33
44
34
45
name = "ner_duckling"
35
46
@@ -38,26 +49,34 @@ class DucklingExtractor(EntityExtractor):
38
49
@staticmethod
39
50
def available_dimensions ():
40
51
from duckling .dim import Dim
41
- return [m [1 ] for m in getmembers (Dim ) if not m [0 ].startswith ("__" ) and not m [0 ].endswith ("__" )]
52
+ return [m [1 ]
53
+ for m in getmembers (Dim )
54
+ if not m [0 ].startswith ("__" ) and not m [0 ].endswith ("__" )]
42
55
43
56
def __init__ (self , duckling , dimensions = None ):
44
57
# type: (DucklingWrapper, Optional[List[Text]]) -> None
45
58
46
- self . dimensions = dimensions if dimensions is not None else self . available_dimensions ()
59
+ super ( DucklingExtractor , self ). __init__ ()
47
60
self .duckling = duckling
48
61
62
+ if dimensions is not None :
63
+ self .dimensions = dimensions
64
+ else :
65
+ self .dimensions = self .available_dimensions ()
66
+
49
67
@classmethod
50
68
def required_packages (cls ):
51
69
# type: () -> List[Text]
52
70
return ["duckling" ]
53
71
54
72
@classmethod
55
- def _create_duckling_wrapper (cls , language ):
73
+ def create_duckling_wrapper (cls , language ):
56
74
from duckling import DucklingWrapper
57
75
58
76
try :
59
- return DucklingWrapper (language = language ) # languages in duckling are eg "de$core"
60
- except ValueError as e : # pragma: no cover
77
+ # languages in duckling are eg "de$core"
78
+ return DucklingWrapper (language = language )
79
+ except ValueError as e : # pragma: no cover
61
80
raise Exception ("Duckling error. {}" .format (e ))
62
81
63
82
@classmethod
@@ -66,12 +85,17 @@ def create(cls, config):
66
85
67
86
dims = config ["duckling_dimensions" ]
68
87
if dims :
69
- unknown_dimensions = [dim for dim in dims if dim not in cls .available_dimensions ()]
88
+ unknown_dimensions = [dim
89
+ for dim in dims
90
+ if dim not in cls .available_dimensions ()]
70
91
if len (unknown_dimensions ) > 0 :
71
- raise ValueError ("Invalid duckling dimension. Got '{}'. Allowed: {}" .format (
72
- ", " .join (unknown_dimensions ), ", " .join (cls .available_dimensions ())))
92
+ raise ValueError (
93
+ "Invalid duckling dimension. Got '{}'. Allowed: {}"
94
+ "" .format (", " .join (unknown_dimensions ),
95
+ ", " .join (cls .available_dimensions ())))
73
96
74
- return DucklingExtractor (cls ._create_duckling_wrapper (config ["language" ]), dims )
97
+ wrapper = cls .create_duckling_wrapper (config ["language" ])
98
+ return DucklingExtractor (wrapper , dims )
75
99
76
100
@classmethod
77
101
def cache_key (cls , model_metadata ):
@@ -82,55 +106,72 @@ def cache_key(cls, model_metadata):
82
106
def process (self , message , ** kwargs ):
83
107
# type: (Message, **Any) -> None
84
108
109
+ if self .duckling is None :
110
+ return
111
+
85
112
extracted = []
86
- if self .duckling is not None :
87
- ref_time = datetime .datetime .utcnow ().strftime ('%Y-%m-%dT%H:%M:%S+00:00' )
88
- if message .time is not None :
89
- # check if time given is valid
90
- try :
91
- ref_time = datetime .datetime \
92
- .utcfromtimestamp (int (message .time )/ 1000.0 )\
93
- .strftime ('%Y-%m-%dT%H:%M:%S+00:00' )
94
- logging .debug (
95
- "Passing reference time {} to duckling" .format (ref_time ))
96
- except Exception as e :
97
- logging .warning (
98
- "Could not parse timestamp {}. "
99
- "Instead current UTC time {} will be passed to duckling" .format (message .time , ref_time ))
100
-
101
- matches = self .duckling .parse (message .text , reference_time = ref_time )
102
- relevant_matches = [match for match in matches if match ["dim" ] in self .dimensions ]
103
- for match in relevant_matches :
104
- entity = {"start" : match ["start" ],
105
- "end" : match ["end" ],
106
- "text" : match ["text" ],
107
- "value" : match ["value" ]["value" ],
108
- "additional_info" : match ["value" ],
109
- "entity" : match ["dim" ]}
110
-
111
- extracted .append (entity )
113
+
114
+ current_time = datetime .datetime .utcnow ()
115
+ ref_time = current_time .strftime ('%Y-%m-%dT%H:%M:%S+00:00' )
116
+ if message .time is not None :
117
+ # check if time given is valid
118
+ try :
119
+ ref_time = datetime .datetime \
120
+ .utcfromtimestamp (int (message .time ) / 1000.0 ) \
121
+ .strftime ('%Y-%m-%dT%H:%M:%S+00:00' )
122
+ logging .debug ("Passing reference time {} "
123
+ "to duckling" .format (ref_time ))
124
+ except Exception as e :
125
+ logging .warning ("Could not parse timestamp {}. Instead "
126
+ "current UTC time {} will be passed to "
127
+ "duckling. Error: {}"
128
+ "" .format (message .time , ref_time , e ))
129
+
130
+ matches = self .duckling .parse (message .text , reference_time = ref_time )
131
+ relevant_matches = [match
132
+ for match in matches
133
+ if match ["dim" ] in self .dimensions ]
134
+
135
+ for match in relevant_matches :
136
+ value = extract_value (match )
137
+ entity = {"start" : match ["start" ],
138
+ "end" : match ["end" ],
139
+ "text" : match ["text" ],
140
+ "value" : value ,
141
+ "additional_info" : match ["value" ],
142
+ "entity" : match ["dim" ]}
143
+
144
+ extracted .append (entity )
112
145
113
146
extracted = self .add_extractor_name (extracted )
114
- message .set ("entities" , message .get ("entities" , []) + extracted , add_to_output = True )
147
+ message .set ("entities" , message .get ("entities" , []) + extracted ,
148
+ add_to_output = True )
115
149
116
150
def persist (self , model_dir ):
117
151
# type: (Text) -> Dict[Text, Any]
118
152
119
- file_name = self .name + ".json"
153
+ file_name = self .name + ".json"
120
154
full_name = os .path .join (model_dir , file_name )
121
155
with io .open (full_name , 'w' ) as f :
122
156
f .write (str (json .dumps ({"dimensions" : self .dimensions })))
123
157
return {"ner_duckling_persisted" : file_name }
124
158
125
159
@classmethod
126
- def load (cls , model_dir , model_metadata , cached_component , ** kwargs ):
127
- # type: (Text, Metadata, Optional[DucklingExtractor], **Any) -> DucklingExtractor
128
-
129
- persisted = os .path .join (model_dir , model_metadata .get ("ner_duckling_persisted" ))
160
+ def load (cls ,
161
+ model_dir = None , # type: Text
162
+ model_metadata = None , # type: Metadata
163
+ cached_component = None , # type:Optional[DucklingExtractor]
164
+ ** kwargs # type: **Any
165
+ ):
166
+ # type: (...) -> DucklingExtractor
167
+
168
+ persisted = os .path .join (model_dir ,
169
+ model_metadata .get ("ner_duckling_persisted" ))
130
170
if cached_component :
131
171
duckling = cached_component .duckling
132
172
else :
133
- duckling = cls ._create_duckling_wrapper (model_metadata .get ("language" ))
173
+ language = model_metadata .get ("language" )
174
+ duckling = cls .create_duckling_wrapper (language )
134
175
135
176
if os .path .isfile (persisted ):
136
177
with io .open (persisted , encoding = 'utf-8' ) as f :
0 commit comments