File tree Expand file tree Collapse file tree 4 files changed +26
-7
lines changed Expand file tree Collapse file tree 4 files changed +26
-7
lines changed Original file line number Diff line number Diff line change 1- from typing import Any , Dict
1+ from typing import Any , Dict , List
22
33from typing_extensions import Literal
44
@@ -28,6 +28,7 @@ def create_component(
2828 name : str = "cim10" ,
2929 * ,
3030 attr : str = "NORM" ,
31+ cim10 : List [str ] = None ,
3132 ignore_excluded : bool = False ,
3233 ignore_space_tokens : bool = False ,
3334 term_matcher : Literal ["exact" , "simstring" ] = "exact" ,
@@ -75,6 +76,9 @@ def create_component(
7576 The pipeline object
7677 name : str
7778 The name of the component
79+ cim10 : str
80+ List of cim10 to retrieve. If None, all cim10 will be searched,
81+ resulting in higher computation time.
7882 attr : str
7983 The default attribute to use for matching.
8084 ignore_excluded : bool
@@ -104,7 +108,7 @@ def create_component(
104108 nlp = nlp ,
105109 name = name ,
106110 regex = dict (),
107- terms = get_patterns (),
111+ terms = get_patterns (cim10 ),
108112 attr = attr ,
109113 ignore_excluded = ignore_excluded ,
110114 ignore_space_tokens = ignore_space_tokens ,
Original file line number Diff line number Diff line change 55from edsnlp import BASE_DIR
66
77
8- def get_patterns () -> Dict [str , List [str ]]:
8+ def get_patterns (cim10 : List [ str ] = None ) -> Dict [str , List [str ]]:
99 df = pd .read_csv (BASE_DIR / "resources" / "cim10.csv.gz" )
1010
1111 df ["code_pattern" ] = df ["code" ]
@@ -30,4 +30,6 @@ def get_patterns() -> Dict[str, List[str]]:
3030
3131 patterns = df .groupby ("code" )["patterns" ].agg (list ).to_dict ()
3232
33+ patterns = {k : v for k , v in patterns .items () if k in cim10 } if cim10 else patterns
34+
3335 return patterns
Original file line number Diff line number Diff line change 1- from typing import Any , Dict
1+ from typing import Any , Dict , List
22
33from typing_extensions import Literal
44
@@ -28,6 +28,7 @@ def create_component(
2828 name : str = "drugs" ,
2929 * ,
3030 attr : str = "NORM" ,
31+ atc : List [str ] = None ,
3132 ignore_excluded : bool = False ,
3233 ignore_space_tokens : bool = False ,
3334 term_matcher : Literal ["exact" , "simstring" ] = "exact" ,
@@ -83,6 +84,9 @@ def create_component(
8384 The name of the component
8485 attr : str
8586 The default attribute to use for matching.
87+ atc : str
88+ List of atc to retrieve. If None, all atc will be searched,
89+ resulting in higher computation time.
8690 ignore_excluded : bool
8791 Whether to skip excluded tokens (requires an upstream
8892 pipeline to mark excluded tokens).
@@ -111,7 +115,7 @@ def create_component(
111115 nlp = nlp ,
112116 name = name ,
113117 regex = dict (),
114- terms = get_patterns (),
118+ terms = get_patterns (atc ),
115119 attr = attr ,
116120 ignore_excluded = ignore_excluded ,
117121 ignore_space_tokens = ignore_space_tokens ,
Original file line number Diff line number Diff line change 66drugs_file = BASE_DIR / "resources" / "drugs.json"
77
88
9- def get_patterns () -> Dict [str , List [str ]]:
9+ def filter_dict_by_keys (D : Dict [str , List [str ]], L : List [str ]):
10+ filtered_dict = {
11+ k : v for k , v in D .items () if any (k .startswith (prefix ) for prefix in L )
12+ }
13+ return filtered_dict
14+
15+
16+ def get_patterns (atc : List [str ] = None ) -> Dict [str , List [str ]]:
1017 with open (drugs_file , "r" ) as f :
11- return json .load (f )
18+ patterns = json .load (f )
19+ patterns = {k : v for k , v in patterns .items () if k in atc } if atc else patterns
20+ return patterns
You can’t perform that action at this time.
0 commit comments