|
1 | 1 | tumour_pattern = (
|
2 | 2 | r"(?P<tumour_prefix>[cpyramP]{1,2}\s?)?" # Optional tumour prefix
|
3 | 3 | r"T\s?" # 'T' followed by optional space
|
4 |
| - r"(?P<tumour>([0-4]|is|[Xx]))" # Tumour size (required if 'T' is present) |
| 4 | + r"(?P<tumour>([0-4]|is|[Xx]|[Oo]))" # Tumour size (required if 'T' is present) |
5 | 5 | r"(?:\s?(?P<tumour_specification>[abcdx]|mi))?" # Optional tumour specification
|
6 | 6 | r"(?:\s?\((?P<tumour_suffix>[^()]{1,10})\))?" # Optional tumour suffix
|
7 | 7 | )
|
8 | 8 |
|
9 | 9 | node_pattern = (
|
10 | 10 | r"(?P<node_prefix>[cpyraP]{1,2}\s?)?" # Optional node prefix
|
11 | 11 | r"N\s?" # 'N' followed by optional space
|
12 |
| - r"(?P<node>[Xx01234\+])" # Node size/status (required if 'N' is present) |
| 12 | + r"(?P<node>[Xx01234\+]|[Oo])" # Node size/status (required if 'N' is present) |
13 | 13 | r"(?:\s?(?P<node_specification>"
|
14 | 14 | r"[abcdx]|mi|sn|i[-,+]|mol[-,+]|\(mi\)|\(sn\)|"
|
15 | 15 | r"\(i[-,+]\)|\(mol[-,+]\)|\(\d+\s*/\s*\d+\)))?" # Optional specification
|
|
19 | 19 | metastasis_pattern = (
|
20 | 20 | r"(?P<metastasis_prefix>[cpyraP]{1,2}\s?)?" # Optional metastasis prefix
|
21 | 21 | r"M\s?" # 'M' followed by optional space
|
22 |
| - r"(?P<metastasis>[Xx0123\+])" # Metastasis status (required if 'M' is present) |
| 22 | + r"(?P<metastasis>[Xx0123\+]|[Oo])" # Metastasis status (required if 'M' is present) |
23 | 23 | r"(?:\s?(?P<metastasis_specification>"
|
24 | 24 | r"[abcd]|i\+|mol\+|cy\+|\(i\+\)|\(mol\+\)|"
|
25 | 25 | r"\(cy\+\)|PUL|OSS|HEP|BRA|LYM|OTH|MAR|PLE|PER|ADR|SKI))?" # Optional specification
|
|
31 | 31 |
|
32 | 32 | resection_pattern = (
|
33 | 33 | r"R\s?"
|
34 |
| - r"(?P<resection>[Xx012])?" # Optional resection completeness |
| 34 | + r"(?P<resection>[Xx012]|[Oo])?" # Optional resection completeness |
35 | 35 | r"(?:\s?(?P<resection_specification>is|cy\+|\(is\)|\(cy\+\)))?" # Optional specification
|
36 | 36 | r"(?:\s?(?P<resection_loc>(\((?P<r_loc>[a-z]+)\)[,;\s]*)*))?" # Optional localization with space
|
37 | 37 | )
|
|
46 | 46 |
|
47 | 47 | # We need te exclude pattern like 'T1', 'T2' if they are not followed by node or
|
48 | 48 | # metastasis sections.
|
| 49 | + |
49 | 50 | exclude_pattern = (
|
50 | 51 | r"(?!T\s*[0-4]\s*[.,\/](?!\s*"
|
51 | 52 | + node_pattern
|
|
57 | 58 | + "))"
|
58 | 59 | )
|
59 | 60 |
|
| 61 | +exclude_pattern = ( |
| 62 | + r"(?!" |
| 63 | + r"(?:[cpyramP]{0,2}\s*)?" # Optional prefix like p, yp, PT |
| 64 | + r"T\s*" |
| 65 | + r"(?:[0-4]|is|[xXoO])" # T stage (includes is, x, o) |
| 66 | + r"(?:[abcdx]|mi)?" # Optional specification |
| 67 | + r"(?:\s*\([^()]{1,10}\))?" # Optional suffix |
| 68 | + r"(?:\s*[\s,\/\.\(\)]|$)" # <-- KEY ADDITION: allow end-of-string ($) |
| 69 | + r"(?!\s*" |
| 70 | + + node_pattern + "?" + TNM_space + "?" + metastasis_pattern + "?" |
| 71 | + + ")" |
| 72 | + + ")" |
| 73 | +) |
| 74 | + |
60 | 75 | tnm_pattern_new = (
|
61 | 76 | r"(?:\b|^)"
|
62 | 77 | + exclude_pattern
|
|
90 | 105 | + version_pattern
|
91 | 106 | + ")?"
|
92 | 107 | + r")"
|
93 |
| - + r"(?:\b|$|\n)" |
| 108 | + + r"(?=[\s\(\)\.,;:/]|$)" |
| 109 | + #+ r"(?:\b|$|\n)" |
94 | 110 | )
|
0 commit comments