From 46386a0af69b9e3e66671ef10444b1792f33a1bb Mon Sep 17 00:00:00 2001 From: "Alban Peyrat (Archi)" Date: Thu, 11 Apr 2024 13:14:21 +0200 Subject: [PATCH] 1.17.5 : bug fix --- CHANGELOG.md | 7 +++++++ fcr_classes.py | 6 +++++- fcr_func.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca49d63..6c1f2ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ _Some previous changes will be added_ ## [Unreleased] +## [1.17.5] - 2024-04-11 + +### Fixed + +* `Universal_Data_Extractor.extract_list_of_strings` method does not crash anymore if some subfield had no values +* ASCII range (hexadecimal) `21-2F`, `3A-40`, `5B-60`, `7B-7F` added to the noise list + ## [1.17.4] - 2024-03-28 ### Added diff --git a/fcr_classes.py b/fcr_classes.py index e1d39db..e0456ec 100644 --- a/fcr_classes.py +++ b/fcr_classes.py @@ -2884,7 +2884,11 @@ def extract_list_of_strings(self,marc_field: Marc_Fields_Data, filter_value: Opt output = [] extraction = self.extract_data_from_marc_field(marc_field, filter_value) for field_value in extraction: - output.append(" ".join(field_value)) + valid_values = [] + for value in field_value: + if value: + valid_values.append(value) + output.append(" ".join(valid_values)) return output def extract_list_of_lists(self,marc_field: Marc_Fields_Data, filter_value: Optional[str] = "") -> List[str]: diff --git a/fcr_func.py b/fcr_func.py index 7d3b05d..8ea773a 100644 --- a/fcr_func.py +++ b/fcr_func.py @@ -16,7 +16,7 @@ def prep_string(_str:str, _noise = True, _multiplespaces = True) -> str: """ # remove noise (punctuation) if asked (by default yes) if _noise: - _str = re.sub(r"\.|\,|\?|\!|\;|\/|\:|\=|\[|\]|\'|\-|\(|\)|\||\"|\<|\>|\+|\°|[\u2010-\u2015]", " ", _str, flags=re.IGNORECASE) + _str = re.sub(r"[\x21-\x2F]|[\x3A-\x40]|[\x5B-\x60]|[\x7B-\x7F]|[\u2010-\u2015]|\.|\,|\?|\!|\;|\/|\:|\=|\[|\]|\'|\-|\(|\)|\||\"|\<|\>|\+|\°", " ", _str, flags=re.IGNORECASE) # replace multiple spaces by ine in string if requested (default yes) if _multiplespaces: _str = re.sub("\s+", " ", _str).strip()