diff --git a/pipelines/common_utils.py b/pipelines/common_utils.py index 89906080..c065741d 100644 --- a/pipelines/common_utils.py +++ b/pipelines/common_utils.py @@ -44,6 +44,76 @@ def get_available_reports( ] +def parse_exclude_date_ranges( + exclude_date_ranges_str: str | None, +) -> list[tuple[dt.date, dt.date]] | None: + """ + Parse comma-separated date ranges from string to list of tuples. + + This utility function is useful for parsing date exclusion parameters + that may be used by various forecasting models to exclude periods with + known reporting problems or other data quality issues. + + Parameters + ---------- + exclude_date_ranges_str : str | None + Comma-separated list of date ranges in format 'start:end'. + Example: '2024-01-15:2024-01-20,2024-03-01:2024-03-07' + + Returns + ------- + list[tuple[dt.date, dt.date]] | None + List of (start_date, end_date) tuples where both dates are inclusive, + or None if input is None/empty. + + Raises + ------ + ValueError + If date range format is invalid, dates can't be parsed as YYYY-MM-DD, + or start_date > end_date. + + Examples + -------- + >>> parse_exclude_date_ranges("2024-01-15:2024-01-20") + [(datetime.date(2024, 1, 15), datetime.date(2024, 1, 20))] + + >>> parse_exclude_date_ranges("2024-01-15:2024-01-20,2024-03-01:2024-03-07") + [(datetime.date(2024, 1, 15), datetime.date(2024, 1, 20)), + (datetime.date(2024, 3, 1), datetime.date(2024, 3, 7))] + + >>> parse_exclude_date_ranges(None) + None + """ + if exclude_date_ranges_str is None or not exclude_date_ranges_str.strip(): + return None + + parsed_ranges = [] + for date_range_str in exclude_date_ranges_str.split(","): + date_range_str = date_range_str.strip() + if ":" not in date_range_str: + raise ValueError( + f"Invalid date range format: '{date_range_str}'. " + "Expected format: 'start_date:end_date' (e.g., '2024-01-15:2024-01-20')" + ) + start_str, end_str = date_range_str.split(":", 1) + try: + start_date = dt.datetime.strptime(start_str.strip(), "%Y-%m-%d").date() + end_date = dt.datetime.strptime(end_str.strip(), "%Y-%m-%d").date() + except ValueError as e: + raise ValueError( + f"Invalid date format in range '{date_range_str}'. " + f"Expected YYYY-MM-DD format. Error: {e}" + ) from e + if start_date > end_date: + raise ValueError( + f"Invalid date range '{date_range_str}': " + f"start_date ({start_date}) must be <= end_date ({end_date})" + ) + parsed_ranges.append((start_date, end_date)) + + return parsed_ranges + + def parse_and_validate_report_date( report_date: str, available_facility_level_reports: list[dt.date],