diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f80a820d..78056e24 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,10 +13,11 @@ jobs: ruff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: astral-sh/ruff-action@v3 with: version: "latest" + tests: needs: ruff strategy: @@ -26,10 +27,10 @@ jobs: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7998fd2c..f4f71f8c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,11 +9,9 @@ repos: # Run the formatter. - id: ruff-format - # - repo: local - # hooks: - # - id: pyright-verifytypes - # name: pyright verifytypes - # entry: bash -c 'pip install . >/dev/null && pyright --verifytypes swvo --ignoreexternal' - # language: system - # types: [python] - # pass_filenames: false + - repo: local + hooks: + - id: ty + name: ty check + entry: ty check swvo --ignore unresolved-import + language: python diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d652ef9b..7d6ec719 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,7 +26,16 @@ We use [Ruff](https://docs.astral.sh/ruff/) for linting, formatting, and import pip install pre-commit pre-commit install ``` - This ensures Ruff runs automatically on changed files before each commit. + This ensures Ruff runs automatically on changed files before each commit (this wall also run Ty type checking, see below) + +## Type Checking +We use [Ty](https://ty.sh/) for type checking. +1. To check your code for type issues: + ```bash + ty check . + ``` +2. Address any type errors reported by Ty. +3. [Optional] Ty is also integrated with `pre-commit`. If you have set up `pre-commit` as described above, Ty will run automatically on changed files before each commit. ## Running Tests We use `pytest` for testing. diff --git a/pyproject.toml b/pyproject.toml index c610dc13..26e56204 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,3 +91,13 @@ push = true [tool.bumpver.file_patterns] "pyproject.toml" = ['current_version = "{version}"'] "swvo/__init__.py" = ['__version__ = "{version}"'] + +[tool.ty.src] +include = ["swvo"] +exclude = ["tests", "swvo/io/RBMDataSet"] + +[[tool.ty.overrides]] +include = ["swvo"] + +[tool.ty.overrides.rules] +invalid-return-type = "ignore" diff --git a/swvo/io/dst/wdc.py b/swvo/io/dst/wdc.py index 907a6b36..865bcf71 100644 --- a/swvo/io/dst/wdc.py +++ b/swvo/io/dst/wdc.py @@ -53,7 +53,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) diff --git a/swvo/io/f10_7/omni.py b/swvo/io/f10_7/omni.py index 6c03bc3e..bb4ffa87 100644 --- a/swvo/io/f10_7/omni.py +++ b/swvo/io/f10_7/omni.py @@ -68,7 +68,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False) f107_df["file_name"] = data_out["file_name"] # we return it just every 24 hours - f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0) + f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0) # ty: ignore[possibly-missing-attribute] f107_df = f107_df.replace(999.9, np.nan) f107_df = f107_df.truncate( before=start_time - timedelta(hours=23.9999), diff --git a/swvo/io/f10_7/swpc.py b/swvo/io/f10_7/swpc.py index 5abf39a5..1b0e2e03 100644 --- a/swvo/io/f10_7/swpc.py +++ b/swvo/io/f10_7/swpc.py @@ -55,13 +55,30 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!" raise ValueError(msg) - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) logger.info(f"SWPC F10.7 data directory: {self.data_dir}") + def _is_within_download_range(self, target_date: datetime) -> bool: + """Check if a date is within the last 30 days. + + Parameters + ---------- + target_date : datetime + Date to check. + + Returns + ------- + bool + True if the date is within the last 30 days, False otherwise. + """ + now = datetime.now(timezone.utc) + thirty_days_ago = now - timedelta(days=30) + return target_date >= thirty_days_ago + def _get_processed_file_list( self, start_time: datetime, end_time: datetime ) -> tuple[list[Path], list[tuple[datetime, datetime]]]: @@ -111,8 +128,7 @@ def download_and_process(self) -> None: logger.debug(f"Updating {file_path}...") existing_data = pd.read_csv(file_path, parse_dates=["date"]) - existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None) - + existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None) # ty: ignore[unresolved-attribute] combined_data = pd.concat([existing_data, year_data]) combined_data = combined_data.drop_duplicates(subset=["date"], keep="last") combined_data = combined_data.sort_values("date") @@ -238,6 +254,15 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal for file_path in file_paths: if not file_path.exists(): if download: + year = int(file_path.stem.split("_")[-1]) + year_end = datetime(year, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + if not self._is_within_download_range(year_end): + logger.warning( + f"Cannot download data for year {year}. " + f"Only data from the last 30 days can be downloaded from SWPC." + ) + continue + self.download_and_process() else: warnings.warn(f"File {file_path} not found") @@ -247,8 +272,8 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal data_out = df_one_file.combine_first(data_out) if not data_out.empty: - if data_out.index.tzinfo is None: - data_out.index = data_out.index.tz_localize("UTC") + if data_out.index.tzinfo is None: # ty: ignore[possibly-missing-attribute] + data_out.index = data_out.index.tz_localize("UTC") # ty: ignore[possibly-missing-attribute] data_out.drop("date", axis=1, inplace=True) data_out = data_out.truncate( before=start_time - timedelta(hours=23.9999), diff --git a/swvo/io/hp/ensemble.py b/swvo/io/hp/ensemble.py index 5e4e99a7..62a53396 100755 --- a/swvo/io/hp/ensemble.py +++ b/swvo/io/hp/ensemble.py @@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None: msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!" raise ValueError(msg) - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) @@ -178,7 +178,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N if end_time is not None and not end_time.tzinfo: end_time = end_time.replace(tzinfo=timezone.utc) - if not (0 <= horizon <= 72): + if not (0 <= horizon <= 72): # ty: ignore[unsupported-operator] raise ValueError("Horizon must be between 0 and 72 hours") if self.index == "hp30": @@ -187,7 +187,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N raise ValueError("Horizon for hp30 must be in 0.5 hour increments") elif self.index == "hp60": freq = "1h" - if horizon % 1 != 0: + if horizon % 1 != 0: # ty: ignore[unsupported-operator] raise ValueError("Horizon for hp60 must be in 1 hour increments") align_start_to_hp_hr = start_time.replace(hour=start_time.hour, minute=0, second=0, microsecond=0) diff --git a/swvo/io/hp/gfz.py b/swvo/io/hp/gfz.py index 0b89eff9..309d5174 100755 --- a/swvo/io/hp/gfz.py +++ b/swvo/io/hp/gfz.py @@ -10,7 +10,7 @@ from ftplib import FTP from pathlib import Path from shutil import rmtree -from typing import Optional +from typing import List, Optional import numpy as np import pandas as pd @@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None: msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!" raise ValueError(msg) - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -263,15 +263,15 @@ def _get_processed_file_list(self, start_time: datetime, end_time: datetime) -> return file_paths, time_intervals - def _process_single_file(self, temp_dir: str, filenames: str) -> pd.DataFrame: + def _process_single_file(self, temp_dir: Path, filenames: List[str]) -> pd.DataFrame: """Process HpGFZ file to a DataFrame. Parameters ---------- - temp_dir : str + temp_dir : Path Temporary directory to store the file. - file_path : Path - Path to the file. + filenames : List[str] + List of filenames to process. Returns ------- diff --git a/swvo/io/hp/read_hp_from_multiple_models.py b/swvo/io/hp/read_hp_from_multiple_models.py index 5582dee1..0dd54816 100644 --- a/swvo/io/hp/read_hp_from_multiple_models.py +++ b/swvo/io/hp/read_hp_from_multiple_models.py @@ -87,7 +87,7 @@ def read_hp_from_multiple_models( # noqa: PLR0913 start_time, end_time, historical_data_cutoff_time, - reduce_ensemble, + reduce_ensemble, # ty: ignore[invalid-argument-type] download=download, ) @@ -128,7 +128,7 @@ def _read_from_model( # noqa: PLR0913 num_ens_members = len(data_one_model) if num_ens_members > 0 and reduce_ensemble is not None: - data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index) + data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index) # ty: ignore[invalid-argument-type] return data_one_model diff --git a/swvo/io/kp/ensemble.py b/swvo/io/kp/ensemble.py index 9296c7c2..27573900 100755 --- a/swvo/io/kp/ensemble.py +++ b/swvo/io/kp/ensemble.py @@ -49,7 +49,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) @@ -108,7 +108,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]: freq=timedelta(hours=3), ) data_out = pd.DataFrame(index=t) - data_out.index = data_out.index.tz_localize(timezone.utc) + data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute] data_out["kp"] = np.array([np.nan] * len(t)) data_out = data_out.truncate( before=start_time - timedelta(hours=2.9999), @@ -129,7 +129,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]: df["file_name"] = file df.loc[df["kp"].isna(), "file_name"] = None - df.index = df.index.tz_localize("UTC") + df.index = df.index.tz_localize("UTC") # ty: ignore[possibly-missing-attribute] df = df.truncate( before=start_time - timedelta(hours=2.9999), diff --git a/swvo/io/kp/niemegk.py b/swvo/io/kp/niemegk.py index d49ed4a0..6a32de1f 100755 --- a/swvo/io/kp/niemegk.py +++ b/swvo/io/kp/niemegk.py @@ -55,7 +55,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -163,7 +163,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False) freq=timedelta(hours=3), ) data_out = pd.DataFrame(index=t) - data_out.index = data_out.index.tz_localize(timezone.utc) + data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute] data_out["kp"] = np.array([np.nan] * len(t)) data_out["file_name"] = np.array([None] * len(t)) @@ -254,8 +254,8 @@ def _read_single_file(self, file_path) -> pd.DataFrame: df["t"] = pd.to_datetime(df["t"]) df.index = df["t"] df.drop(labels=["t"], axis=1, inplace=True) - if not df.index.tzinfo: - df.index = df.index.tz_localize(timezone.utc) + if not df.index.tzinfo: # ty: ignore[possibly-missing-attribute] + df.index = df.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute] df["file_name"] = file_path df.loc[df["kp"].isna(), "file_name"] = None @@ -302,7 +302,7 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame: ) data.index.rename("t", inplace=True) data.index = data["t"] - data.index = data.index.tz_localize(timezone.utc) + data.index = data.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute] data.drop(labels=["t"], axis=1, inplace=True) data.dropna(inplace=True) data = data[data["kp"] != -1.0] diff --git a/swvo/io/kp/omni.py b/swvo/io/kp/omni.py index 9a95475c..a166ebc2 100755 --- a/swvo/io/kp/omni.py +++ b/swvo/io/kp/omni.py @@ -63,7 +63,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False) kp_df["kp"] = data_out["kp"] kp_df["file_name"] = data_out["file_name"] # we return it just every 3 hours - kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0) + kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0) # ty: ignore[possibly-missing-attribute] kp_df = kp_df.truncate( before=start_time - timedelta(hours=2.9999), after=end_time + timedelta(hours=2.9999), diff --git a/swvo/io/kp/read_kp_from_multiple_models.py b/swvo/io/kp/read_kp_from_multiple_models.py index cfa4ae6a..cdbe3cca 100644 --- a/swvo/io/kp/read_kp_from_multiple_models.py +++ b/swvo/io/kp/read_kp_from_multiple_models.py @@ -91,7 +91,7 @@ def read_kp_from_multiple_models( # noqa: PLR0913 start_time, end_time, historical_data_cutoff_time, - reduce_ensemble, + reduce_ensemble, # ty: ignore[invalid-argument-type] download=download, ) data_out = construct_updated_data_frame(data_out, data_one_model, model.LABEL) @@ -172,7 +172,7 @@ def _read_from_model( # noqa: PLR0913 num_ens_members = len(data_one_model) if num_ens_members > 0 and reduce_ensemble is not None: - data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble) + data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble) # ty: ignore[invalid-argument-type] return data_one_model diff --git a/swvo/io/kp/swpc.py b/swvo/io/kp/swpc.py index 7ed057a5..d2f62fac 100755 --- a/swvo/io/kp/swpc.py +++ b/swvo/io/kp/swpc.py @@ -57,7 +57,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -180,7 +180,7 @@ def read(self, start_time: datetime, end_time: Optional[datetime] = None, downlo freq=timedelta(hours=3), ) data_out = pd.DataFrame(index=t) - data_out.index = data_out.index.tz_localize(timezone.utc) + data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute] data_out["kp"] = np.array([np.nan] * len(t)) data_out["file_name"] = np.array([np.nan] * len(t)) @@ -252,7 +252,7 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame: lines = f.readlines() for line in lines: if ":Issued:" in line: - year = int(re.search(r"(\d{4})", line).group(1)) + year = int(re.search(r"(\d{4})", line).group(1)) # ty: ignore[possibly-missing-attribute] break for i, line in enumerate(lines): @@ -260,19 +260,19 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame: first_line = i + 2 break - headers = lines[first_line].split() + headers = lines[first_line].split() # ty: ignore[invalid-argument-type] headers = [headers[i] + " " + headers[i + 1] for i in range(0, len(headers), 2)] for d in headers: try: if any("Dec" in month for month in headers) and "Jan" in d: - parsed_date = self._parse_date(d, year + 1) + parsed_date = self._parse_date(d, year + 1) # ty: ignore[unsupported-operator] else: parsed_date = self._parse_date(d, year) dates.append(parsed_date) except ValueError: raise - for line in lines[first_line + 1 : first_line + 9]: + for line in lines[first_line + 1 : first_line + 9]: # ty: ignore[unsupported-operator] values = [float(val) for val in line.split()[1:] if re.match(r"^\d+\.\d+$", val)] kp_data.append(values) diff --git a/swvo/io/omni/omni_high_res.py b/swvo/io/omni/omni_high_res.py index 2c632ad2..cc2d5d0c 100644 --- a/swvo/io/omni/omni_high_res.py +++ b/swvo/io/omni/omni_high_res.py @@ -51,7 +51,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -62,7 +62,7 @@ def download_and_process( self, start_time: datetime, end_time: datetime, - cadence_min: float = 1, + cadence_min: int = 1, reprocess_files: bool = False, ) -> None: """Download and process OMNI High Resolution data files. @@ -73,7 +73,7 @@ def download_and_process( Start time for data download. end_time : datetime End time for data download. - cadence_min : float, optional + cadence_min : int, optional Cadence of the data in minutes, defaults to 1 reprocess_files : bool, optional Downloads and processes the files again, defaults to False, by default False @@ -127,7 +127,7 @@ def read( self, start_time: datetime, end_time: datetime, - cadence_min: float = 1, + cadence_min: int = 1, download: bool = False, ) -> pd.DataFrame: """ @@ -139,7 +139,7 @@ def read( Start time for reading data. end_time : datetime End time for reading data. - cadence_min : float, optional + cadence_min : int, optional Cadence of the data in minutes, defaults to 1 download : bool, optional Download data on the go, defaults to False. diff --git a/swvo/io/omni/omni_low_res.py b/swvo/io/omni/omni_low_res.py index 64ca0909..f1775fd1 100755 --- a/swvo/io/omni/omni_low_res.py +++ b/swvo/io/omni/omni_low_res.py @@ -110,7 +110,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) diff --git a/swvo/io/solar_wind/ace.py b/swvo/io/solar_wind/ace.py index 528dfde0..ee458cd3 100644 --- a/swvo/io/solar_wind/ace.py +++ b/swvo/io/solar_wind/ace.py @@ -62,7 +62,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -107,7 +107,7 @@ def download_and_process(self, request_time: datetime) -> None: logger.debug("Processing file ...") processed_df = self._process_single_file(temporary_dir) - unique_dates = np.unique(processed_df.index.date) + unique_dates = np.unique(processed_df.index.date) # ty: ignore[possibly-missing-attribute] for date in unique_dates: file_path = self.data_dir / date.strftime("%Y/%m") / f"ACE_SW_NOWCAST_{date.strftime('%Y%m%d')}.csv" @@ -301,7 +301,7 @@ def _update_filename(self, row: pd.Series) -> str: file_date_str = Path(row["file_name"]).stem.split("_")[-1] file_date = pd.to_datetime(file_date_str, format="%Y%m%d").date() - index_date = row.name.date() + index_date = row.name.date() # ty: ignore[unresolved-attribute] return "propagated from previous ACE NOWCAST file" if file_date != index_date else row["file_name"] def _read_single_file(self, file_path) -> pd.DataFrame: diff --git a/swvo/io/solar_wind/dscovr.py b/swvo/io/solar_wind/dscovr.py index 4d2aa812..36373334 100644 --- a/swvo/io/solar_wind/dscovr.py +++ b/swvo/io/solar_wind/dscovr.py @@ -60,7 +60,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) self.data_dir.mkdir(parents=True, exist_ok=True) @@ -103,7 +103,7 @@ def download_and_process(self, request_time: datetime) -> None: logger.debug("Processing file ...") processed_df = self._process_single_file(temporary_dir) - unique_dates = np.unique(processed_df.index.date) + unique_dates = np.unique(processed_df.index.date) # ty: ignore[possibly-missing-attribute] for date in unique_dates: file_path = self.data_dir / date.strftime("%Y/%m") / f"DSCOVR_SW_NOWCAST_{date.strftime('%Y%m%d')}.csv" @@ -403,5 +403,5 @@ def _update_filename(self, row: pd.Series) -> str: file_date_str = Path(row["file_name"]).stem.split("_")[-1] file_date = pd.to_datetime(file_date_str, format="%Y%m%d").date() - index_date = row.name.date() + index_date = row.name.date() # ty: ignore[unresolved-attribute] return "propagated from previous DSCOVR NOWCAST file" if file_date != index_date else row["file_name"] diff --git a/swvo/io/solar_wind/read_solar_wind_from_multiple_models.py b/swvo/io/solar_wind/read_solar_wind_from_multiple_models.py index 99b3f724..ebecb53b 100644 --- a/swvo/io/solar_wind/read_solar_wind_from_multiple_models.py +++ b/swvo/io/solar_wind/read_solar_wind_from_multiple_models.py @@ -101,7 +101,7 @@ def read_solar_wind_from_multiple_models( # noqa: PLR0913 start_time, end_time, historical_data_cutoff_time, - reduce_ensemble, + reduce_ensemble, # ty: ignore[invalid-argument-type] download=download, do_interpolation=do_interpolation, ) @@ -213,7 +213,7 @@ def _read_from_model( # noqa: PLR0913 num_ens_members = len(data_one_model) if num_ens_members > 0 and reduce_ensemble is not None: - data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble) + data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble) # ty: ignore[invalid-argument-type] return data_one_model diff --git a/swvo/io/solar_wind/swift.py b/swvo/io/solar_wind/swift.py index 9de5b09a..14c9e3be 100644 --- a/swvo/io/solar_wind/swift.py +++ b/swvo/io/solar_wind/swift.py @@ -56,7 +56,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None: if self.ENV_VAR_NAME not in os.environ: raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!") - data_dir = os.environ.get(self.ENV_VAR_NAME) + data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment] self.data_dir: Path = Path(data_dir) @@ -249,7 +249,7 @@ def _update_filename(self, row: pd.Series) -> str: file_date_str = Path(row["file_name"]).stem.split("_")[-1] file_date = pd.to_datetime(file_date_str, format="%Y-%m-%dt0000").date() - index_date = row.name.date() + index_date = row.name.date() # ty: ignore[unresolved-attribute] return "propagated from previous SWIFT FORECAST file" if file_date != index_date else row["file_name"] def _nan_dataframe(self, start_time, end_time): diff --git a/swvo/io/symh/omni.py b/swvo/io/symh/omni.py index 66380d50..1eecfe55 100644 --- a/swvo/io/symh/omni.py +++ b/swvo/io/symh/omni.py @@ -43,7 +43,7 @@ def read( self, start_time: datetime, end_time: datetime, - cadence_min: float = 1, + cadence_min: int = 1, download: bool = True, ) -> pd.DataFrame: """ @@ -55,7 +55,7 @@ def read( Start time of the data to read. Must be timezone-aware. end_time : datetime End time of the data to read. Must be timezone-aware. - cadence_min : float, optional + cadence_min : int, optional Cadence of the data in minutes, defaults to 1 download : bool, optional Download data on the go, defaults to True. diff --git a/swvo/io/utils.py b/swvo/io/utils.py index d5fe6575..b85ab11d 100644 --- a/swvo/io/utils.py +++ b/swvo/io/utils.py @@ -212,7 +212,7 @@ def sw_mag_propagation(sw_data: pd.DataFrame) -> pd.DataFrame: Data frame with propagated solar wind data, indexed by time. """ - sw_data["t"] = [t.timestamp() for t in sw_data.index.to_pydatetime()] + sw_data["t"] = [t.timestamp() for t in sw_data.index.to_pydatetime()] # ty: ignore[possibly-missing-attribute] sw_data = sw_data.dropna(how="any") distance = 1.5e6