GFZ · sahiljhawar · Feb 12, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -13,10 +13,11 @@ jobs:
   ruff:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
       - uses: astral-sh/ruff-action@v3
         with:
           version: "latest"
+
   tests:
     needs: ruff
     strategy:
@@ -26,10 +27,10 @@ jobs:
         os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,11 +9,9 @@ repos:
       # Run the formatter.
       - id: ruff-format
 
-  # - repo: local
-  #   hooks:
-  #     - id: pyright-verifytypes
-  #       name: pyright verifytypes
-  #       entry: bash -c 'pip install . >/dev/null && pyright --verifytypes swvo --ignoreexternal'
-  #       language: system
-  #       types: [python]
-  #       pass_filenames: false
+  - repo: local
+    hooks:
+      - id: ty
+        name: ty check
+        entry: ty check swvo --ignore unresolved-import
+        language: python
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -26,7 +26,16 @@ We use [Ruff](https://docs.astral.sh/ruff/) for linting, formatting, and import
     pip install pre-commit
     pre-commit install
     ```
-    This ensures Ruff runs automatically on changed files before each commit.
+    This ensures Ruff runs automatically on changed files before each commit (this wall also run Ty type checking, see below)
+
+## Type Checking
+We use [Ty](https://ty.sh/) for type checking.
+1.  To check your code for type issues:
+    ```bash
+    ty check .
+    ```
+2.  Address any type errors reported by Ty.
+3.  [Optional] Ty is also integrated with `pre-commit`. If you have set up `pre-commit` as described above, Ty will run automatically on changed files before each commit.
 
 ## Running Tests
 We use `pytest` for testing.

diff --git a/pyproject.toml b/pyproject.toml
@@ -91,3 +91,13 @@ push = true
 [tool.bumpver.file_patterns]
 "pyproject.toml" = ['current_version = "{version}"']
 "swvo/__init__.py" = ['__version__ = "{version}"']
+
+[tool.ty.src]
+include = ["swvo"]
+exclude = ["tests", "swvo/io/RBMDataSet"]
+
+[[tool.ty.overrides]]
+include = ["swvo"]
+
+[tool.ty.overrides.rules]
+invalid-return-type = "ignore"
diff --git a/swvo/io/dst/wdc.py b/swvo/io/dst/wdc.py
@@ -53,7 +53,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
             if self.ENV_VAR_NAME not in os.environ:
                 raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
         self.data_dir.mkdir(parents=True, exist_ok=True)

diff --git a/swvo/io/f10_7/omni.py b/swvo/io/f10_7/omni.py
@@ -68,7 +68,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
         f107_df["file_name"] = data_out["file_name"]
 
         # we return it just every 24 hours
-        f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0)
+        f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0)  # ty: ignore[possibly-missing-attribute]
         f107_df = f107_df.replace(999.9, np.nan)
         f107_df = f107_df.truncate(
             before=start_time - timedelta(hours=23.9999),

diff --git a/swvo/io/f10_7/swpc.py b/swvo/io/f10_7/swpc.py
@@ -55,13 +55,30 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
             if self.ENV_VAR_NAME not in os.environ:
                 msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
                 raise ValueError(msg)
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
         self.data_dir.mkdir(parents=True, exist_ok=True)
 
         logger.info(f"SWPC F10.7 data directory: {self.data_dir}")
 
+    def _is_within_download_range(self, target_date: datetime) -> bool:
+        """Check if a date is within the last 30 days.
+
+        Parameters
+        ----------
+        target_date : datetime
+            Date to check.
+
+        Returns
+        -------
+        bool
+            True if the date is within the last 30 days, False otherwise.
+        """
+        now = datetime.now(timezone.utc)
+        thirty_days_ago = now - timedelta(days=30)
+        return target_date >= thirty_days_ago
+
     def _get_processed_file_list(
         self, start_time: datetime, end_time: datetime
     ) -> tuple[list[Path], list[tuple[datetime, datetime]]]:
@@ -111,8 +128,7 @@ def download_and_process(self) -> None:
                     logger.debug(f"Updating {file_path}...")
 
                     existing_data = pd.read_csv(file_path, parse_dates=["date"])
-                    existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None)
-
+                    existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None)  # ty: ignore[unresolved-attribute]
                     combined_data = pd.concat([existing_data, year_data])
                     combined_data = combined_data.drop_duplicates(subset=["date"], keep="last")
                     combined_data = combined_data.sort_values("date")
@@ -238,6 +254,15 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal
         for file_path in file_paths:
             if not file_path.exists():
                 if download:
+                    year = int(file_path.stem.split("_")[-1])
+                    year_end = datetime(year, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
+                    if not self._is_within_download_range(year_end):
+                        logger.warning(
+                            f"Cannot download data for year {year}. "
+                            f"Only data from the last 30 days can be downloaded from SWPC."
+                        )
+                        continue
+
                     self.download_and_process()
                 else:
                     warnings.warn(f"File {file_path} not found")
@@ -247,8 +272,8 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal
             data_out = df_one_file.combine_first(data_out)
 
         if not data_out.empty:
-            if data_out.index.tzinfo is None:
-                data_out.index = data_out.index.tz_localize("UTC")
+            if data_out.index.tzinfo is None:  # ty: ignore[possibly-missing-attribute]
+                data_out.index = data_out.index.tz_localize("UTC")  # ty: ignore[possibly-missing-attribute]
         data_out.drop("date", axis=1, inplace=True)
         data_out = data_out.truncate(
             before=start_time - timedelta(hours=23.9999),

diff --git a/swvo/io/hp/ensemble.py b/swvo/io/hp/ensemble.py
@@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None:
                 msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
                 raise ValueError(msg)
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
 
@@ -178,7 +178,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N
         if end_time is not None and not end_time.tzinfo:
             end_time = end_time.replace(tzinfo=timezone.utc)
 
-        if not (0 <= horizon <= 72):
+        if not (0 <= horizon <= 72):  # ty: ignore[unsupported-operator]
             raise ValueError("Horizon must be between 0 and 72 hours")
 
         if self.index == "hp30":
@@ -187,7 +187,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N
                 raise ValueError("Horizon for hp30 must be in 0.5 hour increments")
         elif self.index == "hp60":
             freq = "1h"
-            if horizon % 1 != 0:
+            if horizon % 1 != 0:  # ty: ignore[unsupported-operator]
                 raise ValueError("Horizon for hp60 must be in 1 hour increments")
 
         align_start_to_hp_hr = start_time.replace(hour=start_time.hour, minute=0, second=0, microsecond=0)

diff --git a/swvo/io/hp/gfz.py b/swvo/io/hp/gfz.py
@@ -10,7 +10,7 @@
 from ftplib import FTP
 from pathlib import Path
 from shutil import rmtree
-from typing import Optional
+from typing import List, Optional
 
 import numpy as np
 import pandas as pd
@@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None:
                 msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
                 raise ValueError(msg)
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
         self.data_dir.mkdir(parents=True, exist_ok=True)
@@ -263,15 +263,15 @@ def _get_processed_file_list(self, start_time: datetime, end_time: datetime) ->
 
         return file_paths, time_intervals
 
-    def _process_single_file(self, temp_dir: str, filenames: str) -> pd.DataFrame:
+    def _process_single_file(self, temp_dir: Path, filenames: List[str]) -> pd.DataFrame:
         """Process HpGFZ file to a DataFrame.
 
         Parameters
         ----------
-        temp_dir : str
+        temp_dir : Path
             Temporary directory to store the file.
-        file_path : Path
-            Path to the file.
+        filenames : List[str]
+            List of filenames to process.
 
         Returns
         -------

diff --git a/swvo/io/hp/read_hp_from_multiple_models.py b/swvo/io/hp/read_hp_from_multiple_models.py
@@ -87,7 +87,7 @@ def read_hp_from_multiple_models(  # noqa: PLR0913
             start_time,
             end_time,
             historical_data_cutoff_time,
-            reduce_ensemble,
+            reduce_ensemble,  # ty: ignore[invalid-argument-type]
             download=download,
         )
 
@@ -128,7 +128,7 @@ def _read_from_model(  # noqa: PLR0913
         num_ens_members = len(data_one_model)
 
         if num_ens_members > 0 and reduce_ensemble is not None:
-            data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index)
+            data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index)  # ty: ignore[invalid-argument-type]
 
     return data_one_model
 

diff --git a/swvo/io/kp/ensemble.py b/swvo/io/kp/ensemble.py
@@ -49,7 +49,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
             if self.ENV_VAR_NAME not in os.environ:
                 raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
 
@@ -108,7 +108,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]:
                 freq=timedelta(hours=3),
             )
             data_out = pd.DataFrame(index=t)
-            data_out.index = data_out.index.tz_localize(timezone.utc)
+            data_out.index = data_out.index.tz_localize(timezone.utc)  # ty: ignore[possibly-missing-attribute]
             data_out["kp"] = np.array([np.nan] * len(t))
             data_out = data_out.truncate(
                 before=start_time - timedelta(hours=2.9999),
@@ -129,7 +129,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]:
                 df["file_name"] = file
                 df.loc[df["kp"].isna(), "file_name"] = None
 
-                df.index = df.index.tz_localize("UTC")
+                df.index = df.index.tz_localize("UTC")  # ty: ignore[possibly-missing-attribute]
 
                 df = df.truncate(
                     before=start_time - timedelta(hours=2.9999),

diff --git a/swvo/io/kp/niemegk.py b/swvo/io/kp/niemegk.py
@@ -55,7 +55,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
             if self.ENV_VAR_NAME not in os.environ:
                 raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
         self.data_dir.mkdir(parents=True, exist_ok=True)
@@ -163,7 +163,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
             freq=timedelta(hours=3),
         )
         data_out = pd.DataFrame(index=t)
-        data_out.index = data_out.index.tz_localize(timezone.utc)
+        data_out.index = data_out.index.tz_localize(timezone.utc)  # ty: ignore[possibly-missing-attribute]
         data_out["kp"] = np.array([np.nan] * len(t))
         data_out["file_name"] = np.array([None] * len(t))
 
@@ -254,8 +254,8 @@ def _read_single_file(self, file_path) -> pd.DataFrame:
         df["t"] = pd.to_datetime(df["t"])
         df.index = df["t"]
         df.drop(labels=["t"], axis=1, inplace=True)
-        if not df.index.tzinfo:
-            df.index = df.index.tz_localize(timezone.utc)
+        if not df.index.tzinfo:  # ty: ignore[possibly-missing-attribute]
+            df.index = df.index.tz_localize(timezone.utc)  # ty: ignore[possibly-missing-attribute]
 
         df["file_name"] = file_path
         df.loc[df["kp"].isna(), "file_name"] = None
@@ -302,7 +302,7 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame:
         )
         data.index.rename("t", inplace=True)
         data.index = data["t"]
-        data.index = data.index.tz_localize(timezone.utc)
+        data.index = data.index.tz_localize(timezone.utc)  # ty: ignore[possibly-missing-attribute]
         data.drop(labels=["t"], axis=1, inplace=True)
         data.dropna(inplace=True)
         data = data[data["kp"] != -1.0]

diff --git a/swvo/io/kp/omni.py b/swvo/io/kp/omni.py
@@ -63,7 +63,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
         kp_df["kp"] = data_out["kp"]
         kp_df["file_name"] = data_out["file_name"]
         # we return it just every 3 hours
-        kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0)
+        kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0)  # ty: ignore[possibly-missing-attribute]
         kp_df = kp_df.truncate(
             before=start_time - timedelta(hours=2.9999),
             after=end_time + timedelta(hours=2.9999),

diff --git a/swvo/io/kp/read_kp_from_multiple_models.py b/swvo/io/kp/read_kp_from_multiple_models.py
@@ -91,7 +91,7 @@ def read_kp_from_multiple_models(  # noqa: PLR0913
             start_time,
             end_time,
             historical_data_cutoff_time,
-            reduce_ensemble,
+            reduce_ensemble,  # ty: ignore[invalid-argument-type]
             download=download,
         )
         data_out = construct_updated_data_frame(data_out, data_one_model, model.LABEL)
@@ -172,7 +172,7 @@ def _read_from_model(  # noqa: PLR0913
         num_ens_members = len(data_one_model)
 
         if num_ens_members > 0 and reduce_ensemble is not None:
-            data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble)
+            data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble)  # ty: ignore[invalid-argument-type]
 
     return data_one_model
 

diff --git a/swvo/io/kp/swpc.py b/swvo/io/kp/swpc.py
@@ -57,7 +57,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
             if self.ENV_VAR_NAME not in os.environ:
                 raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")
 
-            data_dir = os.environ.get(self.ENV_VAR_NAME)
+            data_dir = os.environ.get(self.ENV_VAR_NAME)  # ty: ignore[invalid-assignment]
 
         self.data_dir: Path = Path(data_dir)
         self.data_dir.mkdir(parents=True, exist_ok=True)
@@ -180,7 +180,7 @@ def read(self, start_time: datetime, end_time: Optional[datetime] = None, downlo
             freq=timedelta(hours=3),
         )
         data_out = pd.DataFrame(index=t)
-        data_out.index = data_out.index.tz_localize(timezone.utc)
+        data_out.index = data_out.index.tz_localize(timezone.utc)  # ty: ignore[possibly-missing-attribute]
         data_out["kp"] = np.array([np.nan] * len(t))
         data_out["file_name"] = np.array([np.nan] * len(t))
 
@@ -252,27 +252,27 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame:
             lines = f.readlines()
             for line in lines:
                 if ":Issued:" in line:
-                    year = int(re.search(r"(\d{4})", line).group(1))
+                    year = int(re.search(r"(\d{4})", line).group(1))  # ty: ignore[possibly-missing-attribute]
                     break
 
             for i, line in enumerate(lines):
                 if "NOAA Kp index breakdown" in line:
                     first_line = i + 2
                     break
 
-            headers = lines[first_line].split()
+            headers = lines[first_line].split()  # ty: ignore[invalid-argument-type]
             headers = [headers[i] + " " + headers[i + 1] for i in range(0, len(headers), 2)]
             for d in headers:
                 try:
                     if any("Dec" in month for month in headers) and "Jan" in d:
-                        parsed_date = self._parse_date(d, year + 1)
+                        parsed_date = self._parse_date(d, year + 1)  # ty: ignore[unsupported-operator]
                     else:
                         parsed_date = self._parse_date(d, year)
                     dates.append(parsed_date)
                 except ValueError:
                     raise
 
-            for line in lines[first_line + 1 : first_line + 9]:
+            for line in lines[first_line + 1 : first_line + 9]:  # ty: ignore[unsupported-operator]
                 values = [float(val) for val in line.split()[1:] if re.match(r"^\d+\.\d+$", val)]
 
                 kp_data.append(values)