Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
09becdc
chore(pre-commit): remove pyright from pre-commit
sahiljhawar Feb 11, 2026
0e659c6
chore(CI): add pyright to CI
sahiljhawar Feb 11, 2026
a2f9e33
chore(CI): add pyright as a standalone workflow and corresponding bad…
sahiljhawar Feb 11, 2026
9e93436
chore(CI): run CI on main
sahiljhawar Feb 11, 2026
1e5780c
chore: try ty for typecheckin
sahiljhawar Feb 11, 2026
cc0d3ea
style(type-checking): add type ignore for invalid assignment in data_dir
sahiljhawar Feb 12, 2026
13acd85
chore(ty): add ty ignore comment
sahiljhawar Feb 12, 2026
6d9f0b4
fix: do not fail on when day difference is larger than 30 days
sahiljhawar Feb 12, 2026
d4c1773
chore: add ty options
sahiljhawar Feb 12, 2026
1256679
Merge branch 'main' into pyright-ci
sahiljhawar Feb 12, 2026
71d7b9b
Merge branch 'main' into pyright-ci
sahiljhawar Feb 12, 2026
447f8fb
chore(hp): add ty ignore comments
sahiljhawar Feb 12, 2026
3124667
chore(kp): add ty ignore comments
sahiljhawar Feb 12, 2026
2334eff
chore(omni): fix type in high res
sahiljhawar Feb 12, 2026
c918789
chore(sw): add ty ignore comments
sahiljhawar Feb 12, 2026
ea5509f
chore: add ty ignore comments
sahiljhawar Feb 12, 2026
df9ccef
chore: add ty as pre-commit
sahiljhawar Feb 12, 2026
f411272
ci: remove pyright ci
sahiljhawar Feb 12, 2026
96b21e3
docs: add ty in contrib
sahiljhawar Feb 12, 2026
65ccd84
Update swvo/io/solar_wind/ace.py
sahiljhawar Feb 12, 2026
189db9a
Update swvo/io/solar_wind/read_solar_wind_from_multiple_models.py
sahiljhawar Feb 12, 2026
43ce4b6
Update swvo/io/kp/read_kp_from_multiple_models.py
sahiljhawar Feb 12, 2026
cb867a6
Update swvo/io/solar_wind/read_solar_wind_from_multiple_models.py
sahiljhawar Feb 12, 2026
7127e14
Update swvo/io/hp/read_hp_from_multiple_models.py
sahiljhawar Feb 12, 2026
762e261
Update swvo/io/hp/read_hp_from_multiple_models.py
sahiljhawar Feb 12, 2026
ec9fe2b
Update swvo/io/hp/gfz.py
sahiljhawar Feb 12, 2026
05e1e99
chore: remove pyright badge and add 'swvo' in ty check
sahiljhawar Feb 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
Comment thread
sahiljhawar marked this conversation as resolved.
- uses: astral-sh/ruff-action@v3
with:
version: "latest"

tests:
needs: ruff
strategy:
Expand All @@ -26,10 +27,10 @@ jobs:
os: [ubuntu-latest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
Comment thread
sahiljhawar marked this conversation as resolved.
Comment thread
sahiljhawar marked this conversation as resolved.
python-version: ${{ matrix.python-version }}

Expand Down
14 changes: 6 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ repos:
# Run the formatter.
- id: ruff-format

# - repo: local
# hooks:
# - id: pyright-verifytypes
# name: pyright verifytypes
# entry: bash -c 'pip install . >/dev/null && pyright --verifytypes swvo --ignoreexternal'
# language: system
# types: [python]
# pass_filenames: false
- repo: local
hooks:
- id: ty
name: ty check
entry: ty check swvo --ignore unresolved-import
language: python
Comment thread
sahiljhawar marked this conversation as resolved.
11 changes: 10 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,16 @@ We use [Ruff](https://docs.astral.sh/ruff/) for linting, formatting, and import
pip install pre-commit
pre-commit install
```
This ensures Ruff runs automatically on changed files before each commit.
This ensures Ruff runs automatically on changed files before each commit (this wall also run Ty type checking, see below)

## Type Checking
We use [Ty](https://ty.sh/) for type checking.
1. To check your code for type issues:
```bash
ty check .
```
2. Address any type errors reported by Ty.
3. [Optional] Ty is also integrated with `pre-commit`. If you have set up `pre-commit` as described above, Ty will run automatically on changed files before each commit.

## Running Tests
We use `pytest` for testing.
Expand Down
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,13 @@ push = true
[tool.bumpver.file_patterns]
"pyproject.toml" = ['current_version = "{version}"']
"swvo/__init__.py" = ['__version__ = "{version}"']

[tool.ty.src]
include = ["swvo"]
exclude = ["tests", "swvo/io/RBMDataSet"]

[[tool.ty.overrides]]
include = ["swvo"]

[tool.ty.overrides.rules]
invalid-return-type = "ignore"
2 changes: 1 addition & 1 deletion swvo/io/dst/wdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
if self.ENV_VAR_NAME not in os.environ:
raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion swvo/io/f10_7/omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
f107_df["file_name"] = data_out["file_name"]

# we return it just every 24 hours
f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0)
f107_df = f107_df.drop(f107_df[data_out.index.hour % 24 != 0].index, axis=0) # ty: ignore[possibly-missing-attribute]
f107_df = f107_df.replace(999.9, np.nan)
f107_df = f107_df.truncate(
before=start_time - timedelta(hours=23.9999),
Expand Down
35 changes: 30 additions & 5 deletions swvo/io/f10_7/swpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,30 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
if self.ENV_VAR_NAME not in os.environ:
msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
raise ValueError(msg)
data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)

logger.info(f"SWPC F10.7 data directory: {self.data_dir}")

def _is_within_download_range(self, target_date: datetime) -> bool:
"""Check if a date is within the last 30 days.

Parameters
----------
target_date : datetime
Date to check.

Returns
-------
bool
True if the date is within the last 30 days, False otherwise.
"""
now = datetime.now(timezone.utc)
thirty_days_ago = now - timedelta(days=30)
return target_date >= thirty_days_ago

def _get_processed_file_list(
self, start_time: datetime, end_time: datetime
) -> tuple[list[Path], list[tuple[datetime, datetime]]]:
Expand Down Expand Up @@ -111,8 +128,7 @@ def download_and_process(self) -> None:
logger.debug(f"Updating {file_path}...")

existing_data = pd.read_csv(file_path, parse_dates=["date"])
existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None)

existing_data["date"] = pd.to_datetime(existing_data["date"]).dt.tz_localize(None) # ty: ignore[unresolved-attribute]
combined_data = pd.concat([existing_data, year_data])
combined_data = combined_data.drop_duplicates(subset=["date"], keep="last")
combined_data = combined_data.sort_values("date")
Expand Down Expand Up @@ -238,6 +254,15 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal
for file_path in file_paths:
if not file_path.exists():
if download:
year = int(file_path.stem.split("_")[-1])
year_end = datetime(year, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
if not self._is_within_download_range(year_end):
logger.warning(
f"Cannot download data for year {year}. "
f"Only data from the last 30 days can be downloaded from SWPC."
)
continue

self.download_and_process()
else:
warnings.warn(f"File {file_path} not found")
Expand All @@ -247,8 +272,8 @@ def read(self, start_time: datetime, end_time: datetime, *, download: bool = Fal
data_out = df_one_file.combine_first(data_out)

if not data_out.empty:
if data_out.index.tzinfo is None:
data_out.index = data_out.index.tz_localize("UTC")
if data_out.index.tzinfo is None: # ty: ignore[possibly-missing-attribute]
data_out.index = data_out.index.tz_localize("UTC") # ty: ignore[possibly-missing-attribute]
data_out.drop("date", axis=1, inplace=True)
data_out = data_out.truncate(
before=start_time - timedelta(hours=23.9999),
Expand Down
6 changes: 3 additions & 3 deletions swvo/io/hp/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None:
msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
raise ValueError(msg)

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)

Expand Down Expand Up @@ -178,7 +178,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N
if end_time is not None and not end_time.tzinfo:
end_time = end_time.replace(tzinfo=timezone.utc)

if not (0 <= horizon <= 72):
if not (0 <= horizon <= 72): # ty: ignore[unsupported-operator]
raise ValueError("Horizon must be between 0 and 72 hours")

if self.index == "hp30":
Expand All @@ -187,7 +187,7 @@ def read_with_horizon(self, start_time: datetime, end_time: datetime, horizon: N
raise ValueError("Horizon for hp30 must be in 0.5 hour increments")
elif self.index == "hp60":
freq = "1h"
if horizon % 1 != 0:
if horizon % 1 != 0: # ty: ignore[unsupported-operator]
raise ValueError("Horizon for hp60 must be in 1 hour increments")

align_start_to_hp_hr = start_time.replace(hour=start_time.hour, minute=0, second=0, microsecond=0)
Expand Down
12 changes: 6 additions & 6 deletions swvo/io/hp/gfz.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ftplib import FTP
from pathlib import Path
from shutil import rmtree
from typing import Optional
from typing import List, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(self, index: str, data_dir: Optional[Path] = None) -> None:
msg = f"Necessary environment variable {self.ENV_VAR_NAME} not set!"
raise ValueError(msg)

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -263,15 +263,15 @@ def _get_processed_file_list(self, start_time: datetime, end_time: datetime) ->

return file_paths, time_intervals

def _process_single_file(self, temp_dir: str, filenames: str) -> pd.DataFrame:
def _process_single_file(self, temp_dir: Path, filenames: List[str]) -> pd.DataFrame:
"""Process HpGFZ file to a DataFrame.

Parameters
----------
temp_dir : str
temp_dir : Path
Temporary directory to store the file.
file_path : Path
Path to the file.
filenames : List[str]
List of filenames to process.

Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions swvo/io/hp/read_hp_from_multiple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def read_hp_from_multiple_models( # noqa: PLR0913
start_time,
end_time,
historical_data_cutoff_time,
reduce_ensemble,
reduce_ensemble, # ty: ignore[invalid-argument-type]
download=download,
)

Expand Down Expand Up @@ -128,7 +128,7 @@ def _read_from_model( # noqa: PLR0913
num_ens_members = len(data_one_model)

if num_ens_members > 0 and reduce_ensemble is not None:
data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index)
data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble, model.index) # ty: ignore[invalid-argument-type]

return data_one_model

Expand Down
6 changes: 3 additions & 3 deletions swvo/io/kp/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
if self.ENV_VAR_NAME not in os.environ:
raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)

Expand Down Expand Up @@ -108,7 +108,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]:
freq=timedelta(hours=3),
)
data_out = pd.DataFrame(index=t)
data_out.index = data_out.index.tz_localize(timezone.utc)
data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute]
data_out["kp"] = np.array([np.nan] * len(t))
data_out = data_out.truncate(
before=start_time - timedelta(hours=2.9999),
Expand All @@ -129,7 +129,7 @@ def read(self, start_time: datetime, end_time: datetime) -> list[pd.DataFrame]:
df["file_name"] = file
df.loc[df["kp"].isna(), "file_name"] = None

df.index = df.index.tz_localize("UTC")
df.index = df.index.tz_localize("UTC") # ty: ignore[possibly-missing-attribute]

df = df.truncate(
before=start_time - timedelta(hours=2.9999),
Expand Down
10 changes: 5 additions & 5 deletions swvo/io/kp/niemegk.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
if self.ENV_VAR_NAME not in os.environ:
raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -163,7 +163,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
freq=timedelta(hours=3),
)
data_out = pd.DataFrame(index=t)
data_out.index = data_out.index.tz_localize(timezone.utc)
data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute]
data_out["kp"] = np.array([np.nan] * len(t))
data_out["file_name"] = np.array([None] * len(t))

Expand Down Expand Up @@ -254,8 +254,8 @@ def _read_single_file(self, file_path) -> pd.DataFrame:
df["t"] = pd.to_datetime(df["t"])
df.index = df["t"]
df.drop(labels=["t"], axis=1, inplace=True)
if not df.index.tzinfo:
df.index = df.index.tz_localize(timezone.utc)
if not df.index.tzinfo: # ty: ignore[possibly-missing-attribute]
df.index = df.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute]

df["file_name"] = file_path
df.loc[df["kp"].isna(), "file_name"] = None
Expand Down Expand Up @@ -302,7 +302,7 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame:
)
data.index.rename("t", inplace=True)
data.index = data["t"]
data.index = data.index.tz_localize(timezone.utc)
data.index = data.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute]
data.drop(labels=["t"], axis=1, inplace=True)
data.dropna(inplace=True)
data = data[data["kp"] != -1.0]
Expand Down
2 changes: 1 addition & 1 deletion swvo/io/kp/omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def read(self, start_time: datetime, end_time: datetime, download: bool = False)
kp_df["kp"] = data_out["kp"]
kp_df["file_name"] = data_out["file_name"]
# we return it just every 3 hours
kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0)
kp_df = kp_df.drop(kp_df[data_out.index.hour % 3 != 0].index, axis=0) # ty: ignore[possibly-missing-attribute]
kp_df = kp_df.truncate(
before=start_time - timedelta(hours=2.9999),
after=end_time + timedelta(hours=2.9999),
Expand Down
4 changes: 2 additions & 2 deletions swvo/io/kp/read_kp_from_multiple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def read_kp_from_multiple_models( # noqa: PLR0913
start_time,
end_time,
historical_data_cutoff_time,
reduce_ensemble,
reduce_ensemble, # ty: ignore[invalid-argument-type]
download=download,
)
data_out = construct_updated_data_frame(data_out, data_one_model, model.LABEL)
Expand Down Expand Up @@ -172,7 +172,7 @@ def _read_from_model( # noqa: PLR0913
num_ens_members = len(data_one_model)

if num_ens_members > 0 and reduce_ensemble is not None:
data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble)
data_one_model = _reduce_ensembles(data_one_model, reduce_ensemble) # ty: ignore[invalid-argument-type]

return data_one_model

Expand Down
12 changes: 6 additions & 6 deletions swvo/io/kp/swpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self, data_dir: Optional[Path] = None) -> None:
if self.ENV_VAR_NAME not in os.environ:
raise ValueError(f"Necessary environment variable {self.ENV_VAR_NAME} not set!")

data_dir = os.environ.get(self.ENV_VAR_NAME)
data_dir = os.environ.get(self.ENV_VAR_NAME) # ty: ignore[invalid-assignment]

self.data_dir: Path = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -180,7 +180,7 @@ def read(self, start_time: datetime, end_time: Optional[datetime] = None, downlo
freq=timedelta(hours=3),
)
data_out = pd.DataFrame(index=t)
data_out.index = data_out.index.tz_localize(timezone.utc)
data_out.index = data_out.index.tz_localize(timezone.utc) # ty: ignore[possibly-missing-attribute]
data_out["kp"] = np.array([np.nan] * len(t))
data_out["file_name"] = np.array([np.nan] * len(t))

Expand Down Expand Up @@ -252,27 +252,27 @@ def _process_single_file(self, temporary_dir: Path) -> pd.DataFrame:
lines = f.readlines()
for line in lines:
if ":Issued:" in line:
year = int(re.search(r"(\d{4})", line).group(1))
year = int(re.search(r"(\d{4})", line).group(1)) # ty: ignore[possibly-missing-attribute]
break

for i, line in enumerate(lines):
if "NOAA Kp index breakdown" in line:
first_line = i + 2
break

headers = lines[first_line].split()
headers = lines[first_line].split() # ty: ignore[invalid-argument-type]
headers = [headers[i] + " " + headers[i + 1] for i in range(0, len(headers), 2)]
for d in headers:
try:
if any("Dec" in month for month in headers) and "Jan" in d:
parsed_date = self._parse_date(d, year + 1)
parsed_date = self._parse_date(d, year + 1) # ty: ignore[unsupported-operator]
else:
parsed_date = self._parse_date(d, year)
dates.append(parsed_date)
except ValueError:
raise

for line in lines[first_line + 1 : first_line + 9]:
for line in lines[first_line + 1 : first_line + 9]: # ty: ignore[unsupported-operator]
values = [float(val) for val in line.split()[1:] if re.match(r"^\d+\.\d+$", val)]

kp_data.append(values)
Expand Down
Loading
Loading