Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 59 additions & 5 deletions src/zyra/processing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,51 @@ def register_cli(subparsers: Any) -> None:
is_netcdf_bytes,
)
from zyra.utils.cli_helpers import (
read_all_bytes as _read_bytes,
read_all_bytes as _read_all_bytes,
)

def _read_input_bytes(
path_or_url: str,
*,
idx_pattern: str | None = None,
unsigned: bool = False,
) -> bytes:
"""Load bytes from path/URL with optional GRIB ``.idx`` subsetting."""

if not idx_pattern:
return _read_all_bytes(path_or_url)

from urllib.parse import urlparse

parsed = urlparse(path_or_url)
scheme = (parsed.scheme or "").lower()
try:
if scheme in {"http", "https"}:
from zyra.connectors.backends import http as http_backend
from zyra.utils.grib import idx_to_byteranges

lines = http_backend.get_idx_lines(path_or_url)
ranges = idx_to_byteranges(lines, idx_pattern)
if not ranges:
return b""
return http_backend.download_byteranges(path_or_url, ranges.keys())
if scheme == "s3":
from zyra.connectors.backends import s3 as s3_backend
from zyra.utils.grib import idx_to_byteranges

lines = s3_backend.get_idx_lines(path_or_url, unsigned=unsigned)
ranges = idx_to_byteranges(lines, idx_pattern)
if not ranges:
return b""
return s3_backend.download_byteranges(
path_or_url, None, ranges.keys(), unsigned=unsigned
)
except Exception as exc: # pragma: no cover - optional deps/env specific
raise SystemExit(f"Failed to subset via .idx: {exc}") from exc

# Fallback: local files/other schemes fall back to full read
return _read_all_bytes(path_or_url)

def cmd_decode_grib2(args: argparse.Namespace) -> int:
# Per-command verbosity/trace mapping
import os
Expand All @@ -93,7 +135,11 @@ def cmd_decode_grib2(args: argparse.Namespace) -> int:
# fast and to avoid importing heavy modules unnecessarily, we load the
# decoder utilities only after we've successfully read the input bytes
# and determined that we actually need to decode.
data = _read_bytes(args.file_or_url)
data = _read_input_bytes(
args.file_or_url,
idx_pattern=getattr(args, "pattern", None),
unsigned=bool(getattr(args, "unsigned", False)),
)
import logging

if os.environ.get("ZYRA_SHELL_TRACE"):
Expand Down Expand Up @@ -134,7 +180,7 @@ def cmd_extract_variable(args: argparse.Namespace) -> int:
extract_variable,
)

data = _read_bytes(args.file_or_url)
data = _read_all_bytes(args.file_or_url)
if getattr(args, "stdout", False):
out_fmt = (args.format or "netcdf").lower()
if out_fmt not in ("netcdf", "grib2"):
Expand Down Expand Up @@ -269,7 +315,11 @@ def cmd_convert_format(args: argparse.Namespace) -> int:
outdir_p.mkdir(parents=True, exist_ok=True)
wrote = []
for src in args.inputs:
data = _read_bytes(src)
data = _read_input_bytes(
src,
idx_pattern=getattr(args, "pattern", None),
unsigned=bool(getattr(args, "unsigned", False)),
)
# Fast-path: NetCDF passthrough when converting to NetCDF
if args.format == "netcdf" and is_netcdf_bytes(data):
# Write source name with .nc extension
Expand Down Expand Up @@ -302,7 +352,11 @@ def cmd_convert_format(args: argparse.Namespace) -> int:

# Single-input flow
# Read input first so we can short-circuit pass-through without heavy imports
data = _read_bytes(args.file_or_url)
data = _read_input_bytes(
args.file_or_url,
idx_pattern=getattr(args, "pattern", None),
unsigned=bool(getattr(args, "unsigned", False)),
)
# If reading NetCDF and writing NetCDF with --stdout, pass-through
if (
getattr(args, "stdout", False)
Expand Down
4 changes: 4 additions & 0 deletions src/zyra/transform/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Commands
- `metadata` — Compute frames metadata JSON from a directory of images (alias: `scan-frames`).
- `enrich-metadata` — Enrich frames metadata JSON with dataset id, Vimeo URI, and timestamp.
- `update-dataset-json` — Update dataset JSON fields from CLI args or another file.
- `shapefile-to-geojson` — Convert shapefiles to GeoJSON with optional time normalization.
- `csv-to-geojson` — Convert CSV point tables to GeoJSON with optional time normalization.

metadata
- `--frames-dir DIR` — directory containing images
Expand All @@ -14,3 +16,5 @@ metadata

Examples
- `zyra transform scan-frames --frames-dir ./frames --pattern '\\.(png|jpg)$' --period-seconds 300 -o frames.json`
- `zyra transform shapefile-to-geojson --input data/250314_FRW.shp --timezone America/Chicago --default-year 2025 -o warnings.geojson`
- `zyra transform csv-to-geojson --input data/250314_regional_hotspots.csv --timezone America/Chicago --event-date 2025-03-14 -o hotspots.geojson`
Loading
Loading