diff --git a/src/zyra/processing/__init__.py b/src/zyra/processing/__init__.py index cbd74b10..df158c19 100644 --- a/src/zyra/processing/__init__.py +++ b/src/zyra/processing/__init__.py @@ -66,9 +66,51 @@ def register_cli(subparsers: Any) -> None: is_netcdf_bytes, ) from zyra.utils.cli_helpers import ( - read_all_bytes as _read_bytes, + read_all_bytes as _read_all_bytes, ) + def _read_input_bytes( + path_or_url: str, + *, + idx_pattern: str | None = None, + unsigned: bool = False, + ) -> bytes: + """Load bytes from path/URL with optional GRIB ``.idx`` subsetting.""" + + if not idx_pattern: + return _read_all_bytes(path_or_url) + + from urllib.parse import urlparse + + parsed = urlparse(path_or_url) + scheme = (parsed.scheme or "").lower() + try: + if scheme in {"http", "https"}: + from zyra.connectors.backends import http as http_backend + from zyra.utils.grib import idx_to_byteranges + + lines = http_backend.get_idx_lines(path_or_url) + ranges = idx_to_byteranges(lines, idx_pattern) + if not ranges: + return b"" + return http_backend.download_byteranges(path_or_url, ranges.keys()) + if scheme == "s3": + from zyra.connectors.backends import s3 as s3_backend + from zyra.utils.grib import idx_to_byteranges + + lines = s3_backend.get_idx_lines(path_or_url, unsigned=unsigned) + ranges = idx_to_byteranges(lines, idx_pattern) + if not ranges: + return b"" + return s3_backend.download_byteranges( + path_or_url, None, ranges.keys(), unsigned=unsigned + ) + except Exception as exc: # pragma: no cover - optional deps/env specific + raise SystemExit(f"Failed to subset via .idx: {exc}") from exc + + # Fallback: local files/other schemes fall back to full read + return _read_all_bytes(path_or_url) + def cmd_decode_grib2(args: argparse.Namespace) -> int: # Per-command verbosity/trace mapping import os @@ -93,7 +135,11 @@ def cmd_decode_grib2(args: argparse.Namespace) -> int: # fast and to avoid importing heavy modules unnecessarily, we load the # decoder utilities only after we've successfully read the input bytes # and determined that we actually need to decode. - data = _read_bytes(args.file_or_url) + data = _read_input_bytes( + args.file_or_url, + idx_pattern=getattr(args, "pattern", None), + unsigned=bool(getattr(args, "unsigned", False)), + ) import logging if os.environ.get("ZYRA_SHELL_TRACE"): @@ -134,7 +180,7 @@ def cmd_extract_variable(args: argparse.Namespace) -> int: extract_variable, ) - data = _read_bytes(args.file_or_url) + data = _read_all_bytes(args.file_or_url) if getattr(args, "stdout", False): out_fmt = (args.format or "netcdf").lower() if out_fmt not in ("netcdf", "grib2"): @@ -269,7 +315,11 @@ def cmd_convert_format(args: argparse.Namespace) -> int: outdir_p.mkdir(parents=True, exist_ok=True) wrote = [] for src in args.inputs: - data = _read_bytes(src) + data = _read_input_bytes( + src, + idx_pattern=getattr(args, "pattern", None), + unsigned=bool(getattr(args, "unsigned", False)), + ) # Fast-path: NetCDF passthrough when converting to NetCDF if args.format == "netcdf" and is_netcdf_bytes(data): # Write source name with .nc extension @@ -302,7 +352,11 @@ def cmd_convert_format(args: argparse.Namespace) -> int: # Single-input flow # Read input first so we can short-circuit pass-through without heavy imports - data = _read_bytes(args.file_or_url) + data = _read_input_bytes( + args.file_or_url, + idx_pattern=getattr(args, "pattern", None), + unsigned=bool(getattr(args, "unsigned", False)), + ) # If reading NetCDF and writing NetCDF with --stdout, pass-through if ( getattr(args, "stdout", False) diff --git a/src/zyra/transform/README.md b/src/zyra/transform/README.md index 15ee65ce..6b3efe5f 100644 --- a/src/zyra/transform/README.md +++ b/src/zyra/transform/README.md @@ -4,6 +4,8 @@ Commands - `metadata` — Compute frames metadata JSON from a directory of images (alias: `scan-frames`). - `enrich-metadata` — Enrich frames metadata JSON with dataset id, Vimeo URI, and timestamp. - `update-dataset-json` — Update dataset JSON fields from CLI args or another file. +- `shapefile-to-geojson` — Convert shapefiles to GeoJSON with optional time normalization. +- `csv-to-geojson` — Convert CSV point tables to GeoJSON with optional time normalization. metadata - `--frames-dir DIR` — directory containing images @@ -14,3 +16,5 @@ metadata Examples - `zyra transform scan-frames --frames-dir ./frames --pattern '\\.(png|jpg)$' --period-seconds 300 -o frames.json` +- `zyra transform shapefile-to-geojson --input data/250314_FRW.shp --timezone America/Chicago --default-year 2025 -o warnings.geojson` +- `zyra transform csv-to-geojson --input data/250314_regional_hotspots.csv --timezone America/Chicago --event-date 2025-03-14 -o hotspots.geojson` diff --git a/src/zyra/transform/__init__.py b/src/zyra/transform/__init__.py index 4186412b..05d25018 100644 --- a/src/zyra/transform/__init__.py +++ b/src/zyra/transform/__init__.py @@ -3,12 +3,20 @@ import argparse import json +import logging import os import re from datetime import datetime, timedelta from pathlib import Path from typing import Any +from rasterio.enums import Resampling + +from zyra.transform.geospatial import ( + write_csv_points_geojson, + write_shapefile_geojson, +) +from zyra.transform.raster import convert_geotiff_to_cog from zyra.utils.cli_helpers import configure_logging_from_env from zyra.utils.date_manager import DateManager from zyra.utils.io_utils import open_output @@ -133,8 +141,216 @@ def _cmd_metadata(ns: argparse.Namespace) -> int: return 0 +_RESAMPLING_MAP = {item.name.lower(): item for item in Resampling} +_RESAMPLING_CHOICES = sorted(_RESAMPLING_MAP.keys()) + + +def _parse_resampling( + value: str | None, *, default: Resampling | None = None +) -> Resampling | None: + if value is None: + return default + key = value.strip().lower() + if key not in _RESAMPLING_MAP: + raise SystemExit(f"Unsupported resampling value: {value}") + return _RESAMPLING_MAP[key] + + +def _parse_overview_levels(value: str | None) -> tuple[int, ...] | None: + if value is None: + return None + token = value.strip().lower() + if not token or token == "auto": + return None + levels: list[int] = [] + for part in token.split(","): + part = part.strip() + if not part: + continue + try: + level = int(part) + except ValueError as exc: + raise SystemExit("Overview levels must be integers (e.g., 2,4,8)") from exc + if level <= 1: + raise SystemExit("Overview levels must be greater than 1") + levels.append(level) + if not levels: + return None + return tuple(sorted(set(levels))) + + +def _collect_raster_inputs( + inputs: list[str] | None, + input_dir: str | None, + pattern: str, + recursive: bool, +) -> list[Path]: + pattern = pattern or "*.tif" + gathered: list[Path] = [] + seen: set[Path] = set() + + def _add_candidate(path: Path) -> None: + if not path.exists(): + raise SystemExit(f"Input not found: {path}") + if not path.is_file(): + raise SystemExit(f"Input is not a file: {path}") + resolved = path.resolve() + if resolved not in seen: + seen.add(resolved) + gathered.append(path) + + if inputs: + for item in inputs: + candidate = Path(item) + if candidate.is_dir(): + iterator = ( + candidate.rglob(pattern) if recursive else candidate.glob(pattern) + ) + for child in sorted(iterator): + if child.is_file(): + _add_candidate(child) + else: + _add_candidate(candidate) + + if input_dir: + directory = Path(input_dir) + if not directory.exists() or not directory.is_dir(): + raise SystemExit(f"Input directory not found: {directory}") + iterator = directory.rglob(pattern) if recursive else directory.glob(pattern) + for child in sorted(iterator): + if child.is_file(): + _add_candidate(child) + + if not gathered: + raise SystemExit("No GeoTIFF inputs found for geotiff-to-cog") + return gathered + + +def _render_output_name( + src_path: Path, + *, + template: str, + output_dir: Path, + dm: DateManager, + timestamp_format: str | None, +) -> Path: + if output_dir is None: + raise SystemExit("--output-dir is required when using --name-template") + values = { + "stem": src_path.stem, + "name": src_path.name, + "timestamp": "", + } + if "{timestamp" in template: + ts_iso = dm.extract_date_time(src_path.name) + dt_value: datetime | None = None + if ts_iso: + try: + dt_value = datetime.fromisoformat(ts_iso) + except ValueError: + dt_value = None + if dt_value is None: + raise SystemExit( + f"Could not derive timestamp for '{src_path.name}' to populate {{timestamp}}" + ) + out_fmt = timestamp_format or "%Y%m%dT%H%M%SZ" + values["timestamp"] = dt_value.strftime(out_fmt) + + try: + filename = template.format(**values) + except KeyError as exc: + missing = exc.args[0] + raise SystemExit( + f"Unknown placeholder '{{{missing}}}' in --name-template" + ) from exc + + return output_dir / filename + + +def _cmd_geotiff_to_cog(ns: argparse.Namespace) -> int: + if getattr(ns, "verbose", False): + os.environ["ZYRA_VERBOSITY"] = "debug" + elif getattr(ns, "quiet", False): + os.environ["ZYRA_VERBOSITY"] = "quiet" + if getattr(ns, "trace", False): + os.environ["ZYRA_SHELL_TRACE"] = "1" + configure_logging_from_env() + + inputs = _collect_raster_inputs(ns.inputs, ns.input_dir, ns.pattern, ns.recursive) + multi_inputs = len(inputs) > 1 + + if ns.output and ns.output_dir: + raise SystemExit("Specify either --output or --output-dir, not both") + if multi_inputs and ns.output: + raise SystemExit("--output can only be used with a single input GeoTIFF") + if multi_inputs and not ns.output_dir: + raise SystemExit("--output-dir is required when converting multiple files") + if not ns.output and not ns.output_dir: + raise SystemExit( + "Specify --output or --output-dir for the COG conversion output" + ) + + resampling = _parse_resampling(ns.resampling, default=Resampling.bilinear) + overview_resampling = _parse_resampling(ns.overview_resampling, default=None) + overview_levels = _parse_overview_levels(ns.overview_levels) + + if ns.predictor and ns.predictor.lower() != "auto": + try: + predictor_value: int | None = int(ns.predictor) + except ValueError as exc: + raise SystemExit("--predictor must be an integer or 'auto'") from exc + else: + predictor_value = None + + timestamp_dm = DateManager([ns.datetime_format] if ns.datetime_format else None) + + output_dir_path = Path(ns.output_dir) if ns.output_dir else None + if output_dir_path and not ns.dry_run: + output_dir_path.mkdir(parents=True, exist_ok=True) + + for input_path in inputs: + if ns.output: + target_path = Path(ns.output) + if not ns.dry_run: + target_path.parent.mkdir(parents=True, exist_ok=True) + else: + target_path = _render_output_name( + input_path, + template=ns.name_template, + output_dir=output_dir_path, + dm=timestamp_dm, + timestamp_format=ns.output_datetime_format, + ) + + if ns.dry_run: + print(f"[dry-run] {input_path} -> {target_path}") + continue + + try: + result_path = convert_geotiff_to_cog( + input_path, + target_path, + dst_crs=ns.dst_crs, + resampling=resampling or Resampling.bilinear, + overview_levels=overview_levels, + overview_resampling=overview_resampling, + blocksize=ns.blocksize, + compression=ns.compression, + predictor=predictor_value, + bigtiff=ns.bigtiff, + num_threads=ns.num_threads, + overwrite=bool(ns.overwrite), + ) + except Exception as exc: # pragma: no cover - surface error to CLI + logging.error(str(exc)) + return 2 + print(result_path) + + return 0 + + def register_cli(subparsers: Any) -> None: - """Register transform subcommands (metadata, enrich-metadata, enrich-datasets, update-dataset-json).""" + """Register transform subcommands (metadata, enrich, dataset tools, geo helpers).""" from zyra.cli_common import add_output_option @@ -190,6 +406,129 @@ def _configure_metadata_parser( ) _configure_metadata_parser(p_scan, alias_name="scan-frames") + p_cog = subparsers.add_parser( + "geotiff-to-cog", + help="Convert GeoTIFFs to Cloud Optimized GeoTIFFs", + description=( + "Convert one or more GeoTIFF inputs into Cloud Optimized GeoTIFFs (COGs) with optional reprojection, " + "tiled compression, and overview generation." + ), + ) + p_cog.add_argument( + "--input", + dest="inputs", + action="append", + help="Input GeoTIFF path (repeatable). Directories will be scanned using --pattern", + ) + p_cog.add_argument( + "--input-dir", + dest="input_dir", + help="Directory containing GeoTIFF files to convert", + ) + p_cog.add_argument( + "--pattern", + default="*.tif", + help="Glob pattern used when scanning directories (default: *.tif)", + ) + p_cog.add_argument( + "-o", + "--output", + dest="output", + help="Output path for a single input file", + ) + p_cog.add_argument( + "--output-dir", + dest="output_dir", + help="Directory to write outputs (required for multiple inputs)", + ) + p_cog.add_argument( + "--name-template", + default="{stem}.tif", + help="Filename template when writing to --output-dir. Supports {stem}, {name}, {timestamp}", + ) + p_cog.add_argument( + "--datetime-format", + help="Datetime format to extract timestamps from source filenames (for {timestamp})", + ) + p_cog.add_argument( + "--output-datetime-format", + default="%Y%m%dT%H%M%SZ", + help="Strftime pattern used when formatting {timestamp} (default: %Y%m%dT%H%M%SZ)", + ) + p_cog.add_argument( + "--dst-crs", + help="Destination CRS (e.g., EPSG:4326). Defaults to the source CRS", + ) + p_cog.add_argument( + "--resampling", + default="bilinear", + choices=_RESAMPLING_CHOICES, + help="Resampling kernel to use when reprojecting (default: bilinear)", + ) + p_cog.add_argument( + "--overview-resampling", + choices=_RESAMPLING_CHOICES, + help="Resampling kernel to use for overview generation (default: match --resampling)", + ) + p_cog.add_argument( + "--overview-levels", + help="Comma-separated overview decimation levels (default: auto)", + ) + p_cog.add_argument( + "--blocksize", + type=int, + default=512, + help="Internal tile size in pixels (default: 512)", + ) + p_cog.add_argument( + "--compression", + default="DEFLATE", + help="Compression codec for output tiles (default: DEFLATE)", + ) + p_cog.add_argument( + "--predictor", + default="auto", + help="Predictor to use (auto, 1, 2, or 3)", + ) + p_cog.add_argument( + "--bigtiff", + default="IF_SAFER", + choices=["YES", "NO", "IF_NEEDED", "IF_SAFER"], + help="Control BigTIFF creation (default: IF_SAFER)", + ) + p_cog.add_argument( + "--num-threads", + dest="num_threads", + help="Value for GDAL NUM_THREADS option (e.g., ALL_CPUS)", + ) + p_cog.add_argument( + "--overwrite", + action="store_true", + help="Allow overwriting existing outputs", + ) + p_cog.add_argument( + "--dry-run", + action="store_true", + help="Report planned outputs without writing files", + ) + p_cog.add_argument( + "--recursive", + action="store_true", + help="Recurse into subdirectories when scanning directories", + ) + p_cog.add_argument( + "--verbose", action="store_true", help="Verbose logging for this command" + ) + p_cog.add_argument( + "--quiet", action="store_true", help="Quiet logging for this command" + ) + p_cog.add_argument( + "--trace", + action="store_true", + help="Shell-style trace of key steps and external commands", + ) + p_cog.set_defaults(func=_cmd_geotiff_to_cog) + # Enrich metadata with dataset_id, vimeo_uri, and updated_at def _cmd_enrich(ns: argparse.Namespace) -> int: """CLI: enrich a frames metadata JSON with dataset id and Vimeo URI. @@ -618,3 +957,231 @@ def _update_entry(entry: dict) -> bool: help="Shell-style trace of key steps and external commands", ) p3.set_defaults(func=_cmd_update_dataset) + + def _cmd_shapefile_to_geojson(ns: argparse.Namespace) -> int: + if getattr(ns, "verbose", False): + os.environ["ZYRA_VERBOSITY"] = "debug" + elif getattr(ns, "quiet", False): + os.environ["ZYRA_VERBOSITY"] = "quiet" + if getattr(ns, "trace", False): + os.environ["ZYRA_SHELL_TRACE"] = "1" + configure_logging_from_env() + time_fields = ns.time_fields if ns.time_fields else ["LocalTime"] + time_formats = ns.time_formats if ns.time_formats else None + fallback_date = None + if ns.fallback_date: + try: + fallback_date = datetime.strptime(ns.fallback_date, "%Y-%m-%d").date() + except ValueError as exc: # pragma: no cover - CLI validation + raise SystemExit( + f"Invalid --fallback-date: {ns.fallback_date}" + ) from exc + try: + write_shapefile_geojson( + ns.input, + ns.output, + indent=ns.indent, + time_fields=time_fields, + time_formats=time_formats, + timezone_name=ns.timezone, + year_field=ns.year_field, + month_field=ns.month_field, + day_field=ns.day_field, + default_year=ns.default_year, + fallback_date=fallback_date, + local_time_property=ns.local_time_property, + utc_time_property=ns.utc_time_property, + ) + except FileNotFoundError as exc: + raise SystemExit(str(exc)) from exc + return 0 + + p_sf = subparsers.add_parser( + "shapefile-to-geojson", + help="Convert shapefile polygons/polylines to GeoJSON with optional time normalization", + description=( + "Convert a shapefile to GeoJSON. Optionally normalize local time fields into ISO timestamps using timezone hints." + ), + ) + p_sf.add_argument("--input", required=True, help="Path to input .shp file") + p_sf.add_argument( + "--time-field", + dest="time_fields", + action="append", + help="Field containing local time strings (repeatable)", + ) + p_sf.add_argument( + "--time-format", + dest="time_formats", + action="append", + help="Custom strptime format for local time (repeatable)", + ) + p_sf.add_argument( + "--timezone", + default="UTC", + help="Timezone name for local time parsing (default: UTC)", + ) + p_sf.add_argument( + "--year-field", + default="Year", + help="Field containing year values used when time strings omit a year", + ) + p_sf.add_argument( + "--month-field", + default="Month", + help="Field containing month values used when time strings omit a month", + ) + p_sf.add_argument( + "--day-field", + default="Day", + help="Field containing day values used when time strings omit a day", + ) + p_sf.add_argument( + "--default-year", + type=int, + help="Fallback year when no year field is present", + ) + p_sf.add_argument( + "--fallback-date", + help="Fallback date (YYYY-MM-DD) when month/day are missing", + ) + p_sf.add_argument( + "--local-time-property", + default="time_local_iso", + help="Output property name for normalized local timestamp", + ) + p_sf.add_argument( + "--utc-time-property", + default="time_utc_iso", + help="Output property name for normalized UTC timestamp", + ) + p_sf.add_argument( + "--indent", + type=int, + default=2, + help="Indentation level for GeoJSON output", + ) + add_output_option(p_sf) + p_sf.add_argument( + "--verbose", action="store_true", help="Verbose logging for this command" + ) + p_sf.add_argument( + "--quiet", action="store_true", help="Quiet logging for this command" + ) + p_sf.add_argument( + "--trace", + action="store_true", + help="Shell-style trace of key steps and external commands", + ) + p_sf.set_defaults(func=_cmd_shapefile_to_geojson) + + def _cmd_csv_to_geojson(ns: argparse.Namespace) -> int: + if getattr(ns, "verbose", False): + os.environ["ZYRA_VERBOSITY"] = "debug" + elif getattr(ns, "quiet", False): + os.environ["ZYRA_VERBOSITY"] = "quiet" + if getattr(ns, "trace", False): + os.environ["ZYRA_SHELL_TRACE"] = "1" + configure_logging_from_env() + local_fields = ns.local_time_fields if ns.local_time_fields else ["Time (CDT)"] + utc_fields = ns.utc_time_fields if ns.utc_time_fields else ["Time (Z)"] + time_formats = ns.time_formats if ns.time_formats else None + event_date = None + if ns.event_date: + try: + event_date = datetime.strptime(ns.event_date, "%Y-%m-%d").date() + except ValueError as exc: # pragma: no cover - CLI validation + raise SystemExit(f"Invalid --event-date: {ns.event_date}") from exc + try: + write_csv_points_geojson( + ns.input, + ns.output, + indent=ns.indent, + lat_field=ns.lat_field, + lon_field=ns.lon_field, + local_time_fields=local_fields, + utc_time_fields=utc_fields, + time_formats=time_formats, + timezone_name=ns.timezone, + event_date=event_date, + local_time_property=ns.local_time_property, + utc_time_property=ns.utc_time_property, + ) + except FileNotFoundError as exc: + raise SystemExit(str(exc)) from exc + return 0 + + p_csv = subparsers.add_parser( + "csv-to-geojson", + help="Convert CSV latitude/longitude rows to GeoJSON points", + description=( + "Convert a tabular CSV with latitude/longitude columns into GeoJSON points, optionally normalizing local timestamps." + ), + ) + p_csv.add_argument("--input", required=True, help="Path to CSV file") + p_csv.add_argument( + "--lat-field", + default="Lat", + help="Column containing latitude values (default: Lat)", + ) + p_csv.add_argument( + "--lon-field", + default="Lon", + help="Column containing longitude values (default: Lon)", + ) + p_csv.add_argument( + "--local-time-field", + dest="local_time_fields", + action="append", + help="Column containing local time strings (repeatable)", + ) + p_csv.add_argument( + "--utc-time-field", + dest="utc_time_fields", + action="append", + help="Column containing UTC time strings (HH:MM, repeatable)", + ) + p_csv.add_argument( + "--time-format", + dest="time_formats", + action="append", + help="Custom strptime format for local time strings (repeatable)", + ) + p_csv.add_argument( + "--timezone", + default="UTC", + help="Local timezone name for interpreting local time strings", + ) + p_csv.add_argument( + "--event-date", + help="Event date (YYYY-MM-DD) used when local/UTC fields omit a date", + ) + p_csv.add_argument( + "--local-time-property", + default="time_local_iso", + help="Output property name for normalized local timestamp", + ) + p_csv.add_argument( + "--utc-time-property", + default="time_utc_iso", + help="Output property name for normalized UTC timestamp", + ) + p_csv.add_argument( + "--indent", + type=int, + default=2, + help="Indentation level for GeoJSON output", + ) + add_output_option(p_csv) + p_csv.add_argument( + "--verbose", action="store_true", help="Verbose logging for this command" + ) + p_csv.add_argument( + "--quiet", action="store_true", help="Quiet logging for this command" + ) + p_csv.add_argument( + "--trace", + action="store_true", + help="Shell-style trace of key steps and external commands", + ) + p_csv.set_defaults(func=_cmd_csv_to_geojson) diff --git a/src/zyra/transform/geospatial.py b/src/zyra/transform/geospatial.py new file mode 100644 index 00000000..d4124f6d --- /dev/null +++ b/src/zyra/transform/geospatial.py @@ -0,0 +1,355 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Utilities for preparing geospatial datasets (shapefiles, CSV points) for animation.""" + +from __future__ import annotations + +import csv +import json +import math +from datetime import date, datetime, timezone +from pathlib import Path +from typing import Any, Iterable, Sequence + +import shapefile # pyshp + +try: # Python 3.9+ + from zoneinfo import ZoneInfo +except ImportError: # pragma: no cover - fallback for very old interpreters + from backports.zoneinfo import ZoneInfo # type: ignore + +from zyra.utils.io_utils import open_output + +_DEFAULT_SHAPE_TIME_FORMATS: Sequence[str] = ( + "%m/%d %I:%M %p", + "%m/%d %I%M %p", + "%m/%d %H:%M", + "%m/%d %H%M", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %I:%M %p", +) + +_DEFAULT_CSV_TIME_FORMATS: Sequence[str] = ( + "%I:%M:%S %p", + "%I:%M %p", + "%H:%M:%S", + "%H:%M", +) + + +def _coerce_int(value: Any) -> int | None: + try: + if value is None: + return None + if isinstance(value, float) and math.isnan(value): + return None + return int(value) + except (ValueError, TypeError): + return None + + +def _prepare_fallback_datetime( + *, + year: int | None, + month: int | None, + day: int | None, + fallback_date: date | None, +) -> datetime | None: + base_date = fallback_date + if base_date is None: + if year is None and month is None and day is None: + return None + year = year if year is not None else 1900 + month = month if month is not None else 1 + day = day if day is not None else 1 + try: + base_date = date(year, month, day) + except ValueError: + return None + return datetime.combine(base_date, datetime.min.time()) + + +def _parse_local_datetime( + value: str, + *, + timezone_name: str, + formats: Sequence[str], + fallback_dt: datetime | None, +) -> tuple[str | None, str | None]: + value = (value or "").strip() + if not value: + return None, None + zone = ZoneInfo(timezone_name) + for fmt in formats: + try: + parsed = datetime.strptime(value, fmt) + except ValueError: + continue + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=zone) + else: # pragma: no cover - rarely used, but handle tz-aware strings + parsed = parsed.astimezone(zone) + # Patch in fallback components when strptime supplied defaults + if fallback_dt is not None: + if parsed.year == 1900: + parsed = parsed.replace(year=fallback_dt.year) + if parsed.month == 1 and parsed.day == 1: + parsed = parsed.replace( + month=fallback_dt.month, + day=fallback_dt.day, + ) + # Avoid propagating 1900-01-01 without fallback context + if parsed.year == 1900 and fallback_dt is None: + continue + local_iso = parsed.isoformat() + utc_iso = parsed.astimezone(timezone.utc).isoformat() + return local_iso, utc_iso + return None, None + + +def shapefile_to_geojson( + input_path: str | Path, + *, + time_fields: Iterable[str] | None = None, + time_formats: Sequence[str] | None = None, + timezone_name: str = "UTC", + year_field: str | None = None, + month_field: str | None = None, + day_field: str | None = None, + default_year: int | None = None, + fallback_date: date | None = None, + local_time_property: str = "time_local_iso", + utc_time_property: str = "time_utc_iso", +) -> dict[str, Any]: + """Convert a shapefile into a GeoJSON FeatureCollection. + + Parameters enable basic timestamp normalization by specifying which fields + contain local time strings (``time_fields``) and optional contextual clues + (`year_field`, `month_field`, `day_field`, or ``fallback_date``). + """ + + shp_path = Path(input_path) + if not shp_path.exists(): + raise FileNotFoundError(f"Shapefile not found: {shp_path}") + + time_fields = tuple(time_fields or ("LocalTime",)) + time_formats = time_formats or _DEFAULT_SHAPE_TIME_FORMATS + + reader = shapefile.Reader(str(shp_path)) + field_names = [f[0] for f in reader.fields[1:]] # skip deletion flag + + features: list[dict[str, Any]] = [] + for idx in range(len(reader)): + record = reader.record(idx) + shape = reader.shape(idx) + props: dict[str, Any] = {} + for f_idx, name in enumerate(field_names): + value = record[f_idx] + if isinstance(value, float) and math.isnan(value): + value = None + props[name] = value + + local_str: str | None = None + for candidate in time_fields: + if candidate in props and props[candidate]: + local_str = str(props[candidate]) + break + + year_val = _coerce_int(props.get(year_field)) if year_field else default_year + month_val = _coerce_int(props.get(month_field)) if month_field else None + day_val = _coerce_int(props.get(day_field)) if day_field else None + fallback_dt = _prepare_fallback_datetime( + year=year_val, + month=month_val, + day=day_val, + fallback_date=fallback_date, + ) + local_iso, utc_iso = (None, None) + if local_str is not None: + local_iso, utc_iso = _parse_local_datetime( + local_str, + timezone_name=timezone_name, + formats=time_formats, + fallback_dt=fallback_dt, + ) + props[local_time_property] = local_iso + props[utc_time_property] = utc_iso + + geometry = getattr(shape, "__geo_interface__", None) + if not geometry: + geometry = { + "type": "Polygon", + "coordinates": shape.points, + } + feature: dict[str, Any] = { + "type": "Feature", + "geometry": geometry, + "properties": props, + } + bbox = getattr(shape, "bbox", None) + if bbox: + feature["bbox"] = [float(v) for v in bbox] + features.append(feature) + + collection: dict[str, Any] = { + "type": "FeatureCollection", + "features": features, + "metadata": { + "source": str(shp_path), + "feature_count": len(features), + "timezone": timezone_name, + "time_fields": list(time_fields), + }, + } + if getattr(reader, "bbox", None): + collection["bbox"] = [float(v) for v in reader.bbox] + return collection + + +def write_shapefile_geojson( + input_path: str | Path, + output_path: str | Path, + *, + indent: int = 2, + **kwargs: Any, +) -> None: + payload = shapefile_to_geojson(input_path, **kwargs) + with open_output(str(output_path)) as fh: + fh.write(json.dumps(payload, indent=indent).encode("utf-8")) + + +def csv_points_to_geojson( + input_csv: str | Path, + *, + lat_field: str = "Lat", + lon_field: str = "Lon", + local_time_fields: Iterable[str] | None = None, + utc_time_fields: Iterable[str] | None = None, + time_formats: Sequence[str] | None = None, + timezone_name: str = "UTC", + event_date: date | None = None, + local_time_property: str = "time_local_iso", + utc_time_property: str = "time_utc_iso", +) -> dict[str, Any]: + """Convert a CSV table with latitude/longitude columns to GeoJSON points.""" + + csv_path = Path(input_csv) + if not csv_path.exists(): + raise FileNotFoundError(f"CSV file not found: {csv_path}") + + local_time_fields = tuple(local_time_fields or ("Time (CDT)",)) + utc_time_fields = tuple(utc_time_fields or ("Time (Z)",)) + time_formats = time_formats or _DEFAULT_CSV_TIME_FORMATS + + features: list[dict[str, Any]] = [] + with csv_path.open(newline="", encoding="utf-8") as fh: + reader = csv.DictReader(fh) + for row in reader: + lat_val = row.get(lat_field) + lon_val = row.get(lon_field) + if not lat_val or not lon_val: + continue + try: + lat = float(lat_val) + lon = float(lon_val) + except ValueError: + continue + + local_str: str | None = None + for field in local_time_fields: + value = row.get(field) + if value: + local_str = value + break + + fallback_dt = None + if event_date is not None: + fallback_dt = datetime.combine(event_date, datetime.min.time()) + + local_iso, utc_iso = (None, None) + if local_str: + local_iso, utc_iso = _parse_local_datetime( + local_str, + timezone_name=timezone_name, + formats=time_formats, + fallback_dt=fallback_dt, + ) + + if utc_iso is None: + for field in utc_time_fields: + value = (row.get(field) or "").strip() + if not value: + continue + try: + parsed = datetime.strptime(value, "%H:%M") + except ValueError: + continue + if event_date is None: + break + parsed = parsed.replace( + year=event_date.year, + month=event_date.month, + day=event_date.day, + tzinfo=timezone.utc, + ) + utc_iso = parsed.isoformat() + if local_iso is None: + local_iso = parsed.astimezone( + ZoneInfo(timezone_name) + ).isoformat() + break + + props = dict(row) + props[local_time_property] = local_iso + props[utc_time_property] = utc_iso + + features.append( + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [lon, lat], + }, + "properties": props, + } + ) + + bbox = None + if features: + lons = [feat["geometry"]["coordinates"][0] for feat in features] + lats = [feat["geometry"]["coordinates"][1] for feat in features] + bbox = [min(lons), min(lats), max(lons), max(lats)] + + return { + "type": "FeatureCollection", + "features": features, + "bbox": bbox, + "metadata": { + "source": str(csv_path), + "feature_count": len(features), + "timezone": timezone_name, + "lat_field": lat_field, + "lon_field": lon_field, + "local_time_fields": list(local_time_fields), + "utc_time_fields": list(utc_time_fields), + }, + } + + +def write_csv_points_geojson( + input_csv: str | Path, + output_path: str | Path, + *, + indent: int = 2, + **kwargs: Any, +) -> None: + payload = csv_points_to_geojson(input_csv, **kwargs) + with open_output(str(output_path)) as fh: + fh.write(json.dumps(payload, indent=indent).encode("utf-8")) + + +__all__ = [ + "shapefile_to_geojson", + "write_shapefile_geojson", + "csv_points_to_geojson", + "write_csv_points_geojson", +] diff --git a/src/zyra/transform/raster.py b/src/zyra/transform/raster.py new file mode 100644 index 00000000..99da973a --- /dev/null +++ b/src/zyra/transform/raster.py @@ -0,0 +1,178 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Raster transform helpers (e.g., GeoTIFF → Cloud Optimized GeoTIFF).""" + +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +import rasterio +from rasterio.crs import CRS +from rasterio.enums import Resampling +from rasterio.errors import RasterioIOError +from rasterio.warp import calculate_default_transform, reproject + +DEFAULT_BLOCKSIZE = 512 +DEFAULT_COMPRESSION = "DEFLATE" + + +def _auto_overview_levels(width: int, height: int) -> tuple[int, ...]: + levels: list[int] = [] + max_dim = max(width, height) + factor = 2 + while max_dim / factor > 256 and factor <= 512: + levels.append(factor) + factor *= 2 + return tuple(levels) + + +def _detect_predictor(dtypes: Iterable[str]) -> int | None: + """Return a suitable predictor value for the dataset.""" + + try: + import numpy + except ( + ModuleNotFoundError + ): # pragma: no cover - numpy is a required dep via rasterio + return None + + predictor: int | None = None + for dtype in dtypes: + if numpy.issubdtype(numpy.dtype(dtype), numpy.floating): + predictor = 3 + break + if numpy.issubdtype(numpy.dtype(dtype), numpy.integer): + predictor = 2 + return predictor + + +def convert_geotiff_to_cog( + input_path: str | Path, + output_path: str | Path, + *, + dst_crs: CRS | None = None, + resampling: Resampling = Resampling.bilinear, + overview_levels: Iterable[int] | None = None, + overview_resampling: Resampling | None = None, + blocksize: int = DEFAULT_BLOCKSIZE, + compression: str = DEFAULT_COMPRESSION, + predictor: int | None = None, + bigtiff: str | None = "IF_SAFER", + num_threads: str | None = None, + overwrite: bool = False, +) -> Path: + """Convert a GeoTIFF into a Cloud Optimized GeoTIFF. + + Parameters mirror the CLI, covering reprojection, tiling, and overview + generation. The function raises ``FileExistsError`` if the output already + exists and ``overwrite`` is ``False``. + """ + + src_path = Path(input_path) + if not src_path.exists(): + raise FileNotFoundError(f"GeoTIFF not found: {src_path}") + + dst_path = Path(output_path) + if dst_path.exists() and not overwrite: + raise FileExistsError(f"Output already exists: {dst_path}") + + dst_path.parent.mkdir(parents=True, exist_ok=True) + + try: + src = rasterio.open(src_path) + except RasterioIOError as exc: # pragma: no cover - surface upstream error + raise RasterioIOError(str(exc)) from exc + + with src: + src_crs = src.crs + if dst_crs is None: + if src_crs is None: + raise ValueError( + "Source dataset has no CRS; specify --dst-crs to define the target reference system" + ) + dst_crs_obj = src_crs + else: + dst_crs_obj = CRS.from_user_input(dst_crs) + + if src_crs is None and dst_crs_obj is not None and dst_crs_obj != src_crs: + # We cannot reproject without a source CRS; treat as metadata assignment only. + need_reproject = False + else: + need_reproject = src_crs is not None and src_crs != dst_crs_obj + + if need_reproject: + transform, width, height = calculate_default_transform( + src_crs, + dst_crs_obj, + src.width, + src.height, + *src.bounds, + ) + else: + transform = src.transform + width = src.width + height = src.height + + profile = src.profile.copy() + profile.update( + driver="GTiff", + height=height, + width=width, + transform=transform, + crs=dst_crs_obj, + tiled=True, + blockxsize=blocksize, + blockysize=blocksize, + compress=compression, + bigtiff=bigtiff, + ) + if num_threads: + profile["NUM_THREADS"] = num_threads + else: + profile.pop("NUM_THREADS", None) + + if predictor is None: + predictor = _detect_predictor(src.dtypes) + if predictor: + profile["predictor"] = predictor + + if overview_levels: + levels = tuple( + sorted(int(level) for level in overview_levels if int(level) > 1) + ) + else: + levels = _auto_overview_levels(width, height) + + overview_resampling = overview_resampling or resampling + + with rasterio.open(dst_path, "w", **profile) as dst: + for idx in range(1, src.count + 1): + if need_reproject: + reproject( + source=rasterio.band(src, idx), + destination=rasterio.band(dst, idx), + src_transform=src.transform, + src_crs=src_crs, + dst_transform=transform, + dst_crs=dst_crs_obj, + resampling=resampling, + src_nodata=src.nodata, + dst_nodata=profile.get("nodata"), + ) + else: + data = src.read(idx) + dst.write(data, idx) + + if levels: + dst.build_overviews(levels, overview_resampling) + dst.update_tags(ns="rio_overview", resampling=overview_resampling.name) + + # Ensure overviews were flushed; reopen in read mode to validate + with rasterio.open(dst_path) as check_ds: + if levels and not check_ds.overviews(1): + raise RuntimeError("Overview generation failed for COG output") + + return dst_path + + +__all__ = ["convert_geotiff_to_cog"] diff --git a/src/zyra/utils/cli_helpers.py b/src/zyra/utils/cli_helpers.py index cbfba098..6cb52df7 100644 --- a/src/zyra/utils/cli_helpers.py +++ b/src/zyra/utils/cli_helpers.py @@ -4,13 +4,35 @@ import contextlib import re import tempfile +from pathlib import Path from typing import Iterable, Iterator +from urllib.parse import urlparse + +import requests from .io_utils import open_input # re-export def read_all_bytes(path_or_dash: str) -> bytes: - """Read all bytes from a path or '-' (stdin).""" + """Read all bytes from a file path, URL, or '-' (stdin).""" + + parsed = urlparse(path_or_dash) + if parsed.scheme in {"http", "https"}: + resp = requests.get(path_or_dash, timeout=60) + resp.raise_for_status() + return resp.content + + if parsed.scheme and parsed.scheme != "file": + with open_input(path_or_dash) as f: + return f.read() + + try: + p = Path(path_or_dash) + except Exception: + p = None + if p and p.exists(): + return p.read_bytes() + with open_input(path_or_dash) as f: return f.read() diff --git a/src/zyra/utils/iso8601.py b/src/zyra/utils/iso8601.py index 83695075..55f825bc 100644 --- a/src/zyra/utils/iso8601.py +++ b/src/zyra/utils/iso8601.py @@ -9,6 +9,12 @@ import re from datetime import datetime, timedelta, timezone +from typing import Any + +try: # NumPy is an optional dependency in a few lightweight environments + import numpy as _np +except Exception: # pragma: no cover - fallback when numpy unavailable + _np = None def iso_to_ms(s: str) -> int: @@ -24,6 +30,48 @@ def iso_to_ms(s: str) -> int: return int(dt.timestamp() * 1000) +def to_datetime(value: Any) -> datetime | None: + """Coerce ``value`` into a timezone-aware ``datetime`` where possible. + + Accepts ``datetime`` objects (naive assumed UTC), ISO strings with optional + ``Z`` suffix, and ``numpy.datetime64`` values. Returns ``None`` when the + input is empty or cannot be interpreted as a timestamp. + """ + + if value is None: + return None + if isinstance(value, datetime): + return value if value.tzinfo else value.replace(tzinfo=timezone.utc) + if isinstance(value, str): + token = value.strip() + if not token: + return None + if token.endswith("Z"): + token = token[:-1] + "+00:00" + try: + dt = datetime.fromisoformat(token) + except ValueError: + for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"): + try: + dt = datetime.strptime(token, fmt) + break + except ValueError: + continue + else: + return None + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + if _np is not None and isinstance(value, _np.datetime64): # pragma: no cover + # Use nanosecond precision to preserve sub-second detail when present + ts = value.astype("datetime64[ns]").astype("int64") + return datetime.fromtimestamp(ts / 1_000_000_000, tz=timezone.utc) + if hasattr(value, "item"): + try: + return to_datetime(value.item()) + except Exception: + return None + return None + + # Supported duration subset: P[nD]T[nH][nM][nS] # Examples: PT30M, PT2H, PT1H30M, P1DT30M # Notes: diff --git a/src/zyra/visualization/animate_manager.py b/src/zyra/visualization/animate_manager.py index 674f79b3..d625e321 100644 --- a/src/zyra/visualization/animate_manager.py +++ b/src/zyra/visualization/animate_manager.py @@ -8,13 +8,17 @@ from __future__ import annotations from dataclasses import asdict, dataclass +from datetime import datetime, timedelta, timezone from io import BytesIO from pathlib import Path from typing import Any, Sequence +from zyra.utils.iso8601 import to_datetime + from .base import Renderer from .contour_manager import ContourManager from .heatmap_manager import HeatmapManager +from .overlays import OverlayRenderer, OverlaySpec, build_overlay_datasets from .styles import DEFAULT_EXTENT, FIGURE_DPI from .vector_field_manager import VectorFieldManager @@ -138,6 +142,11 @@ def render(self, data: Any = None, **kwargs: Any): import matplotlib.pyplot as plt import numpy as np + overlay_specs: list[OverlaySpec] = kwargs.pop("overlay_specs", None) or [] + overlay_time_map: dict[str, str] = kwargs.pop("overlay_time_map", None) or {} + overlay_time_default: str | None = kwargs.pop("overlay_time_default", None) + overlay_time_tolerance = int(kwargs.pop("overlay_time_tolerance", 900) or 900) + width = int(kwargs.get("width", 1024)) height = int(kwargs.get("height", 512)) dpi = int(kwargs.get("dpi", FIGURE_DPI)) @@ -172,6 +181,10 @@ def render(self, data: Any = None, **kwargs: Any): output_dir.mkdir(parents=True, exist_ok=True) frames: list[FrameInfo] = [] + if overlay_specs and mode not in ("heatmap", "contour"): + raise ValueError( + "Overlays are supported only for heatmap and contour modes" + ) if mode == "vector": import numpy as np @@ -277,13 +290,35 @@ def render(self, data: Any = None, **kwargs: Any): warn_if_mismatch(in_crs, reproject=reproject, context="animate") except Exception: pass + frame_times_dt: list[datetime] = [] + base_start = datetime(1970, 1, 1, tzinfo=timezone.utc) + for idx in range(stack.shape[0]): + ts_val = timestamps[idx] if idx < len(timestamps) else None + dt = to_datetime(ts_val) + if dt is None: + dt = base_start + timedelta(seconds=idx) + frame_times_dt.append(dt) + + overlay_renderer: OverlayRenderer | None = None + overlay_manifest: list[dict[str, str]] | None = None + if overlay_specs: + datasets = build_overlay_datasets( + overlay_specs, + frame_times=frame_times_dt, + default_time_key=overlay_time_default, + time_keys=overlay_time_map, + tolerance=timedelta(seconds=overlay_time_tolerance), + ) + overlay_renderer = OverlayRenderer(datasets) + overlay_manifest = overlay_renderer.describe() + for i in range(stack.shape[0]): arr = stack[i] if mode == "contour": mgr = ContourManager( basemap=self.basemap, extent=self.extent, cmap=cmap, filled=True ) - mgr.render( + fig = mgr.render( arr, width=width, height=height, @@ -306,7 +341,7 @@ def render(self, data: Any = None, **kwargs: Any): mgr = HeatmapManager( basemap=self.basemap, extent=self.extent, cmap=cmap ) - mgr.render( + fig = mgr.render( arr, width=width, height=height, @@ -327,6 +362,12 @@ def render(self, data: Any = None, **kwargs: Any): tile_zoom=tile_zoom, ) + if overlay_renderer and hasattr(mgr, "_fig"): + fig_obj = getattr(mgr, "_fig", None) + if fig_obj and fig_obj.axes: + ax = fig_obj.axes[0] + overlay_renderer.draw(ax, i) + fname = self.filename_template.format(index=i) fpath = output_dir / fname mgr.save(str(fpath)) @@ -344,6 +385,8 @@ def render(self, data: Any = None, **kwargs: Any): "count": len(frames), "frames": [asdict(f) for f in frames], } + if overlay_specs: + self._manifest["overlays"] = overlay_manifest or [] return self._manifest def save(self, output_path: str | None = None, *, as_buffer: bool = False): diff --git a/src/zyra/visualization/cli_animate.py b/src/zyra/visualization/cli_animate.py index bfd67f2e..9ea996f7 100644 --- a/src/zyra/visualization/cli_animate.py +++ b/src/zyra/visualization/cli_animate.py @@ -8,6 +8,7 @@ from zyra.utils.cli_helpers import configure_logging_from_env from zyra.visualization.cli_utils import features_from_ns, resolve_basemap_ref +from zyra.visualization.overlays import OverlaySpec, parse_overlay_spec def handle_animate(ns) -> int: @@ -19,8 +20,29 @@ def handle_animate(ns) -> int: if getattr(ns, "trace", False): os.environ["ZYRA_SHELL_TRACE"] = "1" configure_logging_from_env() + overlay_args = getattr(ns, "overlays", None) or [] + overlay_specs: list[OverlaySpec] = [] + overlay_time_default: str | None = None + overlay_time_map: dict[str, str] = {} + if overlay_args: + for item in overlay_args: + spec = parse_overlay_spec(item) + overlay_specs.append(spec) + overlay_time_default, overlay_time_map = _parse_overlay_time_keys( + getattr(ns, "overlay_time_keys", None) + ) + for spec in overlay_specs: + if spec.time_key is None: + spec.time_key = overlay_time_map.get(spec.alias, overlay_time_default) + overlay_time_tolerance = int(getattr(ns, "overlay_time_tolerance", 900) or 900) + + if overlay_specs and getattr(ns, "inputs", None): + raise SystemExit( + "--overlay is not supported with --inputs batch mode; specify a single --input" + ) + # Batch mode for animate: --inputs with --output-dir - if getattr(ns, "inputs", None): + if getattr(ns, "inputs", None) and not overlay_specs: if not ns.output_dir: raise SystemExit("--output-dir is required when using --inputs") from zyra.processing.video_processor import VideoProcessor @@ -203,6 +225,14 @@ def handle_animate(ns) -> int: mode=ns.mode, basemap=bmap, extent=ns.extent, output_dir=ns.output_dir ) features = features_from_ns(ns) + overlay_kwargs = {} + if overlay_specs: + overlay_kwargs = { + "overlay_specs": overlay_specs, + "overlay_time_map": overlay_time_map, + "overlay_time_default": overlay_time_default, + "overlay_time_tolerance": overlay_time_tolerance, + } manifest = mgr.render( input_path=ns.input, var=ns.var, @@ -237,6 +267,7 @@ def handle_animate(ns) -> int: # CRS crs=getattr(ns, "crs", None), reproject=getattr(ns, "reproject", False), + **overlay_kwargs, ) out = mgr.save(ns.manifest) if out: @@ -356,3 +387,23 @@ def _build_ffmpeg_grid_args( ] ) return args + + +def _parse_overlay_time_keys(values) -> tuple[str | None, dict[str, str]]: + default_key: str | None = None + mapping: dict[str, str] = {} + if not values: + return default_key, mapping + for raw in values: + token = (raw or "").strip() + if not token: + continue + if "=" in token: + alias, key = token.split("=", 1) + alias = alias.strip() + key = key.strip() + if alias and key: + mapping[alias] = key + else: + default_key = token + return default_key, mapping diff --git a/src/zyra/visualization/cli_register.py b/src/zyra/visualization/cli_register.py index 12d3411a..b8cfd7f4 100644 --- a/src/zyra/visualization/cli_register.py +++ b/src/zyra/visualization/cli_register.py @@ -289,6 +289,25 @@ def register_cli(subparsers: Any) -> None: p_anim.add_argument("--units") p_anim.add_argument("--show-timestamp", action="store_true") p_anim.add_argument("--timestamps-csv") + p_anim.add_argument( + "--overlay", + action="append", + dest="overlays", + help="Overlay specification: [alias=]path[:style] (repeatable)", + ) + p_anim.add_argument( + "--overlay-time-key", + action="append", + dest="overlay_time_keys", + help="Overlay time property mapping (alias=property or property)", + ) + p_anim.add_argument( + "--overlay-time-tolerance", + type=int, + default=900, + dest="overlay_time_tolerance", + help="Seconds to allow when matching overlay timestamps (default: 900)", + ) p_anim.add_argument( "--timestamp-loc", choices=["upper_left", "upper_right", "lower_left", "lower_right"], @@ -300,6 +319,13 @@ def register_cli(subparsers: Any) -> None: p_anim.add_argument("--xarray-engine") p_anim.add_argument("--crs") p_anim.add_argument("--reproject", action="store_true") + p_anim.add_argument( + "--features", help="Comma-separated features: coastline,borders,gridlines" + ) + p_anim.add_argument( + "--manifest", + help="Optional manifest output path (JSON); when omitted, manifest is only logged", + ) p_anim.add_argument("--to-video") p_anim.add_argument("--fps", type=int, default=30) p_anim.add_argument( diff --git a/src/zyra/visualization/overlays.py b/src/zyra/visualization/overlays.py new file mode 100644 index 00000000..54ff8ad8 --- /dev/null +++ b/src/zyra/visualization/overlays.py @@ -0,0 +1,340 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Overlay rendering helpers for ``visualize animate``.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any, Iterable, Iterator, MutableMapping, Optional + +import cartopy.crs as ccrs +from matplotlib.axes import Axes + +from zyra.utils.iso8601 import to_datetime + +# ---------------------------------------------------------------------------- +# Data models + + +@dataclass +class OverlayFeature: + geometry: dict[str, Any] + properties: dict[str, Any] + timestamp: Optional[datetime] = None + + +@dataclass +class OverlayDataset: + alias: str + path: Path + style: str + static_features: list[OverlayFeature] = field(default_factory=list) + features_by_index: MutableMapping[int, list[OverlayFeature]] = field( + default_factory=dict + ) + + def features_for_index(self, index: int) -> Iterable[OverlayFeature]: + items = [] + items.extend(self.static_features) + items.extend(self.features_by_index.get(index, [])) + return items + + +@dataclass +class OverlaySpec: + alias: str + path: Path + style: str + time_key: Optional[str] + + +# ---------------------------------------------------------------------------- +# Style registry + + +class OverlayStyleError(ValueError): + pass + + +def _coord_pair(coord: Iterable[Any]) -> tuple[float, float]: + seq = list(coord) + if len(seq) < 2: + raise ValueError("Coordinate must have at least two values") + return float(seq[0]), float(seq[1]) + + +def _iter_points(geometry: dict[str, Any]) -> Iterator[tuple[float, float]]: + geometry = geometry or {} + gtype = geometry.get("type") + if gtype == "Point": + coords = geometry.get("coordinates") + if coords: + try: + yield _coord_pair(coords) + except ValueError: + return + elif gtype == "MultiPoint": + for coords in geometry.get("coordinates", []): + try: + yield _coord_pair(coords) + except ValueError: + continue + elif gtype == "GeometryCollection": + for geom in geometry.get("geometries", []): + yield from _iter_points(geom) + + +def _iter_polygon_rings( + geometry: dict[str, Any], +) -> Iterator[list[tuple[float, float]]]: + gtype = geometry.get("type") + if gtype == "Polygon": + coords = geometry.get("coordinates", []) + if coords: + points = [] + for coord in coords[0]: + try: + points.append(_coord_pair(coord)) + except ValueError: + continue + if points: + yield points + elif gtype == "MultiPolygon": + for polygon in geometry.get("coordinates", []): + if not polygon: + continue + points = [] + for coord in polygon[0]: + try: + points.append(_coord_pair(coord)) + except ValueError: + continue + if points: + yield points + elif gtype == "GeometryCollection": + for geom in geometry.get("geometries", []): + yield from _iter_polygon_rings(geom) + + +def draw_red_dots(ax: Axes, features: Iterable[OverlayFeature]) -> None: + lons: list[float] = [] + lats: list[float] = [] + for feat in features: + for lon, lat in _iter_points(feat.geometry): + lons.append(lon) + lats.append(lat) + if not lons: + return + ax.scatter( + lons, + lats, + transform=ccrs.PlateCarree(), + color="#ff3b30", + edgecolor="white", + linewidth=0.5, + s=24, + zorder=5, + ) + + +def draw_magenta_outline(ax: Axes, features: Iterable[OverlayFeature]) -> None: + for feat in features: + for ring in _iter_polygon_rings(feat.geometry): + if len(ring) < 2: + continue + lons = [lon for lon, _ in ring] + lats = [lat for _, lat in ring] + ax.plot( + lons, + lats, + transform=ccrs.PlateCarree(), + color="#ff00ff", + linewidth=1.2, + linestyle="-", + zorder=4, + ) + + +STYLE_REGISTRY: dict[str, Any] = { + "red-dots": draw_red_dots, + "magenta-outline": draw_magenta_outline, +} + + +def resolve_style(style: str) -> Any: + if style not in STYLE_REGISTRY: + raise OverlayStyleError(f"Unsupported overlay style: {style}") + return STYLE_REGISTRY[style] + + +# ---------------------------------------------------------------------------- +# Loading and assignment + + +def parse_overlay_spec(text: str) -> OverlaySpec: + alias: Optional[str] = None + payload = text + if "=" in text and not text.strip().startswith("/"): + alias, payload = text.split("=", 1) + if ":" in payload: + path_str, style = payload.rsplit(":", 1) + else: + path_str, style = payload, "auto" + path = Path(path_str).expanduser() + if not path.exists(): + raise FileNotFoundError(f"Overlay file not found: {path}") + resolved_alias = alias or path.stem + resolved_style = style if style != "auto" else infer_default_style(path) + return OverlaySpec(resolved_alias, path, resolved_style, None) + + +def infer_default_style(path: Path) -> str: + if path.suffix.lower() in {".geojson", ".json"}: + try: + with path.open("r", encoding="utf-8") as fp: + data = json.load(fp) + geom_type = data.get("features", [{}])[0].get("geometry", {}).get("type") + if geom_type and "point" in geom_type.lower(): + return "red-dots" + except Exception: + pass + return "magenta-outline" + return "red-dots" + + +def build_overlay_datasets( + specs: list[OverlaySpec], + frame_times: list[datetime], + *, + default_time_key: Optional[str] = None, + time_keys: Optional[dict[str, str]] = None, + tolerance: timedelta = timedelta(minutes=15), +) -> list[OverlayDataset]: + datasets: list[OverlayDataset] = [] + for spec in specs: + time_key = spec.time_key + if time_key is None and time_keys: + time_key = time_keys.get(spec.alias) + if time_key is None: + time_key = default_time_key + dataset = load_overlay_dataset( + spec, + frame_times=frame_times, + time_key=time_key, + tolerance=tolerance, + ) + datasets.append(dataset) + return datasets + + +def load_overlay_dataset( + spec: OverlaySpec, + *, + frame_times: list[datetime], + time_key: Optional[str], + tolerance: timedelta, +) -> OverlayDataset: + with spec.path.open("r", encoding="utf-8") as fp: + payload = json.load(fp) + features_raw = extract_features(payload) + dataset = OverlayDataset(alias=spec.alias, path=spec.path, style=spec.style) + assign_features(dataset, features_raw, frame_times, time_key, tolerance) + return dataset + + +def extract_features(obj: dict[str, Any]) -> list[OverlayFeature]: + features: list[OverlayFeature] = [] + if obj.get("type") == "FeatureCollection": + for feat in obj.get("features", []): + geometry = feat.get("geometry") or {} + properties = feat.get("properties") or {} + features.append( + OverlayFeature( + geometry=geometry, + properties=properties, + ) + ) + elif obj.get("type") == "Feature": + features.append( + OverlayFeature( + geometry=obj.get("geometry") or {}, + properties=obj.get("properties") or {}, + ) + ) + else: + raise ValueError("Overlay data must be a GeoJSON FeatureCollection or Feature") + return features + + +def assign_features( + dataset: OverlayDataset, + features: list[OverlayFeature], + frame_times: list[datetime], + time_key: Optional[str], + tolerance: timedelta, +) -> None: + if not frame_times: + dataset.static_features.extend(features) + return + for feat in features: + timestamp = None + if time_key: + raw = feat.properties.get(time_key) + timestamp = to_datetime(raw) if raw is not None else None + feat.timestamp = timestamp + if timestamp is None: + dataset.static_features.append(feat) + continue + idx = _match_frame_index(frame_times, timestamp, tolerance) + if idx is None: + dataset.static_features.append(feat) + else: + dataset.features_by_index.setdefault(idx, []).append(feat) + + +def _match_frame_index( + frame_times: list[datetime], + target: datetime, + tolerance: timedelta, +) -> Optional[int]: + best_idx = None + best_delta = None + for idx, frame_time in enumerate(frame_times): + delta = abs(frame_time - target) + if best_delta is None or delta < best_delta: + best_delta = delta + best_idx = idx + if best_idx is None or best_delta is None: + return None + if best_delta > tolerance: + return None + return best_idx + + +class OverlayRenderer: + """Draw overlay datasets on Matplotlib axes.""" + + def __init__(self, datasets: list[OverlayDataset]): + self.datasets = datasets + + def draw(self, ax: Axes, frame_index: int) -> None: + for dataset in self.datasets: + style_fn = resolve_style(dataset.style) + features = list(dataset.features_for_index(frame_index)) + if not features: + continue + style_fn(ax, features) + + def describe(self) -> list[dict[str, str]]: + entries: list[dict[str, str]] = [] + for dataset in self.datasets: + entries.append( + { + "alias": dataset.alias, + "path": str(dataset.path), + "style": dataset.style, + } + ) + return entries diff --git a/src/zyra/wizard/zyra_capabilities.json b/src/zyra/wizard/zyra_capabilities.json index 68ada914..4b1ec4cf 100644 --- a/src/zyra/wizard/zyra_capabilities.json +++ b/src/zyra/wizard/zyra_capabilities.json @@ -1967,6 +1967,188 @@ "args_schema": null, "example_args": null }, + "process geotiff-to-cog": { + "description": "Convert one or more GeoTIFF inputs into Cloud Optimized GeoTIFFs (COGs) with optional reprojection, tiled compression, and overview generation.", + "doc": "Convert one or more GeoTIFF inputs into Cloud Optimized GeoTIFFs (COGs) with optional reprojection, tiled compression, and overview generation.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--input-dir", + "--pattern", + "--output", + "--output-dir", + "--name-template", + "--datetime-format", + "--output-datetime-format", + "--dst-crs", + "--resampling", + "--overview-resampling", + "--overview-levels", + "--blocksize", + "--compression", + "--predictor", + "--bigtiff", + "--num-threads", + "--overwrite", + "--dry-run", + "--recursive", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Input GeoTIFF path (repeatable). Directories will be scanned using --pattern", + "path_arg": true, + "type": "path" + }, + "--input-dir": { + "help": "Directory containing GeoTIFF files to convert", + "path_arg": true, + "type": "path" + }, + "--pattern": { + "help": "Glob pattern used when scanning directories (default: *.tif)", + "type": "str", + "default": "*.tif" + }, + "--output": { + "help": "Output path for a single input file", + "path_arg": true, + "type": "path" + }, + "--output-dir": { + "help": "Directory to write outputs (required for multiple inputs)", + "path_arg": true, + "type": "path" + }, + "--name-template": { + "help": "Filename template when writing to --output-dir. Supports {stem}, {name}, {timestamp}", + "type": "str", + "default": "{stem}.tif" + }, + "--datetime-format": "Datetime format to extract timestamps from source filenames (for {timestamp})", + "--output-datetime-format": { + "help": "Strftime pattern used when formatting {timestamp} (default: %Y%m%dT%H%M%SZ)", + "path_arg": true, + "type": "path", + "default": "%Y%m%dT%H%M%SZ" + }, + "--dst-crs": "Destination CRS (e.g., EPSG:4326). Defaults to the source CRS", + "--resampling": { + "help": "Resampling kernel to use when reprojecting (default: bilinear)", + "choices": [ + "average", + "bilinear", + "cubic", + "cubic_spline", + "gauss", + "lanczos", + "max", + "med", + "min", + "mode", + "nearest", + "q1", + "q3", + "rms", + "sum" + ], + "type": "str", + "default": "bilinear" + }, + "--overview-resampling": { + "help": "Resampling kernel to use for overview generation (default: match --resampling)", + "choices": [ + "average", + "bilinear", + "cubic", + "cubic_spline", + "gauss", + "lanczos", + "max", + "med", + "min", + "mode", + "nearest", + "q1", + "q3", + "rms", + "sum" + ], + "type": "str" + }, + "--overview-levels": "Comma-separated overview decimation levels (default: auto)", + "--blocksize": { + "help": "Internal tile size in pixels (default: 512)", + "type": "int", + "default": 512 + }, + "--compression": { + "help": "Compression codec for output tiles (default: DEFLATE)", + "type": "str", + "default": "DEFLATE" + }, + "--predictor": { + "help": "Predictor to use (auto, 1, 2, or 3)", + "type": "str", + "default": "auto" + }, + "--bigtiff": { + "help": "Control BigTIFF creation (default: IF_SAFER)", + "choices": [ + "YES", + "NO", + "IF_NEEDED", + "IF_SAFER" + ], + "type": "str", + "default": "IF_SAFER" + }, + "--num-threads": "Value for GDAL NUM_THREADS option (e.g., ALL_CPUS)", + "--overwrite": { + "help": "Allow overwriting existing outputs", + "type": "bool", + "default": false + }, + "--dry-run": { + "help": "Report planned outputs without writing files", + "type": "bool", + "default": false + }, + "--recursive": { + "help": "Recurse into subdirectories when scanning directories", + "type": "bool", + "default": false + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "process", + "args_schema": null, + "example_args": null + }, "process enrich-metadata": { "description": "Enrich a frames metadata JSON with dataset_id, Vimeo URI, and updated_at; read from file or stdin.", "doc": "Enrich a frames metadata JSON with dataset_id, Vimeo URI, and updated_at; read from file or stdin.", @@ -2222,6 +2404,207 @@ "args_schema": null, "example_args": null }, + "process shapefile-to-geojson": { + "description": "Convert a shapefile to GeoJSON. Optionally normalize local time fields into ISO timestamps using timezone hints.", + "doc": "Convert a shapefile to GeoJSON. Optionally normalize local time fields into ISO timestamps using timezone hints.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--time-field", + "--time-format", + "--timezone", + "--year-field", + "--month-field", + "--day-field", + "--default-year", + "--fallback-date", + "--local-time-property", + "--utc-time-property", + "--indent", + "--output", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Path to input .shp file", + "path_arg": true, + "type": "path", + "required": true + }, + "--time-field": "Field containing local time strings (repeatable)", + "--time-format": "Custom strptime format for local time (repeatable)", + "--timezone": { + "help": "Timezone name for local time parsing (default: UTC)", + "type": "str", + "default": "UTC" + }, + "--year-field": { + "help": "Field containing year values used when time strings omit a year", + "type": "str", + "default": "Year" + }, + "--month-field": { + "help": "Field containing month values used when time strings omit a month", + "type": "str", + "default": "Month" + }, + "--day-field": { + "help": "Field containing day values used when time strings omit a day", + "type": "str", + "default": "Day" + }, + "--default-year": { + "help": "Fallback year when no year field is present", + "type": "int" + }, + "--fallback-date": "Fallback date (YYYY-MM-DD) when month/day are missing", + "--local-time-property": { + "help": "Output property name for normalized local timestamp", + "type": "str", + "default": "time_local_iso" + }, + "--utc-time-property": { + "help": "Output property name for normalized UTC timestamp", + "type": "str", + "default": "time_utc_iso" + }, + "--indent": { + "help": "Indentation level for GeoJSON output", + "type": "int", + "default": 2 + }, + "--output": { + "help": "Output path or '-' for stdout", + "path_arg": true, + "type": "path", + "default": "-" + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "process", + "args_schema": null, + "example_args": null + }, + "process csv-to-geojson": { + "description": "Convert a tabular CSV with latitude/longitude columns into GeoJSON points, optionally normalizing local timestamps.", + "doc": "Convert a tabular CSV with latitude/longitude columns into GeoJSON points, optionally normalizing local timestamps.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--lat-field", + "--lon-field", + "--local-time-field", + "--utc-time-field", + "--time-format", + "--timezone", + "--event-date", + "--local-time-property", + "--utc-time-property", + "--indent", + "--output", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Path to CSV file", + "path_arg": true, + "type": "path", + "required": true + }, + "--lat-field": { + "help": "Column containing latitude values (default: Lat)", + "type": "str", + "default": "Lat" + }, + "--lon-field": { + "help": "Column containing longitude values (default: Lon)", + "type": "str", + "default": "Lon" + }, + "--local-time-field": "Column containing local time strings (repeatable)", + "--utc-time-field": "Column containing UTC time strings (HH:MM, repeatable)", + "--time-format": "Custom strptime format for local time strings (repeatable)", + "--timezone": { + "help": "Local timezone name for interpreting local time strings", + "type": "str", + "default": "UTC" + }, + "--event-date": "Event date (YYYY-MM-DD) used when local/UTC fields omit a date", + "--local-time-property": { + "help": "Output property name for normalized local timestamp", + "type": "str", + "default": "time_local_iso" + }, + "--utc-time-property": { + "help": "Output property name for normalized UTC timestamp", + "type": "str", + "default": "time_utc_iso" + }, + "--indent": { + "help": "Indentation level for GeoJSON output", + "type": "int", + "default": 2 + }, + "--output": { + "help": "Output path or '-' for stdout", + "path_arg": true, + "type": "path", + "default": "-" + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "process", + "args_schema": null, + "example_args": null + }, "visualize heatmap": { "description": "zyra visualize heatmap", "doc": "", @@ -2963,6 +3346,9 @@ "--units", "--show-timestamp", "--timestamps-csv", + "--overlay", + "--overlay-time-key", + "--overlay-time-tolerance", "--timestamp-loc", "--map-type", "--tile-source", @@ -2970,6 +3356,8 @@ "--xarray-engine", "--crs", "--reproject", + "--features", + "--manifest", "--to-video", "--fps", "--grid-mode", @@ -3083,6 +3471,13 @@ "default": false }, "--timestamps-csv": "", + "--overlay": "Overlay specification: [alias=]path[:style] (repeatable)", + "--overlay-time-key": "Overlay time property mapping (alias=property or property)", + "--overlay-time-tolerance": { + "help": "Seconds to allow when matching overlay timestamps (default: 900)", + "type": "int", + "default": 900 + }, "--timestamp-loc": { "help": "", "choices": [ @@ -3116,6 +3511,12 @@ "type": "bool", "default": false }, + "--features": "Comma-separated features: coastline,borders,gridlines", + "--manifest": { + "help": "Optional manifest output path (JSON); when omitted, manifest is only logged", + "path_arg": true, + "type": "path" + }, "--to-video": "", "--fps": { "help": "", @@ -4158,6 +4559,9 @@ "--units", "--show-timestamp", "--timestamps-csv", + "--overlay", + "--overlay-time-key", + "--overlay-time-tolerance", "--timestamp-loc", "--map-type", "--tile-source", @@ -4165,6 +4569,8 @@ "--xarray-engine", "--crs", "--reproject", + "--features", + "--manifest", "--to-video", "--fps", "--grid-mode", @@ -4278,6 +4684,13 @@ "default": false }, "--timestamps-csv": "", + "--overlay": "Overlay specification: [alias=]path[:style] (repeatable)", + "--overlay-time-key": "Overlay time property mapping (alias=property or property)", + "--overlay-time-tolerance": { + "help": "Seconds to allow when matching overlay timestamps (default: 900)", + "type": "int", + "default": 900 + }, "--timestamp-loc": { "help": "", "choices": [ @@ -4311,6 +4724,12 @@ "type": "bool", "default": false }, + "--features": "Comma-separated features: coastline,borders,gridlines", + "--manifest": { + "help": "Optional manifest output path (JSON); when omitted, manifest is only logged", + "path_arg": true, + "type": "path" + }, "--to-video": "", "--fps": { "help": "", @@ -5602,6 +6021,188 @@ "args_schema": null, "example_args": null }, + "transform geotiff-to-cog": { + "description": "Convert one or more GeoTIFF inputs into Cloud Optimized GeoTIFFs (COGs) with optional reprojection, tiled compression, and overview generation.", + "doc": "Convert one or more GeoTIFF inputs into Cloud Optimized GeoTIFFs (COGs) with optional reprojection, tiled compression, and overview generation.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--input-dir", + "--pattern", + "--output", + "--output-dir", + "--name-template", + "--datetime-format", + "--output-datetime-format", + "--dst-crs", + "--resampling", + "--overview-resampling", + "--overview-levels", + "--blocksize", + "--compression", + "--predictor", + "--bigtiff", + "--num-threads", + "--overwrite", + "--dry-run", + "--recursive", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Input GeoTIFF path (repeatable). Directories will be scanned using --pattern", + "path_arg": true, + "type": "path" + }, + "--input-dir": { + "help": "Directory containing GeoTIFF files to convert", + "path_arg": true, + "type": "path" + }, + "--pattern": { + "help": "Glob pattern used when scanning directories (default: *.tif)", + "type": "str", + "default": "*.tif" + }, + "--output": { + "help": "Output path for a single input file", + "path_arg": true, + "type": "path" + }, + "--output-dir": { + "help": "Directory to write outputs (required for multiple inputs)", + "path_arg": true, + "type": "path" + }, + "--name-template": { + "help": "Filename template when writing to --output-dir. Supports {stem}, {name}, {timestamp}", + "type": "str", + "default": "{stem}.tif" + }, + "--datetime-format": "Datetime format to extract timestamps from source filenames (for {timestamp})", + "--output-datetime-format": { + "help": "Strftime pattern used when formatting {timestamp} (default: %Y%m%dT%H%M%SZ)", + "path_arg": true, + "type": "path", + "default": "%Y%m%dT%H%M%SZ" + }, + "--dst-crs": "Destination CRS (e.g., EPSG:4326). Defaults to the source CRS", + "--resampling": { + "help": "Resampling kernel to use when reprojecting (default: bilinear)", + "choices": [ + "average", + "bilinear", + "cubic", + "cubic_spline", + "gauss", + "lanczos", + "max", + "med", + "min", + "mode", + "nearest", + "q1", + "q3", + "rms", + "sum" + ], + "type": "str", + "default": "bilinear" + }, + "--overview-resampling": { + "help": "Resampling kernel to use for overview generation (default: match --resampling)", + "choices": [ + "average", + "bilinear", + "cubic", + "cubic_spline", + "gauss", + "lanczos", + "max", + "med", + "min", + "mode", + "nearest", + "q1", + "q3", + "rms", + "sum" + ], + "type": "str" + }, + "--overview-levels": "Comma-separated overview decimation levels (default: auto)", + "--blocksize": { + "help": "Internal tile size in pixels (default: 512)", + "type": "int", + "default": 512 + }, + "--compression": { + "help": "Compression codec for output tiles (default: DEFLATE)", + "type": "str", + "default": "DEFLATE" + }, + "--predictor": { + "help": "Predictor to use (auto, 1, 2, or 3)", + "type": "str", + "default": "auto" + }, + "--bigtiff": { + "help": "Control BigTIFF creation (default: IF_SAFER)", + "choices": [ + "YES", + "NO", + "IF_NEEDED", + "IF_SAFER" + ], + "type": "str", + "default": "IF_SAFER" + }, + "--num-threads": "Value for GDAL NUM_THREADS option (e.g., ALL_CPUS)", + "--overwrite": { + "help": "Allow overwriting existing outputs", + "type": "bool", + "default": false + }, + "--dry-run": { + "help": "Report planned outputs without writing files", + "type": "bool", + "default": false + }, + "--recursive": { + "help": "Recurse into subdirectories when scanning directories", + "type": "bool", + "default": false + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "transform", + "args_schema": null, + "example_args": null + }, "transform enrich-metadata": { "description": "Enrich a frames metadata JSON with dataset_id, Vimeo URI, and updated_at; read from file or stdin.", "doc": "Enrich a frames metadata JSON with dataset_id, Vimeo URI, and updated_at; read from file or stdin.", @@ -5857,6 +6458,207 @@ "args_schema": null, "example_args": null }, + "transform shapefile-to-geojson": { + "description": "Convert a shapefile to GeoJSON. Optionally normalize local time fields into ISO timestamps using timezone hints.", + "doc": "Convert a shapefile to GeoJSON. Optionally normalize local time fields into ISO timestamps using timezone hints.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--time-field", + "--time-format", + "--timezone", + "--year-field", + "--month-field", + "--day-field", + "--default-year", + "--fallback-date", + "--local-time-property", + "--utc-time-property", + "--indent", + "--output", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Path to input .shp file", + "path_arg": true, + "type": "path", + "required": true + }, + "--time-field": "Field containing local time strings (repeatable)", + "--time-format": "Custom strptime format for local time (repeatable)", + "--timezone": { + "help": "Timezone name for local time parsing (default: UTC)", + "type": "str", + "default": "UTC" + }, + "--year-field": { + "help": "Field containing year values used when time strings omit a year", + "type": "str", + "default": "Year" + }, + "--month-field": { + "help": "Field containing month values used when time strings omit a month", + "type": "str", + "default": "Month" + }, + "--day-field": { + "help": "Field containing day values used when time strings omit a day", + "type": "str", + "default": "Day" + }, + "--default-year": { + "help": "Fallback year when no year field is present", + "type": "int" + }, + "--fallback-date": "Fallback date (YYYY-MM-DD) when month/day are missing", + "--local-time-property": { + "help": "Output property name for normalized local timestamp", + "type": "str", + "default": "time_local_iso" + }, + "--utc-time-property": { + "help": "Output property name for normalized UTC timestamp", + "type": "str", + "default": "time_utc_iso" + }, + "--indent": { + "help": "Indentation level for GeoJSON output", + "type": "int", + "default": 2 + }, + "--output": { + "help": "Output path or '-' for stdout", + "path_arg": true, + "type": "path", + "default": "-" + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "transform", + "args_schema": null, + "example_args": null + }, + "transform csv-to-geojson": { + "description": "Convert a tabular CSV with latitude/longitude columns into GeoJSON points, optionally normalizing local timestamps.", + "doc": "Convert a tabular CSV with latitude/longitude columns into GeoJSON points, optionally normalizing local timestamps.", + "epilog": "", + "groups": [ + { + "title": "options", + "options": [ + "--help", + "--input", + "--lat-field", + "--lon-field", + "--local-time-field", + "--utc-time-field", + "--time-format", + "--timezone", + "--event-date", + "--local-time-property", + "--utc-time-property", + "--indent", + "--output", + "--verbose", + "--quiet", + "--trace" + ] + } + ], + "options": { + "--help": "show this help message and exit", + "--input": { + "help": "Path to CSV file", + "path_arg": true, + "type": "path", + "required": true + }, + "--lat-field": { + "help": "Column containing latitude values (default: Lat)", + "type": "str", + "default": "Lat" + }, + "--lon-field": { + "help": "Column containing longitude values (default: Lon)", + "type": "str", + "default": "Lon" + }, + "--local-time-field": "Column containing local time strings (repeatable)", + "--utc-time-field": "Column containing UTC time strings (HH:MM, repeatable)", + "--time-format": "Custom strptime format for local time strings (repeatable)", + "--timezone": { + "help": "Local timezone name for interpreting local time strings", + "type": "str", + "default": "UTC" + }, + "--event-date": "Event date (YYYY-MM-DD) used when local/UTC fields omit a date", + "--local-time-property": { + "help": "Output property name for normalized local timestamp", + "type": "str", + "default": "time_local_iso" + }, + "--utc-time-property": { + "help": "Output property name for normalized UTC timestamp", + "type": "str", + "default": "time_utc_iso" + }, + "--indent": { + "help": "Indentation level for GeoJSON output", + "type": "int", + "default": 2 + }, + "--output": { + "help": "Output path or '-' for stdout", + "path_arg": true, + "type": "path", + "default": "-" + }, + "--verbose": { + "help": "Verbose logging for this command", + "type": "bool", + "default": false + }, + "--quiet": { + "help": "Quiet logging for this command", + "type": "bool", + "default": false + }, + "--trace": { + "help": "Shell-style trace of key steps and external commands", + "type": "bool", + "default": false + } + }, + "positionals": [], + "domain": "transform", + "args_schema": null, + "example_args": null + }, "search api": { "description": "zyra search api", "doc": "", diff --git a/tests/misc/test_cli_smoke_streaming.py b/tests/misc/test_cli_smoke_streaming.py index bd4a2409..cb6863cb 100644 --- a/tests/misc/test_cli_smoke_streaming.py +++ b/tests/misc/test_cli_smoke_streaming.py @@ -45,6 +45,46 @@ def test_decode_grib2_raw_passthrough_netcdf(monkeypatch, capsysbinary): assert captured.err == b"" +def test_decode_grib2_idx_pattern_uses_http_ranges(monkeypatch, capsysbinary): + from zyra.cli import main + + captured: dict[str, list[str]] = {} + + def fake_get_idx_lines(url, *_, **__): + return [ + "1:0:d=20250314:TMP:2 m above ground:anl:", + "2:1234:d=20250314:MASSDEN:8 m above ground:", + "3:5678:d=20250314:COLMD:entire atmosphere (considered as a single layer):", + ] + + def fake_download_byteranges(url, byte_ranges, *_, **__): + captured["ranges"] = list(byte_ranges) + return b"subset" + + monkeypatch.setattr( + "zyra.connectors.backends.http.get_idx_lines", fake_get_idx_lines + ) + monkeypatch.setattr( + "zyra.connectors.backends.http.download_byteranges", + fake_download_byteranges, + ) + + rc = main( + [ + "process", + "decode-grib2", + "https://example.invalid/hrrr.t12z.wrfsfcf03.grib2", + "--raw", + "--pattern", + ":MASSDEN:8 m above ground:", + ] + ) + assert rc == 0 + out = capsysbinary.readouterr() + assert out.out == b"subset" + assert captured.get("ranges") == ["bytes=1234-5677"] + + def test_extract_variable_stdout_netcdf_simulated(monkeypatch, capsysbinary): # Simulate wgrib2 producing NetCDF bytes for the selected variable from types import SimpleNamespace diff --git a/tests/transform/test_fire_transforms.py b/tests/transform/test_fire_transforms.py new file mode 100644 index 00000000..05cfc9f8 --- /dev/null +++ b/tests/transform/test_fire_transforms.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: Apache-2.0 +from datetime import datetime + +import shapefile + +from zyra.transform.geospatial import csv_points_to_geojson, shapefile_to_geojson + + +def _write_wgs84_prj(path): + path.write_text( + 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]', + encoding="ascii", + ) + + +def test_normalize_fire_warnings(tmp_path): + shp_path = tmp_path / "warnings.shp" + w = shapefile.Writer(str(shp_path)) + w.autoBalance = 1 + w.field("ReqAgency", "C") + w.field("NWSOffice", "C") + w.field("Year", "N", decimal=0) + w.field("Month", "N", decimal=0) + w.field("Day", "N", decimal=0) + w.field("Time", "N", decimal=0) + w.field("Fire_Name", "C") + w.field("WhoInit", "C") + w.field("LocalTime", "C") + ring = [(-97.0, 35.5), (-97.0, 36.0), (-96.5, 36.0), (-96.5, 35.5), (-97.0, 35.5)] + w.poly([ring]) + w.record("FWT", "KOUN", 2025, 3, 14, 934, "Test Fire", "Forecaster", "3/14 934 PM") + w.close() + _write_wgs84_prj(tmp_path / "warnings.prj") + + result = shapefile_to_geojson( + shp_path, + timezone_name="America/Chicago", + default_year=2025, + time_fields=["LocalTime"], + ) + assert result["metadata"]["feature_count"] == 1 + feature = result["features"][0] + assert feature["properties"]["time_utc_iso"] == "2025-03-15T02:34:00+00:00" + assert feature["geometry"]["type"] == "Polygon" + + +def test_convert_hotspots_csv(tmp_path): + csv_path = tmp_path / "hotspots.csv" + rows = [ + [ + "WFO", + "County", + "Location", + "Time (CDT)", + "Time (Z)", + "Lat", + "Lon", + "Type", + "PDS?", + "Comment", + "", + "", + ], + [ + "OUN", + "Payne", + "4 NNW Drumright", + "10:23:00 AM", + "15:23", + "36.0383", + "-96.6275", + "Update", + "", + "", + "", + "", + ], + ] + csv_path.write_text("\n".join(",".join(r) for r in rows), encoding="utf-8") + + result = csv_points_to_geojson( + csv_path, + event_date=datetime(2025, 3, 14).date(), + timezone_name="America/Chicago", + local_time_fields=["Time (CDT)"], + utc_time_fields=["Time (Z)"], + ) + assert result["metadata"]["feature_count"] == 1 + feature = result["features"][0] + assert feature["geometry"]["type"] == "Point" + assert feature["properties"]["time_utc_iso"] == "2025-03-14T15:23:00+00:00" + assert feature["properties"]["WFO"] == "OUN" diff --git a/tests/transform/test_geotiff_cog.py b/tests/transform/test_geotiff_cog.py new file mode 100644 index 00000000..575393ba --- /dev/null +++ b/tests/transform/test_geotiff_cog.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations + +import numpy as np +import pytest +import rasterio +from rasterio.enums import Resampling +from rasterio.transform import from_origin + +from zyra.transform.raster import convert_geotiff_to_cog + + +def _create_test_raster(path: str, *, width: int = 256, height: int = 256) -> None: + data = np.arange(width * height, dtype=np.uint16).reshape((height, width)) + profile = { + "driver": "GTiff", + "height": height, + "width": width, + "count": 1, + "dtype": "uint16", + "crs": "EPSG:3857", + "transform": from_origin(-1_000_000, 1_000_000, 1_000, 1_000), + } + with rasterio.open(path, "w", **profile) as dst: + dst.write(data, 1) + + +def test_convert_geotiff_to_cog_reprojects_and_builds_overviews(tmp_path): + src_path = tmp_path / "source.tif" + dst_path = tmp_path / "output" / "cog.tif" + _create_test_raster(str(src_path)) + + result = convert_geotiff_to_cog( + src_path, + dst_path, + dst_crs="EPSG:4326", + resampling=Resampling.bilinear, + overview_levels=(2,), + overview_resampling=Resampling.nearest, + blocksize=128, + compression="LZW", + predictor=2, + overwrite=True, + ) + + assert result == dst_path + assert dst_path.exists() + + with rasterio.open(dst_path) as ds: + assert ds.crs.to_epsg() == 4326 + assert ds.profile.get("tiled") + assert ds.profile.get("blockxsize") == 128 + assert ds.profile.get("blockysize") == 128 + assert (ds.profile.get("compress") or "").upper() == "LZW" + assert ds.overviews(1) == [2] + + +def test_convert_geotiff_to_cog_requires_overwrite(tmp_path): + src_path = tmp_path / "source.tif" + dst_path = tmp_path / "cog.tif" + _create_test_raster(str(src_path)) + + # First conversion creates the file + convert_geotiff_to_cog(src_path, dst_path, overwrite=True) + + with pytest.raises(FileExistsError): + convert_geotiff_to_cog(src_path, dst_path, overwrite=False) diff --git a/tests/visualization/test_overlays.py b/tests/visualization/test_overlays.py new file mode 100644 index 00000000..bc58ccfa --- /dev/null +++ b/tests/visualization/test_overlays.py @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: Apache-2.0 +import json +from datetime import datetime, timedelta + +import cartopy.crs as ccrs +import matplotlib.pyplot as plt + +from zyra.visualization.overlays import ( + OverlayRenderer, + build_overlay_datasets, + parse_overlay_spec, +) + + +def _dt(ts: str) -> datetime: + return datetime.fromisoformat(ts.replace("Z", "+00:00")) + + +def test_build_overlay_datasets_assigns_by_time(tmp_path): + data = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [-97.5, 35.5]}, + "properties": {"issued": "2025-03-14T15:00:00Z"}, + }, + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [-97.4, 35.6]}, + "properties": {"issued": "2025-03-14T15:05:00Z"}, + }, + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [-97.3, 35.7]}, + "properties": {}, + }, + ], + } + path = tmp_path / "detections.geojson" + path.write_text(json.dumps(data), encoding="utf-8") + + spec = parse_overlay_spec(f"{path}:red-dots") + spec.time_key = "issued" + + frame_times = [ + _dt("2025-03-14T15:00:00Z"), + _dt("2025-03-14T15:05:00Z"), + ] + + datasets = build_overlay_datasets( + [spec], + frame_times=frame_times, + tolerance=timedelta(minutes=2), + ) + dataset = datasets[0] + + frame0 = list(dataset.features_for_index(0)) + frame1 = list(dataset.features_for_index(1)) + + assert len(frame0) == 2 # includes static feature + assert len(frame1) == 2 + + static = [f for f in dataset.static_features] + assert len(static) == 1 + assert static[0].properties == {} + + +def test_overlay_renderer_draw(tmp_path): + payload = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-97.6, 35.4], + [-97.4, 35.4], + [-97.4, 35.6], + [-97.6, 35.6], + [-97.6, 35.4], + ] + ], + }, + "properties": {}, + } + ], + } + path = tmp_path / "warnings.geojson" + path.write_text(json.dumps(payload), encoding="utf-8") + + spec = parse_overlay_spec(f"{path}:magenta-outline") + datasets = build_overlay_datasets( + [spec], + frame_times=[_dt("2025-03-14T15:00:00Z")], + ) + renderer = OverlayRenderer(datasets) + + fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()}) + renderer.draw(ax, 0) + # Matplotlib adds Line2D for outline + lines = [ + artist for artist in ax.get_children() if artist.__class__.__name__ == "Line2D" + ] + assert lines, "Expected overlay line to be drawn" + plt.close(fig)