diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8b1ae1d1..9738b93b 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -87,25 +87,44 @@ that is only used for development (tests, linting, docs, etc.). - Follow `docs/source/dev/README.rst` for style: maintain numpy-style docstrings, avoid type hints unless pre-existing, and keep module-level imports in the documented order (`numpy as np`, `xarray as xr`, etc.). -- Respect Ruff defaults (line length 79; docstrings 72). Run - `pixi run -e dev ruff check .` and `pixi run -e dev ruff format .` before - opening a PR. -- Favor descriptive names instead of extra comments; only add comments for - non-obvious behavior (e.g., tricky array ops or concurrency concerns). +- Follow Ruff configuration (79 char lines, 72 char lines for docstrings, + double quotes, numpy docstyle). Run locally: `pixi run -e dev ruff check .` + and `pixi run -e dev ruff format .` +- Do not add comments unless they are absolutely critical to understanding; + always prefer descriptive function and variable names instead. If some functionality needs further explanation, add this to the class/function/method docstring under the "Notes" section. - Use absolute imports under `revrt.`. - Surface warnings/errors through `revrt.warn` and `revrt.exceptions` (e.g., raise `revrtValueError` and emit `revrtWarning`) to ensure logging hooks fire. +- Logging: Use `logging.getLogger(__name__)`. Log at appropriate levels + (DEBUG, INFO, WARNING, ERROR, CRITICAL). Avoid print statements. +- Version: Never edit `_version.py` manually (auto-generated by `setuptools_scm`). + Tag releases `vX.Y.Z`. - When touching PyO3 bindings, update both Python shims in `revrt/_rust.py` (if needed) and the Rust exports to keep signatures aligned. ## 6. Docstring Guidelines (Python) - Use numpy-style docstrings; first line must omit a trailing period. -- Keep docstring lines ≤72 characters and avoid short summaries on - `__init__` methods. -- Document parameters in the method/function docstring, not the class-level - docstring; protected helpers (`_name`) should have single-sentence docs. +- Avoid type hints. +- Keep docstring length to 72 characters per line. +- Never include a period (".") at the end of the first line of docstrings. +- Do not add a short summary to __init__ methods. Instead, keep the line blank + and start the "Parameters" section after a second newline. +- Do not document parameters in the class docstring - do that in the __init__ + docstring instead. +- Do not add docstring to dunder methods (e.g., __str__, __repr__, etc.) + unless absolutely necessary. +- All @property and @cached_property method documentation should be one line + long and should start with the return type followed by a colon + (e.g. `"""str: My string property"""`). +- If the default value for a parameter is **not** `None`, document it using + the format: `param_name : type, default=`. If the default + value for a parameter **is** `None`, use the format : `param_name : type, optional`. +- "Protected" functions and methods (i.e. starting with an underscore) + should always be documented using **only** one-line summary docstrings. +- To exclude functions or classes from the public API documentation, start + the docstring with the token ``[NOT PUBLIC API]``. - Maintain intersphinx references where possible (see dev guide for mappings). ## 7. Coding Guidelines (Rust) @@ -128,16 +147,24 @@ that is only used for development (tests, linting, docs, etc.). `tests/conftest.py` for shared setup. Keep all test data small (individual files <1MB). The data doesn't need to fully reproduce realistic analysis cases - it just needs to include characteristics of a realistic case. +- All python test files (e.g. ``test_scenario.py``) should end with the + following block of code: + + .. code-block:: python + + if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) + + This allows the (single) file to be executed, running only the tests contained + within, which is extremely useful when updating/modifying/adding tests in the file. - Pytest options for parallel execution (`-n auto`) are supported; prefer `pixi run -e dev pytest -n auto` for heavier suites. ## 9. Logging, Errors, and Warnings - Do not log-and-raise manually; custom exceptions/warnings already emit log records. -- Prefer `revrt.utilities.log_mem` for memory-sensitive workflows to keep log - output consistent. -- CLI commands should rely on the logging configuration provided in - `revrt._cli.configure_logging` to avoid duplicate handlers. +- CLI commands should surface actionable messages and exit codes without + relying on hidden background logging. ## 10. Common Pitfalls & Gotchas - The PyO3 module must be rebuilt if Rust code changes; run a Pixi task (tests diff --git a/crates/revrt/src/dataset.rs b/crates/revrt/src/dataset.rs index b7c1776a..78c66ab2 100644 --- a/crates/revrt/src/dataset.rs +++ b/crates/revrt/src/dataset.rs @@ -14,7 +14,7 @@ use zarrs::storage::{ use crate::ArrayIndex; use crate::cost::CostFunction; -use crate::error::Result; +use crate::error::{Error, Result}; pub(crate) use lazy_subset::LazySubset; /// Manages the features datasets and calculated total cost @@ -65,15 +65,13 @@ impl Dataset { trace!("Creating a new group for the cost dataset"); zarrs::group::GroupBuilder::new() - .build(swap.clone(), "/") - .unwrap() - .store_metadata() - .unwrap(); + .build(swap.clone(), "/")? + .store_metadata()?; let entries = source .list() .expect("failed to list variables in source dataset"); - let first_entry = entries + let first_entry_opt = entries .into_iter() .map(|entry| entry.to_string()) .find(|entry| { @@ -81,17 +79,34 @@ impl Dataset { // Skip coordinate axes when selecting a representative variable for cost storage. const EXCLUDES: [&str; 6] = ["latitude", "longitude", "band", "x", "y", "spatial_ref"]; - !name.ends_with(".json") && !EXCLUDES.iter().any(|needle| name.contains(needle)) - }) - .expect("no suitable variables found in source dataset"); - let varname = first_entry.split('/').next().unwrap().to_string(); + !name.ends_with(".json") && !EXCLUDES.iter().any(|needle| name == *needle) + }); + let first_entry = match first_entry_opt { + Some(e) => e, + None => { + return Err(Error::IO(std::io::Error::other(format!( + "no non-coordinate variables found in source dataset: {:?}", + source.list().ok() + )))); + } + }; + + // Skip coordinate axes when selecting a representative variable for cost storage. + let varname = match first_entry.split('/').next() { + Some(name) => name, + None => { + return Err(Error::IO(std::io::Error::other( + "Could not determine any variable names from source dataset", + ))); + } + }; debug!("Using '{}' to determine shape of cost data", varname); - let tmp = zarrs::array::Array::open(source.clone(), &format!("/{varname}")).unwrap(); + let tmp = zarrs::array::Array::open(source.clone(), &format!("/{varname}"))?; let chunk_grid = tmp.chunk_grid(); debug!("Chunk grid info: {:?}", &chunk_grid); - add_layer_to_data("cost_invariant", chunk_grid, &swap); - add_layer_to_data("cost", chunk_grid, &swap); + add_layer_to_data("cost_invariant", chunk_grid, &swap)?; + add_layer_to_data("cost", chunk_grid, &swap)?; let cost_chunk_idx = ndarray::Array2::from_elem( ( @@ -268,10 +283,11 @@ impl Dataset { // Calculate the average with center point (half grid + other half grid). // Also, apply the diagonal factor for the extra distance. + // Finally, add any invariant costs. let cost_to_neighbors = neighbors .iter() .zip(invariant_neighbors.iter()) - .filter(|(((ir, jr), _), _)| !(*ir == i && *jr == j)) // no center point + .filter(|(((ir, jr), v), _)| !(*ir == i && *jr == j) && *v > 0.) // no center point and only positive costs .map(|(((ir, jr), v), ((inv_ir, inv_jr), inv_cost))| { debug_assert_eq!((ir, jr), (inv_ir, inv_jr)); ((ir, jr), 0.5 * (v + center.1), inv_cost) @@ -283,9 +299,8 @@ impl Dataset { } else { v }; - ((ir, jr), scaled + inv_cost) + (ArrayIndex { i: *ir, j: *jr }, scaled + inv_cost) }) - .map(|((ir, jr), v)| (ArrayIndex { i: *ir, j: *jr }, v)) .collect::>(); trace!("Neighbors {:?}", cost_to_neighbors); @@ -352,29 +367,28 @@ fn add_layer_to_data( layer_name: &str, chunk_shape: &ChunkGrid, swap: &ReadableWritableListableStorage, -) { +) -> Result<()> { trace!("Creating an empty {} array", layer_name); let dataset_path = format!("/{layer_name}"); - zarrs::array::ArrayBuilder::new_with_chunk_grid( - // cost_shape, + let builder = zarrs::array::ArrayBuilder::new_with_chunk_grid( chunk_shape.clone(), zarrs::array::DataType::Float32, zarrs::array::FillValue::from(zarrs::array::ZARR_NAN_F32), - ) - .build(swap.clone(), &dataset_path) - .unwrap() - .store_metadata() - .unwrap(); + ); + + let built = builder.build(swap.clone(), &dataset_path)?; + built.store_metadata()?; - let array = zarrs::array::Array::open(swap.clone(), &dataset_path).unwrap(); + let array = zarrs::array::Array::open(swap.clone(), &dataset_path)?; trace!("'{}' shape: {:?}", layer_name, array.shape().to_vec()); trace!("'{}' chunk shape: {:?}", layer_name, array.chunk_grid()); trace!( "Dataset contents after '{}' creation: {:?}", layer_name, - swap.list().unwrap() + swap.list()? ); + Ok(()) } #[cfg(test)] @@ -388,14 +402,19 @@ mod tests { let path = samples::multi_variable_zarr(); let cost_function = CostFunction::from_json(r#"{"cost_layers": [{"layer_name": "A"}]}"#).unwrap(); - let dataset = - Dataset::open(path, cost_function, 250_000_000).expect("Error opening dataset"); + let dataset = Dataset::open(path, cost_function, 1_000).expect("Error opening dataset"); let test_points = [ArrayIndex { i: 3, j: 1 }, ArrayIndex { i: 2, j: 2 }]; let array = zarrs::array::Array::open(dataset.source.clone(), "/A").unwrap(); for point in test_points { let results = dataset.get_3x3(&point); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); let ArrayIndex { i: ci, j: cj } = point; let center_subset = zarrs::array_subset::ArraySubset::new_with_ranges(&[ 0..1, @@ -471,6 +490,12 @@ mod tests { for point in test_points { let results = dataset.get_3x3(&point); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); let ArrayIndex { i: ci, j: cj } = point; let center_subset = zarrs::array_subset::ArraySubset::new_with_ranges(&[ 0..1, @@ -545,13 +570,20 @@ mod tests { let results = dataset.get_3x3(&ArrayIndex { i: 0, j: 0 }); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); + assert_eq!(results, vec![]); } #[test_case((0, 0), vec![(0, 1, 0.5), (1, 0, 1.0), (1, 1, 1.5 * SQRT_2)] ; "top left corner")] - #[test_case((0, 1), vec![(0, 0, 0.5), (1, 0, 1.5 * SQRT_2), (1, 1, 2.)] ; "top right corner")] - #[test_case((1, 0), vec![(0, 0, 1.), (0, 1, 1.5 * SQRT_2), (1, 1, 2.5)] ; "bottom left corner")] - #[test_case((1, 1), vec![(0, 0, 1.5 * SQRT_2), (0, 1, 2.), (1, 0, 2.5)] ; "bottom right corner")] + #[test_case((0, 1), vec![(1, 0, 1.5 * SQRT_2), (1, 1, 2.)] ; "top right corner")] + #[test_case((1, 0), vec![(0, 1, 1.5 * SQRT_2), (1, 1, 2.5)] ; "bottom left corner")] + #[test_case((1, 1), vec![(0, 1, 2.), (1, 0, 2.5)] ; "bottom right corner")] fn test_get_3x3_two_by_two_array((si, sj): (u64, u64), expected_output: Vec<(u64, u64, f32)>) { let path = samples::cost_as_index_zarr((2, 2), (2, 2)); let cost_function = @@ -561,6 +593,13 @@ mod tests { let results = dataset.get_3x3(&ArrayIndex { i: si, j: sj }); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); + assert_eq!( results, expected_output @@ -571,10 +610,10 @@ mod tests { } #[test_case((0, 0), vec![(0, 1, 0.5), (1, 0, 1.5), (1, 1, 2.0 * SQRT_2)] ; "top left corner")] - #[test_case((0, 1), vec![(0, 0, 0.5), (0, 2, 1.5), (1, 0, 2.0 * SQRT_2), (1, 1, 2.5), (1, 2, 3. * SQRT_2)] ; "top middle")] + #[test_case((0, 1), vec![(0, 2, 1.5), (1, 0, 2.0 * SQRT_2), (1, 1, 2.5), (1, 2, 3. * SQRT_2)] ; "top middle")] #[test_case((0, 2), vec![(0, 1, 1.5), (1, 1, 3.0 * SQRT_2), (1, 2, 3.5)] ; "top right corner")] - #[test_case((1, 0), vec![(0, 0, 1.5), (0, 1, 2.0 * SQRT_2), (1, 1, 3.5), (2, 0, 4.5), (2, 1, 5.0 * SQRT_2)] ; "middle left")] - #[test_case((1, 1), vec![(0, 0, 2.0 * SQRT_2), (0, 1, 2.5), (0, 2, 3.0 * SQRT_2), (1, 0, 3.5), (1, 2, 4.5), (2, 0, 5.0 * SQRT_2), (2, 1, 5.5), (2, 2, 6.0 * SQRT_2)] ; "middle middle")] + #[test_case((1, 0), vec![(0, 1, 2.0 * SQRT_2), (1, 1, 3.5), (2, 0, 4.5), (2, 1, 5.0 * SQRT_2)] ; "middle left")] + #[test_case((1, 1), vec![(0, 1, 2.5), (0, 2, 3.0 * SQRT_2), (1, 0, 3.5), (1, 2, 4.5), (2, 0, 5.0 * SQRT_2), (2, 1, 5.5), (2, 2, 6.0 * SQRT_2)] ; "middle middle")] #[test_case((1, 2), vec![(0, 1, 3.0 * SQRT_2), (0, 2, 3.5), (1, 1, 4.5), (2, 1, 6.0 * SQRT_2), (2, 2, 6.5)] ; "middle right")] #[test_case((2, 0), vec![(1, 0, 4.5), (1, 1, 5.0 * SQRT_2), (2, 1, 6.5)] ; "bottom left corner")] #[test_case((2, 1), vec![(1, 0, 5.0 * SQRT_2), (1, 1, 5.5), (1, 2, 6.0 * SQRT_2), (2, 0, 6.5), (2, 2, 7.5)] ; "bottom middle")] @@ -591,6 +630,13 @@ mod tests { let results = dataset.get_3x3(&ArrayIndex { i: si, j: sj }); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); + assert_eq!( results, expected_output @@ -601,10 +647,10 @@ mod tests { } #[test_case((0, 0), vec![(0, 1, 0.5), (1, 0, 2.), (1, 1, 2.5 * SQRT_2)] ; "top left corner")] - #[test_case((0, 1), vec![(0, 0, 0.5), (0, 2, 1.5), (1, 0, 2.5 * SQRT_2), (1, 1, 3.), (1, 2, 3.5 * SQRT_2)] ; "top left edge")] + #[test_case((0, 1), vec![(0, 2, 1.5), (1, 0, 2.5 * SQRT_2), (1, 1, 3.), (1, 2, 3.5 * SQRT_2)] ; "top left edge")] #[test_case((0, 2), vec![(0, 1, 1.5), (0, 3, 2.5), (1, 1, 3.5 * SQRT_2), (1, 2, 4.), (1, 3, 4.5 * SQRT_2)] ; "top right edge")] #[test_case((0, 3), vec![(0, 2, 2.5), (1, 2, 4.5 * SQRT_2), (1, 3, 5.)] ; "top right corner")] - #[test_case((1, 0), vec![(0, 0, 2.), (0, 1, 2.5 * SQRT_2), (1, 1, 4.5), (2, 0, 6.), (2, 1, 6.5 * SQRT_2)] ; "left top edge")] + #[test_case((1, 0), vec![(0, 1, 2.5 * SQRT_2), (1, 1, 4.5), (2, 0, 6.), (2, 1, 6.5 * SQRT_2)] ; "left top edge")] #[test_case((1, 3), vec![(0, 2, 4.5 * SQRT_2), (0, 3, 5.), (1, 2, 6.5), (2, 2, 8.5 * SQRT_2), (2, 3, 9.)] ; "right top edge")] #[test_case((2, 0), vec![(1, 0, 6.), (1, 1, 6.5 * SQRT_2), (2, 1, 8.5), (3, 0, 10.), (3, 1, 10.5 * SQRT_2)] ; "left bottom edge")] #[test_case((2, 3), vec![(1, 2, 8.5 * SQRT_2), (1, 3, 9.), (2, 2, 10.5), (3, 2, 12.5 * SQRT_2), (3, 3, 13.)] ; "right bottom edge")] @@ -624,6 +670,13 @@ mod tests { let results = dataset.get_3x3(&ArrayIndex { i: si, j: sj }); + // index 0, 0 has a cost of 0 and should therefore be filtered out + assert!( + !results + .iter() + .any(|(ArrayIndex { i, j }, _)| *i == 0 && *j == 0) + ); + assert_eq!( results, expected_output diff --git a/crates/revrt/src/error.rs b/crates/revrt/src/error.rs index ee696053..5d455031 100644 --- a/crates/revrt/src/error.rs +++ b/crates/revrt/src/error.rs @@ -5,6 +5,15 @@ pub enum Error { #[error(transparent)] IO(#[from] std::io::Error), + #[error(transparent)] + ZarrsArrayCreate(#[from] zarrs::array::ArrayCreateError), + + #[error(transparent)] + ZarrsGroupCreate(#[from] zarrs::group::GroupCreateError), + + #[error(transparent)] + ZarrsStorage(#[from] zarrs::storage::StorageError), + #[allow(dead_code)] #[error("Undefined error")] // Used during development while it is not clear a category of error diff --git a/crates/revrt/src/ffi.rs b/crates/revrt/src/ffi.rs index d060593a..f31a1c09 100644 --- a/crates/revrt/src/ffi.rs +++ b/crates/revrt/src/ffi.rs @@ -12,6 +12,9 @@ impl From for PyErr { fn from(err: Error) -> PyErr { match err { Error::IO(msg) => PyIOError::new_err(msg), + Error::ZarrsArrayCreate(e) => PyIOError::new_err(e.to_string()), + Error::ZarrsStorage(e) => PyIOError::new_err(e.to_string()), + Error::ZarrsGroupCreate(e) => PyIOError::new_err(e.to_string()), Error::Undefined(msg) => revrtRustError::new_err(msg), } } diff --git a/crates/revrt/src/lib.rs b/crates/revrt/src/lib.rs index 8ff8b2e3..3251f7cf 100644 --- a/crates/revrt/src/lib.rs +++ b/crates/revrt/src/lib.rs @@ -185,7 +185,6 @@ mod tests { #[test_case((1, 1), vec![(1, 3), (3, 1)], 1.; "horizontal and vertical")] #[test_case((3, 3), vec![(3, 5), (1, 1), (3, 1)], 1.; "horizontal")] #[test_case((3, 3), vec![(5, 3), (5, 5), (1, 3)], 1.; "vertical")] - #[test_case((3, 3), vec![(3, 1), (3, 4)], 0.; "zero costs")] fn routing_one_point_to_many_same_cost_and_length( (si, sj): (u64, u64), endpoints: Vec<(u64, u64)>, diff --git a/revrt/__init__.py b/revrt/__init__.py index 106330fc..f0041322 100644 --- a/revrt/__init__.py +++ b/revrt/__init__.py @@ -1,4 +1,4 @@ -"""Routing analysis library for the reV model""" +"""Routing analysis library""" import importlib.metadata diff --git a/revrt/_cli.py b/revrt/_cli.py index a04fdd2d..d75ec32d 100644 --- a/revrt/_cli.py +++ b/revrt/_cli.py @@ -7,6 +7,7 @@ from revrt import __version__ from revrt.spatial_characterization.cli import route_characterizations_command from revrt.costs.cli import build_masks_command, build_routing_layers_command +from revrt.routing.cli import route_points_command from revrt.utilities.cli import ( layers_to_file_command, layers_from_file_command, @@ -24,6 +25,7 @@ build_masks_command, build_routing_layers_command, route_characterizations_command, + route_points_command, ] main = make_cli(commands, info={"name": "reVRt", "version": __version__}) diff --git a/revrt/costs/config/config.py b/revrt/costs/config/config.py index 3c1317cc..768eef43 100644 --- a/revrt/costs/config/config.py +++ b/revrt/costs/config/config.py @@ -54,7 +54,7 @@ def __init__(self, config=None): config : str or dict, optional Dictionary of transmission cost configuration values, or path to JSON/JSON5 file containing this dictionary. The - dictionary should have the following keys: + dictionary should have a subset of the following keys: - base_line_costs - iso_lookup @@ -65,6 +65,8 @@ def __init__(self, config=None): - power_to_voltage - transformer_costs - upgrade_substation_costs + - voltage_polarity_mult + - row_width Each of these keys should point to another dictionary or path to JSON/JSON5 file containing a dictionary of diff --git a/revrt/exceptions.py b/revrt/exceptions.py index d727c65c..97271824 100644 --- a/revrt/exceptions.py +++ b/revrt/exceptions.py @@ -14,7 +14,9 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger""" super().__init__(*args, **kwargs) if args: - logger.error(str(args[0]), stacklevel=2) + logger.error( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) class revrtAttributeError(revrtError, AttributeError): diff --git a/revrt/routing/__init__.py b/revrt/routing/__init__.py new file mode 100644 index 00000000..def6ca7a --- /dev/null +++ b/revrt/routing/__init__.py @@ -0,0 +1 @@ +"""reVRt routing logic""" diff --git a/revrt/routing/cli.py b/revrt/routing/cli.py new file mode 100644 index 00000000..da5fc16e --- /dev/null +++ b/revrt/routing/cli.py @@ -0,0 +1,520 @@ +"""reVRt routing CLI functions""" + +import time +import logging +import contextlib +from math import ceil +from pathlib import Path +from copy import deepcopy + +import pandas as pd +import geopandas as gpd +import xarray as xr +from gaps.cli import CLICommandFromFunction + +from revrt.costs.config import parse_config +from revrt.routing.point_to_many import find_all_routes, RoutingScenario +from revrt.routing.utilities import map_to_costs +from revrt.exceptions import revrtKeyError + + +logger = logging.getLogger(__name__) +_MILLION_USD_PER_MILE_TO_USD_PER_PIXEL = 55923.40730136006 +"""Conversion from million dollars/mile to $/pixel + +1,000,000 [$/million dollars] +* 90 [meters/pixel] +/ 1609.344 [meters/mile] += 55923.40730136006 [$/pixel] +""" + + +def compute_lcp_routes( # noqa: PLR0913, PLR0917 + cost_fpath, + route_table, + cost_layers, + out_dir, + job_name, + friction_layers=None, + tracked_layers=None, + cost_multiplier_layer=None, + cost_multiplier_scalar=1, + transmission_config=None, + save_paths=False, + use_hard_barrier=False, + _split_params=None, +): + """Run least-cost path routing for pairs of points + + Parameters + ---------- + cost_fpath : path-like + Path to layered Zarr file containing cost and other required + routing layers. + route_table : path-like + Path to CSV file defining the start and + end points of all routes. Must have the following columns: + + "start_lat": Stating point latitude + "start_lon": Stating point longitude + "end_lat": Ending point latitude + "end_lon": Ending point longitude + + cost_layers : list + List of H5 layers that are summed to determine total costs + raster used for routing. Costs and distances for each individual + layer are also reported as requested (e.g. wet and dry costs). + Each dict in the list should have the following keys: + + - "layer_name": (REQUIRED) Name of layer in layered file + containing cost data. + - "multiplier_layer": (OPTIONAL) Name of layer in layered + file containing spatially explicit multiplier values to + apply to this cost layer before summing it with the + others. Default is ``None``. + - "multiplier_scalar": (OPTIONAL) Scalar value to multiply + this layer by before summing it with the others. Default + is ``1``. + - "is_invariant": (OPTIONAL) Boolean flag indicating whether + this layer is length invariant (i.e. should NOT be + multiplied by path length; values should be $). Default is + ``False``. + - "include_in_report": (OPTIONAL) Boolean flag indicating + whether the costs and distances for this layer should be + output in the final LCP table. Default is ``True``. + - "apply_row_mult": (OPTIONAL) Boolean flag indicating + whether the right-of-way width multiplier should be + applied for this layer. If ``True``, then the transmission + config should have a "row_width" dictionary that maps + voltages to right-of-way width multipliers. Also, the + routing table input should have a "voltage" entry for + every route. Every "voltage" value in the routing table + must be given in the "row_width" dictionary in the + transmission config, otherwise an error will be thrown. + Default is ``False``. + - "apply_polarity_mult": (OPTIONAL) Boolean flag indicating + whether the polarity multiplier should be applied for this + layer. If ``True``, then the transmission config should + have a "voltage_polarity_mult" dictionary that maps + voltages to a new dictionary, the latter mapping + polarities to multipliers. For example, a valid + "voltage_polarity_mult" dictionary might be + ``{"138": {"ac": 1.15, "dc": 2}}``. + In addition, the routing table input should have a + "voltage" **and** a "polarity" entry for every route. + Every "voltage" + "polarity" combination in the routing + table must be given in the "voltage_polarity_mult" + dictionary in the transmission config, otherwise an error + will be thrown. + + ..IMPORTANT:: + The multiplier in this config is assumed to be in units + of million $ per mile and will be converted to + $ per pixel before being applied to the layer!. + + Default is ``False``. + + out_dir : path-like + Directory where routing outputs should be written. + job_name : str + Label used to name the generated output file. + friction_layers : list, optional + Layers to be added to costs to influence routing but NOT + reported in final cost (i.e. friction, barriers, etc.). Each + item in this list should be a dictionary containing the + following keys: + + - "layer_name": (REQUIRED) Name of layer in layered file + containing routing data. This can also be "lcp_agg_costs", + which represents the layer built out using the + `cost_layers` input. + - "multiplier_layer": (OPTIONAL) Name of layer in layered + file containing spatially explicit multiplier values to + apply to this routing layer before summing it with the + others. Default is ``None``. This can also be + "lcp_agg_costs", which represents the layer built out + using the `cost_layers` input. + - "multiplier_scalar": (OPTIONAL) Scalar value to multiply + this layer by before summing it with the others. Default + is ``1``. + - "include_in_report": (OPTIONAL) Boolean flag indicating + whether the routing and distances for this layer should be + output in the final LCP table. Default is ``False``. + - "apply_row_mult": (OPTIONAL) Boolean flag indicating + whether the right-of-way width multiplier should be + applied for this layer. If ``True``, then the transmission + config should have a "row_width" dictionary that maps + voltages to right-of-way width multipliers. Also, the + routing table input should have a "voltage" entry for + every route. Every "voltage" value in the routing table + must be given in the "row_width" dictionary in the + transmission config, otherwise an error will be thrown. + Default is ``False``. + - "apply_polarity_mult": (OPTIONAL) Boolean flag indicating + whether the polarity multiplier should be applied for this + layer. If ``True``, then the transmission config should + have a "voltage_polarity_mult" dictionary that maps + voltages to a new dictionary, the latter mapping + polarities to multipliers. For example, a valid + "voltage_polarity_mult" dictionary might be + ``{"138": {"ac": 1.15, "dc": 2}}``. + In addition, the routing table input should have a + "voltage" **and** a "polarity" entry for every route. + Every "voltage" + "polarity" combination in the routing + table must be given in the "voltage_polarity_mult" + dictionary in the transmission config, otherwise an error + will be thrown. + + ..IMPORTANT:: + The multiplier in this config is assumed to be in units + of million $ per mile and will be converted to + $ per pixel before being applied to the layer!. + + Default is ``False``. + + By default, ``None``. + tracked_layers : dict, optional + Dictionary mapping layer names to strings, where the strings are + dask aggregation methods (similar to what numpy has) that + should be applied to the layer along the LCP to be included as a + characterization column in the output. By default, ``None``. + cost_multiplier_layer : str, optional + Name of the spatial multiplier layer applied to final costs. + By default, ``None``. + cost_multiplier_scalar : int, default=1 + Scalar multiplier applied to the final cost surface. + By default, ``1``. + transmission_config : path-like or dict, optional + Dictionary of transmission cost configuration values, or + path to JSON/JSON5 file containing this dictionary. The + dictionary should have a subset of the following keys: + + - base_line_costs + - iso_lookup + - iso_multipliers + - land_use_classes + - new_substation_costs + - power_classes + - power_to_voltage + - transformer_costs + - upgrade_substation_costs + - voltage_polarity_mult + - row_width + + Each of these keys should point to another dictionary or + path to JSON/JSON5 file containing a dictionary of + configurations for each section. For the expected contents + of each dictionary, see the default config. If ``None``, + values from the default config are used. + By default, ``None``. + save_paths : bool, default=False + Save outputs as a GeoPackage with path geometries when ``True``. + Defaults to ``False``. + use_hard_barrier : bool, optional + Optional flag to treat any cost values of <= 0 as a hard barrier + (i.e. no paths can ever cross this). If ``False``, cost values + of <= 0 are set to a large value to simulate a strong but + permeable barrier. By default, ``False``. + + Returns + ------- + str or None + Path to the output table if any routes were computed. + """ + + start_time = time.time() + + out_dir = Path(out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + logger.debug("Tracked layers input: %r", tracked_layers) + logger.debug("Transmission config input: %r", transmission_config) + + transmission_config = parse_config(config=transmission_config) + + route_points = _route_points_subset( + route_table, + sort_cols=["start_lat", "start_lon"], + split_params=_split_params, + ) + if len(route_points) == 0: + logger.info("No routes to process!") + return None + + out_fp = ( + out_dir / f"{job_name}.gpkg" + if save_paths + else out_dir / f"{job_name}.csv" + ) + + _run_lcp( + cost_fpath, + route_points, + cost_layers, + out_fp=out_fp, + transmission_config=transmission_config, + cost_multiplier_layer=cost_multiplier_layer, + cost_multiplier_scalar=cost_multiplier_scalar, + friction_layers=friction_layers, + tracked_layers=tracked_layers, + use_hard_barrier=use_hard_barrier, + ) + + elapsed_time = (time.time() - start_time) / 60 + logger.info("Processing took %.2f minutes", elapsed_time) + + return str(out_fp) + + +def _run_lcp( + cost_fpath, + route_points, + cost_layers, + out_fp, + transmission_config=None, + cost_multiplier_layer=None, + cost_multiplier_scalar=1, + friction_layers=None, + tracked_layers=None, + use_hard_barrier=True, +): + """Execute least-cost path routing for the prepared route subset""" + + ts = time.monotonic() + out_fp = Path(out_fp) + save_paths = out_fp.suffix.lower() == ".gpkg" + + if route_points.empty: + logger.info("Found no paths to compute!") + return + + with xr.open_dataset(cost_fpath, consolidated=False, engine="zarr") as ds: + route_points = map_to_costs( + route_points, + crs=ds.rio.crs, + transform=ds.rio.transform(), + shape=ds.rio.shape, + ) + + logger.info("Computing Least Cost Paths") + num_computed = 0 + for polarity, voltage, routes in _paths_to_compute(route_points, out_fp): + route_cl = _update_multipliers( + cost_layers, polarity, voltage, transmission_config + ) + route_fl = _update_multipliers( + friction_layers or [], polarity, voltage, transmission_config + ) + route_definitions = [ + ( + (info["start_row"], info["start_col"]), + [(info["end_row"], info["end_col"])], + info.to_dict(), + ) + for __, info in routes.iterrows() + ] + + scenario = RoutingScenario( + cost_fpath=cost_fpath, + cost_layers=route_cl, + friction_layers=route_fl, + tracked_layers=tracked_layers, + cost_multiplier_layer=cost_multiplier_layer, + cost_multiplier_scalar=cost_multiplier_scalar, + use_hard_barrier=use_hard_barrier, + ) + + out = find_all_routes( + scenario, + route_definitions=route_definitions, + save_paths=save_paths, + ) + + num_computed += len(out) + + if save_paths: + gpd.GeoDataFrame(out, geometry="geometry").to_file( + out_fp, driver="GPKG", mode="a" + ) + else: + pd.DataFrame(out).to_csv(out_fp, mode="a") + + time_elapsed = f"{(time.monotonic() - ts) / 3600:.4f} hour(s)" + logger.info("%d paths were computed in %s", num_computed, time_elapsed) + + +def _paths_to_compute(route_points, out_fp): + """Yield route groups that still require computation""" + existing_routes = _collect_existing_routes(out_fp) + + group_cols = ["start_row", "start_col", "polarity", "voltage"] + for check_col in ["polarity", "voltage"]: + if check_col not in route_points.columns: + route_points[check_col] = "unknown" + + for group_info, routes in route_points.groupby(group_cols): + if existing_routes: + mask = routes.apply( + lambda row: ( + int(row["start_row"]), + int(row["start_col"]), + int(row["end_row"]), + int(row["end_col"]), + str(row.get("polarity", "unknown")), + str(row.get("voltage", "unknown")), + ) + not in existing_routes, + axis=1, + ) + routes = routes[mask] # noqa: PLW2901 + + if routes.empty: + continue + + *__, polarity, voltage = group_info + yield polarity, voltage, routes + + +def _collect_existing_routes(out_fp): + """Collect already computed routes from an existing output file""" + + if out_fp is None or not out_fp.exists(): + return set() + + if out_fp.suffix.lower() == ".gpkg": + existing_df = gpd.read_file(out_fp) + else: + existing_df = pd.read_csv(out_fp) + + return { + ( + int(row["start_row"]), + int(row["start_col"]), + int(row["end_row"]), + int(row["end_col"]), + str(row.get("polarity", "unknown")), + str(row.get("voltage", "unknown")), + ) + for __, row in existing_df.iterrows() + } + + +def _update_multipliers(layers, polarity, voltage, transmission_config): + """Update layer multipliers based on user input""" + output_layers = deepcopy(layers) + polarity = str(polarity) + voltage = str(int(voltage)) if voltage != "unknown" else "unknown" + + for layer in output_layers: + if layer.pop("apply_row_mult", False): + row_multiplier = _get_row_multiplier(transmission_config, voltage) + layer["multiplier_scalar"] = ( + layer.get("multiplier_scalar", 1) * row_multiplier + ) + + if layer.pop("apply_polarity_mult", False): + polarity_multiplier = _get_polarity_multiplier( + transmission_config, voltage, polarity + ) + layer["multiplier_scalar"] = ( + layer.get("multiplier_scalar", 1) + * polarity_multiplier + * _MILLION_USD_PER_MILE_TO_USD_PER_PIXEL + ) + + return output_layers + + +def _get_row_multiplier(transmission_config, voltage): + """Get right-of-way width multiplier for a given voltage""" + try: + row_widths = transmission_config["row_width"] + except KeyError as e: + msg = ( + "`apply_row_mult` was set to `True`, but 'row_width' " + "not found in transmission config!" + ) + raise revrtKeyError(msg) from e + + try: + row_multiplier = row_widths[voltage] + except KeyError as e: + msg = ( + "`apply_row_mult` was set to `True`, but voltage ' " + f"{voltage}' not found in transmission config " + "'row_width' settings. Available voltages: " + f"{list(row_widths)}" + ) + raise revrtKeyError(msg) from e + + return row_multiplier + + +def _get_polarity_multiplier(transmission_config, voltage, polarity): + """Get multiplier for a given voltage and polarity""" + try: + polarity_config = transmission_config["voltage_polarity_mult"] + except KeyError as e: + msg = ( + "`apply_polarity_mult` was set to `True`, but " + "'voltage_polarity_mult' not found in transmission config!" + ) + raise revrtKeyError(msg) from e + + try: + polarity_voltages = polarity_config[voltage] + except KeyError as e: + msg = ( + "`apply_polarity_mult` was set to `True`, but voltage ' " + f"{voltage}' not found in polarity config. Available voltages: " + f"{list(polarity_config)}" + ) + raise revrtKeyError(msg) from e + + try: + polarity_multiplier = polarity_voltages[polarity] + except KeyError as e: + msg = ( + "`apply_polarity_mult` was set to `True`, but polarity ' " + f"{polarity}' not found in voltage config. Available polarities: " + f"{list(polarity_voltages)}" + ) + raise revrtKeyError(msg) from e + + return polarity_multiplier + + +def _route_points_subset(route_table, sort_cols, split_params): + """Get indices of points that are sorted by location""" + + with contextlib.suppress(TypeError, UnicodeDecodeError): + route_points = pd.read_csv(route_table) + + route_points = route_points.sort_values(sort_cols).reset_index(drop=True) + + start_ind, n_chunks = split_params or (0, 1) + chunk_size = ceil(len(route_points) / n_chunks) + return route_points.iloc[ + start_ind * chunk_size : (start_ind + 1) * chunk_size + ] + + +def _split_routes(config): + """Compute route split params inside of config""" + exec_control = config.get("execution_control", {}) + if exec_control.get("option") == "local": + num_nodes = 1 + else: + num_nodes = exec_control.pop("nodes", 1) + + config["_split_params"] = [(i, num_nodes) for i in range(num_nodes)] + return config + + +route_points_command = CLICommandFromFunction( + compute_lcp_routes, + name="route-points", + add_collect=False, + split_keys={"_split_params"}, + config_preprocessor=_split_routes, +) diff --git a/revrt/routing/point_to_many.py b/revrt/routing/point_to_many.py new file mode 100644 index 00000000..348de390 --- /dev/null +++ b/revrt/routing/point_to_many.py @@ -0,0 +1,650 @@ +"""reVRt routing from a point to many points""" + +import json +import time +import logging +from warnings import warn +from functools import cached_property + +import rasterio +import numpy as np +import xarray as xr +import dask.array as da +from shapely.geometry import Point +from shapely.geometry.linestring import LineString + +from revrt import find_paths + +from revrt.exceptions import ( + revrtKeyError, + revrtLeastCostPathNotFoundError, +) +from revrt.warn import revrtWarning + +logger = logging.getLogger(__name__) +LCP_AGG_COST_LAYER_NAME = "lcp_agg_costs" +"""Special name reserved for internally-built cost layer""" + + +class RoutingScenario: + """Container for routing scenario configuration""" + + def __init__( + self, + cost_fpath, + cost_layers, + friction_layers=None, + tracked_layers=None, + cost_multiplier_layer=None, + cost_multiplier_scalar=1, + use_hard_barrier=True, + ): + """ + + Parameters + ---------- + cost_fpath : path-like + Path to the cost layer Zarr store used for routing. + cost_layers : list + List of dictionaries containing layer definitions + contributing to the summed routing cost. + friction_layers : list, optional + List of dictionaries defining layers that influence routing + but are excluded from reports. + tracked_layers : dict, optional + Layers to summarize along the path, mapped to aggregation + names. + cost_multiplier_layer : str, optional + Layer name providing spatial multipliers for total cost. + cost_multiplier_scalar : int or float, optional + Scalar multiplier applied to the final cost surface. + use_hard_barrier : bool, optional + Flag indicating whether non-positive costs block traversal. + """ + self.cost_fpath = cost_fpath + self.cost_layers = cost_layers + self.friction_layers = friction_layers or [] + self.tracked_layers = tracked_layers or {} + self.cost_multiplier_layer = cost_multiplier_layer + self.cost_multiplier_scalar = cost_multiplier_scalar + self.use_hard_barrier = use_hard_barrier + + def __repr__(self): + return ( + "RoutingScenario:" + f"\n\t- cost_layers: {self.cost_layers}" + f"\n\t- friction_layers: {self.friction_layers}" + f"\n\t- cost_multiplier_layer: {self.cost_multiplier_layer}" + f"\n\t- cost_multiplier_scalar: {self.cost_multiplier_scalar}" + ) + + @cached_property + def cl_as_json(self): + """str: JSON string describing configured cost layers""" + return json.dumps({"cost_layers": self.cost_layers}) + + +class RoutingLayers: + """Class to build a routing layer from user input""" + + SOFT_BARRIER_MULTIPLIER = 100 + """Multiplier to apply to max cost to use for barriers + + This value is only used if ``use_hard_barrier=False``. + """ + + def __init__(self, routing_scenario, chunks="auto"): + """ + + Parameters + ---------- + routing_scenario : RoutingScenario + Scenario containing cost, friction, and tracking metadata. + chunks : str or mapping, default="auto" + Chunk specification forwarded to ``xarray.open_dataset``. + By default, ``"auto"``. + """ + self.routing_scenario = routing_scenario + self._layer_fh = xr.open_dataset( + self.routing_scenario.cost_fpath, + chunks=chunks, + consolidated=False, + engine="zarr", + ) + self.li_cost_layer_map = {} + self.tracked_layers = [] + + self.transform = self._layer_fh.rio.transform() + self._full_shape = self._layer_fh.rio.shape + self.cost_crs = self._layer_fh.rio.crs + self._layers = set(self._layer_fh.variables) + + self.cost = None + self.li_cost = None + self.final_routing_layer = None + + def __repr__(self): + return f"RoutingLayers for {self.routing_scenario!r}" + + @property + def latitudes(self): + """xarray.DataArray: Latitude coordinates for the cost grid""" + return self._layer_fh["latitude"] + + @property + def longitudes(self): + """xarray.DataArray: Longitude coordinates for the cost grid""" + return self._layer_fh["longitude"] + + def _verify_layer_exists(self, layer_name): + """Verify that layer exists in cost file""" + if layer_name not in self._layers: + msg = ( + f"Did not find layer {layer_name!r} in cost " + f"file {str(self.routing_scenario.cost_fpath)!r}" + ) + raise revrtKeyError(msg) + + def close(self): + """Close the underlying xarray file handle""" + self._layer_fh.close() + + def build(self): + """Build lazy routing arrays from layered file""" + + logger.debug("Building %r", self) + self._build_cost_layer() + self._build_final_routing_layer() + self._build_tracked_layers() + + return self + + def _build_cost_layer(self): + """Build out the main cost layer""" + self.cost = da.zeros(self._full_shape, dtype=np.float32) + self.li_cost = da.zeros(self._full_shape, dtype=np.float32) + for layer_info in self.routing_scenario.cost_layers: + layer_name = layer_info["layer_name"] + is_li = layer_info.get("is_invariant", False) + cost = self._extract_and_scale_layer(layer_info) + + if is_li: + self.li_cost += cost + self.li_cost_layer_map[layer_name] = cost + else: + self.cost += cost + + if layer_info.get("include_in_report", True): + self.tracked_layers.append( + CharacterizedLayer( + layer_name, cost, is_length_invariant=is_li + ) + ) + + if mll := self.routing_scenario.cost_multiplier_layer: + self._verify_layer_exists(mll) + self.cost *= self._layer_fh[mll].isel(band=0).astype(np.float32) + + self.cost *= self.routing_scenario.cost_multiplier_scalar + self.li_cost += self.cost * 0 + + def _build_final_routing_layer(self): + """Build out the routing array""" + self.final_routing_layer = self.cost.copy() + self.final_routing_layer += self.li_cost + + friction_costs = da.zeros(self._full_shape, dtype=np.float32) + for layer_info in self.routing_scenario.friction_layers: + layer_name = layer_info["layer_name"] + friction_layer = self._extract_and_scale_layer( + layer_info, allow_cl=True + ) + if layer_info.get("include_in_report", False): + self.tracked_layers.append( + CharacterizedLayer(layer_name, friction_layer) + ) + + friction_costs += friction_layer + + # Must happen at end of loop so that "lcp_agg_cost" + # remains constant + self.final_routing_layer += friction_costs + + max_val = ( + da.max(self.final_routing_layer) * self.SOFT_BARRIER_MULTIPLIER + ) + self.final_routing_layer = da.where( + self.final_routing_layer <= 0, + -1 if self.routing_scenario.use_hard_barrier else max_val, + self.final_routing_layer, + ) + (self.cost * 0) + + def _extract_and_scale_layer(self, layer_info, allow_cl=False): + """Extract layer based on name and scale according to input""" + cost = self._extract_layer(layer_info["layer_name"], allow_cl=allow_cl) + + multiplier_layer_name = layer_info.get("multiplier_layer") + if multiplier_layer_name: + cost *= self._extract_layer( + multiplier_layer_name, allow_cl=allow_cl + ) + + cost *= layer_info.get("multiplier_scalar", 1) + return cost + + def _extract_layer(self, layer_name, allow_cl=False): + """Extract layer based on name""" + if allow_cl and layer_name == LCP_AGG_COST_LAYER_NAME: + return self.final_routing_layer.copy() + + self._verify_layer_exists(layer_name) + return self._layer_fh[layer_name].isel(band=0).astype(np.float32) + + def _build_tracked_layers(self): + """Build out a dictionary of tracked layers""" + for ( + tracked_layer, + method, + ) in self.routing_scenario.tracked_layers.items(): + if getattr(da, method, None) is None: + msg = ( + f"Did not find method {method!r} in dask.array! " + f"Skipping tracked layer {tracked_layer!r}" + ) + warn(msg, revrtWarning) + continue + + if tracked_layer not in self._layers: + msg = ( + f"Did not find layer {tracked_layer!r} in cost file " + f"{str(self.routing_scenario.cost_fpath)!r}. " + "Skipping..." + ) + warn(msg, revrtWarning) + continue + + layer = ( + self._layer_fh[tracked_layer].isel(band=0).astype(np.float32) + ) + self.tracked_layers.append( + CharacterizedLayer(tracked_layer, layer, agg_method=method) + ) + + +class CharacterizedLayer: + """Encapsulate tracked routing layer metadata""" + + def __init__( + self, name, layer, is_length_invariant=False, agg_method=None + ): + """ + + Parameters + ---------- + name : str + Identifier used when reporting layer-derived metrics. + layer : xarray.DataArray or dask.array.Array + Data values sampled from the routing stack. + is_length_invariant : bool, default=False + Flag signaling cost values should ignore segment length. + By default, ``False``. + agg_method : str, optional + Name of dask aggregation used when tracking statistics. + By default, ``None``. + """ + self.name = name + self.layer = layer + self.is_length_invariant = is_length_invariant + self.agg_method = agg_method + + def __repr__(self): + return ( + f"CharacterizedLayer(name={self.name!r}, " + f"is_length_invariant={self.is_length_invariant}, " + f"agg_method={self.agg_method!r})" + ) + + def compute(self, route, cell_size): + """Compute layer metrics along a route + + Parameters + ---------- + route : sequence + Ordered ``(row, col)`` indices describing the path. + cell_size : float + Raster cell size in meters for distance calculations. + + Returns + ------- + dict + Mapping of metric names to aggregated layer values. + """ + rows, cols = np.array(route).T + layer_values = self.layer.isel( + y=xr.DataArray(rows, dims="points"), + x=xr.DataArray(cols, dims="points"), + ) + + if self.agg_method is None: + return self._compute_total_and_length( + layer_values, route, cell_size + ) + + return self._compute_agg(layer_values) + + def _compute_total_and_length(self, layer_values, route, cell_size): + """Compute total cost and length metrics for the layer""" + lens, __ = _compute_lens(route, cell_size) + + layer_data = getattr(layer_values, "data", layer_values) + if not isinstance(layer_data, da.Array): + layer_data = da.asarray(layer_data) + + if self.is_length_invariant: + layer_cost = da.sum(layer_data) + else: + layer_cost = da.sum(layer_data * lens) + + layer_length = da.sum(lens[layer_data > 0]) * cell_size / 1000 + + return { + f"{self.name}_cost": layer_cost.astype(np.float32).compute(), + f"{self.name}_dist_km": ( + layer_length.astype(np.float32).compute() + ), + } + + def _compute_agg(self, layer_values): + """Compute aggregated statistic for tracked layer""" + aggregate = getattr(da, self.agg_method)(layer_values).astype( + np.float32 + ) + return {f"{self.name}_{self.agg_method}": aggregate.compute()} + + +class RouteResult: + """Class to compute route characteristics given layer cost maps""" + + def __init__( + self, + routing_layers, + route, + optimized_objective, + add_geom=False, + attrs=None, + ): + """ + + Parameters + ---------- + routing_layers : RoutingLayers + Routing layer manager containing cost and tracker arrays. + route : list + Ordered row and column indices defining the path. + optimized_objective : float + Objective value returned by the routing solver. + add_geom : bool, default=False + Include shapely geometry in the output when ``True``. + By default, ``False``. + attrs : dict, optional + Additional metadata merged into the result dictionary. + By default, ``None``. + """ + self._routing_layers = routing_layers + self._route = route + self._optimized_objective = optimized_objective + self.__lens = self._total_path_length = None + self._by_layer_results = {} + self._add_geom = add_geom + self._attrs = attrs or {} + + @property + def cell_size(self): + """float: Raster cell size in meters""" + return abs(self._routing_layers.transform.a) + + @property + def _lens(self): + """array-like: Cached per-cell travel distances""" + if self.__lens is None: + self._compute_path_length() + return self.__lens + + @property + def total_path_length(self): + """float: Total path length in kilometers""" + if self._total_path_length is None: + self._compute_path_length() + return self._total_path_length + + @property + def cost(self): + """float: Optimized objective evaluated over the route""" + rows, cols = np.array(self._route).T + cell_costs = self._routing_layers.cost.isel( + y=xr.DataArray(rows, dims="points"), + x=xr.DataArray(cols, dims="points"), + ) + + # Multiple distance travel through cell by cost and sum it! + return da.sum(cell_costs * self._lens).astype(np.float32).compute() + + @property + def end_lat(self): + """float: Latitude of the terminal path cell""" + row, col = self._route[-1] + return ( + self._routing_layers.latitudes.isel(y=row, x=col) + .astype(np.float32) + .compute() + .item() + ) + + @property + def end_lon(self): + """float: Longitude of the terminal path cell""" + row, col = self._route[-1] + return ( + self._routing_layers.longitudes.isel(y=row, x=col) + .astype(np.float32) + .compute() + .item() + ) + + @property + def geom(self): + """shapely.GeometryType: Geometry for the computed path""" + rows, cols = np.array(self._route).T + x, y = rasterio.transform.xy( + self._routing_layers.transform, rows, cols + ) + geom = Point if len(self._route) == 1 else LineString + return geom(list(zip(x, y, strict=True))) + + def build(self): + """Assemble route metrics and optional geometry payload""" + results = { + "length_km": self.total_path_length, + "cost": self.cost, + "poi_lat": self.end_lat, + "poi_lon": self.end_lon, + "start_row": self._route[0][0], + "start_col": self._route[0][1], + "end_row": self._route[-1][0], + "end_col": self._route[-1][1], + "optimized_objective": self._optimized_objective, + } + for check_key in ["start_row", "start_col", "end_row", "end_col"]: + if ( + check_key in self._attrs + and results[check_key] != self._attrs[check_key] + ): + msg = ( + f"Computed {check_key}={results[check_key]} does " + f"not match expected {check_key}=" + f"{self._attrs[check_key]}!" + ) + warn(msg, revrtWarning) + + results.update(self._attrs) + for layer in self._routing_layers.tracked_layers: + layer_result = layer.compute(self._route, self.cell_size) + results.update(layer_result) + + if self._add_geom: + results["geometry"] = self.geom + + return results + + def _compute_path_length(self): + """Compute the total length and cell by cell length of LCP""" + self.__lens, self._total_path_length = _compute_lens( + self._route, self.cell_size + ) + + +def find_all_routes(routing_scenario, route_definitions, save_paths=False): + """Compute least-cost routes for each start and destination set + + Parameters + ---------- + routing_scenario : RoutingScenario + Scenario describing the cost layers and routing options. + route_definitions : Iterable + Sequence of ``(start_point, end_points, attrs)`` tuples + defining which points to route between. The `attrs` dictionary + will be appended to the output for each route. + save_paths : bool, default=False + Include shapely geometries in the output when ``True``. + By default, ``False``. + + Returns + ------- + list + Route summaries for each successfully computed path. + """ + if not route_definitions: + return [] + + ts = time.monotonic() + + routing_layers = RoutingLayers(routing_scenario).build() + try: + routes = _compute_routes( + routing_scenario, + route_definitions, + routing_layers, + save_paths=save_paths, + ) + finally: + routing_layers.close() + + time_elapsed = f"{(time.monotonic() - ts) / 60:.4f} min" + logger.debug("Least Cost tie-line computed in %s", time_elapsed) + + return routes + + +def _compute_routes( + routing_scenario, route_definitions, routing_layers, save_paths +): + """Evaluate route definitions and build result records""" + routes = [] + for start_point, end_points, attrs in route_definitions: + try: + indices, optimized_objective = _compute_valid_path( + routing_scenario, routing_layers, start_point, end_points + ) + except revrtLeastCostPathNotFoundError: + continue + + route = RouteResult( + routing_layers, + indices, + optimized_objective, + add_geom=save_paths, + attrs=attrs, + ) + routes.append(route.build()) + + return routes + + +def _compute_valid_path( + routing_scenario, routing_layers, start_point, end_points +): + """Validate provided indices then solve for the least-cost path""" + _validate_starting_point(routing_layers, start_point) + _validate_end_points(routing_layers, end_points) + + try: + route_result = find_paths( + zarr_fp=routing_scenario.cost_fpath, + cost_layers=routing_scenario.cl_as_json, + start=[start_point], + end=end_points, + )[0] + except Exception as ex: + msg = ( + f"Unable to find path from {start_point} any of {end_points}: {ex}" + ) + logger.exception(msg) + + raise revrtLeastCostPathNotFoundError(msg) from ex + + return route_result + + +def _validate_starting_point(routing_layers, start_point): + """Raise when the starting cell lacks a positive traversal cost""" + start_row, start_col = start_point + start_cost = ( + routing_layers.final_routing_layer.isel(y=start_row, x=start_col) + .compute() + .item() + ) + + if start_cost <= 0: + msg = ( + f"Start idx {start_point} does not have a valid cost: " + f"{start_cost:.2f} (must be > 0)!" + ) + raise revrtLeastCostPathNotFoundError(msg) + + +def _validate_end_points(routing_layers, end_points): + """Raise when no end cell provides a positive traversal cost""" + end_rows, end_cols = np.array(end_points).T + end_costs = routing_layers.final_routing_layer.isel( + y=xr.DataArray(end_rows, dims="points"), + x=xr.DataArray(end_cols, dims="points"), + ) + if not np.any(end_costs.compute() > 0): + msg = ( + f"None of the end idx {end_points} have a valid cost " + f"(must be > 0)!" + ) + raise revrtLeastCostPathNotFoundError(msg) + + +def _compute_lens(route, cell_size): + """Compute the total length and cell by cell length of LCP""" + # Use Pythagorean theorem to calculate length between cells (km) + # Use c**2 = a**2 + b**2 to determine length of individual paths + lens = np.sqrt(np.sum(np.diff(route, axis=0) ** 2, axis=1)) + total_path_length = np.sum(lens) * cell_size / 1000 + + # Need to determine distance coming into and out of any cell. + # Assume paths start and end at the center of a cell. Therefore, + # distance traveled in the cell is half the distance entering it + # and half the distance exiting it. Duplicate all lengths, + # pad 0s on ends for start and end cells, and divide all + # distance by half. + lens = np.repeat(lens, 2) + lens = np.insert(np.append(lens, 0), 0, 0) + lens /= 2 + + # Group entrance and exits distance together, and add them + lens = lens.reshape((int(lens.shape[0] / 2), 2)) + lens = np.sum(lens, axis=1) + return lens, total_path_length diff --git a/revrt/routing/utilities.py b/revrt/routing/utilities.py new file mode 100644 index 00000000..6101fbea --- /dev/null +++ b/revrt/routing/utilities.py @@ -0,0 +1,108 @@ +"""reVRt routing module utilities""" + +import logging +from warnings import warn + +import rasterio +import numpy as np +import geopandas as gpd +from shapely.geometry import Point + +from revrt.warn import revrtWarning + + +logger = logging.getLogger(__name__) + + +def map_to_costs(route_points, crs, transform, shape): + """Map route table to cost indices and drop out-of-bounds rows + + Parameters + ---------- + route_points : pandas.DataFrame + Route definitions table with at least `start_lat`, `start_lon`, + `end_lat`, and `end_lon` coordinate columns. + crs : str or pyproj.crs.CRS + Coordinate reference system for the cost raster. + transform : affine.Affine + Rasterio affine transform giving pixel origin and resolution. + shape : tuple + Raster height and width for bounds checking. + + Returns + ------- + pandas.DataFrame + Updated route table filtered to routes within the cost domain. + """ + route_points = _get_start_end_point_cost_indices( + route_points, crs, transform + ) + return _filter_points_outside_cost_domain(route_points, shape) + + +def _get_start_end_point_cost_indices(route_points, cost_crs, transform): + """Populate start/end row and column indices for each route""" + + logger.debug("Map %d routes to cost raster", len(route_points)) + logger.debug("First few routes:\n%s", route_points.head()) + logger.debug("Transform:\n%s", transform) + + start_lat = route_points["start_lat"].astype("float32") + start_lon = route_points["start_lon"].astype("float32") + start_row, start_col = _transform_lat_lon_to_row_col( + transform, cost_crs, start_lat, start_lon + ) + end_lat = route_points["end_lat"].astype("float32") + end_lon = route_points["end_lon"].astype("float32") + end_row, end_col = _transform_lat_lon_to_row_col( + transform, cost_crs, end_lat, end_lon + ) + + logger.debug("Mapping done!") + + route_points["start_row"] = start_row + route_points["start_col"] = start_col + route_points["end_row"] = end_row + route_points["end_col"] = end_col + + return route_points + + +def _filter_points_outside_cost_domain(route_points, shape): + """Drop routes whose indices fall outside the cost domain""" + + logger.debug("Filtering out points outside cost domain...") + mask = route_points["start_row"] >= 0 + mask &= route_points["start_row"] < shape[0] + mask &= route_points["start_col"] >= 0 + mask &= route_points["start_col"] < shape[1] + mask &= route_points["end_row"] >= 0 + mask &= route_points["end_row"] < shape[0] + mask &= route_points["end_col"] >= 0 + mask &= route_points["end_col"] < shape[1] + + logger.debug("Mask computed!") + + if any(~mask): + msg = ( + "The following features are outside of the cost exclusion " + f"domain and will be dropped:\n{route_points.loc[~mask]}" + ) + warn(msg, revrtWarning) + route_points = route_points.loc[mask].reset_index(drop=True) + + return route_points + + +def _transform_lat_lon_to_row_col(transform, cost_crs, lat, lon): + """Convert WGS84 coordinates to cost grid row and column arrays""" + feats = gpd.GeoDataFrame( + geometry=[Point(*p) for p in zip(lon, lat, strict=True)] + ) + coords = feats.set_crs("EPSG:4326").to_crs(cost_crs)["geometry"].centroid + row, col = rasterio.transform.rowcol( + transform, coords.x.values, coords.y.values + ) + row = np.array(row) + col = np.array(col) + return row, col diff --git a/revrt/warn.py b/revrt/warn.py index 0d6755d3..1b2f642a 100644 --- a/revrt/warn.py +++ b/revrt/warn.py @@ -14,7 +14,9 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger""" super().__init__(*args, **kwargs) if args: - logger.warning(str(args[0]), stacklevel=2) + logger.warning( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) class revrtDeprecationWarning(revrtWarning, DeprecationWarning): diff --git a/tests/python/integration/test_routing_utilities_integration.py b/tests/python/integration/test_routing_utilities_integration.py new file mode 100644 index 00000000..c25cad46 --- /dev/null +++ b/tests/python/integration/test_routing_utilities_integration.py @@ -0,0 +1,101 @@ +"""reVrt tests for routing utilities""" + +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +import xarray as xr +from rasterio.transform import from_origin + +from revrt.routing.utilities import map_to_costs +from revrt.utilities import LayeredFile + + +@pytest.fixture(scope="module") +def sample_layered_data(tmp_path_factory): + """Sample layered data files to use across tests""" + data_dir = tmp_path_factory.mktemp("routing_data") + + layered_fp = data_dir / "test_layered.zarr" + layer_file = LayeredFile(layered_fp) + + height, width = (7, 8) + cell_size = 1.0 + x0, y0 = 0.0, float(height) + transform = from_origin(x0, y0, cell_size, cell_size) + x_coords = ( + x0 + np.arange(width, dtype=np.float32) * cell_size + cell_size / 2 + ) + y_coords = ( + y0 - np.arange(height, dtype=np.float32) * cell_size - cell_size / 2 + ) + + layer_1 = np.array( + [ + [ + [7, 7, 8, 0, 9, 9, 9, 0], + [8, 1, 2, 2, 9, 9, 9, 0], + [9, 1, 3, 3, 9, 1, 2, 3], + [9, 1, 2, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 1, 1, 9, 0], + [9, 9, 9, 9, 9, 9, 9, 0], + ] + ], + dtype=np.float32, + ) + + da = xr.DataArray( + layer_1, + dims=("band", "y", "x"), + coords={"y": y_coords, "x": x_coords}, + ) + da = da.rio.write_crs("EPSG:4326") + da = da.rio.write_transform(transform) + + geotiff_fp = data_dir / "layer_1.tif" + da.rio.to_raster(geotiff_fp, driver="GTiff") + + layer_file.write_geotiff_to_file(geotiff_fp, "layer_1", overwrite=True) + return layered_fp + + +def test_basic_map_to_costs_integration(sample_layered_data): + """Basic integration test for mapping to costs from layered data""" + + route_table = pd.DataFrame( + { + "start_lat": 5.5, + "start_lon": [1.5, 2.5], + "end_lat": 4.5, + "end_lon": 6.5, + } + ) + + with xr.open_dataset( + sample_layered_data, consolidated=False, engine="zarr" + ) as ds: + route_table = map_to_costs( + route_table, + crs=ds.rio.crs, + transform=ds.rio.transform(), + shape=ds.rio.shape, + ) + + np.testing.assert_array_equal( + route_table["start_row"].to_numpy(), np.array([1, 1]) + ) + np.testing.assert_array_equal( + route_table["start_col"].to_numpy(), np.array([1, 2]) + ) + np.testing.assert_array_equal( + route_table["end_row"].to_numpy(), np.array([2, 2]) + ) + np.testing.assert_array_equal( + route_table["end_col"].to_numpy(), np.array([6, 6]) + ) + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/routing/test_routing_cli.py b/tests/python/unit/routing/test_routing_cli.py new file mode 100644 index 00000000..a44ef3df --- /dev/null +++ b/tests/python/unit/routing/test_routing_cli.py @@ -0,0 +1,575 @@ +"""reVRt routing CLI unit tests""" + +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +import xarray as xr +import geopandas as gpd +from shapely.geometry import Point, LineString +from rasterio.transform import from_origin + +from revrt.utilities import LayeredFile +from revrt.routing.utilities import map_to_costs +from revrt.exceptions import revrtKeyError +from revrt.routing.cli import ( + compute_lcp_routes, + _run_lcp, + _collect_existing_routes, + _update_multipliers, + _route_points_subset, + _paths_to_compute, + _split_routes, + _get_row_multiplier, + _get_polarity_multiplier, + _MILLION_USD_PER_MILE_TO_USD_PER_PIXEL, +) + + +@pytest.fixture(scope="module") +def sample_layered_data(tmp_path_factory): + """Create layered routing data mimicking point_to_many tests""" + + data_dir = tmp_path_factory.mktemp("routing_cli_data") + + layered_fp = data_dir / "test_layered.zarr" + layer_file = LayeredFile(layered_fp) + + height, width = (7, 8) + cell_size = 1.0 + x0, y0 = 0.0, float(height) + transform = from_origin(x0, y0, cell_size, cell_size) + x_coords = ( + x0 + np.arange(width, dtype=np.float32) * cell_size + cell_size / 2 + ) + y_coords = ( + y0 - np.arange(height, dtype=np.float32) * cell_size - cell_size / 2 + ) + + layer_values = [ + np.array( + [ + [ + [7, 7, 8, 0, 9, 9, 9, 0], + [8, 1, 2, 2, 9, 9, 9, 0], + [9, 1, 3, 3, 9, 1, 2, 3], + [9, 1, 2, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 1, 1, 9, 0], + [9, 9, 9, 9, 9, 9, 9, 0], + ] + ], + dtype=np.float32, + ), + np.array( + [ + [ + [8, 7, 6, 5, 5, 6, 7, 9], + [7, 1, 1, 2, 3, 3, 2, 8], + [6, 2, 9, 6, 5, 2, 1, 7], + [7, 3, 8, 1, 2, 3, 2, 6], + [8, 4, 7, 2, 8, 4, 3, 5], + [9, 5, 6, 3, 4, 4, 3, 4], + [9, 6, 7, 4, 5, 5, 4, 3], + ] + ], + dtype=np.float32, + ), + np.array( + [ + [ + [6, 6, 6, 6, 6, 7, 8, 9], + [5, 2, 2, 3, 4, 5, 6, 8], + [4, 3, 7, 7, 6, 4, 5, 7], + [5, 4, 6, 2, 3, 4, 4, 6], + [6, 5, 5, 3, 7, 5, 5, 5], + [7, 6, 6, 4, 5, 5, 4, 4], + [8, 7, 7, 5, 6, 5, 4, 3], + ] + ], + dtype=np.float32, + ), + ] + + for ind, routing_layer in enumerate(layer_values, start=1): + da = xr.DataArray( + routing_layer, + dims=("band", "y", "x"), + coords={"y": y_coords, "x": x_coords}, + ) + da = da.rio.write_crs("EPSG:4326") + da = da.rio.write_transform(transform) + + geotiff_fp = data_dir / f"layer_{ind}.tif" + da.rio.to_raster(geotiff_fp, driver="GTiff") + + layer_file.write_geotiff_to_file( + geotiff_fp, f"layer_{ind}", overwrite=True + ) + + return layered_fp + + +def _build_route_table(layered_fp, rows_cols): + """Helper to construct route tables with CRS-aligned coordinates""" + + with xr.open_dataset(layered_fp, consolidated=False, engine="zarr") as ds: + latitudes = ds["latitude"].to_numpy() + longitudes = ds["longitude"].to_numpy() + + records = [] + for idx, (start, end) in enumerate(rows_cols): + s_row, s_col = start + e_row, e_col = end + records.append( + { + "route_id": f"route_{idx}", + "start_lat": float(latitudes[s_row, s_col]), + "start_lon": float(longitudes[s_row, s_col]), + "end_lat": float(latitudes[e_row, e_col]), + "end_lon": float(longitudes[e_row, e_col]), + "voltage": 138, + "polarity": "ac", + } + ) + + return pd.DataFrame.from_records(records) + + +def test_compute_lcp_routes_generates_csv(sample_layered_data, tmp_path): + """compute_lcp_routes should map points and persist CSV outputs""" + + routes = _build_route_table( + sample_layered_data, + rows_cols=[((1, 1), (2, 3)), ((0, 0), (3, 4))], + ) + route_table_fp = tmp_path / "route_table.csv" + routes.to_csv(route_table_fp, index=False) + + out_dir = tmp_path / "routing_outputs" + cost_layers = [ + { + "layer_name": "layer_1", + "apply_row_mult": True, + "multiplier_scalar": 1, + }, + { + "layer_name": "layer_2", + "apply_polarity_mult": True, + "multiplier_scalar": 1, + }, + ] + + transmission_config = { + "row_width": {"138": 1.5}, + "voltage_polarity_mult": {"138": {"ac": 2e-5}}, + } + + result_fp = compute_lcp_routes( + cost_fpath=sample_layered_data, + route_table=route_table_fp, + cost_layers=cost_layers, + out_dir=out_dir, + job_name="run", + transmission_config=transmission_config, + save_paths=False, + _split_params=(0, 1), + ) + + output_path = Path(result_fp) + assert output_path.exists() + + df = pd.read_csv(output_path) + df = df[df["route_id"] != "route_id"].reset_index(drop=True) + df = df.drop(columns=[c for c in df.columns if c.startswith("Unnamed")]) + + numeric_cols = [ + "cost", + "length_km", + "optimized_objective", + "layer_1_cost", + "layer_1_dist_km", + "layer_2_cost", + "layer_2_dist_km", + ] + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col]) + + assert len(df) == len(routes) + assert set(df["route_id"]) == set(routes["route_id"]) + + with xr.open_dataset( + sample_layered_data, consolidated=False, engine="zarr" + ) as ds: + mapped_routes = map_to_costs( + routes.copy(), ds.rio.crs, ds.rio.transform(), ds.rio.shape + ) + + merged = df.merge( + mapped_routes[ + [ + "route_id", + "start_row", + "start_col", + "end_row", + "end_col", + ] + ], + on="route_id", + how="left", + suffixes=("", "_expected"), + ) + + for col in ["start_row", "start_col", "end_row", "end_col"]: + assert ( + merged[col].astype(int) == merged[f"{col}_expected"].astype(int) + ).all() + + assert np.allclose( + merged["cost"], merged["layer_1_cost"] + merged["layer_2_cost"] + ) + assert np.all(merged["length_km"] > 0) + assert np.allclose( + merged["cost"], merged["optimized_objective"], rtol=1e-5 + ) + + +def test_compute_lcp_routes_returns_none_on_empty_indices( + sample_layered_data, tmp_path +): + """Ensure compute_lcp_routes short-circuits when route points are empty""" + + route_table_fp = tmp_path / "routes.csv" + _build_route_table(sample_layered_data, [((1, 1), (2, 2))]).to_csv( + route_table_fp, index=False + ) + + result = compute_lcp_routes( + cost_fpath=sample_layered_data, + route_table=route_table_fp, + cost_layers=[{"layer_name": "layer_1"}], + out_dir=tmp_path, + job_name="no_routes", + _split_params=(1000, 1), + ) + + assert result is None + assert not (tmp_path / "no_routes.csv").exists() + + +def test_run_lcp_with_save_paths_filters_existing_routes( + sample_layered_data, tmp_path, monkeypatch +): + """_run_lcp should skip already processed routes and append geometries""" + + routes = _build_route_table( + sample_layered_data, + rows_cols=[((1, 1), (2, 2)), ((2, 2), (4, 4))], + ) + + with xr.open_dataset( + sample_layered_data, consolidated=False, engine="zarr" + ) as ds: + mapped_routes = map_to_costs( + routes.copy(), ds.rio.crs, ds.rio.transform(), ds.rio.shape + ) + + existing_tuple = ( + int(mapped_routes.iloc[0]["start_row"]), + int(mapped_routes.iloc[0]["start_col"]), + int(mapped_routes.iloc[0]["end_row"]), + int(mapped_routes.iloc[0]["end_col"]), + routes.iloc[0]["polarity"], + str(routes.iloc[0]["voltage"]), + ) + + monkeypatch.setattr( + "revrt.routing.cli._collect_existing_routes", + lambda _: {existing_tuple}, + ) + + saved_calls = [] + + def fake_to_file(self, path, driver=None, mode=None, **_kwargs): + saved_calls.append((path, driver, mode, self.copy(deep=True))) + + monkeypatch.setattr( + "revrt.routing.cli.gpd.GeoDataFrame.to_file", fake_to_file + ) + + out_fp = tmp_path / "routes.gpkg" + + _run_lcp( + cost_fpath=sample_layered_data, + route_points=routes, + cost_layers=[{"layer_name": "layer_1"}], + out_fp=out_fp, + transmission_config={ + "row_width": {"138": 1.0}, + "voltage_polarity_mult": {"138": {"ac": 1.0}}, + }, + cost_multiplier_scalar=1, + friction_layers=[{"layer_name": "layer_2", "apply_row_mult": True}], + tracked_layers={"layer_3": "max"}, + use_hard_barrier=True, + ) + + assert len(saved_calls) == 1 + saved_path, driver, mode, saved_gdf = saved_calls[0] + assert saved_path == out_fp + assert driver == "GPKG" + assert mode == "a" + assert len(saved_gdf) == 1 + assert saved_gdf["route_id"].iloc[0] == routes.iloc[1]["route_id"] + + expected = mapped_routes.iloc[1] + assert int(saved_gdf["start_row"].iloc[0]) == int(expected["start_row"]) + assert int(saved_gdf["start_col"].iloc[0]) == int(expected["start_col"]) + assert int(saved_gdf["end_row"].iloc[0]) == int(expected["end_row"]) + assert int(saved_gdf["end_col"].iloc[0]) == int(expected["end_col"]) + + cost_val = float(saved_gdf["cost"].iloc[0]) + objective_val = float(saved_gdf["optimized_objective"].iloc[0]) + length_val = float(saved_gdf["length_km"].iloc[0]) + + assert cost_val > 0 + assert length_val > 0 + assert objective_val == pytest.approx(cost_val, rel=1e-5) + + geom = saved_gdf.geometry.iloc[0] + assert isinstance(geom, LineString) + assert len(geom.coords) >= 2 + + +def test_run_lcp_returns_immediately_when_no_routes(tmp_path): + """_run_lcp should exit early when route_points is empty""" + + _run_lcp( + cost_fpath="unused", # cost file is ignored in this branch + route_points=pd.DataFrame(), + cost_layers=[], + out_fp=tmp_path / "unused.csv", + ) + + +def test_collect_existing_routes_csv(tmp_path): + """_collect_existing_routes should read CSV outputs""" + + data = pd.DataFrame( + [ + { + "start_row": 0, + "start_col": 1, + "end_row": 2, + "end_col": 3, + "polarity": "ac", + "voltage": "230", + } + ] + ) + csv_fp = tmp_path / "routes.csv" + data.to_csv(csv_fp, index=False) + + result = _collect_existing_routes(csv_fp) + assert result == {(0, 1, 2, 3, "ac", "230")} + + +def test_collect_existing_routes_gpkg(tmp_path): + """_collect_existing_routes should support GeoPackage outputs""" + + gpkg_fp = tmp_path / "routes.gpkg" + gdf = gpd.GeoDataFrame( + { + "start_row": [1], + "start_col": [2], + "end_row": [3], + "end_col": [4], + "polarity": ["unknown"], + "voltage": ["unknown"], + "geometry": [Point(0, 0)], + } + ) + gdf.to_file(gpkg_fp, driver="GPKG") + + result = _collect_existing_routes(gpkg_fp) + assert result == {(1, 2, 3, 4, "unknown", "unknown")} + + +def test_collect_existing_routes_when_missing(tmp_path): + """Missing outputs should result in an empty existing route set""" + + assert _collect_existing_routes(None) == set() + assert _collect_existing_routes(tmp_path / "missing.csv") == set() + + +def test_route_points_subset_with_chunking(tmp_path): + """_route_points_subset should slice sorted features by chunk""" + + test_fp = tmp_path / "features.csv" + features = pd.DataFrame( + { + "start_lat": [5.0, 1.0, 3.0, 7.0], + "start_lon": [0.0, 1.0, 2.0, 3.0], + } + ) + + features.to_csv(test_fp, index=False) + + first_chunk = _route_points_subset( + test_fp, ["start_lat", "start_lon"], (0, 2) + ) + assert first_chunk["start_lat"].tolist() == [1.0, 3.0] + assert first_chunk["start_lon"].tolist() == [1.0, 2.0] + + second_chunk = _route_points_subset( + test_fp, ["start_lat", "start_lon"], (1, 2) + ) + assert second_chunk["start_lat"].tolist() == [5.0, 7.0] + assert second_chunk["start_lon"].tolist() == [0.0, 3.0] + + +def test_paths_to_compute_inserts_missing_columns(tmp_path): + """_paths_to_compute should back-fill missing polarity/voltage columns""" + + route_points = pd.DataFrame( + { + "start_row": [0], + "start_col": [1], + "end_row": [2], + "end_col": [3], + } + ) + + groups = list(_paths_to_compute(route_points, tmp_path / "not_there.csv")) + assert groups + polarity, voltage, grouped_routes = groups[0] + assert polarity == "unknown" + assert voltage == "unknown" + assert grouped_routes.iloc[0]["start_row"] == 0 + + +def test_split_routes_handles_local_and_cluster(): + """_split_routes should configure chunking for local and cluster modes""" + + local_config = {"execution_control": {"option": "local", "nodes": 4}} + result_local = _split_routes(local_config) + assert result_local["_split_params"] == [(0, 1)] + assert result_local["execution_control"]["nodes"] == 4 + + cluster_config = {"execution_control": {"nodes": 3}} + result_cluster = _split_routes(cluster_config) + assert result_cluster["_split_params"] == [(0, 3), (1, 3), (2, 3)] + assert "nodes" not in result_cluster["execution_control"] + + +def test_update_multipliers_applies_row_and_polarity(): + """_update_multipliers should apply configured scalar adjustments""" + + layers = [ + { + "layer_name": "layer_1", + "multiplier_scalar": 2, + "apply_row_mult": True, + }, + {"layer_name": "layer_2", "apply_polarity_mult": True}, + ] + + transmission_config = { + "row_width": {"138": 1.5}, + "voltage_polarity_mult": {"138": {"ac": 0.5}}, + } + + updated = _update_multipliers( + layers, + polarity="ac", + voltage=138, + transmission_config=transmission_config, + ) + + # original input remains unchanged + assert layers[0]["apply_row_mult"] is True + assert updated[0]["multiplier_scalar"] == pytest.approx(3) + assert updated[1]["multiplier_scalar"] == pytest.approx( + 0.5 * _MILLION_USD_PER_MILE_TO_USD_PER_PIXEL + ) + + # Voltage marked as unknown should skip multiplier lookups + unchanged = _update_multipliers( + [{"layer_name": "layer_3"}], "dc", "unknown", transmission_config + ) + assert unchanged[0]["layer_name"] == "layer_3" + + +def test_get_row_multiplier_missing_config(): + """_get_row_multiplier should raise when configuration keys are absent""" + + with pytest.raises( + revrtKeyError, + match=( + r"`apply_row_mult` was set to `True`, but 'row_width' not found " + r"in transmission config" + ), + ): + _get_row_multiplier({}, "138") + + +def test_get_row_multiplier_unknown_voltage(): + """_get_row_multiplier should surface available voltages on failure""" + + config = {"row_width": {"230": 1.2}} + with pytest.raises( + revrtKeyError, + match=( + r"`apply_row_mult` was set to `True`, but voltage '\s*138' not " + r"found in transmission config 'row_width' settings. " + r"Available voltages: \['230'\]" + ), + ): + _get_row_multiplier(config, "138") + + +def test_get_polarity_multiplier_missing_config(): + """_get_polarity_multiplier should raise when multiplier section missing""" + + with pytest.raises( + revrtKeyError, + match=( + r"`apply_polarity_mult` was set to `True`, but " + r"'voltage_polarity_mult' not found in transmission config" + ), + ): + _get_polarity_multiplier({}, "138", "ac") + + +def test_get_polarity_multiplier_unknown_voltage(): + """_get_polarity_multiplier should guard against unknown voltages""" + + config = {"voltage_polarity_mult": {"230": {"ac": 1.0}}} + with pytest.raises( + revrtKeyError, + match=( + r"`apply_polarity_mult` was set to `True`, but voltage '\s*138' " + r"not found in polarity config. Available voltages: \['230'\]" + ), + ): + _get_polarity_multiplier(config, "138", "ac") + + +def test_get_polarity_multiplier_unknown_polarity(): + """_get_polarity_multiplier should guard against unknown polarities""" + + config = {"voltage_polarity_mult": {"138": {"dc": 1.0}}} + with pytest.raises( + revrtKeyError, + match=( + r"`apply_polarity_mult` was set to `True`, but polarity '\s*ac' " + r"not found in voltage config. Available polarities: \['dc'\]" + ), + ): + _get_polarity_multiplier(config, "138", "ac") + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/routing/test_routing_point_to_many.py b/tests/python/unit/routing/test_routing_point_to_many.py new file mode 100644 index 00000000..de138d27 --- /dev/null +++ b/tests/python/unit/routing/test_routing_point_to_many.py @@ -0,0 +1,981 @@ +"""reVrt tests for routing one point to many endpoints""" + +from pathlib import Path + +import pytest +import numpy as np +import xarray as xr +from rasterio.transform import from_origin + +from revrt.routing import point_to_many +from revrt.utilities import LayeredFile +from revrt.routing.point_to_many import ( + find_all_routes, + LCP_AGG_COST_LAYER_NAME, + RouteResult, + RoutingLayers, + RoutingScenario, +) +from revrt.exceptions import revrtKeyError, revrtLeastCostPathNotFoundError +from revrt.warn import revrtWarning + + +@pytest.fixture(scope="module") +def sample_layered_data(tmp_path_factory): + """Sample layered data files to use across tests""" + data_dir = tmp_path_factory.mktemp("routing_data") + + layered_fp = data_dir / "test_layered.zarr" + layer_file = LayeredFile(layered_fp) + + height, width = (7, 8) + cell_size = 1.0 + x0, y0 = 0.0, float(height) + transform = from_origin(x0, y0, cell_size, cell_size) + x_coords = ( + x0 + np.arange(width, dtype=np.float32) * cell_size + cell_size / 2 + ) + y_coords = ( + y0 - np.arange(height, dtype=np.float32) * cell_size - cell_size / 2 + ) + + layer_1 = np.array( + [ + [ + [7, 7, 8, 0, 9, 9, 9, 0], + [8, 1, 2, 2, 9, 9, 9, 0], + [9, 1, 3, 3, 9, 1, 2, 3], + [9, 1, 2, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 9, 1, 9, 0], + [9, 9, 9, 1, 1, 1, 9, 0], + [9, 9, 9, 9, 9, 9, 9, 0], + ] + ], + dtype=np.float32, + ) + + layer_2 = np.array( + [ + [ + [8, 7, 6, 5, 5, 6, 7, 9], + [7, 1, 1, 2, 3, 3, 2, 8], + [6, 2, 9, 6, 5, 2, 1, 7], + [7, 3, 8, 1, 2, 3, 2, 6], + [8, 4, 7, 2, 8, 4, 3, 5], + [9, 5, 6, 3, 4, 4, 3, 4], + [9, 6, 7, 4, 5, 5, 4, 3], + ] + ], + dtype=np.float32, + ) + + layer_3 = np.array( + [ + [ + [6, 6, 6, 6, 6, 7, 8, 9], + [5, 2, 2, 3, 4, 5, 6, 8], + [4, 3, 7, 7, 6, 4, 5, 7], + [5, 4, 6, 2, 3, 4, 4, 6], + [6, 5, 5, 3, 7, 5, 5, 5], + [7, 6, 6, 4, 5, 5, 4, 4], + [8, 7, 7, 5, 6, 5, 4, 3], + ] + ], + dtype=np.float32, + ) + + layer_4 = np.array( + [ + [ + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + ] + ], + dtype=np.float32, + ) + + layer_5 = np.array( + [ + [ + [0, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 1], + [0, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 1, 1, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + ] + ], + dtype=np.float32, + ) + + for ind, routing_layer in enumerate( + [ + layer_1, + layer_2, + layer_3, + layer_4, + layer_5, + ], + start=1, + ): + da = xr.DataArray( + routing_layer, + dims=("band", "y", "x"), + coords={"y": y_coords, "x": x_coords}, + ) + da = da.rio.write_crs("EPSG:4326") + da = da.rio.write_transform(transform) + + geotiff_fp = data_dir / f"layer_{ind}.tif" + da.rio.to_raster(geotiff_fp, driver="GTiff") + + layer_file.write_geotiff_to_file( + geotiff_fp, f"layer_{ind}", overwrite=True + ) + return layered_fp + + +def test_basic_single_route_layered_file_short_path(sample_layered_data): + """Test routing using a LayeredFile-generated cost surface""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(1, 2)], {}), + ], + save_paths=False, + ) + + assert len(output) == 1 + route = output[0] + assert route["cost"] == pytest.approx((1 + 2) / 2) + assert route["length_km"] == 1 / 1000 + assert route["cost"] == route["optimized_objective"] + + +def test_basic_single_route_layered_file(sample_layered_data): + """Test routing using a LayeredFile-generated cost surface""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(2, 6)], {}), + ((1, 2), [(2, 6)], {}), + ], + save_paths=False, + ) + + assert len(output) == 2 + first_route = output[0] + assert first_route["cost"] == pytest.approx(11.192389) + assert first_route["length_km"] == pytest.approx(0.0090710678) + assert np.isclose(first_route["cost"], first_route["optimized_objective"]) + + second_route = output[1] + assert second_route["cost"] == pytest.approx(12.278174) + assert second_route["length_km"] == pytest.approx(0.008656854) + assert np.isclose( + second_route["cost"], second_route["optimized_objective"] + ) + + +def test_multi_layer_route_layered_file(sample_layered_data): + """Test routing across multiple cost layers""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[ + {"layer_name": "layer_1"}, + {"layer_name": "layer_2"}, + ], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(2, 6)], {}), + ((1, 2), [(2, 6)], {}), + ], + save_paths=False, + ) + + assert len(output) == 2 + + first_route = output[0] + assert first_route["cost"] == pytest.approx( + 27.606602, + rel=1e-4, + ) + assert first_route["length_km"] == pytest.approx( + 0.005414, + rel=1e-4, + ) + assert first_route["layer_1_cost"] == pytest.approx( + 17.571068, + rel=1e-4, + ) + assert first_route["layer_2_cost"] == pytest.approx( + 10.035534, + rel=1e-4, + ) + assert np.isclose( + first_route["cost"], first_route["optimized_objective"], rtol=1e-6 + ) + + second_route = output[1] + assert second_route["cost"] == pytest.approx( + 25.106602, + rel=1e-4, + ) + assert second_route["length_km"] == pytest.approx( + 0.004414, + rel=1e-4, + ) + assert second_route["layer_1_cost"] == pytest.approx( + 16.071068, + rel=1e-4, + ) + assert second_route["layer_2_cost"] == pytest.approx( + 9.035534, + rel=1e-4, + ) + assert np.isclose( + second_route["cost"], second_route["optimized_objective"], rtol=1e-6 + ) + + +def test_save_paths_returns_expected_geometry(sample_layered_data): + """Saving paths returns expected geometries for each route""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(2, 6)], {}), + ((1, 2), [(2, 6)], {}), + ], + save_paths=True, + ) + + assert isinstance(output, list) + assert len(output) == 2 + + route_geoms = [route["geometry"] for route in output] + + expected_geometries = [ + [ + (1.5, 5.5), + (1.5, 4.5), + (2.5, 3.5), + (3.5, 2.5), + (4.5, 1.5), + (5.5, 2.5), + (5.5, 3.5), + (6.5, 4.5), + ], + [ + (2.5, 5.5), + (2.5, 4.5), + (3.5, 3.5), + (3.5, 2.5), + (4.5, 1.5), + (5.5, 2.5), + (5.5, 3.5), + (6.5, 4.5), + ], + ] + + for geom, expected_coords in zip( + route_geoms, expected_geometries, strict=True + ): + assert geom.geom_type == "LineString" + assert np.allclose( + np.asarray(geom.coords), np.asarray(expected_coords) + ) + + +def test_empty_route_definitions_returns_empty_dataframe(sample_layered_data): + """Empty route definitions return an empty dataframe""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[], + save_paths=False, + ) + + assert isinstance(output, list) + assert not output + + +def test_empty_route_definitions_returns_empty_geo_dataframe( + sample_layered_data, +): + """Empty route definitions return an empty dataframe""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[], + save_paths=True, + ) + + assert isinstance(output, list) + assert not output + + +def test_multi_layer_route_with_multiplier(sample_layered_data): + """Test routing with multiple layers and a scalar multiplier""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[ + {"layer_name": "layer_1"}, + { + "layer_name": "layer_2", + "multiplier_scalar": 0.5, + }, + ], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(2, 6)], {}), + ((1, 2), [(2, 6)], {}), + ], + save_paths=False, + ) + + assert len(output) == 2 + + first_route = output[0] + assert first_route["cost"] == pytest.approx( + 22.588835, + rel=1e-4, + ) + assert first_route["length_km"] == pytest.approx( + 0.005414, + rel=1e-4, + ) + assert first_route["layer_1_cost"] == pytest.approx( + 17.571068, + rel=1e-4, + ) + assert first_route["layer_2_cost"] == pytest.approx( + 5.017767, + rel=1e-4, + ) + assert np.isclose( + first_route["cost"], + first_route["optimized_objective"], + rtol=1e-4, + ) + + second_route = output[1] + assert second_route["cost"] == pytest.approx( + 20.588835, + rel=1e-4, + ) + assert second_route["length_km"] == pytest.approx( + 0.004414, + rel=1e-4, + ) + assert second_route["layer_1_cost"] == pytest.approx( + 16.071068, + rel=1e-4, + ) + assert second_route["layer_2_cost"] == pytest.approx( + 4.517767, + rel=1e-4, + ) + assert np.isclose( + second_route["cost"], + second_route["optimized_objective"], + rtol=1e-4, + ) + + +def test_multi_layer_route_with_scalar_and_layer_multipliers( + sample_layered_data, +): + """Test routing when combining scalar and layer multipliers""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[ + {"layer_name": "layer_1"}, + {"layer_name": "layer_2", "multiplier_scalar": 0.5}, + { + "layer_name": "layer_3", + "multiplier_layer": "layer_4", + }, + { + "layer_name": "layer_5", + "multiplier_scalar": 2, + "multiplier_layer": "layer_4", + }, + ], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(1, 2)], {}), + ], + save_paths=False, + ) + + assert len(output) == 1 + + route = output[0] + assert route["cost"] == pytest.approx(2.0, rel=1e-4) + assert route["length_km"] == pytest.approx(0.001, rel=1e-4) + assert route["layer_1_cost"] == pytest.approx(1.5, rel=1e-4) + assert route["layer_2_cost"] == pytest.approx(0.5, rel=1e-4) + assert route["layer_3_cost"] == pytest.approx(0.0, abs=1e-8) + assert route["layer_5_cost"] == pytest.approx(0.0, abs=1e-8) + assert route["layer_1_dist_km"] == pytest.approx(0.001, rel=1e-4) + assert route["layer_2_dist_km"] == pytest.approx(0.001, rel=1e-4) + assert route["layer_3_dist_km"] == pytest.approx(0.0, abs=1e-8) + assert route["layer_5_dist_km"] == pytest.approx(0.0, abs=1e-8) + assert np.isclose(route["cost"], route["optimized_objective"], rtol=1e-6) + + +def test_routing_with_tracked_layers(sample_layered_data): + """Tracked layers report aggregated stats alongside routing results""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + tracked_layers={ + "layer_1": "mean", + "layer_2": "max", + "layer_3": "min", + }, + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(1, 2)], {}), + ], + save_paths=False, + ) + + assert len(output) == 1 + route = output[0] + + assert { + "layer_1_mean", + "layer_2_max", + "layer_3_min", + }.issubset(route.keys()) + + assert route["layer_1_mean"] == pytest.approx(1.5) + assert route["layer_2_max"] == pytest.approx(1.0) + assert route["layer_3_min"] == pytest.approx(2.0) + + +def test_start_point_on_barrier_returns_no_route( + sample_layered_data, assert_message_was_logged +): + """If the start point is on a barrier (cost <= 0) no route is returned""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + # (0, 3) in layer_1 is 0 -> treated as barrier + output = find_all_routes( + scenario, + route_definitions=[ + ((0, 3), [(2, 6)], {}), + ], + save_paths=False, + ) + assert_message_was_logged( + "Start idx (0, 3) does not have a valid cost: -1.00 (must be > 0)!", + "ERROR", + ) + + assert isinstance(output, list) + assert not output + + +def test_some_endpoints_include_barriers_but_one_valid(sample_layered_data): + """If some end points <=0 but at least one is valid, route is found""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + # include one barrier end (0,3) and one valid end (2,6) + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(0, 3), (2, 6)], {}), + ], + save_paths=False, + ) + + assert len(output) == 1 + # At least one valid endpoint must be reached and cost must be positive. + route = output[0] + assert route["cost"] > 0 + + end_row = int(route["end_row"]) + end_col = int(route["end_col"]) + assert (end_row, end_col) == (2, 6) + + +def test_all_endpoints_are_barriers_returns_no_route( + sample_layered_data, assert_message_was_logged +): + """If all end points are barriers, no route is returned""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(0, 3), (0, 7)], {}), + ], + save_paths=False, + ) + assert_message_was_logged( + "None of the end idx [(0, 3), (0, 7)] have a valid cost (must be > 0)", + "ERROR", + ) + + assert isinstance(output, list) + assert not output + + +def test_routing_scenario_repr_contains_fields(sample_layered_data): + """RoutingScenario repr surfaces configured layer metadata""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + friction_layers=[{"layer_name": "layer_2"}], + cost_multiplier_layer="layer_3", + cost_multiplier_scalar=1.5, + ) + + representation = repr(scenario) + + assert "layer_1" in representation + assert "layer_2" in representation + assert "cost_multiplier_scalar: 1.5" in representation + + +def test_missing_cost_layer_raises_key_error(sample_layered_data): + """Missing layers surface a revrtKeyError during build""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "not_there"}], + ) + + with pytest.raises( + revrtKeyError, match="Did not find layer 'not_there' in cost file" + ): + find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(1, 2)], {}), + ], + save_paths=False, + ) + + +def test_cost_multiplier_layer_and_scalar_applied(sample_layered_data): + """Cost multipliers scale base costs before routing aggregation""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + cost_multiplier_layer="layer_3", + cost_multiplier_scalar=2.0, + ) + + routing_layers = RoutingLayers(scenario).build() + try: + cost_val = routing_layers.cost.isel(y=1, x=1).compute().item() + layer_one = ( + routing_layers._layer_fh["layer_1"] + .isel(band=0, y=1, x=1) + .compute() + .item() + ) + layer_three = ( + routing_layers._layer_fh["layer_3"] + .isel(band=0, y=1, x=1) + .compute() + .item() + ) + expected = layer_one * layer_three * scenario.cost_multiplier_scalar + + assert cost_val == pytest.approx(expected) + finally: + routing_layers.close() + + +def test_length_invariant_layer_costs_ignore_path_length( + sample_layered_data, +): + """Length invariant cost layers ignore per-cell distances""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[ + {"layer_name": "layer_1"}, + {"layer_name": "layer_2", "is_invariant": True}, + ], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + route = [(1, 1), (1, 2)] + result = RouteResult( + routing_layers, + route, + optimized_objective=0.0, + ).build() + + layer_two = ( + routing_layers._layer_fh["layer_2"].isel(band=0).compute().values + ) + expected = sum(layer_two[row, col] for row, col in route) + + assert result["layer_2_cost"] == pytest.approx(expected) + finally: + routing_layers.close() + + +def test_soft_barrier_setting_controls_barrier_value(sample_layered_data): + """Soft barriers convert impassable cells to large positive costs""" + + hard_scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + use_hard_barrier=True, + ) + hard_layers = RoutingLayers(hard_scenario).build() + try: + hard_value = ( + hard_layers.final_routing_layer.isel(y=0, x=3).compute().item() + ) + finally: + hard_layers.close() + + soft_scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + use_hard_barrier=False, + ) + soft_layers = RoutingLayers(soft_scenario).build() + try: + soft_value = ( + soft_layers.final_routing_layer.isel(y=0, x=3).compute().item() + ) + assert hard_value == -1 + assert soft_value > 0 + assert soft_value > abs(hard_value) + finally: + soft_layers.close() + + +def test_tracked_layers_invalid_configs_warn( + sample_layered_data, assert_message_was_logged +): + """Tracked layer config issues emit revrtWarning messages""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + tracked_layers={ + "layer_1": "does_not_exist", + "missing_layer": "mean", + }, + ) + + with pytest.warns(revrtWarning) as warning_record: + routing_layers = RoutingLayers(scenario).build() + + assert_message_was_logged("Did not find layer", "WARNING") + assert_message_was_logged("Did not find method", "WARNING") + + try: + assert len(warning_record) == 2 + finally: + routing_layers.close() + + +def test_friction_layers_and_lcp_agg_costs(sample_layered_data): + """Friction layers may include cost stack and tracked layer toggles""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[ + {"layer_name": "layer_1", "include_in_report": False}, + ], + friction_layers=[ + { + "layer_name": "layer_2", + "multiplier_scalar": 0.5, + "include_in_report": True, + }, + { + "layer_name": LCP_AGG_COST_LAYER_NAME, + "multiplier_scalar": 0.1, + }, + ], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + tracked_names = {layer.name for layer in routing_layers.tracked_layers} + assert "layer_1" not in tracked_names + assert "layer_2" in tracked_names + + base_value = routing_layers.cost.isel(y=1, x=1).compute().item() + final_value = ( + routing_layers.final_routing_layer.isel(y=1, x=1).compute().item() + ) + + assert final_value > base_value + finally: + routing_layers.close() + + +def test_route_result_build_warns_on_attr_mismatch( + sample_layered_data, assert_message_was_logged +): + """RouteResult build warns when provided attrs contradict results""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + route = [(1, 1), (1, 2)] + with pytest.warns(revrtWarning): + result = RouteResult( + routing_layers, + route, + optimized_objective=0.0, + add_geom=True, + attrs={"start_row": 0}, + ).build() + + assert_message_was_logged("does not match", "WARNING") + + assert result["geometry"].geom_type == "LineString" + assert result["start_row"] == 0 + finally: + routing_layers.close() + + +def test_route_result_geom_returns_point_for_single_cell(sample_layered_data): + """RouteResult.geom returns a Point geometry for single-cell routes""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + route = [(1, 1)] + result = RouteResult( + routing_layers, + route, + optimized_objective=0.0, + ) + + assert result.geom.geom_type == "Point" + finally: + routing_layers.close() + + +def test_characterized_layer_length_metric_uses_positive_mask( + sample_layered_data, +): + """CharacterizedLayer uses positive-value mask when summing lengths""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + layer = next( + tracked + for tracked in routing_layers.tracked_layers + if tracked.name == "layer_1" + ) + route = [(1, 1), (1, 2), (2, 3)] + metrics = layer.compute(route, abs(routing_layers.transform.a)) + + assert metrics["layer_1_dist_km"] >= 0 + finally: + routing_layers.close() + + +def test_route_result_cached_properties_reuse_computed_values( + sample_layered_data, +): + """RouteResult caches per-route lengths after first computation""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + route = [(1, 1), (1, 2), (2, 3)] + result = RouteResult( + routing_layers, + route, + optimized_objective=0.0, + ) + + first_length = result.total_path_length + assert isinstance(first_length, float) + second_length = result.total_path_length + assert second_length == first_length + + first_lens = result._lens + assert np.allclose(result._lens, first_lens) + finally: + routing_layers.close() + + +def test_route_result_cost_property_returns_value(sample_layered_data): + """RouteResult.cost multiplies cell costs by cached travel lengths""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario).build() + try: + route = [(1, 1), (1, 2), (2, 3)] + result = RouteResult( + routing_layers, + route, + optimized_objective=0.0, + ) + + assert result.cost > 0 + finally: + routing_layers.close() + + +def test_characterized_layer_total_length_computation(sample_layered_data): + """CharacterizedLayer computes length-weighted costs for eager data""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + routing_layers = RoutingLayers(scenario, chunks=None).build() + try: + layer = next( + tracked + for tracked in routing_layers.tracked_layers + if tracked.name == "layer_1" + ) + route = [(1, 1), (1, 2), (2, 3)] + metrics = layer.compute(route, abs(routing_layers.transform.a)) + + assert metrics["layer_1_cost"] > 0 + assert metrics["layer_1_dist_km"] >= 0 + finally: + routing_layers.close() + + +def test_find_paths_exception_yields_no_routes( + sample_layered_data, monkeypatch +): + """find_paths failures propagate as revrtLeastCostPathNotFoundError""" + + scenario = RoutingScenario( + cost_fpath=sample_layered_data, + cost_layers=[{"layer_name": "layer_1"}], + ) + + def boom(**_): + msg = "boom" + raise RuntimeError(msg) + + monkeypatch.setattr("revrt.routing.point_to_many.find_paths", boom) + + output = find_all_routes( + scenario, + route_definitions=[ + ((1, 1), [(1, 2)], {}), + ], + save_paths=False, + ) + + assert output == [] + + routing_layers = RoutingLayers(scenario).build() + try: + with pytest.raises( + revrtLeastCostPathNotFoundError, match="Unable to find path" + ): + point_to_many._compute_valid_path( + scenario, + routing_layers, + (1, 1), + [(1, 2)], + ) + finally: + routing_layers.close() + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/routing/test_routing_utilities.py b/tests/python/unit/routing/test_routing_utilities.py new file mode 100644 index 00000000..20123157 --- /dev/null +++ b/tests/python/unit/routing/test_routing_utilities.py @@ -0,0 +1,215 @@ +"""reVrt tests for routing utilities""" + +from pathlib import Path +import warnings + +import numpy as np +import pandas as pd +import pytest +from rasterio.transform import from_origin, xy + +from revrt.routing import utilities as routing_utils +from revrt.warn import revrtWarning + + +@pytest.fixture +def cost_grid(): + """Simple grid metadata for routing tests""" + + height, width = (4, 5) + cell_size = 1.0 + transform = from_origin(0.0, float(height), cell_size, cell_size) + return "EPSG:4326", transform, (height, width) + + +def test_transform_lat_lon_to_row_col_expected_indices(cost_grid): + """Map lat/lon pairs to expected raster indices""" + + crs, transform, _ = cost_grid + lon_a, lat_a = xy(transform, 0, 0, offset="center") + lon_b, lat_b = xy(transform, 3, 4, offset="center") + + row, col = routing_utils._transform_lat_lon_to_row_col( + transform, crs, np.array([lat_a, lat_b]), np.array([lon_a, lon_b]) + ) + + assert isinstance(row, np.ndarray) + assert isinstance(col, np.ndarray) + np.testing.assert_array_equal(row, np.array([0, 3])) + np.testing.assert_array_equal(col, np.array([0, 4])) + + +def test_get_start_end_point_cost_indices_adds_expected_columns(cost_grid): + """Populate start/end index columns from coordinates""" + + crs, transform, _ = cost_grid + lon_start_a, lat_start_a = xy(transform, 0, 0, offset="center") + lon_start_b, lat_start_b = xy(transform, 1, 2, offset="center") + lon_end_a, lat_end_a = xy(transform, 2, 3, offset="center") + lon_end_b, lat_end_b = xy(transform, 3, 4, offset="center") + + route_points = pd.DataFrame( + { + "start_lat": [str(lat_start_a), str(lat_start_b)], + "start_lon": [str(lon_start_a), str(lon_start_b)], + "end_lat": [str(lat_end_a), str(lat_end_b)], + "end_lon": [str(lon_end_a), str(lon_end_b)], + } + ) + + updated = routing_utils._get_start_end_point_cost_indices( + route_points, crs, transform + ) + + assert updated is route_points + np.testing.assert_array_equal( + route_points["start_row"].to_numpy(), np.array([0, 1]) + ) + np.testing.assert_array_equal( + route_points["start_col"].to_numpy(), np.array([0, 2]) + ) + np.testing.assert_array_equal( + route_points["end_row"].to_numpy(), np.array([2, 3]) + ) + np.testing.assert_array_equal( + route_points["end_col"].to_numpy(), np.array([3, 4]) + ) + + +def test_filter_points_outside_cost_domain_no_warning(cost_grid): + """Do not warn when all routes remain in bounds""" + + _, _, shape = cost_grid + route_points = pd.DataFrame( + { + "start_row": [0, 1], + "start_col": [0, 2], + "end_row": [2, 3], + "end_col": [3, 4], + } + ) + expected = route_points.copy(deep=True) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + filtered = routing_utils._filter_points_outside_cost_domain( + route_points, shape + ) + + assert not caught + pd.testing.assert_frame_equal(filtered, expected) + + +def test_filter_points_outside_cost_domain_warns_and_drops(cost_grid): + """Warn and drop routes that fall outside bounds""" + + _, _, shape = cost_grid + route_points = pd.DataFrame( + { + "start_row": [0, -1], + "start_col": [0, 1], + "end_row": [1, 5], + "end_col": [1, 6], + "label": ["valid", "invalid"], + } + ) + + with pytest.warns(revrtWarning) as record: + filtered = routing_utils._filter_points_outside_cost_domain( + route_points, shape + ) + + assert len(record) == 1 + assert "outside of the cost exclusion domain" in str(record[0].message) + assert filtered.index.tolist() == [0] + assert filtered["label"].tolist() == ["valid"] + + +def test_map_to_costs_maps_and_preserves_valid_routes(cost_grid): + """End-to-end mapping keeps valid route intact""" + + crs, transform, shape = cost_grid + lon_start, lat_start = xy(transform, 0, 0, offset="center") + lon_end, lat_end = xy(transform, 2, 3, offset="center") + + route_points = pd.DataFrame( + { + "start_lat": [lat_start], + "start_lon": [lon_start], + "end_lat": [lat_end], + "end_lon": [lon_end], + } + ) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + mapped = routing_utils.map_to_costs( + route_points.copy(deep=True), crs, transform, shape + ) + + revrt_warnings = [w for w in caught if isinstance(w.message, revrtWarning)] + assert not revrt_warnings + + np.testing.assert_array_equal( + mapped["start_row"].to_numpy(), np.array([0]) + ) + np.testing.assert_array_equal( + mapped["start_col"].to_numpy(), np.array([0]) + ) + np.testing.assert_array_equal( + mapped["end_row"].to_numpy(), + np.array([2]), + ) + np.testing.assert_array_equal( + mapped["end_col"].to_numpy(), + np.array([3]), + ) + + +def test_map_to_costs_filters_routes_outside_cost_domain(cost_grid): + """Remove routes that leave the cost domain""" + + crs, transform, shape = cost_grid + lon_valid_start, lat_valid_start = xy(transform, 0, 0, offset="center") + lon_valid_end, lat_valid_end = xy(transform, 1, 1, offset="center") + + # Column beyond grid width keeps latitude inside range but forces filtering + lon_outside = lon_valid_start + shape[1] + lat_outside = lat_valid_start + + route_points = pd.DataFrame( + { + "start_lat": [lat_valid_start, lat_outside], + "start_lon": [lon_valid_start, lon_outside], + "end_lat": [lat_valid_end, lat_outside], + "end_lon": [lon_valid_end, lon_outside], + } + ) + + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + mapped = routing_utils.map_to_costs( + route_points, crs, transform, shape + ) + + revrt_warnings = [w for w in caught if isinstance(w.message, revrtWarning)] + assert len(revrt_warnings) == 1 + np.testing.assert_array_equal( + mapped["start_row"].to_numpy(), np.array([0]) + ) + np.testing.assert_array_equal( + mapped["start_col"].to_numpy(), np.array([0]) + ) + np.testing.assert_array_equal( + mapped["end_row"].to_numpy(), + np.array([1]), + ) + np.testing.assert_array_equal( + mapped["end_col"].to_numpy(), + np.array([1]), + ) + assert mapped.index.tolist() == [0] + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/test_exceptions.py b/tests/python/unit/test_exceptions.py index dc1d6df9..c4dc443c 100644 --- a/tests/python/unit/test_exceptions.py +++ b/tests/python/unit/test_exceptions.py @@ -12,8 +12,8 @@ revrtFileNotFoundError, revrtInvalidStartCostError, revrtKeyError, - revrtNotImplementedError, revrtLeastCostPathNotFoundError, + revrtNotImplementedError, revrtProfileCheckError, revrtRuntimeError, revrtTypeError, @@ -25,7 +25,7 @@ def test_exceptions_log_error(caplog, assert_message_was_logged): - """Test that a raised exception logs message, if any.""" + """Test that a raised exception logs message, if any""" try: raise revrtError # noqa: TRY301 @@ -105,12 +105,13 @@ def test_exceptions_log_uncaught_error(assert_message_was_logged): def test_catching_error_by_type( raise_type, catch_types, assert_message_was_logged ): - """Test that gaps exceptions are caught correctly.""" + """Test that gaps exceptions are caught correctly""" for catch_type in catch_types: with pytest.raises(catch_type) as exc_info: raise raise_type(BASIC_ERROR_MESSAGE) assert BASIC_ERROR_MESSAGE in str(exc_info.value) + assert_message_was_logged(raise_type.__name__, "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") diff --git a/tests/python/unit/test_warn.py b/tests/python/unit/test_warn.py index fa2f0a55..c307c45a 100644 --- a/tests/python/unit/test_warn.py +++ b/tests/python/unit/test_warn.py @@ -18,6 +18,7 @@ def test_warnings_log_message(warning_class, assert_message_was_logged): """Test that a raised warning logs message, if any""" warn(BASIC_WARNING_MESSAGE, warning_class) + assert_message_was_logged(warning_class.__name__, "WARNING") assert_message_was_logged(BASIC_WARNING_MESSAGE, "WARNING")