diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 01f7df4e..1c13b28a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -2,7 +2,8 @@ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 - +sphinx: + configuration: docs/conf.py build: os: ubuntu-22.04 tools: diff --git a/environment.yml b/environment.yml index 994ab6cf..4a94c84d 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - pydantic >=2, <3 - pip - pyyaml + - copernicusmarine >= 2 # linting - pre-commit diff --git a/pyproject.toml b/pyproject.toml index f270fb35..b80b42ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "numpy >=1, < 2", "pydantic >=2, <3", "PyYAML", + "copernicusmarine >= 2", ] [project.urls] @@ -104,6 +105,8 @@ ignore = [ "D212", # one-blank-line-before-class "D203", + # First line of docstring should be in imperative mood + "D401", # TODO: Remove later "D100", "D103" diff --git a/scripts/download_data.py b/scripts/download_data.py deleted file mode 100644 index d3034ee7..00000000 --- a/scripts/download_data.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Download data required to run expeditions. - -This is a very crude script, here just as long as we do not properly incorporate it into the library. -""" - -import datetime - -import copernicusmarine - -if __name__ == "__main__": - datadir = "input_data" - username = input("username: ") - password = input("password: ") - - copernicusmarine.subset( - dataset_id="cmems_mod_glo_phy_my_0.083deg_static", - force_dataset_part="bathy", - variables=["deptho"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename="bathymetry.nc", - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "default_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "default_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "default_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "drifter_uv.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "drifter_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=0.49402499198913574, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "argo_float_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "argo_float_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "argo_float_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) diff --git a/src/virtualship/cli/_creds.py b/src/virtualship/cli/_creds.py new file mode 100644 index 00000000..cee1c69f --- /dev/null +++ b/src/virtualship/cli/_creds.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from pathlib import Path + +import click +import pydantic +import yaml + +CREDENTIALS_FILE = "credentials.yaml" + + +class CredentialFileError(Exception): + """Exception raised for errors in the input file format.""" + + pass + + +class Credentials(pydantic.BaseModel): + """Credentials to be used in `virtualship fetch` command.""" + + COPERNICUS_USERNAME: str + COPERNICUS_PASSWORD: str + + @classmethod + def from_yaml(cls, path: str | Path) -> Credentials: + """ + Load credentials from a yaml file. + + :param path: Path to the file to load from. + :returns Credentials: The credentials. + """ + with open(path) as file: + data = yaml.safe_load(file) + + if not isinstance(data, dict): + raise CredentialFileError("Credential file is of an invalid format.") + + return cls(**data) + + def dump(self) -> str: + """ + Dump credentials to a yaml string. + + :param creds: The credentials to dump. + :returns str: The yaml string. + """ + return yaml.safe_dump(self.model_dump()) + + def to_yaml(self, path: str | Path) -> None: + """ + Write credentials to a yaml file. + + :param path: Path to the file to write to. + """ + with open(path, "w") as file: + file.write(self.dump()) + + +def get_dummy_credentials_yaml() -> str: + return ( + Credentials( + COPERNICUS_USERNAME="my_username", COPERNICUS_PASSWORD="my_password" + ) + .dump() + .strip() + ) + + +def get_credentials_flow( + username: str | None, password: str | None, creds_path: Path +) -> tuple[str, str]: + """ + Execute flow of getting credentials for use in the `fetch` command. + + - If username and password are provided via CLI, use them (ignore the credentials file if exists). + - If username and password are not provided, try to load them from the credentials file. + - If no credentials are provided, print a message on how to make credentials file and prompt for credentials. + + :param username: The username provided via CLI. + :param password: The password provided via CLI. + :param creds_path: The path to the credentials file. + """ + if username and password: + if creds_path.exists(): + click.echo( + f"Credentials file exists at '{creds_path}', but username and password are already provided. Ignoring credentials file." + ) + return username, password + + try: + creds = Credentials.from_yaml(creds_path) + click.echo(f"Loaded credentials from '{creds_path}'.") + return creds.COPERNICUS_USERNAME, creds.COPERNICUS_PASSWORD + except FileNotFoundError: + msg = f"""Credentials not provided. Credentials can be obtained from https://data.marine.copernicus.eu/register. Either pass in via `--username` and `--password` arguments, or via config file at '{creds_path}'. Config file should be YAML along following format: +### {creds_path} + +{get_dummy_credentials_yaml().strip()} + +### + +Prompting for credentials instead... +""" + click.echo(msg) + username = click.prompt("username") + password = click.prompt("password", hide_input=True) + return username, password diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py new file mode 100644 index 00000000..cfa52a61 --- /dev/null +++ b/src/virtualship/cli/_fetch.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import hashlib +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING + +import click +from pydantic import BaseModel + +from virtualship.utils import _dump_yaml, _generic_load_yaml + +if TYPE_CHECKING: + from virtualship.expedition.space_time_region import SpaceTimeRegion + +DOWNLOAD_METADATA = "download_metadata.yaml" + + +def _hash(s: str, *, length: int) -> str: + """Create a hash of a string.""" + assert length % 2 == 0, "Length must be even." + half_length = length // 2 + + return hashlib.shake_128(s.encode("utf-8")).hexdigest(half_length) + + +def create_hash(s: str) -> str: + """Create an 8 digit hash of a string.""" + return _hash(s, length=8) + + +def hash_model(model: BaseModel, salt: int = 0) -> str: + """ + Hash a Pydantic model. + + :param region: The region to hash. + :param salt: Salt to add to the hash. + :returns: The hash. + """ + return create_hash(model.model_dump_json() + str(salt)) + + +def get_space_time_region_hash(space_time_region: SpaceTimeRegion) -> str: + # Increment salt in the event of breaking data fetching changes with prior versions + # of virtualship where you want to force new hashes (i.e., new data downloads) + salt = 0 + return hash_model(space_time_region, salt=salt) + + +def filename_to_hash(filename: str) -> str: + """Extract hash from filename of the format YYYYMMDD_HHMMSS_{hash}.""" + parts = filename.split("_") + if len(parts) != 3: + raise ValueError( + f"Filename '{filename}' must have 3 parts delimited with underscores." + ) + return parts[-1] + + +def hash_to_filename(hash: str) -> str: + """Return a filename of the format YYYYMMDD_HHMMSS_{hash}.""" + if "_" in hash: + raise ValueError("Hash cannot contain underscores.") + return f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash}" + + +class IncompleteDownloadError(Exception): + """Exception raised for incomplete downloads.""" + + pass + + +class DownloadMetadata(BaseModel): + """Metadata for a data download.""" + + download_complete: bool + download_date: datetime | None = None + + def to_yaml(self, file_path: str | Path) -> None: + with open(file_path, "w") as file: + _dump_yaml(self, file) + + @classmethod + def from_yaml(cls, file_path: str | Path) -> DownloadMetadata: + return _generic_load_yaml(file_path, cls) + + +def get_existing_download( + data_folder: Path, space_time_region_hash: str +) -> Path | None: + """Check if a download has already been completed. If so, return the path for existing download.""" + for download_path in data_folder.iterdir(): + try: + hash = filename_to_hash(download_path.name) + except ValueError: + click.echo( + f"Skipping {download_path.name} as it is not a valid download folder name." + ) + continue + + if hash == space_time_region_hash: + assert_complete_download(download_path) + return download_path + + return None + + +def assert_complete_download(download_path: Path) -> None: + download_metadata = download_path / DOWNLOAD_METADATA + try: + with open(download_metadata) as file: + assert DownloadMetadata.from_yaml(file).download_complete + except (FileNotFoundError, AssertionError) as e: + raise IncompleteDownloadError( + f"Download at {download_path} was found, but looks to be incomplete " + f"(likely due to interupting it mid-download). Please delete this folder and retry." + ) from e + return + + +def complete_download(download_path: Path) -> None: + """Mark a download as complete.""" + download_metadata = download_path / DOWNLOAD_METADATA + metadata = DownloadMetadata(download_complete=True, download_date=datetime.now()) + metadata.to_yaml(download_metadata) + return diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 9f218e6f..09cb267a 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,21 +1,31 @@ +import shutil from pathlib import Path import click +import copernicusmarine +from copernicusmarine.core_functions.credentials_utils import InvalidUsernameOrPassword +import virtualship.cli._creds as creds from virtualship import utils -from virtualship.expedition.do_expedition import do_expedition +from virtualship.cli._fetch import ( + DOWNLOAD_METADATA, + DownloadMetadata, + complete_download, + get_existing_download, + get_space_time_region_hash, + hash_to_filename, +) +from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG -@click.command( - help="Initialize a directory for a new expedition, with an example configuration." -) +@click.command() @click.argument( "path", type=click.Path(exists=False, file_okay=False, dir_okay=True), ) def init(path): - """Entrypoint for the tool.""" + """Initialize a directory for a new expedition, with an example schedule and ship config files.""" path = Path(path) path.mkdir(exist_ok=True) @@ -38,23 +48,137 @@ def init(path): click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.") -@click.command( - help="Download the relevant data specified in an expedition directory (i.e., by the expedition config)." -) +@click.command() @click.argument( "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) -def fetch(path): - """Entrypoint for the tool.""" - raise NotImplementedError("Not implemented yet.") +@click.option( + "--username", + type=str, + default=None, + help="Copernicus Marine username.", +) +@click.option( + "--password", + type=str, + default=None, + help="Copernicus Marine password.", +) +def fetch(path: str | Path, username: str | None, password: str | None) -> None: + """ + Download input data for an expedition. + + Entrypoint for the tool to download data based on space-time region provided in the + schedule file. Data is downloaded from Copernicus Marine, credentials for which can be + obtained via registration: https://data.marine.copernicus.eu/register . Credentials can + be provided on prompt, via command line arguments, or via a YAML config file. Run + `virtualship fetch` on a expedition for more info. + """ + if sum([username is None, password is None]) == 1: + raise ValueError("Both username and password must be provided when using CLI.") + + path = Path(path) + + data_folder = path / "data" + data_folder.mkdir(exist_ok=True) + + schedule = _get_schedule(path) + + if schedule.space_time_region is None: + raise ValueError( + "space_time_region not found in schedule, please define it to fetch the data." + ) + + space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) + + existing_download = get_existing_download(data_folder, space_time_region_hash) + if existing_download is not None: + click.echo( + f"Data download for space-time region already completed ('{existing_download}')." + ) + return + + creds_path = path / creds.CREDENTIALS_FILE + username, password = creds.get_credentials_flow(username, password, creds_path) + + # Extract space_time_region details from the schedule + spatial_range = schedule.space_time_region.spatial_range + time_range = schedule.space_time_region.time_range + start_datetime = time_range.start_time + end_datetime = time_range.end_time + # Create download folder and set download metadata + download_folder = data_folder / hash_to_filename(space_time_region_hash) + download_folder.mkdir() + DownloadMetadata(download_complete=False).to_yaml( + download_folder / DOWNLOAD_METADATA + ) + shutil.copyfile(path / SCHEDULE, download_folder / SCHEDULE) -@click.command(help="Do the expedition.") + # Define all datasets to download, including bathymetry + download_dict = { + "Bathymetry": { + "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", + "variables": ["deptho"], + "output_filename": "bathymetry.nc", + }, + "UVdata": { + "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "variables": ["uo", "vo"], + "output_filename": "default_uv.nc", + }, + "Sdata": { + "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "variables": ["so"], + "output_filename": "default_s.nc", + }, + "Tdata": { + "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "variables": ["thetao"], + "output_filename": "default_t.nc", + }, + } + + # Iterate over all datasets and download each based on space_time_region + try: + for dataset in download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.minimum_longitude, + maximum_longitude=spatial_range.maximum_longitude, + minimum_latitude=spatial_range.minimum_latitude, + maximum_latitude=spatial_range.maximum_latitude, + start_datetime=start_datetime, + end_datetime=end_datetime, + minimum_depth=abs(spatial_range.minimum_depth), + maximum_depth=abs(spatial_range.maximum_depth), + output_filename=dataset["output_filename"], + output_directory=download_folder, + username=username, + password=password, + force_download=True, + overwrite=True, + ) + except InvalidUsernameOrPassword as e: + shutil.rmtree(download_folder) + raise e + + complete_download(download_folder) + click.echo("Data download based on space-time region completed.") + + +@click.command() @click.argument( "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) +@click.option( + "--username", + prompt=True, + type=str, +) def run(path): - """Entrypoint for the tool.""" + """Run the expedition.""" do_expedition(Path(path)) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index c755d33b..051ef50d 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -11,6 +11,7 @@ ShipConfig, ShipUnderwaterSTConfig, ) +from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint __all__ = [ @@ -22,6 +23,7 @@ "Schedule", "ShipConfig", "ShipUnderwaterSTConfig", + "SpaceTimeRegion", "Waypoint", "do_expedition", "instruments", diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 1efd92f9..1846ea14 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -6,6 +6,7 @@ import pyproj +from virtualship.cli._fetch import get_existing_download, get_space_time_region_hash from virtualship.utils import CHECKPOINT, SCHEDULE, SHIP_CONFIG from .checkpoint import Checkpoint @@ -18,24 +19,18 @@ from .verify_schedule import verify_schedule -def do_expedition(expedition_dir: str | Path) -> None: +def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> None: """ Perform an expedition, providing terminal feedback and file output. :param expedition_dir: The base directory for the expedition. + :param input_data: Input data folder folder (override used for testing). """ if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) - # load ship configuration ship_config = _get_ship_config(expedition_dir) - if ship_config is None: - return - - # load schedule schedule = _get_schedule(expedition_dir) - if schedule is None: - return # load last checkpoint checkpoint = _load_checkpoint(expedition_dir) @@ -57,7 +52,10 @@ def do_expedition(expedition_dir: str | Path) -> None: # load fieldsets input_data = _load_input_data( - expedition_dir=expedition_dir, ship_config=ship_config + expedition_dir=expedition_dir, + schedule=schedule, + ship_config=ship_config, + input_data=input_data, ) # verify schedule makes sense @@ -114,14 +112,38 @@ def _get_ship_config(expedition_dir: Path) -> ShipConfig | None: file_path = expedition_dir.joinpath(SHIP_CONFIG) try: return ShipConfig.from_yaml(file_path) - except FileNotFoundError: - print(f'Schedule not found. Save it to "{file_path}".') - return None - + except FileNotFoundError as e: + raise FileNotFoundError( + f'Ship config not found. Save it to "{file_path}".' + ) from e + + +def _load_input_data( + expedition_dir: Path, + schedule: Schedule, + ship_config: ShipConfig, + input_data: Path | None, +) -> InputData: + """ + Load the input data. + + :param expedition_dir: Directory of the expedition. + :type expedition_dir: Path + :param schedule: Schedule object. + :type schedule: Schedule + :param ship_config: Ship configuration. + :type ship_config: ShipConfig + :param input_data: Folder containing input data. + :type input_data: Path | None + :return: InputData object. + :rtype: InputData + """ + if input_data is None: + space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) + input_data = get_existing_download(expedition_dir, space_time_region_hash) -def _load_input_data(expedition_dir: Path, ship_config: ShipConfig) -> InputData: return InputData.load( - directory=expedition_dir.joinpath("input_data"), + directory=input_data, load_adcp=ship_config.adcp_config is not None, load_argo_float=ship_config.argo_float_config is not None, load_ctd=ship_config.ctd_config is not None, @@ -130,13 +152,13 @@ def _load_input_data(expedition_dir: Path, ship_config: ShipConfig) -> InputData ) -def _get_schedule(expedition_dir: Path) -> Schedule | None: +def _get_schedule(expedition_dir: Path) -> Schedule: + """Load Schedule object from yaml config file in `expedition_dir`.""" file_path = expedition_dir.joinpath(SCHEDULE) try: return Schedule.from_yaml(file_path) - except FileNotFoundError: - print(f'Schedule not found. Save it to "{file_path}".') - return None + except FileNotFoundError as e: + raise FileNotFoundError(f'Schedule not found. Save it to "{file_path}".') from e def _load_checkpoint(expedition_dir: Path) -> Checkpoint | None: diff --git a/src/virtualship/expedition/input_data.py b/src/virtualship/expedition/input_data.py index 7af9ef72..de56642c 100644 --- a/src/virtualship/expedition/input_data.py +++ b/src/virtualship/expedition/input_data.py @@ -33,7 +33,7 @@ def load( For now this function makes a lot of assumption about file location and contents. - :param directory: Base directory of the expedition. + :param directory: Input data directory. :param load_adcp: Whether to load the ADCP fieldset. :param load_argo_float: Whether to load the argo float fieldset. :param load_ctd: Whether to load the CTD fieldset. @@ -73,7 +73,7 @@ def load( ) @classmethod - def _load_default_fieldset(cls, directory: str | Path) -> FieldSet: + def _load_default_fieldset(cls, directory: Path) -> FieldSet: filenames = { "U": directory.joinpath("default_uv.nc"), "V": directory.joinpath("default_uv.nc"), @@ -116,7 +116,7 @@ def _load_default_fieldset(cls, directory: str | Path) -> FieldSet: return fieldset @classmethod - def _load_drifter_fieldset(cls, directory: str | Path) -> FieldSet: + def _load_drifter_fieldset(cls, directory: Path) -> FieldSet: filenames = { "U": directory.joinpath("drifter_uv.nc"), "V": directory.joinpath("drifter_uv.nc"), diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index d3865483..5e41e00e 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -7,6 +7,7 @@ import pydantic import yaml +from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint @@ -14,6 +15,7 @@ class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] + space_time_region: SpaceTimeRegion | None = None model_config = pydantic.ConfigDict(extra="forbid") diff --git a/src/virtualship/expedition/space_time_region.py b/src/virtualship/expedition/space_time_region.py new file mode 100644 index 00000000..37aaee08 --- /dev/null +++ b/src/virtualship/expedition/space_time_region.py @@ -0,0 +1,59 @@ +"""SpaceTimeRegion class.""" + +from datetime import datetime +from typing import Annotated + +from pydantic import BaseModel, Field, model_validator +from typing_extensions import Self + +Longitude = Annotated[float, Field(..., ge=-180, le=180)] +Latitude = Annotated[float, Field(..., ge=-90, le=90)] +Depth = float # TODO: insert a minimum depth here? e.g., `Annotated[float, Field(..., ge=0)]` + + +class SpatialRange(BaseModel): + """Defines geographic boundaries.""" + + minimum_longitude: Longitude + maximum_longitude: Longitude + minimum_latitude: Latitude + maximum_latitude: Latitude + minimum_depth: Depth | None = None + maximum_depth: Depth | None = None + + @model_validator(mode="after") + def _check_lon_lat_domain(self) -> Self: + if not self.minimum_longitude < self.maximum_longitude: + raise ValueError("minimum_longitude must be less than maximum_longitude") + if not self.minimum_latitude < self.maximum_latitude: + raise ValueError("minimum_latitude must be less than maximum_latitude") + + if sum([self.minimum_depth is None, self.maximum_depth is None]) == 1: + raise ValueError("Both minimum_depth and maximum_depth must be provided.") + + if self.minimum_depth is None: + return self + + if not self.minimum_depth < self.maximum_depth: + raise ValueError("minimum_depth must be less than maximum_depth") + return self + + +class TimeRange(BaseModel): + """Defines the temporal boundaries for a space-time region.""" + + start_time: datetime + end_time: datetime + + @model_validator(mode="after") + def _check_time_range(self) -> Self: + if not self.start_time < self.end_time: + raise ValueError("start_time must be before end_time") + return self + + +class SpaceTimeRegion(BaseModel): + """An space-time region with spatial and temporal boundaries.""" + + spatial_range: SpatialRange + time_range: TimeRange diff --git a/src/virtualship/instruments/drifter.py b/src/virtualship/instruments/drifter.py index b4db3b9a..8dc1e1b0 100644 --- a/src/virtualship/instruments/drifter.py +++ b/src/virtualship/instruments/drifter.py @@ -46,7 +46,7 @@ def simulate_drifters( drifters: list[Drifter], outputdt: timedelta, dt: timedelta, - endtime: datetime | None, + endtime: datetime | None = None, ) -> None: """ Use Parcels to simulate a set of drifters in a fieldset. diff --git a/src/virtualship/static/schedule.yaml b/src/virtualship/static/schedule.yaml index 0db1d2af..de17a7cc 100644 --- a/src/virtualship/static/schedule.yaml +++ b/src/virtualship/static/schedule.yaml @@ -1,3 +1,14 @@ +space_time_region: + spatial_range: + minimum_longitude: -5 + maximum_longitude: 5 + minimum_latitude: -5 + maximum_latitude: 5 + minimum_depth: 0 + maximum_depth: 1500 + time_range: + start_time: 2023-01-01 00:00:00 + end_time: 2023-02-01 00:00:00 waypoints: - instrument: CTD location: diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index a8579bd5..95d47d31 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,5 +1,9 @@ from functools import lru_cache from importlib.resources import files +from typing import TextIO + +import yaml +from pydantic import BaseModel SCHEDULE = "schedule.yaml" SHIP_CONFIG = "ship_config.yaml" @@ -21,3 +25,15 @@ def get_example_config() -> str: def get_example_schedule() -> str: """Get the example schedule file.""" return load_static_file(SCHEDULE) + + +def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None: + """Dump a pydantic model to a yaml string.""" + return yaml.safe_dump( + model.model_dump(by_alias=True), stream, default_flow_style=False + ) + + +def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel: + """Load a yaml string into a pydantic model.""" + return model.model_validate(yaml.safe_load(data)) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py new file mode 100644 index 00000000..05a5fc72 --- /dev/null +++ b/tests/cli/test_cli.py @@ -0,0 +1,84 @@ +from pathlib import Path + +import pytest +from click.testing import CliRunner + +from virtualship.cli.commands import fetch, init +from virtualship.utils import SCHEDULE, SHIP_CONFIG + + +@pytest.fixture +def copernicus_subset_no_download(monkeypatch): + """Mock the download function.""" + + def fake_download(output_filename, output_directory, **_): + Path(output_directory).joinpath(output_filename).touch() + + monkeypatch.setattr( + "virtualship.cli.commands.copernicusmarine.subset", fake_download + ) + yield + + +@pytest.fixture +def runner(): + """An example expedition.""" + runner = CliRunner() + with runner.isolated_filesystem(): + runner.invoke(init, ["."]) + yield runner + + +def test_init(): + runner = CliRunner() + with runner.isolated_filesystem(): + result = runner.invoke(init, ["."]) + assert result.exit_code == 0 + config = Path(SHIP_CONFIG) + schedule = Path(SCHEDULE) + + assert config.exists() + assert schedule.exists() + + +def test_init_existing_config(): + runner = CliRunner() + with runner.isolated_filesystem(): + config = Path(SHIP_CONFIG) + config.write_text("test") + + with pytest.raises(FileExistsError): + result = runner.invoke(init, ["."]) + raise result.exception + + +def test_init_existing_schedule(): + runner = CliRunner() + with runner.isolated_filesystem(): + schedule = Path(SCHEDULE) + schedule.write_text("test") + + with pytest.raises(FileExistsError): + result = runner.invoke(init, ["."]) + raise result.exception + + +@pytest.mark.parametrize( + "fetch_args", + [ + [".", "--username", "test"], + [".", "--password", "test"], + ], +) +@pytest.mark.usefixtures("copernicus_subset_no_download") +def test_fetch_both_creds_via_cli(runner, fetch_args): + result = runner.invoke(fetch, fetch_args) + assert result.exit_code == 1 + assert "Both username and password" in result.exc_info[1].args[0] + + +@pytest.mark.usefixtures("copernicus_subset_no_download") +def test_fetch(runner): + """Test the fetch command, but mock the download.""" + result = runner.invoke(fetch, [".", "--username", "test", "--password", "test"]) + assert result.exit_code == 0 diff --git a/tests/cli/test_creds.py b/tests/cli/test_creds.py new file mode 100644 index 00000000..17ef2023 --- /dev/null +++ b/tests/cli/test_creds.py @@ -0,0 +1,66 @@ +import pydantic +import pytest + +from virtualship.cli._creds import CredentialFileError, Credentials + + +def test_load_credentials(tmp_file): + tmp_file.write_text( + """ + COPERNICUS_USERNAME: test_user + COPERNICUS_PASSWORD: test_password + """ + ) + + creds = Credentials.from_yaml(tmp_file) + assert creds.COPERNICUS_USERNAME == "test_user" + assert creds.COPERNICUS_PASSWORD == "test_password" + + +# parameterize with the contents of the file +@pytest.mark.parametrize( + "contents", + [ + pytest.param( + """ + INVALID_KEY: some_value + """, + id="invalid-key", + ), + pytest.param( + """ + # number not allowed, should be string (or quoted number) + USERNAME: 123 + """, + id="number-not-allowed", + ), + ], +) +def test_invalid_credentials(tmp_file, contents): + tmp_file.write_text(contents) + + with pytest.raises(pydantic.ValidationError): + Credentials.from_yaml(tmp_file) + + +def test_credentials_invalid_format(tmp_file): + tmp_file.write_text( + """ + INVALID_FORMAT_BUT_VALID_YAML + """ + ) + + with pytest.raises(CredentialFileError): + Credentials.from_yaml(tmp_file) + + +def test_rt_credentials(tmp_file): + """Test round-trip for credentials using Credentials.from_yaml() and Credentials.dump().""" + creds = Credentials( + COPERNICUS_USERNAME="test_user", COPERNICUS_PASSWORD="test_password" + ) + + creds.to_yaml(tmp_file) + creds_loaded = Credentials.from_yaml(tmp_file) + + assert creds == creds_loaded diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py new file mode 100644 index 00000000..53801e5b --- /dev/null +++ b/tests/cli/test_fetch.py @@ -0,0 +1,88 @@ +from pathlib import Path + +import pytest +from pydantic import BaseModel + +from virtualship.cli._fetch import ( + DOWNLOAD_METADATA, + DownloadMetadata, + IncompleteDownloadError, + assert_complete_download, + complete_download, + create_hash, + filename_to_hash, + get_existing_download, + hash_model, + hash_to_filename, +) + + +def test_create_hash(): + assert len(create_hash("correct-length")) == 8 + assert create_hash("same") == create_hash("same") + assert create_hash("unique1") != create_hash("unique2") + + +def test_hash_filename_roundtrip(): + hash_ = create_hash("test") + assert filename_to_hash(hash_to_filename(hash_)) == hash_ + + +def test_hash_model(): + class TestModel(BaseModel): + a: int + b: str + + hash_model(TestModel(a=0, b="b")) + + +def test_complete_download(tmp_path): + # Setup + DownloadMetadata(download_complete=False).to_yaml(tmp_path / DOWNLOAD_METADATA) + + complete_download(tmp_path) + + assert_complete_download(tmp_path) + + +def test_assert_complete_download_complete(tmp_path): + # Setup + DownloadMetadata(download_complete=True).to_yaml(tmp_path / DOWNLOAD_METADATA) + + assert_complete_download(tmp_path) + + +def test_assert_complete_download_incomplete(tmp_path): + # Setup + DownloadMetadata(download_complete=False).to_yaml(tmp_path / DOWNLOAD_METADATA) + + with pytest.raises(IncompleteDownloadError): + assert_complete_download(tmp_path) + + +def test_assert_complete_download_missing(tmp_path): + with pytest.raises(IncompleteDownloadError): + assert_complete_download(tmp_path) + + +@pytest.fixture +def existing_data_folder(tmp_path, monkeypatch): + # Setup + folders = [ + "YYYYMMDD_HHMMSS_hash", + "YYYYMMDD_HHMMSS_hash2", + "some-invalid-data-folder", + "YYYYMMDD_HHMMSS_hash3", + ] + data_folder = tmp_path + monkeypatch.setattr( + "virtualship.cli._fetch.assert_complete_download", lambda x: None + ) + for f in folders: + (data_folder / f).mkdir() + yield data_folder + + +def test_get_existing_download(existing_data_folder): + assert isinstance(get_existing_download(existing_data_folder, "hash"), Path) + assert get_existing_download(existing_data_folder, "missing-hash") is None diff --git a/tests/conftest.py b/tests/conftest.py index 1159768d..1b7a1de0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,13 @@ import pytest +@pytest.fixture +def tmp_file(tmp_path): + file = tmp_path / "test.txt" + file.touch() + return file + + @pytest.fixture(autouse=True) def test_in_working_dir(request, monkeypatch): """ diff --git a/tests/expedition/test_do_expedition.py b/tests/expedition/test_do_expedition.py index 055764af..143249ca 100644 --- a/tests/expedition/test_do_expedition.py +++ b/tests/expedition/test_do_expedition.py @@ -1,9 +1,11 @@ +from pathlib import Path + from pytest import CaptureFixture from virtualship.expedition import do_expedition def test_do_expedition(capfd: CaptureFixture) -> None: - do_expedition("expedition_dir") + do_expedition("expedition_dir", input_data=Path("expedition_dir/input_data")) out, _ = capfd.readouterr() assert "This expedition took" in out, "Expedition did not complete successfully." diff --git a/tests/instruments/test_drifter.py b/tests/instruments/test_drifter.py index b49b510b..86e313e5 100644 --- a/tests/instruments/test_drifter.py +++ b/tests/instruments/test_drifter.py @@ -1,7 +1,6 @@ """Test the simulation of drifters.""" import datetime -from datetime import timedelta import numpy as np import xarray as xr @@ -60,8 +59,8 @@ def test_simulate_drifters(tmpdir) -> None: fieldset=fieldset, out_path=out_path, drifters=drifters, - outputdt=timedelta(hours=1), - dt=timedelta(minutes=5), + outputdt=datetime.timedelta(hours=1), + dt=datetime.timedelta(minutes=5), endtime=None, ) diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index ef39ba8d..00000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,41 +0,0 @@ -from pathlib import Path - -import pytest -from click.testing import CliRunner - -from virtualship.cli.commands import init -from virtualship.utils import SCHEDULE, SHIP_CONFIG - - -def test_init(): - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(init, ["."]) - assert result.exit_code == 0 - config = Path(SHIP_CONFIG) - schedule = Path(SCHEDULE) - - assert config.exists() - assert schedule.exists() - - -def test_init_existing_config(): - runner = CliRunner() - with runner.isolated_filesystem(): - config = Path(SHIP_CONFIG) - config.write_text("test") - - with pytest.raises(FileExistsError): - result = runner.invoke(init, ["."]) - raise result.exception - - -def test_init_existing_schedule(): - runner = CliRunner() - with runner.isolated_filesystem(): - schedule = Path(SCHEDULE) - schedule.write_text("test") - - with pytest.raises(FileExistsError): - result = runner.invoke(init, ["."]) - raise result.exception