From 7ac5d4d7ca2f8578529082655e0dd0cf5546daec Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:00:41 -0500 Subject: [PATCH 01/66] add bbox and time range --- src/virtualship/static/schedule.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/virtualship/static/schedule.yaml b/src/virtualship/static/schedule.yaml index 0db1d2af..d82849c7 100644 --- a/src/virtualship/static/schedule.yaml +++ b/src/virtualship/static/schedule.yaml @@ -1,3 +1,12 @@ +area_of_interest: + spatial_range: + minimum_longitude: -5 + maximum_longitude: 5 + minimum_latitude: -5 + maximum_latitude: 5 + time_range: + start_time: 2023-01-01 00:00:00 + end_time: 2023-02-01 00:00:00 waypoints: - instrument: CTD location: From 8c641cceea764975c9628ed8f7114bf184543ac3 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:02:18 -0500 Subject: [PATCH 02/66] add AreaOfInterest class --- .../expedition/area_of_interest.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/virtualship/expedition/area_of_interest.py diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py new file mode 100644 index 00000000..f7c5c644 --- /dev/null +++ b/src/virtualship/expedition/area_of_interest.py @@ -0,0 +1,27 @@ +"""AreaOfInterest class.""" + +from dataclasses import dataclass +from datetime import datetime + +@dataclass +class SpatialRange: + """Defines the geographic boundaries for an area of interest.""" + + minimum_longitude: float + maximum_longitude: float + minimum_latitude: float + maximum_latitude: float + +@dataclass +class TimeRange: + """Defines the temporal boundaries for an area of interest.""" + + start_time: datetime + end_time: datetime + +@dataclass +class AreaOfInterest: + """An area of interest with spatial and temporal boundaries.""" + + spatial_range: SpatialRange + time_range: TimeRange \ No newline at end of file From 6b6b874abc2610907ded4def02f430ad711c7720 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:02:32 -0500 Subject: [PATCH 03/66] export AreaOfInterest --- src/virtualship/expedition/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index c755d33b..8c4f4cff 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -12,6 +12,7 @@ ShipUnderwaterSTConfig, ) from .waypoint import Waypoint +from .area_of_interest import AreaOfInterest __all__ = [ "ADCPConfig", @@ -23,6 +24,7 @@ "ShipConfig", "ShipUnderwaterSTConfig", "Waypoint", + "AreaOfInterest" "do_expedition", "instruments", ] From 6743f1f168e18a8fb1fb1d279438aaa9b113908d Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:03:03 -0500 Subject: [PATCH 04/66] add AreaOfInterest to Schedule class --- src/virtualship/expedition/schedule.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index d3865483..f74a31af 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -1,19 +1,18 @@ """Schedule class.""" from __future__ import annotations - from pathlib import Path - import pydantic import yaml from .waypoint import Waypoint - +from .area_of_interest import AreaOfInterest class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] + area_of_interest: AreaOfInterest model_config = pydantic.ConfigDict(extra="forbid") From 40d4d0d33bdb1bf199118647992ed9e0189751e8 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:04:04 -0500 Subject: [PATCH 05/66] fetch function for downloading data based on the area of interest --- src/virtualship/cli/commands.py | 74 +++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 9f218e6f..cf2243d9 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -3,7 +3,7 @@ import click from virtualship import utils -from virtualship.expedition.do_expedition import do_expedition +from virtualship.expedition.do_expedition import do_expedition, _get_schedule from virtualship.utils import SCHEDULE, SHIP_CONFIG @@ -45,9 +45,75 @@ def init(path): "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) -def fetch(path): - """Entrypoint for the tool.""" - raise NotImplementedError("Not implemented yet.") +def fetch(expedition_dir: str | Path) -> None: + """Entrypoint for the tool to download data based on area of interest.""" + + if isinstance(expedition_dir, str): + expedition_dir = Path(expedition_dir) + + # Load schedule + schedule = _get_schedule(expedition_dir) + if schedule is None: + print("Error: Schedule file not found.") + return + + # Extract area_of_interest details from the schedule + spatial_range = schedule.area_of_interest.spatial_range + time_range = schedule.area_of_interest.time_range + start_datetime = datetime.datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") + end_datetime = datetime.datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") + + # Prompt for user credentials + username = input("username: ") + password = input("password: ") + + # Define all datasets to download, including bathymetry + download_dict = { + "Bathymetry": { + "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", + "variables": ["deptho"], + "output_filename": "bathymetry.nc", + "force_dataset_part": "bathy" + }, + "UVdata": { + "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "variables": ["uo", "vo"], + "output_filename": "default_uv.nc", + }, + "Sdata": { + "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "variables": ["so"], + "output_filename": "default_s.nc", + }, + "Tdata": { + "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "variables": ["thetao"], + "output_filename": "default_t.nc", + }, + } + + # Iterate over all datasets and download each based on area_of_interest + for dataset in download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.min_longitude, + maximum_longitude=spatial_range.max_longitude, + minimum_latitude=spatial_range.min_latitude, + maximum_latitude=spatial_range.max_latitude, + start_datetime=start_datetime, + end_datetime=end_datetime, + minimum_depth=0.49402499198913574, + maximum_depth=5727.9169921875, + output_filename=dataset["output_filename"], + output_directory=expedition_dir, + username=username, + password=password, + force_download=True, + force_dataset_part=dataset.get("force_dataset_part") # Only used if specified in dataset + ) + + click.echo("Data download based on area of interest completed.") @click.command(help="Do the expedition.") From 3194056fc349133df60c6462b7fb377c278adeb6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 04:10:53 +0000 Subject: [PATCH 06/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/cli/commands.py | 13 ++++++++----- src/virtualship/expedition/__init__.py | 5 ++--- src/virtualship/expedition/area_of_interest.py | 5 ++++- src/virtualship/expedition/schedule.py | 5 ++++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index cf2243d9..9f451b5d 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -3,7 +3,7 @@ import click from virtualship import utils -from virtualship.expedition.do_expedition import do_expedition, _get_schedule +from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG @@ -47,7 +47,6 @@ def init(path): ) def fetch(expedition_dir: str | Path) -> None: """Entrypoint for the tool to download data based on area of interest.""" - if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) @@ -60,7 +59,9 @@ def fetch(expedition_dir: str | Path) -> None: # Extract area_of_interest details from the schedule spatial_range = schedule.area_of_interest.spatial_range time_range = schedule.area_of_interest.time_range - start_datetime = datetime.datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") + start_datetime = datetime.datetime.strptime( + time_range.start_time, "%Y-%m-%d %H:%M:%S" + ) end_datetime = datetime.datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") # Prompt for user credentials @@ -73,7 +74,7 @@ def fetch(expedition_dir: str | Path) -> None: "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", "variables": ["deptho"], "output_filename": "bathymetry.nc", - "force_dataset_part": "bathy" + "force_dataset_part": "bathy", }, "UVdata": { "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", @@ -110,7 +111,9 @@ def fetch(expedition_dir: str | Path) -> None: username=username, password=password, force_download=True, - force_dataset_part=dataset.get("force_dataset_part") # Only used if specified in dataset + force_dataset_part=dataset.get( + "force_dataset_part" + ), # Only used if specified in dataset ) click.echo("Data download based on area of interest completed.") diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 8c4f4cff..98ab941a 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -1,5 +1,6 @@ """Everything for simulating an expedition.""" +from .area_of_interest import AreaOfInterest from .do_expedition import do_expedition from .instrument_type import InstrumentType from .schedule import Schedule @@ -12,7 +13,6 @@ ShipUnderwaterSTConfig, ) from .waypoint import Waypoint -from .area_of_interest import AreaOfInterest __all__ = [ "ADCPConfig", @@ -24,7 +24,6 @@ "ShipConfig", "ShipUnderwaterSTConfig", "Waypoint", - "AreaOfInterest" - "do_expedition", + "AreaOfInterest" "do_expedition", "instruments", ] diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py index f7c5c644..dd22ad6c 100644 --- a/src/virtualship/expedition/area_of_interest.py +++ b/src/virtualship/expedition/area_of_interest.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from datetime import datetime + @dataclass class SpatialRange: """Defines the geographic boundaries for an area of interest.""" @@ -12,6 +13,7 @@ class SpatialRange: minimum_latitude: float maximum_latitude: float + @dataclass class TimeRange: """Defines the temporal boundaries for an area of interest.""" @@ -19,9 +21,10 @@ class TimeRange: start_time: datetime end_time: datetime + @dataclass class AreaOfInterest: """An area of interest with spatial and temporal boundaries.""" spatial_range: SpatialRange - time_range: TimeRange \ No newline at end of file + time_range: TimeRange diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index f74a31af..277a178e 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -1,12 +1,15 @@ """Schedule class.""" from __future__ import annotations + from pathlib import Path + import pydantic import yaml -from .waypoint import Waypoint from .area_of_interest import AreaOfInterest +from .waypoint import Waypoint + class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" From aacbb38d6dc4d51b04f6d690f0403d77770da985 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:11:52 -0500 Subject: [PATCH 07/66] import copernicusmarine --- src/virtualship/cli/commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index cf2243d9..0f62fe3d 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -6,6 +6,7 @@ from virtualship.expedition.do_expedition import do_expedition, _get_schedule from virtualship.utils import SCHEDULE, SHIP_CONFIG +import copernicusmarine @click.command( help="Initialize a directory for a new expedition, with an example configuration." From 5988c9017a4aac08286f4c5d1f2c3ede21828920 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:13:55 -0500 Subject: [PATCH 08/66] import datetime --- src/virtualship/cli/commands.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 0f62fe3d..dbd0f124 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -6,6 +6,7 @@ from virtualship.expedition.do_expedition import do_expedition, _get_schedule from virtualship.utils import SCHEDULE, SHIP_CONFIG +from datetime import datetime import copernicusmarine @click.command( @@ -61,8 +62,8 @@ def fetch(expedition_dir: str | Path) -> None: # Extract area_of_interest details from the schedule spatial_range = schedule.area_of_interest.spatial_range time_range = schedule.area_of_interest.time_range - start_datetime = datetime.datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") - end_datetime = datetime.datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") + start_datetime = datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") + end_datetime = datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") # Prompt for user credentials username = input("username: ") From 3d7697e390946cc109aca2bf5e4187d52865450f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 04:16:06 +0000 Subject: [PATCH 09/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/cli/commands.py | 4 ++-- src/virtualship/expedition/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 5019fcb3..f533b968 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,13 +1,13 @@ +from datetime import datetime from pathlib import Path import click +import copernicusmarine from virtualship import utils from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG -from datetime import datetime -import copernicusmarine @click.command( help="Initialize a directory for a new expedition, with an example configuration." diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 98ab941a..7364c6d1 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -24,6 +24,6 @@ "ShipConfig", "ShipUnderwaterSTConfig", "Waypoint", - "AreaOfInterest" "do_expedition", + "AreaOfInterestdo_expedition", "instruments", ] From fafc74f879b05bdb1ae6ab49d15e3424c1d128eb Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Tue, 12 Nov 2024 23:18:38 -0500 Subject: [PATCH 10/66] fix typo --- src/virtualship/expedition/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 98ab941a..ec044b42 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -13,6 +13,7 @@ ShipUnderwaterSTConfig, ) from .waypoint import Waypoint +from .area_of_interest import AreaOfInterest __all__ = [ "ADCPConfig", @@ -24,6 +25,7 @@ "ShipConfig", "ShipUnderwaterSTConfig", "Waypoint", - "AreaOfInterest" "do_expedition", + "AreaOfInterest", + "do_expedition", "instruments", ] From 46af584be337752cfd0acd10fca7fbb245870606 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 04:19:54 +0000 Subject: [PATCH 11/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/expedition/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index ec044b42..2f0fbd04 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -13,7 +13,6 @@ ShipUnderwaterSTConfig, ) from .waypoint import Waypoint -from .area_of_interest import AreaOfInterest __all__ = [ "ADCPConfig", From e09265810d96a21a67414ae9ccda8272f535abbf Mon Sep 17 00:00:00 2001 From: Iury Simoes-Sousa Date: Fri, 15 Nov 2024 08:52:10 -0500 Subject: [PATCH 12/66] Update src/virtualship/cli/commands.py Co-authored-by: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> --- src/virtualship/cli/commands.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f533b968..f36105ed 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -52,7 +52,6 @@ def fetch(expedition_dir: str | Path) -> None: if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) - # Load schedule schedule = _get_schedule(expedition_dir) if schedule is None: print("Error: Schedule file not found.") From bb776a6295621f1a9b5402152b9a0c9d5a384eee Mon Sep 17 00:00:00 2001 From: Iury Simoes-Sousa Date: Fri, 15 Nov 2024 08:52:34 -0500 Subject: [PATCH 13/66] Update src/virtualship/cli/commands.py Co-authored-by: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> --- src/virtualship/cli/commands.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f36105ed..429246b0 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -49,8 +49,7 @@ def init(path): ) def fetch(expedition_dir: str | Path) -> None: """Entrypoint for the tool to download data based on area of interest.""" - if isinstance(expedition_dir, str): - expedition_dir = Path(expedition_dir) + expedition_dir = Path(expedition_dir) schedule = _get_schedule(expedition_dir) if schedule is None: From 870a2c2b77cf8fdc26ccd5218df33460aca7aa36 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:08:00 +0800 Subject: [PATCH 14/66] patch `path` varname --- src/virtualship/cli/commands.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 429246b0..e9ecd30b 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -47,11 +47,11 @@ def init(path): "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) -def fetch(expedition_dir: str | Path) -> None: +def fetch(path: str | Path) -> None: """Entrypoint for the tool to download data based on area of interest.""" - expedition_dir = Path(expedition_dir) + path = Path(path) - schedule = _get_schedule(expedition_dir) + schedule = _get_schedule(path) if schedule is None: print("Error: Schedule file not found.") return @@ -105,7 +105,7 @@ def fetch(expedition_dir: str | Path) -> None: minimum_depth=0.49402499198913574, maximum_depth=5727.9169921875, output_filename=dataset["output_filename"], - output_directory=expedition_dir, + output_directory=path, username=username, password=password, force_download=True, From 8df718a4814d6f57c57608f3397fd56693916a61 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:13:04 +0800 Subject: [PATCH 15/66] update _get_ship_config and _get_schedule --- src/virtualship/cli/commands.py | 3 --- src/virtualship/expedition/do_expedition.py | 22 ++++++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index e9ecd30b..a8486b5f 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -52,9 +52,6 @@ def fetch(path: str | Path) -> None: path = Path(path) schedule = _get_schedule(path) - if schedule is None: - print("Error: Schedule file not found.") - return # Extract area_of_interest details from the schedule spatial_range = schedule.area_of_interest.spatial_range diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 1efd92f9..7a48b9aa 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -27,15 +27,8 @@ def do_expedition(expedition_dir: str | Path) -> None: if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) - # load ship configuration ship_config = _get_ship_config(expedition_dir) - if ship_config is None: - return - - # load schedule schedule = _get_schedule(expedition_dir) - if schedule is None: - return # load last checkpoint checkpoint = _load_checkpoint(expedition_dir) @@ -114,9 +107,10 @@ def _get_ship_config(expedition_dir: Path) -> ShipConfig | None: file_path = expedition_dir.joinpath(SHIP_CONFIG) try: return ShipConfig.from_yaml(file_path) - except FileNotFoundError: - print(f'Schedule not found. Save it to "{file_path}".') - return None + except FileNotFoundError as e: + raise FileNotFoundError( + f'Ship config not found. Save it to "{file_path}".' + ) from e def _load_input_data(expedition_dir: Path, ship_config: ShipConfig) -> InputData: @@ -130,13 +124,13 @@ def _load_input_data(expedition_dir: Path, ship_config: ShipConfig) -> InputData ) -def _get_schedule(expedition_dir: Path) -> Schedule | None: +def _get_schedule(expedition_dir: Path) -> Schedule: + """Load Schedule object from yaml config file in `expedition_dir`.""" file_path = expedition_dir.joinpath(SCHEDULE) try: return Schedule.from_yaml(file_path) - except FileNotFoundError: - print(f'Schedule not found. Save it to "{file_path}".') - return None + except FileNotFoundError as e: + raise FileNotFoundError(f'Schedule not found. Save it to "{file_path}".') from e def _load_checkpoint(expedition_dir: Path) -> Checkpoint | None: From c691a478fea5443d3680152c5c1e0c1e9bc6f14c Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 21:52:46 +0800 Subject: [PATCH 16/66] Add credential config --- src/virtualship/cli/_creds.py | 106 ++++++++++++++++++++++++++++++++ src/virtualship/cli/commands.py | 28 +++++++-- tests/{ => cli}/test_cli.py | 0 tests/cli/test_creds.py | 66 ++++++++++++++++++++ tests/conftest.py | 7 +++ 5 files changed, 202 insertions(+), 5 deletions(-) create mode 100644 src/virtualship/cli/_creds.py rename tests/{ => cli}/test_cli.py (100%) create mode 100644 tests/cli/test_creds.py diff --git a/src/virtualship/cli/_creds.py b/src/virtualship/cli/_creds.py new file mode 100644 index 00000000..1165cd8f --- /dev/null +++ b/src/virtualship/cli/_creds.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from pathlib import Path + +import click +import pydantic +import yaml + +CREDENTIALS_FILE = "credentials.yaml" + + +class CredentialFileError(Exception): + """Exception raised for errors in the input file format.""" + + pass + + +class Credentials(pydantic.BaseModel): + """Credentials to be used in `virtualship fetch` command.""" + + COPERNICUS_USERNAME: str + COPERNICUS_PASSWORD: str + + @classmethod + def from_yaml(cls, path: str | Path) -> Credentials: + """ + Load credentials from a yaml file. + + :param path: Path to the file to load from. + :returns Credentials: The credentials. + """ + with open(path) as file: + data = yaml.safe_load(file) + + if not isinstance(data, dict): + raise CredentialFileError("Credential file is of an invalid format.") + + return cls(**data) + + def dump(self) -> str: + """ + Dump credentials to a yaml string. + + :param creds: The credentials to dump. + :returns str: The yaml string. + """ + return yaml.safe_dump(self.model_dump()) + + def to_yaml(self, path: str | Path) -> None: + """ + Write credentials to a yaml file. + + :param path: Path to the file to write to. + """ + with open(path, "w") as file: + file.write(self.dump()) + + +def get_dummy_credentials_yaml() -> str: + return ( + Credentials( + COPERNICUS_USERNAME="my_username", COPERNICUS_PASSWORD="my_password" + ) + .dump() + .strip() + ) + + +def get_credentials_flow( + username: str | None, password: str | None, creds_path: Path +) -> tuple[str, str]: + """ + Execute flow of getting credentials for use in the `fetch` command. + + - If username and password are provided via CLI, use them (ignore the credentials file if exists). + - If username and password are not provided, try to load them from the credentials file. + - If no credentials are provided, print a message on how to make credentials file and prompt for credentials. + + :param username: The username provided via CLI. + :param password: The password provided via CLI. + :param creds_path: The path to the credentials file. + """ + if username and password: + if creds_path.exists(): + click.echo( + f"Credentials file exists at {creds_path}, but username and password are already provided.\nIgnoring credentials file." + ) + return username, password + + try: + creds = Credentials.from_yaml(creds_path) + return creds.COPERNICUS_USERNAME, creds.COPERNICUS_PASSWORD + except FileNotFoundError: + msg = f"""Credentials not provided. Either pass in via `--username` and `--password` arguments, or via config file at '{creds_path}'. Config file should be YAML along following format: +### {creds_path} + +{get_dummy_credentials_yaml().strip()} + +### + +Prompting for credentials instead... +""" + click.echo(msg) + username = click.prompt("username") + password = click.prompt("password", hide_input=True) + return username, password diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index a8486b5f..a30a931c 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -4,6 +4,7 @@ import click import copernicusmarine +import virtualship.cli._creds as creds from virtualship import utils from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG @@ -47,22 +48,34 @@ def init(path): "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) -def fetch(path: str | Path) -> None: +@click.option( + "--username", + type=str, + default=None, +) +@click.option( + "--password", + type=str, + default=None, +) +def fetch(path: str | Path, username: str | None, password: str | None) -> None: """Entrypoint for the tool to download data based on area of interest.""" + if sum([username is None, password is None]) == 1: + raise ValueError("Both username and password must be provided.") + path = Path(path) schedule = _get_schedule(path) + creds_path = path / creds.CREDENTIALS_FILE + username, password = creds.get_credentials_flow(username, password, creds_path) + # Extract area_of_interest details from the schedule spatial_range = schedule.area_of_interest.spatial_range time_range = schedule.area_of_interest.time_range start_datetime = datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") end_datetime = datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") - # Prompt for user credentials - username = input("username: ") - password = input("password: ") - # Define all datasets to download, including bathymetry download_dict = { "Bathymetry": { @@ -119,6 +132,11 @@ def fetch(path: str | Path) -> None: "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), ) +@click.option( + "--username", + prompt=True, + type=str, +) def run(path): """Entrypoint for the tool.""" do_expedition(Path(path)) diff --git a/tests/test_cli.py b/tests/cli/test_cli.py similarity index 100% rename from tests/test_cli.py rename to tests/cli/test_cli.py diff --git a/tests/cli/test_creds.py b/tests/cli/test_creds.py new file mode 100644 index 00000000..17ef2023 --- /dev/null +++ b/tests/cli/test_creds.py @@ -0,0 +1,66 @@ +import pydantic +import pytest + +from virtualship.cli._creds import CredentialFileError, Credentials + + +def test_load_credentials(tmp_file): + tmp_file.write_text( + """ + COPERNICUS_USERNAME: test_user + COPERNICUS_PASSWORD: test_password + """ + ) + + creds = Credentials.from_yaml(tmp_file) + assert creds.COPERNICUS_USERNAME == "test_user" + assert creds.COPERNICUS_PASSWORD == "test_password" + + +# parameterize with the contents of the file +@pytest.mark.parametrize( + "contents", + [ + pytest.param( + """ + INVALID_KEY: some_value + """, + id="invalid-key", + ), + pytest.param( + """ + # number not allowed, should be string (or quoted number) + USERNAME: 123 + """, + id="number-not-allowed", + ), + ], +) +def test_invalid_credentials(tmp_file, contents): + tmp_file.write_text(contents) + + with pytest.raises(pydantic.ValidationError): + Credentials.from_yaml(tmp_file) + + +def test_credentials_invalid_format(tmp_file): + tmp_file.write_text( + """ + INVALID_FORMAT_BUT_VALID_YAML + """ + ) + + with pytest.raises(CredentialFileError): + Credentials.from_yaml(tmp_file) + + +def test_rt_credentials(tmp_file): + """Test round-trip for credentials using Credentials.from_yaml() and Credentials.dump().""" + creds = Credentials( + COPERNICUS_USERNAME="test_user", COPERNICUS_PASSWORD="test_password" + ) + + creds.to_yaml(tmp_file) + creds_loaded = Credentials.from_yaml(tmp_file) + + assert creds == creds_loaded diff --git a/tests/conftest.py b/tests/conftest.py index 1159768d..1b7a1de0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,13 @@ import pytest +@pytest.fixture +def tmp_file(tmp_path): + file = tmp_path / "test.txt" + file.touch() + return file + + @pytest.fixture(autouse=True) def test_in_working_dir(request, monkeypatch): """ From 5fbf8125a8dacf3c01950a3be39b53b8c6976eac Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:05:01 +0800 Subject: [PATCH 17/66] Add note to obtain credentials --- src/virtualship/cli/_creds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtualship/cli/_creds.py b/src/virtualship/cli/_creds.py index 1165cd8f..bdf4782c 100644 --- a/src/virtualship/cli/_creds.py +++ b/src/virtualship/cli/_creds.py @@ -91,7 +91,7 @@ def get_credentials_flow( creds = Credentials.from_yaml(creds_path) return creds.COPERNICUS_USERNAME, creds.COPERNICUS_PASSWORD except FileNotFoundError: - msg = f"""Credentials not provided. Either pass in via `--username` and `--password` arguments, or via config file at '{creds_path}'. Config file should be YAML along following format: + msg = f"""Credentials not provided. Credentials can be obtained from https://data.marine.copernicus.eu/register. Either pass in via `--username` and `--password` arguments, or via config file at '{creds_path}'. Config file should be YAML along following format: ### {creds_path} {get_dummy_credentials_yaml().strip()} From 391f5ef161a303c6e0e98f2d6fc6f0d3bc05950c Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:07:28 +0800 Subject: [PATCH 18/66] Add copernicusmarine dep --- environment.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 994ab6cf..99fc2a0d 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - pydantic >=2, <3 - pip - pyyaml + - copernicusmarine # linting - pre-commit diff --git a/pyproject.toml b/pyproject.toml index f270fb35..9085b586 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "numpy >=1, < 2", "pydantic >=2, <3", "PyYAML", + "copernicusmarine", ] [project.urls] From 17fef2be346cd58b2d55ef18e31d684bc35d782e Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:12:40 +0800 Subject: [PATCH 19/66] Patch types start_time and end_time already datetime objects --- src/virtualship/cli/commands.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index a30a931c..3c5212a3 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,4 +1,3 @@ -from datetime import datetime from pathlib import Path import click @@ -73,8 +72,8 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: # Extract area_of_interest details from the schedule spatial_range = schedule.area_of_interest.spatial_range time_range = schedule.area_of_interest.time_range - start_datetime = datetime.strptime(time_range.start_time, "%Y-%m-%d %H:%M:%S") - end_datetime = datetime.strptime(time_range.end_time, "%Y-%m-%d %H:%M:%S") + start_datetime = time_range.start_time + end_datetime = time_range.end_time # Define all datasets to download, including bathymetry download_dict = { From 672fa6a504ad950bba42a66687f1d3c345d61853 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:18:00 +0800 Subject: [PATCH 20/66] Update user messages in `virtualship fetch` --- src/virtualship/cli/_creds.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/virtualship/cli/_creds.py b/src/virtualship/cli/_creds.py index bdf4782c..cee1c69f 100644 --- a/src/virtualship/cli/_creds.py +++ b/src/virtualship/cli/_creds.py @@ -83,12 +83,13 @@ def get_credentials_flow( if username and password: if creds_path.exists(): click.echo( - f"Credentials file exists at {creds_path}, but username and password are already provided.\nIgnoring credentials file." + f"Credentials file exists at '{creds_path}', but username and password are already provided. Ignoring credentials file." ) return username, password try: creds = Credentials.from_yaml(creds_path) + click.echo(f"Loaded credentials from '{creds_path}'.") return creds.COPERNICUS_USERNAME, creds.COPERNICUS_PASSWORD except FileNotFoundError: msg = f"""Credentials not provided. Credentials can be obtained from https://data.marine.copernicus.eu/register. Either pass in via `--username` and `--password` arguments, or via config file at '{creds_path}'. Config file should be YAML along following format: From 51451cfbf6c9610f5bad0baf9f5bcc2bce9b2cfb Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 22 Nov 2024 09:26:21 +0100 Subject: [PATCH 21/66] Adding minimum and maximum depth to area_of_interest --- src/virtualship/cli/commands.py | 12 ++++++------ src/virtualship/expedition/area_of_interest.py | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 3c5212a3..4d93ce08 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -105,14 +105,14 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: copernicusmarine.subset( dataset_id=dataset["dataset_id"], variables=dataset["variables"], - minimum_longitude=spatial_range.min_longitude, - maximum_longitude=spatial_range.max_longitude, - minimum_latitude=spatial_range.min_latitude, - maximum_latitude=spatial_range.max_latitude, + minimum_longitude=spatial_range.minimum_longitude, + maximum_longitude=spatial_range.maximum_longitude, + minimum_latitude=spatial_range.minimum_latitude, + maximum_latitude=spatial_range.maximum_latitude, start_datetime=start_datetime, end_datetime=end_datetime, - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, + minimum_depth=abs(spatial_range.minimum_depth), + maximum_depth=abs(spatial_range.maximum_depth), output_filename=dataset["output_filename"], output_directory=path, username=username, diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py index dd22ad6c..3b268b6a 100644 --- a/src/virtualship/expedition/area_of_interest.py +++ b/src/virtualship/expedition/area_of_interest.py @@ -12,6 +12,8 @@ class SpatialRange: maximum_longitude: float minimum_latitude: float maximum_latitude: float + minimum_depth: float + maximum_depth: float @dataclass From bc883606d2cbd74888e5d3403491d10ef3252451 Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 22 Nov 2024 09:26:46 +0100 Subject: [PATCH 22/66] Setting None as default option for drifter endtime --- src/virtualship/instruments/drifter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtualship/instruments/drifter.py b/src/virtualship/instruments/drifter.py index b4db3b9a..8dc1e1b0 100644 --- a/src/virtualship/instruments/drifter.py +++ b/src/virtualship/instruments/drifter.py @@ -46,7 +46,7 @@ def simulate_drifters( drifters: list[Drifter], outputdt: timedelta, dt: timedelta, - endtime: datetime | None, + endtime: datetime | None = None, ) -> None: """ Use Parcels to simulate a set of drifters in a fieldset. From f3a0dc9fe1154e5d888a740b22a9efcefebb638a Mon Sep 17 00:00:00 2001 From: Erik van Sebille Date: Fri, 22 Nov 2024 09:27:03 +0100 Subject: [PATCH 23/66] Removing redundant import in text_drifter --- tests/instruments/test_drifter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/instruments/test_drifter.py b/tests/instruments/test_drifter.py index b49b510b..86e313e5 100644 --- a/tests/instruments/test_drifter.py +++ b/tests/instruments/test_drifter.py @@ -1,7 +1,6 @@ """Test the simulation of drifters.""" import datetime -from datetime import timedelta import numpy as np import xarray as xr @@ -60,8 +59,8 @@ def test_simulate_drifters(tmpdir) -> None: fieldset=fieldset, out_path=out_path, drifters=drifters, - outputdt=timedelta(hours=1), - dt=timedelta(minutes=5), + outputdt=datetime.timedelta(hours=1), + dt=datetime.timedelta(minutes=5), endtime=None, ) From 3b5edc69f6bde38f5c2e195ffa72e4f2f39fc57f Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Fri, 22 Nov 2024 23:29:43 +0800 Subject: [PATCH 24/66] Pin copernicusmarine < 2 --- environment.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 99fc2a0d..382100a0 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - pydantic >=2, <3 - pip - pyyaml - - copernicusmarine + - copernicusmarine < 2 # linting - pre-commit diff --git a/pyproject.toml b/pyproject.toml index 9085b586..6179ce5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "numpy >=1, < 2", "pydantic >=2, <3", "PyYAML", - "copernicusmarine", + "copernicusmarine < 2", ] [project.urls] From fb1effd9403b8134d64827d9a61ab70661effa7b Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:59:01 +0100 Subject: [PATCH 25/66] Validate domains for area_of_interest --- .../expedition/area_of_interest.py | 48 +++++++++++++------ 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py index 3b268b6a..271dac6c 100644 --- a/src/virtualship/expedition/area_of_interest.py +++ b/src/virtualship/expedition/area_of_interest.py @@ -1,31 +1,51 @@ """AreaOfInterest class.""" -from dataclasses import dataclass from datetime import datetime +from typing import Annotated +from pydantic import BaseModel, Field, model_validator +from typing_extensions import Self -@dataclass -class SpatialRange: - """Defines the geographic boundaries for an area of interest.""" +Longitude = Annotated[float, Field(..., ge=-180, le=180)] +Latitude = Annotated[float, Field(..., ge=-90, le=90)] +Depth = float # TODO: insert a minimum depth here? e.g., `Annotated[float, Field(..., ge=0)]` - minimum_longitude: float - maximum_longitude: float - minimum_latitude: float - maximum_latitude: float - minimum_depth: float - maximum_depth: float +class SpatialRange(BaseModel): + """Defines the geographic boundaries for an area of interest.""" -@dataclass -class TimeRange: + minimum_longitude: Longitude + maximum_longitude: Longitude + minimum_latitude: Latitude + maximum_latitude: Latitude + minimum_depth: Depth + maximum_depth: Depth + + @model_validator(mode="after") + def _check_spatial_domain(self) -> Self: + if not self.minimum_longitude < self.maximum_longitude: + raise ValueError("minimum_longitude must be less than maximum_longitude") + if not self.minimum_latitude < self.maximum_latitude: + raise ValueError("minimum_latitude must be less than maximum_latitude") + if not self.minimum_depth < self.maximum_depth: + raise ValueError("minimum_depth must be less than maximum_depth") + return self + + +class TimeRange(BaseModel): """Defines the temporal boundaries for an area of interest.""" start_time: datetime end_time: datetime + @model_validator(mode="after") + def _check_time_range(self) -> Self: + if not self.start_time < self.end_time: + raise ValueError("start_time must be before end_time") + return self + -@dataclass -class AreaOfInterest: +class AreaOfInterest(BaseModel): """An area of interest with spatial and temporal boundaries.""" spatial_range: SpatialRange From 97b4d8d74740ca16578d60e1a2062f2764db9921 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:00:09 +0000 Subject: [PATCH 26/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/expedition/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 2f0fbd04..98c65eba 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -16,6 +16,7 @@ __all__ = [ "ADCPConfig", + "AreaOfInterest", "ArgoFloatConfig", "CTDConfig", "DrifterConfig", @@ -24,7 +25,6 @@ "ShipConfig", "ShipUnderwaterSTConfig", "Waypoint", - "AreaOfInterest", "do_expedition", "instruments", ] From bb8bacd0c708d8f622200672314eac7b334a86f0 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:19:44 +0100 Subject: [PATCH 27/66] Make depth optional in AreaOfInterest --- src/virtualship/expedition/area_of_interest.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py index 271dac6c..74211bff 100644 --- a/src/virtualship/expedition/area_of_interest.py +++ b/src/virtualship/expedition/area_of_interest.py @@ -18,15 +18,22 @@ class SpatialRange(BaseModel): maximum_longitude: Longitude minimum_latitude: Latitude maximum_latitude: Latitude - minimum_depth: Depth - maximum_depth: Depth + minimum_depth: Depth | None = None + maximum_depth: Depth | None = None @model_validator(mode="after") - def _check_spatial_domain(self) -> Self: + def _check_lon_lat_domain(self) -> Self: if not self.minimum_longitude < self.maximum_longitude: raise ValueError("minimum_longitude must be less than maximum_longitude") if not self.minimum_latitude < self.maximum_latitude: raise ValueError("minimum_latitude must be less than maximum_latitude") + + if sum([self.minimum_depth is None, self.maximum_depth is None]) == 1: + raise ValueError("Both minimum_depth and maximum_depth must be provided.") + + if self.minimum_depth is None: + return self + if not self.minimum_depth < self.maximum_depth: raise ValueError("minimum_depth must be less than maximum_depth") return self From 43f33293974a60e4a4bfdb99d3b213709b020d57 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:20:07 +0100 Subject: [PATCH 28/66] Make AreaOfInterest optional in Schedule --- src/virtualship/expedition/schedule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 277a178e..cd282e28 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -15,7 +15,7 @@ class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] - area_of_interest: AreaOfInterest + area_of_interest: AreaOfInterest | None = None model_config = pydantic.ConfigDict(extra="forbid") From 368bd646d4c47b6f7b7bc50ed573ed981dfb9649 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:23:16 +0100 Subject: [PATCH 29/66] Rename AreaOfInterest to SpaceTimeRegion --- src/virtualship/expedition/__init__.py | 4 ++-- src/virtualship/expedition/area_of_interest.py | 4 ++-- src/virtualship/expedition/schedule.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 98c65eba..5cc6c938 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -1,6 +1,6 @@ """Everything for simulating an expedition.""" -from .area_of_interest import AreaOfInterest +from .area_of_interest import SpaceTimeRegion from .do_expedition import do_expedition from .instrument_type import InstrumentType from .schedule import Schedule @@ -16,7 +16,7 @@ __all__ = [ "ADCPConfig", - "AreaOfInterest", + "SpaceTimeRegion", "ArgoFloatConfig", "CTDConfig", "DrifterConfig", diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/area_of_interest.py index 74211bff..35373731 100644 --- a/src/virtualship/expedition/area_of_interest.py +++ b/src/virtualship/expedition/area_of_interest.py @@ -1,4 +1,4 @@ -"""AreaOfInterest class.""" +"""SpaceTimeRegion class.""" from datetime import datetime from typing import Annotated @@ -52,7 +52,7 @@ def _check_time_range(self) -> Self: return self -class AreaOfInterest(BaseModel): +class SpaceTimeRegion(BaseModel): """An area of interest with spatial and temporal boundaries.""" spatial_range: SpatialRange diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index cd282e28..0bb6dee8 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -7,7 +7,7 @@ import pydantic import yaml -from .area_of_interest import AreaOfInterest +from .area_of_interest import SpaceTimeRegion from .waypoint import Waypoint @@ -15,7 +15,7 @@ class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] - area_of_interest: AreaOfInterest | None = None + space_time_region: SpaceTimeRegion | None = None model_config = pydantic.ConfigDict(extra="forbid") From 123ad3d12248da8599f8eed1a4aa1553330f1b18 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:24:38 +0100 Subject: [PATCH 30/66] Rename area_of_interest.py file --- src/virtualship/expedition/__init__.py | 2 +- src/virtualship/expedition/schedule.py | 2 +- .../expedition/{area_of_interest.py => space_time_region.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/virtualship/expedition/{area_of_interest.py => space_time_region.py} (100%) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 5cc6c938..364f7a7c 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -1,6 +1,5 @@ """Everything for simulating an expedition.""" -from .area_of_interest import SpaceTimeRegion from .do_expedition import do_expedition from .instrument_type import InstrumentType from .schedule import Schedule @@ -12,6 +11,7 @@ ShipConfig, ShipUnderwaterSTConfig, ) +from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint __all__ = [ diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 0bb6dee8..5e41e00e 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -7,7 +7,7 @@ import pydantic import yaml -from .area_of_interest import SpaceTimeRegion +from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint diff --git a/src/virtualship/expedition/area_of_interest.py b/src/virtualship/expedition/space_time_region.py similarity index 100% rename from src/virtualship/expedition/area_of_interest.py rename to src/virtualship/expedition/space_time_region.py From 471d43a4fce6692bb477516adb41efb85a1611e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:27:36 +0000 Subject: [PATCH 31/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/expedition/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 364f7a7c..051ef50d 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -16,7 +16,6 @@ __all__ = [ "ADCPConfig", - "SpaceTimeRegion", "ArgoFloatConfig", "CTDConfig", "DrifterConfig", @@ -24,6 +23,7 @@ "Schedule", "ShipConfig", "ShipUnderwaterSTConfig", + "SpaceTimeRegion", "Waypoint", "do_expedition", "instruments", From ef1e6bcfe6c1e32c9696853788eb0af4b18803a9 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:15:22 +0100 Subject: [PATCH 32/66] Add fetch utility functions and tests --- src/virtualship/cli/_fetch.py | 37 +++++++++++++++++++++++++++++++++++ tests/cli/test_fetch.py | 27 +++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/virtualship/cli/_fetch.py create mode 100644 tests/cli/test_fetch.py diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py new file mode 100644 index 00000000..37f926a8 --- /dev/null +++ b/src/virtualship/cli/_fetch.py @@ -0,0 +1,37 @@ +import hashlib +from datetime import datetime + +from pydantic import BaseModel + + +def _hash(s: str, *, length: int) -> str: + """Create a hash of a string.""" + assert length % 2 == 0, "Length must be even." + half_length = length // 2 + + return hashlib.shake_128(s.encode("utf-8")).hexdigest(half_length) + + +def create_hash(s: str) -> str: + """Create an 8 digit hash of a string.""" + return _hash(s, length=8) + + +def hash_model(model: BaseModel) -> str: + """ + Hash a Pydantic model. + + :param region: The region to hash. + :returns: The hash. + """ + return create_hash(model.model_dump_json()) + + +def filename_to_hash(filename: str) -> str: + """Extract hash from filename of the format YYYYMMDD_HHMMSS_{hash}.""" + return filename.split("_")[-1] + + +def hash_to_filename(hash: str) -> str: + """Return a filename of the format YYYYMMDD_HHMMSS_{hash}.""" + return f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash}" diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py new file mode 100644 index 00000000..2712ba69 --- /dev/null +++ b/tests/cli/test_fetch.py @@ -0,0 +1,27 @@ +from pydantic import BaseModel + +from virtualship.cli._fetch import ( + create_hash, + filename_to_hash, + hash_model, + hash_to_filename, +) + + +def test_create_hash(): + assert len(create_hash("correct-length")) == 8 + assert create_hash("same") == create_hash("same") + assert create_hash("unique1") != create_hash("unique2") + + +def test_hash_filename_roundtrip(): + hash_ = create_hash("test") + assert filename_to_hash(hash_to_filename(hash_)) == hash_ + + +def test_hash_model(): + class TestModel(BaseModel): + a: int + b: str + + hash_model(TestModel(a=0, b="b")) From 3bb4e6aefc949e29f7221262d930a6c534dd7afa Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:25:07 +0100 Subject: [PATCH 33/66] Patch attr name --- src/virtualship/cli/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 4d93ce08..53d997dd 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -70,8 +70,8 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: username, password = creds.get_credentials_flow(username, password, creds_path) # Extract area_of_interest details from the schedule - spatial_range = schedule.area_of_interest.spatial_range - time_range = schedule.area_of_interest.time_range + spatial_range = schedule.space_time_region.spatial_range + time_range = schedule.space_time_region.time_range start_datetime = time_range.start_time end_datetime = time_range.end_time From 737e34f787a6398409f7d50733c276b637310b8a Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 12:29:16 -0500 Subject: [PATCH 34/66] pointing the right function to make it work --- src/virtualship/cli/commands.py | 4 ++-- src/virtualship/expedition/schedule.py | 17 ++++++++++++++-- src/virtualship/expedition/schedule.yaml | 25 ++++++++++++++++++++++++ src/virtualship/static/schedule.yaml | 2 ++ 4 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 src/virtualship/expedition/schedule.yaml diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 53d997dd..4d93ce08 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -70,8 +70,8 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: username, password = creds.get_credentials_flow(username, password, creds_path) # Extract area_of_interest details from the schedule - spatial_range = schedule.space_time_region.spatial_range - time_range = schedule.space_time_region.time_range + spatial_range = schedule.area_of_interest.spatial_range + time_range = schedule.area_of_interest.time_range start_datetime = time_range.start_time end_datetime = time_range.end_time diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 5e41e00e..471836f2 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -10,12 +10,17 @@ from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint +# import sys, os +# sys.path.append(os.path.abspath(__file__)) +# from space_time_region import SpaceTimeRegion +# from waypoint import Waypoint -class Schedule(pydantic.BaseModel): + +class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] - space_time_region: SpaceTimeRegion | None = None + area_of_interest: SpaceTimeRegion | None = None model_config = pydantic.ConfigDict(extra="forbid") @@ -44,3 +49,11 @@ def from_yaml(cls, file_path: str | Path) -> Schedule: with open(file_path) as file: data = yaml.safe_load(file) return Schedule(**data) + +if __name__=="__main__": + + import sys + sys.path.append('/home/iury/projects/active/virtualship/src/virtualship/src') + from virtualship.expedition.schedule import Schedule + + diff --git a/src/virtualship/expedition/schedule.yaml b/src/virtualship/expedition/schedule.yaml new file mode 100644 index 00000000..d82849c7 --- /dev/null +++ b/src/virtualship/expedition/schedule.yaml @@ -0,0 +1,25 @@ +area_of_interest: + spatial_range: + minimum_longitude: -5 + maximum_longitude: 5 + minimum_latitude: -5 + maximum_latitude: 5 + time_range: + start_time: 2023-01-01 00:00:00 + end_time: 2023-02-01 00:00:00 +waypoints: + - instrument: CTD + location: + latitude: 0 + longitude: 0 + time: 2023-01-01 00:00:00 + - instrument: DRIFTER + location: + latitude: 0.01 + longitude: 0.01 + time: 2023-01-01 01:00:00 + - instrument: ARGO_FLOAT + location: + latitude: 0.02 + longitude: 0.02 + time: 2023-01-01 02:00:00 diff --git a/src/virtualship/static/schedule.yaml b/src/virtualship/static/schedule.yaml index d82849c7..4cf824a6 100644 --- a/src/virtualship/static/schedule.yaml +++ b/src/virtualship/static/schedule.yaml @@ -4,6 +4,8 @@ area_of_interest: maximum_longitude: 5 minimum_latitude: -5 maximum_latitude: 5 + minimum_depth: 0 + maximum_depth: 1500 time_range: start_time: 2023-01-01 00:00:00 end_time: 2023-02-01 00:00:00 From 6b2eb20c7cfced3c5ea54c02c913b8a82821ada3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 17:29:47 +0000 Subject: [PATCH 35/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/expedition/schedule.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 471836f2..5f840b28 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -16,7 +16,7 @@ # from waypoint import Waypoint -class Schedule(pydantic.BaseModel): +class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] @@ -50,10 +50,9 @@ def from_yaml(cls, file_path: str | Path) -> Schedule: data = yaml.safe_load(file) return Schedule(**data) -if __name__=="__main__": +if __name__ == "__main__": import sys - sys.path.append('/home/iury/projects/active/virtualship/src/virtualship/src') - from virtualship.expedition.schedule import Schedule - + sys.path.append("/home/iury/projects/active/virtualship/src/virtualship/src") + from virtualship.expedition.schedule import Schedule From 60952d8221c458374006c02b8749d42d17d9d03d Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 12:38:52 -0500 Subject: [PATCH 36/66] remove temporary code --- src/virtualship/expedition/schedule.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 471836f2..b81f0b45 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -10,11 +10,6 @@ from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint -# import sys, os -# sys.path.append(os.path.abspath(__file__)) -# from space_time_region import SpaceTimeRegion -# from waypoint import Waypoint - class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" @@ -50,10 +45,4 @@ def from_yaml(cls, file_path: str | Path) -> Schedule: data = yaml.safe_load(file) return Schedule(**data) -if __name__=="__main__": - - import sys - sys.path.append('/home/iury/projects/active/virtualship/src/virtualship/src') - from virtualship.expedition.schedule import Schedule - From 77d40ab2bddf4146cb355b3c0305130c1df75967 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 17:41:27 +0000 Subject: [PATCH 37/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/expedition/schedule.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 7dafc638..7e435a67 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -44,4 +44,3 @@ def from_yaml(cls, file_path: str | Path) -> Schedule: with open(file_path) as file: data = yaml.safe_load(file) return Schedule(**data) - From ac61cf16ca24c3aaae81cda67a1634633442bd31 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 16:46:58 -0500 Subject: [PATCH 38/66] ask for username and password if not provided --- src/virtualship/cli/commands.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 4d93ce08..f9d4a7bf 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -8,6 +8,7 @@ from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG +from getpass import getpass @click.command( help="Initialize a directory for a new expedition, with an example configuration." @@ -59,8 +60,11 @@ def init(path): ) def fetch(path: str | Path, username: str | None, password: str | None) -> None: """Entrypoint for the tool to download data based on area of interest.""" - if sum([username is None, password is None]) == 1: - raise ValueError("Both username and password must be provided.") + if username is None: + username = str(input("Username:")) + + if (password is None): + password = getpass("Password:") path = Path(path) From 61ab17753e1eea6d0399710080503644822640eb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 21:48:19 +0000 Subject: [PATCH 39/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/cli/commands.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f9d4a7bf..7d209c2e 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,3 +1,4 @@ +from getpass import getpass from pathlib import Path import click @@ -8,7 +9,6 @@ from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG -from getpass import getpass @click.command( help="Initialize a directory for a new expedition, with an example configuration." @@ -62,8 +62,8 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: """Entrypoint for the tool to download data based on area of interest.""" if username is None: username = str(input("Username:")) - - if (password is None): + + if password is None: password = getpass("Password:") path = Path(path) From 34bea1636d758facce61b037816770c933cae188 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 17:15:45 -0500 Subject: [PATCH 40/66] creates function that makes a sha256 from a dictionary --- src/virtualship/utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index a8579bd5..1c3fb065 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,5 +1,8 @@ from functools import lru_cache from importlib.resources import files +import json +import hashlib +from datetime import datetime SCHEDULE = "schedule.yaml" SHIP_CONFIG = "ship_config.yaml" @@ -21,3 +24,25 @@ def get_example_config() -> str: def get_example_schedule() -> str: """Get the example schedule file.""" return load_static_file(SCHEDULE) + + +def create_string_hash(data): + """ + Creates a hash string from a nested dictionary or any data. + :param data: Dictionary or other serializable object. + :return: A string hash (e.g., SHA256). + """ + # Custom serialization function for non-serializable types + def custom_serializer(obj): + if isinstance(obj, datetime): + return obj.isoformat() # Convert datetime to ISO 8601 string + raise TypeError(f"Type {type(obj)} not serializable") + + # Convert the dictionary to a sorted JSON string + data_str = json.dumps(data, sort_keys=True, default=custom_serializer) + + # Create a hash using SHA256 + hash_obj = hashlib.sha256(data_str.encode()) + + # Return the hash as a string of letters (hexadecimal) + return hash_obj.hexdigest() \ No newline at end of file From a125a11750dca7814c881e559b5e6d77c6bfccff Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 17:16:38 -0500 Subject: [PATCH 41/66] create a data folder based on the hash from area_of_interest --- src/virtualship/cli/commands.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f9d4a7bf..1ac42474 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -8,6 +8,7 @@ from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG +import os from getpass import getpass @click.command( @@ -68,8 +69,14 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: path = Path(path) + path.joinpath("data/").mkdir(exist_ok=True) + schedule = _get_schedule(path) + aoi_hash = utils.create_string_hash(schedule.dict()["area_of_interest"]) + + path.joinpath(f"data/{aoi_hash}/").mkdir(exist_ok=True) + creds_path = path / creds.CREDENTIALS_FILE username, password = creds.get_credentials_flow(username, password, creds_path) @@ -118,10 +125,11 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: minimum_depth=abs(spatial_range.minimum_depth), maximum_depth=abs(spatial_range.maximum_depth), output_filename=dataset["output_filename"], - output_directory=path, + output_directory=path.joinpath(f"data/{aoi_hash}/"), username=username, password=password, force_download=True, + overwrite_output_data=True, force_dataset_part=dataset.get( "force_dataset_part" ), # Only used if specified in dataset From a2a61c8dd31443a4eae5ac815f5fe0943b78f4c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 22:19:40 +0000 Subject: [PATCH 42/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/cli/commands.py | 9 +++------ src/virtualship/utils.py | 11 ++++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 5d7e63df..af9bc89f 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -9,9 +9,6 @@ from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG -import os -from getpass import getpass - @click.command( help="Initialize a directory for a new expedition, with an example configuration." @@ -72,13 +69,13 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: path = Path(path) path.joinpath("data/").mkdir(exist_ok=True) - + schedule = _get_schedule(path) aoi_hash = utils.create_string_hash(schedule.dict()["area_of_interest"]) - + path.joinpath(f"data/{aoi_hash}/").mkdir(exist_ok=True) - + creds_path = path / creds.CREDENTIALS_FILE username, password = creds.get_credentials_flow(username, password, creds_path) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index 1c3fb065..201b013c 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,8 +1,8 @@ -from functools import lru_cache -from importlib.resources import files -import json import hashlib +import json from datetime import datetime +from functools import lru_cache +from importlib.resources import files SCHEDULE = "schedule.yaml" SHIP_CONFIG = "ship_config.yaml" @@ -32,6 +32,7 @@ def create_string_hash(data): :param data: Dictionary or other serializable object. :return: A string hash (e.g., SHA256). """ + # Custom serialization function for non-serializable types def custom_serializer(obj): if isinstance(obj, datetime): @@ -43,6 +44,6 @@ def custom_serializer(obj): # Create a hash using SHA256 hash_obj = hashlib.sha256(data_str.encode()) - + # Return the hash as a string of letters (hexadecimal) - return hash_obj.hexdigest() \ No newline at end of file + return hash_obj.hexdigest() From 777f505c4037cf07437268da52deef3e9477b609 Mon Sep 17 00:00:00 2001 From: iury simoes-sousa Date: Sat, 21 Dec 2024 17:21:06 -0500 Subject: [PATCH 43/66] insert blank lines --- src/virtualship/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index 201b013c..739c7549 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -28,9 +28,11 @@ def get_example_schedule() -> str: def create_string_hash(data): """ + Creates a hash string from a nested dictionary or any data. :param data: Dictionary or other serializable object. :return: A string hash (e.g., SHA256). + """ # Custom serialization function for non-serializable types From f6908df12f50a4e1069f5830aa367734aa2c507a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 21 Dec 2024 22:21:15 +0000 Subject: [PATCH 44/66] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/virtualship/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index 739c7549..7db66950 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -28,11 +28,11 @@ def get_example_schedule() -> str: def create_string_hash(data): """ - + Creates a hash string from a nested dictionary or any data. :param data: Dictionary or other serializable object. :return: A string hash (e.g., SHA256). - + """ # Custom serialization function for non-serializable types From 2a3c9ab4f8b64f2f03ffd4e66f1ff35fb52ccca4 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:15:41 +0100 Subject: [PATCH 45/66] Update hash to use _fetch utils --- src/virtualship/cli/commands.py | 13 ++++++++++--- src/virtualship/utils.py | 28 ---------------------------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index af9bc89f..533fbac6 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -6,6 +6,11 @@ import virtualship.cli._creds as creds from virtualship import utils +from virtualship.cli._fetch import ( # noqa: F401 + filename_to_hash, + hash_model, + hash_to_filename, +) from virtualship.expedition.do_expedition import _get_schedule, do_expedition from virtualship.utils import SCHEDULE, SHIP_CONFIG @@ -72,9 +77,11 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: schedule = _get_schedule(path) - aoi_hash = utils.create_string_hash(schedule.dict()["area_of_interest"]) - - path.joinpath(f"data/{aoi_hash}/").mkdir(exist_ok=True) + aoi_hash = hash_model(schedule.area_of_interest) + data_folder = path / "data" + data_folder.mkdir(exist_ok=True) + download_folder = data_folder / hash_to_filename(aoi_hash) + download_folder.mkdir() creds_path = path / creds.CREDENTIALS_FILE username, password = creds.get_credentials_flow(username, password, creds_path) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index 7db66950..a8579bd5 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,6 +1,3 @@ -import hashlib -import json -from datetime import datetime from functools import lru_cache from importlib.resources import files @@ -24,28 +21,3 @@ def get_example_config() -> str: def get_example_schedule() -> str: """Get the example schedule file.""" return load_static_file(SCHEDULE) - - -def create_string_hash(data): - """ - - Creates a hash string from a nested dictionary or any data. - :param data: Dictionary or other serializable object. - :return: A string hash (e.g., SHA256). - - """ - - # Custom serialization function for non-serializable types - def custom_serializer(obj): - if isinstance(obj, datetime): - return obj.isoformat() # Convert datetime to ISO 8601 string - raise TypeError(f"Type {type(obj)} not serializable") - - # Convert the dictionary to a sorted JSON string - data_str = json.dumps(data, sort_keys=True, default=custom_serializer) - - # Create a hash using SHA256 - hash_obj = hashlib.sha256(data_str.encode()) - - # Return the hash as a string of letters (hexadecimal) - return hash_obj.hexdigest() From 66a1bb9e622eda3dc0d894a01c9b2a669ea429c5 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:17:20 +0100 Subject: [PATCH 46/66] Avoid mixing matching creds --- src/virtualship/cli/commands.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 533fbac6..8ca73e07 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,4 +1,3 @@ -from getpass import getpass from pathlib import Path import click @@ -65,11 +64,8 @@ def init(path): ) def fetch(path: str | Path, username: str | None, password: str | None) -> None: """Entrypoint for the tool to download data based on area of interest.""" - if username is None: - username = str(input("Username:")) - - if password is None: - password = getpass("Password:") + if sum([username is None, password is None]) == 1: + raise ValueError("Both username and password must be provided when using CLI.") path = Path(path) From 3012812e51005770000fa71842701bafa6716130 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:05:39 +0100 Subject: [PATCH 47/66] Download caching feature --- src/virtualship/cli/_fetch.py | 69 +++++++++++++++++++++++++++++++++ src/virtualship/cli/commands.py | 37 ++++++++++++------ src/virtualship/utils.py | 16 ++++++++ 3 files changed, 111 insertions(+), 11 deletions(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 37f926a8..39007ea1 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -1,8 +1,16 @@ +from __future__ import annotations + import hashlib from datetime import datetime +from pathlib import Path +import click from pydantic import BaseModel +from virtualship.utils import _dump_yaml, _generic_load_yaml + +DOWNLOAD_METADATA = "download_metadata.yaml" + def _hash(s: str, *, length: int) -> str: """Create a hash of a string.""" @@ -35,3 +43,64 @@ def filename_to_hash(filename: str) -> str: def hash_to_filename(hash: str) -> str: """Return a filename of the format YYYYMMDD_HHMMSS_{hash}.""" return f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash}" + + +class IncompleteDownloadError(Exception): + """Exception raised for incomplete downloads.""" + + pass + + +class DownloadMetadata(BaseModel): + """Metadata for a data download.""" + + download_complete: bool + download_date: datetime | None = None + + def to_yaml(self, file_path: str | Path) -> None: + with open(file_path, "w") as file: + _dump_yaml(self, file) + + @classmethod + def from_yaml(cls, file_path: str | Path) -> DownloadMetadata: + return _generic_load_yaml(file_path, cls) + + +def get_existing_download(data_folder: Path, aoi_hash: str) -> Path | None: + """Check if a download has already been completed. If so, return the path for existing download.""" + for download_path in data_folder.iterdir(): + try: + hash = filename_to_hash(download_path.name) + except ValueError: + click.echo( + f"Skipping {download_path.name} as it is not a valid download folder name." + ) + continue + + if hash == aoi_hash: + check_complete_download(download_path) + return download_path + + return None + + +def check_complete_download(download_path: Path) -> bool: + """Check if a download is complete.""" + download_metadata = download_path / DOWNLOAD_METADATA + try: + with open(download_metadata) as file: + assert DownloadMetadata.from_yaml(file).download_complete + except (FileNotFoundError, AssertionError) as e: + raise IncompleteDownloadError( + f"Download at {download_path} was found, but looks to be incomplete " + f"(likely due to interupting it mid-download). Please delete this and retry." + ) from e + return True + + +def complete_download(download_path: Path) -> None: + """Mark a download as complete.""" + download_metadata = download_path / DOWNLOAD_METADATA + metadata = DownloadMetadata(download_complete=True, download_date=datetime.now()) + metadata.to_yaml(download_metadata) + return diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 8ca73e07..f21967b5 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -5,8 +5,11 @@ import virtualship.cli._creds as creds from virtualship import utils -from virtualship.cli._fetch import ( # noqa: F401 - filename_to_hash, +from virtualship.cli._fetch import ( + DOWNLOAD_METADATA, + DownloadMetadata, + complete_download, + get_existing_download, hash_model, hash_to_filename, ) @@ -69,15 +72,19 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: path = Path(path) - path.joinpath("data/").mkdir(exist_ok=True) + data_folder = path / "data" + data_folder.mkdir(exist_ok=True) schedule = _get_schedule(path) aoi_hash = hash_model(schedule.area_of_interest) - data_folder = path / "data" - data_folder.mkdir(exist_ok=True) - download_folder = data_folder / hash_to_filename(aoi_hash) - download_folder.mkdir() + + existing_download = get_existing_download(data_folder, aoi_hash) + if existing_download is not None: + click.echo( + f"Data download based on area of interest already completed at {existing_download}." + ) + return creds_path = path / creds.CREDENTIALS_FILE username, password = creds.get_credentials_flow(username, password, creds_path) @@ -88,28 +95,35 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: start_datetime = time_range.start_time end_datetime = time_range.end_time + # Create download folder and set download metadata + download_folder = data_folder / hash_to_filename(aoi_hash) + download_folder.mkdir() + DownloadMetadata(download_complete=False).to_yaml( + download_folder / DOWNLOAD_METADATA + ) + # Define all datasets to download, including bathymetry download_dict = { "Bathymetry": { "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", "variables": ["deptho"], - "output_filename": "bathymetry.nc", + "output_filename": str(download_folder / "bathymetry.nc"), "force_dataset_part": "bathy", }, "UVdata": { "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", "variables": ["uo", "vo"], - "output_filename": "default_uv.nc", + "output_filename": str(download_folder / "default_uv.nc"), }, "Sdata": { "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", "variables": ["so"], - "output_filename": "default_s.nc", + "output_filename": str(download_folder / "default_s.nc"), }, "Tdata": { "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", "variables": ["thetao"], - "output_filename": "default_t.nc", + "output_filename": str(download_folder / "default_t.nc"), }, } @@ -137,6 +151,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: ), # Only used if specified in dataset ) + complete_download() click.echo("Data download based on area of interest completed.") diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index a8579bd5..95d47d31 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,5 +1,9 @@ from functools import lru_cache from importlib.resources import files +from typing import TextIO + +import yaml +from pydantic import BaseModel SCHEDULE = "schedule.yaml" SHIP_CONFIG = "ship_config.yaml" @@ -21,3 +25,15 @@ def get_example_config() -> str: def get_example_schedule() -> str: """Get the example schedule file.""" return load_static_file(SCHEDULE) + + +def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None: + """Dump a pydantic model to a yaml string.""" + return yaml.safe_dump( + model.model_dump(by_alias=True), stream, default_flow_style=False + ) + + +def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel: + """Load a yaml string into a pydantic model.""" + return model.model_validate(yaml.safe_load(data)) From 416043708bd6c528c0f2cae229ff021d13849b53 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:23:18 +0100 Subject: [PATCH 48/66] Update input data loading with new data folder structure --- src/virtualship/expedition/do_expedition.py | 12 +++++++++--- src/virtualship/expedition/input_data.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 7a48b9aa..a2fcc4d1 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -6,6 +6,7 @@ import pyproj +from virtualship.cli._fetch import get_existing_download, hash_model from virtualship.utils import CHECKPOINT, SCHEDULE, SHIP_CONFIG from .checkpoint import Checkpoint @@ -50,7 +51,7 @@ def do_expedition(expedition_dir: str | Path) -> None: # load fieldsets input_data = _load_input_data( - expedition_dir=expedition_dir, ship_config=ship_config + expedition_dir=expedition_dir, schedule=schedule, ship_config=ship_config ) # verify schedule makes sense @@ -113,9 +114,14 @@ def _get_ship_config(expedition_dir: Path) -> ShipConfig | None: ) from e -def _load_input_data(expedition_dir: Path, ship_config: ShipConfig) -> InputData: +def _load_input_data( + expedition_dir: Path, schedule: Schedule, ship_config: ShipConfig +) -> InputData: + aoi_hash = hash_model(schedule.area_of_interest) + download_directory = get_existing_download(expedition_dir, aoi_hash) + return InputData.load( - directory=expedition_dir.joinpath("input_data"), + directory=download_directory, load_adcp=ship_config.adcp_config is not None, load_argo_float=ship_config.argo_float_config is not None, load_ctd=ship_config.ctd_config is not None, diff --git a/src/virtualship/expedition/input_data.py b/src/virtualship/expedition/input_data.py index 7af9ef72..de56642c 100644 --- a/src/virtualship/expedition/input_data.py +++ b/src/virtualship/expedition/input_data.py @@ -33,7 +33,7 @@ def load( For now this function makes a lot of assumption about file location and contents. - :param directory: Base directory of the expedition. + :param directory: Input data directory. :param load_adcp: Whether to load the ADCP fieldset. :param load_argo_float: Whether to load the argo float fieldset. :param load_ctd: Whether to load the CTD fieldset. @@ -73,7 +73,7 @@ def load( ) @classmethod - def _load_default_fieldset(cls, directory: str | Path) -> FieldSet: + def _load_default_fieldset(cls, directory: Path) -> FieldSet: filenames = { "U": directory.joinpath("default_uv.nc"), "V": directory.joinpath("default_uv.nc"), @@ -116,7 +116,7 @@ def _load_default_fieldset(cls, directory: str | Path) -> FieldSet: return fieldset @classmethod - def _load_drifter_fieldset(cls, directory: str | Path) -> FieldSet: + def _load_drifter_fieldset(cls, directory: Path) -> FieldSet: filenames = { "U": directory.joinpath("drifter_uv.nc"), "V": directory.joinpath("drifter_uv.nc"), From fd060d723b768a501d9f44e0dbe0f87f70e30797 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:00:03 +0100 Subject: [PATCH 49/66] Handle download_cleanup on wrong credentials --- src/virtualship/cli/commands.py | 50 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f21967b5..725fc23b 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,7 +1,9 @@ +import shutil from pathlib import Path import click import copernicusmarine +from copernicusmarine.core_functions.credentials_utils import InvalidUsernameOrPassword import virtualship.cli._creds as creds from virtualship import utils @@ -128,28 +130,32 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: } # Iterate over all datasets and download each based on area_of_interest - for dataset in download_dict.values(): - copernicusmarine.subset( - dataset_id=dataset["dataset_id"], - variables=dataset["variables"], - minimum_longitude=spatial_range.minimum_longitude, - maximum_longitude=spatial_range.maximum_longitude, - minimum_latitude=spatial_range.minimum_latitude, - maximum_latitude=spatial_range.maximum_latitude, - start_datetime=start_datetime, - end_datetime=end_datetime, - minimum_depth=abs(spatial_range.minimum_depth), - maximum_depth=abs(spatial_range.maximum_depth), - output_filename=dataset["output_filename"], - output_directory=path.joinpath(f"data/{aoi_hash}/"), - username=username, - password=password, - force_download=True, - overwrite_output_data=True, - force_dataset_part=dataset.get( - "force_dataset_part" - ), # Only used if specified in dataset - ) + try: + for dataset in download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.minimum_longitude, + maximum_longitude=spatial_range.maximum_longitude, + minimum_latitude=spatial_range.minimum_latitude, + maximum_latitude=spatial_range.maximum_latitude, + start_datetime=start_datetime, + end_datetime=end_datetime, + minimum_depth=abs(spatial_range.minimum_depth), + maximum_depth=abs(spatial_range.maximum_depth), + output_filename=dataset["output_filename"], + output_directory=path.joinpath(f"data/{aoi_hash}/"), + username=username, + password=password, + force_download=True, + overwrite_output_data=True, + force_dataset_part=dataset.get( + "force_dataset_part" + ), # Only used if specified in dataset + ) + except InvalidUsernameOrPassword as e: + shutil.rmtree(download_folder) + raise e complete_download() click.echo("Data download based on area of interest completed.") From d2a7bc8dea0b09f8e0d59d17ac7937daa525de51 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:03:49 +0100 Subject: [PATCH 50/66] Copy schedule to download folde --- src/virtualship/cli/commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 725fc23b..02f96f06 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -103,6 +103,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: DownloadMetadata(download_complete=False).to_yaml( download_folder / DOWNLOAD_METADATA ) + shutil.copyfile(path / SCHEDULE, download_folder / SCHEDULE) # Define all datasets to download, including bathymetry download_dict = { From 18b86ec7b7a2203e5210909135f729392f25efe0 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:28:17 +0100 Subject: [PATCH 51/66] Patch download command --- src/virtualship/cli/_fetch.py | 2 +- src/virtualship/cli/commands.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 39007ea1..41f79d0e 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -93,7 +93,7 @@ def check_complete_download(download_path: Path) -> bool: except (FileNotFoundError, AssertionError) as e: raise IncompleteDownloadError( f"Download at {download_path} was found, but looks to be incomplete " - f"(likely due to interupting it mid-download). Please delete this and retry." + f"(likely due to interupting it mid-download). Please delete this folder and retry." ) from e return True diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 02f96f06..e67b4100 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -84,7 +84,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: existing_download = get_existing_download(data_folder, aoi_hash) if existing_download is not None: click.echo( - f"Data download based on area of interest already completed at {existing_download}." + f"Data download for area of interest already completed ('{existing_download}')." ) return @@ -110,23 +110,23 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: "Bathymetry": { "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", "variables": ["deptho"], - "output_filename": str(download_folder / "bathymetry.nc"), + "output_filename": "bathymetry.nc", "force_dataset_part": "bathy", }, "UVdata": { "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", "variables": ["uo", "vo"], - "output_filename": str(download_folder / "default_uv.nc"), + "output_filename": "default_uv.nc", }, "Sdata": { "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", "variables": ["so"], - "output_filename": str(download_folder / "default_s.nc"), + "output_filename": "default_s.nc", }, "Tdata": { "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", "variables": ["thetao"], - "output_filename": str(download_folder / "default_t.nc"), + "output_filename": "default_t.nc", }, } @@ -145,7 +145,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: minimum_depth=abs(spatial_range.minimum_depth), maximum_depth=abs(spatial_range.maximum_depth), output_filename=dataset["output_filename"], - output_directory=path.joinpath(f"data/{aoi_hash}/"), + output_directory=download_folder, username=username, password=password, force_download=True, @@ -158,7 +158,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: shutil.rmtree(download_folder) raise e - complete_download() + complete_download(download_folder) click.echo("Data download based on area of interest completed.") From 524c650eff344731976adf7d948e83c38dec5cd7 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:39:58 +0100 Subject: [PATCH 52/66] Add `input_data` param for testing --- src/virtualship/expedition/do_expedition.py | 34 +++++++++++++++++---- tests/expedition/test_do_expedition.py | 4 ++- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index a2fcc4d1..34087733 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -19,11 +19,12 @@ from .verify_schedule import verify_schedule -def do_expedition(expedition_dir: str | Path) -> None: +def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> None: """ Perform an expedition, providing terminal feedback and file output. :param expedition_dir: The base directory for the expedition. + :param input_data: Input data folder folder (override used for testing). """ if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) @@ -51,7 +52,10 @@ def do_expedition(expedition_dir: str | Path) -> None: # load fieldsets input_data = _load_input_data( - expedition_dir=expedition_dir, schedule=schedule, ship_config=ship_config + expedition_dir=expedition_dir, + schedule=schedule, + ship_config=ship_config, + input_data=input_data, ) # verify schedule makes sense @@ -115,13 +119,31 @@ def _get_ship_config(expedition_dir: Path) -> ShipConfig | None: def _load_input_data( - expedition_dir: Path, schedule: Schedule, ship_config: ShipConfig + expedition_dir: Path, + schedule: Schedule, + ship_config: ShipConfig, + input_data: Path | None, ) -> InputData: - aoi_hash = hash_model(schedule.area_of_interest) - download_directory = get_existing_download(expedition_dir, aoi_hash) + """ + Load the input data. + + :param expedition_dir: Directory of the expedition. + :type expedition_dir: Path + :param schedule: Schedule object. + :type schedule: Schedule + :param ship_config: Ship configuration. + :type ship_config: ShipConfig + :param input_data: Folder containing input data. + :type input_data: Path | None + :return: InputData object. + :rtype: InputData + """ + if input_data is None: + aoi_hash = hash_model(schedule.area_of_interest) + input_data = get_existing_download(expedition_dir, aoi_hash) return InputData.load( - directory=download_directory, + directory=input_data, load_adcp=ship_config.adcp_config is not None, load_argo_float=ship_config.argo_float_config is not None, load_ctd=ship_config.ctd_config is not None, diff --git a/tests/expedition/test_do_expedition.py b/tests/expedition/test_do_expedition.py index 055764af..143249ca 100644 --- a/tests/expedition/test_do_expedition.py +++ b/tests/expedition/test_do_expedition.py @@ -1,9 +1,11 @@ +from pathlib import Path + from pytest import CaptureFixture from virtualship.expedition import do_expedition def test_do_expedition(capfd: CaptureFixture) -> None: - do_expedition("expedition_dir") + do_expedition("expedition_dir", input_data=Path("expedition_dir/input_data")) out, _ = capfd.readouterr() assert "This expedition took" in out, "Expedition did not complete successfully." From ad995c3807132e5d6285b41cdfb4d82661e6899c Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:31:22 +0100 Subject: [PATCH 53/66] Add tests --- pyproject.toml | 2 ++ tests/cli/test_cli.py | 24 +++++++++++++++- tests/cli/test_fetch.py | 61 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6179ce5a..71459b47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,8 @@ ignore = [ "D212", # one-blank-line-before-class "D203", + # First line of docstring should be in imperative mood + "D401", # TODO: Remove later "D100", "D103" diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index ef39ba8d..c8f684a9 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -3,10 +3,19 @@ import pytest from click.testing import CliRunner -from virtualship.cli.commands import init +from virtualship.cli.commands import fetch, init from virtualship.utils import SCHEDULE, SHIP_CONFIG +@pytest.fixture +def runner(): + """An example expedition.""" + runner = CliRunner() + with runner.isolated_filesystem(): + runner.invoke(init, ["."]) + yield runner + + def test_init(): runner = CliRunner() with runner.isolated_filesystem(): @@ -39,3 +48,16 @@ def test_init_existing_schedule(): with pytest.raises(FileExistsError): result = runner.invoke(init, ["."]) raise result.exception + + +@pytest.mark.parametrize( + "fetch_args", + [ + [".", "--username", "test"], + [".", "--password", "test"], + ], +) +def test_fetch_both_creds_via_cli(runner, fetch_args): + result = runner.invoke(fetch, fetch_args) + assert result.exit_code == 1 + assert "Both username and password" in result.exc_info[1].args[0] diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py index 2712ba69..4f624d8b 100644 --- a/tests/cli/test_fetch.py +++ b/tests/cli/test_fetch.py @@ -1,8 +1,17 @@ +from pathlib import Path + +import pytest from pydantic import BaseModel from virtualship.cli._fetch import ( + DOWNLOAD_METADATA, + DownloadMetadata, + IncompleteDownloadError, + check_complete_download, + complete_download, create_hash, filename_to_hash, + get_existing_download, hash_model, hash_to_filename, ) @@ -25,3 +34,55 @@ class TestModel(BaseModel): b: str hash_model(TestModel(a=0, b="b")) + + +def test_complete_download(tmp_path): + # Setup + DownloadMetadata(download_complete=False).to_yaml(tmp_path / DOWNLOAD_METADATA) + + complete_download(tmp_path) + + assert check_complete_download(tmp_path) + + +def test_check_complete_download_complete(tmp_path): + # Setup + DownloadMetadata(download_complete=True).to_yaml(tmp_path / DOWNLOAD_METADATA) + + assert check_complete_download(tmp_path) + + +def test_check_complete_download_incomplete(tmp_path): + # Setup + DownloadMetadata(download_complete=False).to_yaml(tmp_path / DOWNLOAD_METADATA) + + with pytest.raises(IncompleteDownloadError): + check_complete_download(tmp_path) + + +def test_check_complete_download_missing(tmp_path): + with pytest.raises(IncompleteDownloadError): + assert not check_complete_download(tmp_path) + + +@pytest.fixture +def existing_data_folder(tmp_path, monkeypatch): + # Setup + folders = [ + "YYYYMMDD_HHMMSS_hash", + "YYYYMMDD_HHMMSS_hash2", + "some-invalid-data-folder", + "YYYYMMDD_HHMMSS_hash3", + ] + data_folder = tmp_path + monkeypatch.setattr( + "virtualship.cli._fetch.check_complete_download", lambda x: True + ) + for f in folders: + (data_folder / f).mkdir() + yield data_folder + + +def test_get_existing_download(existing_data_folder): + assert isinstance(get_existing_download(existing_data_folder, "hash"), Path) + assert get_existing_download(existing_data_folder, "missing-hash") is None From afe6112e699451911a41e5154279f3ad585ca619 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:41:19 +0100 Subject: [PATCH 54/66] Add fetch test --- tests/cli/test_cli.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index c8f684a9..05a5fc72 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -7,6 +7,19 @@ from virtualship.utils import SCHEDULE, SHIP_CONFIG +@pytest.fixture +def copernicus_subset_no_download(monkeypatch): + """Mock the download function.""" + + def fake_download(output_filename, output_directory, **_): + Path(output_directory).joinpath(output_filename).touch() + + monkeypatch.setattr( + "virtualship.cli.commands.copernicusmarine.subset", fake_download + ) + yield + + @pytest.fixture def runner(): """An example expedition.""" @@ -57,7 +70,15 @@ def test_init_existing_schedule(): [".", "--password", "test"], ], ) +@pytest.mark.usefixtures("copernicus_subset_no_download") def test_fetch_both_creds_via_cli(runner, fetch_args): result = runner.invoke(fetch, fetch_args) assert result.exit_code == 1 assert "Both username and password" in result.exc_info[1].args[0] + + +@pytest.mark.usefixtures("copernicus_subset_no_download") +def test_fetch(runner): + """Test the fetch command, but mock the download.""" + result = runner.invoke(fetch, [".", "--username", "test", "--password", "test"]) + assert result.exit_code == 0 From 5669e418e86eb781badeab517849ac91f22ec60d Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:51:56 +0100 Subject: [PATCH 55/66] Pin copernicusmarine >= 2 Fixes #90 --- environment.yml | 2 +- src/virtualship/cli/commands.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/environment.yml b/environment.yml index 382100a0..4a94c84d 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,7 @@ dependencies: - pydantic >=2, <3 - pip - pyyaml - - copernicusmarine < 2 + - copernicusmarine >= 2 # linting - pre-commit diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index e67b4100..f013717e 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -111,7 +111,6 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", "variables": ["deptho"], "output_filename": "bathymetry.nc", - "force_dataset_part": "bathy", }, "UVdata": { "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", @@ -149,10 +148,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: username=username, password=password, force_download=True, - overwrite_output_data=True, - force_dataset_part=dataset.get( - "force_dataset_part" - ), # Only used if specified in dataset + overwrite=True, ) except InvalidUsernameOrPassword as e: shutil.rmtree(download_folder) From bcfe01e35c70ec5fd7b89496792a23dbd0488f5a Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 17:22:23 +0100 Subject: [PATCH 56/66] RTD explicit config key https://about.readthedocs.com/blog/2024/12/deprecate-config-files-without-sphinx-or-mkdocs-config/ --- .readthedocs.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 01f7df4e..1c13b28a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -2,7 +2,8 @@ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 - +sphinx: + configuration: docs/conf.py build: os: ubuntu-22.04 tools: From 0ef7cf0d5d0fe209e55263807a8d31e07c8b3404 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 17:25:13 +0100 Subject: [PATCH 57/66] Delete download_data script --- scripts/download_data.py | 137 --------------------------------------- 1 file changed, 137 deletions(-) delete mode 100644 scripts/download_data.py diff --git a/scripts/download_data.py b/scripts/download_data.py deleted file mode 100644 index d3034ee7..00000000 --- a/scripts/download_data.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Download data required to run expeditions. - -This is a very crude script, here just as long as we do not properly incorporate it into the library. -""" - -import datetime - -import copernicusmarine - -if __name__ == "__main__": - datadir = "input_data" - username = input("username: ") - password = input("password: ") - - copernicusmarine.subset( - dataset_id="cmems_mod_glo_phy_my_0.083deg_static", - force_dataset_part="bathy", - variables=["deptho"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename="bathymetry.nc", - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "default_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "default_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "default_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "drifter_uv.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "drifter_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=0.49402499198913574, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) - - download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "argo_float_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "argo_float_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "argo_float_t.nc", - }, - } - - for dataset in download_dict: - copernicusmarine.subset( - dataset_id=download_dict[dataset]["dataset_id"], - variables=download_dict[dataset]["variables"], - minimum_longitude=-0.01, - maximum_longitude=0.01, - minimum_latitude=-0.01, - maximum_latitude=0.01, - start_datetime=datetime.datetime.strptime("2023-01-01", "%Y-%m-%d"), - end_datetime=datetime.datetime.strptime("2023-01-02", "%Y-%m-%d"), - minimum_depth=0.49402499198913574, - maximum_depth=5727.9169921875, - output_filename=download_dict[dataset]["output_filename"], - output_directory=datadir, - username=username, - password=password, - force_download=True, - ) From b830393b3dc597bbdb6edd5c7cb76a8491c643c5 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 17:28:09 +0100 Subject: [PATCH 58/66] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 71459b47..b80b42ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "numpy >=1, < 2", "pydantic >=2, <3", "PyYAML", - "copernicusmarine < 2", + "copernicusmarine >= 2", ] [project.urls] From 8d35d4b5ff6cc01715a2596d4e7d1be7aed4f9e0 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 17:38:10 +0100 Subject: [PATCH 59/66] Improve filename to hash conversion --- src/virtualship/cli/_fetch.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 41f79d0e..cd7365a4 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -37,11 +37,18 @@ def hash_model(model: BaseModel) -> str: def filename_to_hash(filename: str) -> str: """Extract hash from filename of the format YYYYMMDD_HHMMSS_{hash}.""" - return filename.split("_")[-1] + parts = filename.split("_") + if len(parts) != 3: + raise ValueError( + f"Filename '{filename}' must have 3 parts delimited with underscores." + ) + return parts[-1] def hash_to_filename(hash: str) -> str: """Return a filename of the format YYYYMMDD_HHMMSS_{hash}.""" + if "_" in hash: + raise ValueError("Hash cannot contain underscores.") return f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash}" From 9ffd8de214de9becc6025728a9d6bbab434e70dd Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Mon, 13 Jan 2025 17:38:20 +0100 Subject: [PATCH 60/66] Rename to assert_complete_download --- src/virtualship/cli/_fetch.py | 7 +++---- tests/cli/test_fetch.py | 18 +++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index cd7365a4..ba5572db 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -85,14 +85,13 @@ def get_existing_download(data_folder: Path, aoi_hash: str) -> Path | None: continue if hash == aoi_hash: - check_complete_download(download_path) + assert_complete_download(download_path) return download_path return None -def check_complete_download(download_path: Path) -> bool: - """Check if a download is complete.""" +def assert_complete_download(download_path: Path) -> None: download_metadata = download_path / DOWNLOAD_METADATA try: with open(download_metadata) as file: @@ -102,7 +101,7 @@ def check_complete_download(download_path: Path) -> bool: f"Download at {download_path} was found, but looks to be incomplete " f"(likely due to interupting it mid-download). Please delete this folder and retry." ) from e - return True + return def complete_download(download_path: Path) -> None: diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py index 4f624d8b..53801e5b 100644 --- a/tests/cli/test_fetch.py +++ b/tests/cli/test_fetch.py @@ -7,7 +7,7 @@ DOWNLOAD_METADATA, DownloadMetadata, IncompleteDownloadError, - check_complete_download, + assert_complete_download, complete_download, create_hash, filename_to_hash, @@ -42,27 +42,27 @@ def test_complete_download(tmp_path): complete_download(tmp_path) - assert check_complete_download(tmp_path) + assert_complete_download(tmp_path) -def test_check_complete_download_complete(tmp_path): +def test_assert_complete_download_complete(tmp_path): # Setup DownloadMetadata(download_complete=True).to_yaml(tmp_path / DOWNLOAD_METADATA) - assert check_complete_download(tmp_path) + assert_complete_download(tmp_path) -def test_check_complete_download_incomplete(tmp_path): +def test_assert_complete_download_incomplete(tmp_path): # Setup DownloadMetadata(download_complete=False).to_yaml(tmp_path / DOWNLOAD_METADATA) with pytest.raises(IncompleteDownloadError): - check_complete_download(tmp_path) + assert_complete_download(tmp_path) -def test_check_complete_download_missing(tmp_path): +def test_assert_complete_download_missing(tmp_path): with pytest.raises(IncompleteDownloadError): - assert not check_complete_download(tmp_path) + assert_complete_download(tmp_path) @pytest.fixture @@ -76,7 +76,7 @@ def existing_data_folder(tmp_path, monkeypatch): ] data_folder = tmp_path monkeypatch.setattr( - "virtualship.cli._fetch.check_complete_download", lambda x: True + "virtualship.cli._fetch.assert_complete_download", lambda x: None ) for f in folders: (data_folder / f).mkdir() From cfc50cc5adedf6e82bdf804e9517a9c8642edf18 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:03:17 +0100 Subject: [PATCH 61/66] Error message when area of interest isn't defined --- src/virtualship/cli/commands.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index f013717e..3be54456 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -79,6 +79,11 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: schedule = _get_schedule(path) + if schedule.area_of_interest is None: + raise ValueError( + "Area of interest not found in schedule, please define it to fetch the data." + ) + aoi_hash = hash_model(schedule.area_of_interest) existing_download = get_existing_download(data_folder, aoi_hash) From 7132766710e5d99d1110159a3c39520d545537c7 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:40:26 +0100 Subject: [PATCH 62/66] Add area of interest hash salting --- src/virtualship/cli/_fetch.py | 14 ++++++++++++-- src/virtualship/cli/commands.py | 4 ++-- src/virtualship/expedition/do_expedition.py | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index ba5572db..55584187 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -7,6 +7,7 @@ import click from pydantic import BaseModel +from virtualship.expedition.space_time_region import SpaceTimeRegion from virtualship.utils import _dump_yaml, _generic_load_yaml DOWNLOAD_METADATA = "download_metadata.yaml" @@ -25,14 +26,23 @@ def create_hash(s: str) -> str: return _hash(s, length=8) -def hash_model(model: BaseModel) -> str: +def hash_model(model: BaseModel, salt: int = 0) -> str: """ Hash a Pydantic model. :param region: The region to hash. + :param salt: Salt to add to the hash. :returns: The hash. """ - return create_hash(model.model_dump_json()) + return create_hash(model.model_dump_json() + str(salt)) + + +def get_area_of_interest_hash(area_of_interest: SpaceTimeRegion) -> str: + """Get the hash of the area of interest.""" + # Increment salt in the event of breaking data fetching changes with prior versions + # of virtualship where you want to force new hashes (i.e., new data downloads) + salt = 0 + return hash_model(area_of_interest, salt=salt) def filename_to_hash(filename: str) -> str: diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 3be54456..9cc9c6b1 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -11,8 +11,8 @@ DOWNLOAD_METADATA, DownloadMetadata, complete_download, + get_area_of_interest_hash, get_existing_download, - hash_model, hash_to_filename, ) from virtualship.expedition.do_expedition import _get_schedule, do_expedition @@ -84,7 +84,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: "Area of interest not found in schedule, please define it to fetch the data." ) - aoi_hash = hash_model(schedule.area_of_interest) + aoi_hash = get_area_of_interest_hash(schedule.area_of_interest) existing_download = get_existing_download(data_folder, aoi_hash) if existing_download is not None: diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 34087733..9f04b052 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -6,7 +6,7 @@ import pyproj -from virtualship.cli._fetch import get_existing_download, hash_model +from virtualship.cli._fetch import get_area_of_interest_hash, get_existing_download from virtualship.utils import CHECKPOINT, SCHEDULE, SHIP_CONFIG from .checkpoint import Checkpoint @@ -139,7 +139,7 @@ def _load_input_data( :rtype: InputData """ if input_data is None: - aoi_hash = hash_model(schedule.area_of_interest) + aoi_hash = get_area_of_interest_hash(schedule.area_of_interest) input_data = get_existing_download(expedition_dir, aoi_hash) return InputData.load( From 8698d1338911918c7e1c782aeb54a20bc112e3fe Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:49:13 +0100 Subject: [PATCH 63/66] Update 'area of interest' to 'space-time region' throughout --- src/virtualship/cli/_fetch.py | 11 ++++---- src/virtualship/cli/commands.py | 26 +++++++++---------- src/virtualship/expedition/do_expedition.py | 6 ++--- src/virtualship/expedition/schedule.py | 2 +- .../expedition/space_time_region.py | 6 ++--- src/virtualship/static/schedule.yaml | 2 +- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 55584187..67ddfbf1 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -37,12 +37,11 @@ def hash_model(model: BaseModel, salt: int = 0) -> str: return create_hash(model.model_dump_json() + str(salt)) -def get_area_of_interest_hash(area_of_interest: SpaceTimeRegion) -> str: - """Get the hash of the area of interest.""" +def get_space_time_region_hash(space_time_region: SpaceTimeRegion) -> str: # Increment salt in the event of breaking data fetching changes with prior versions # of virtualship where you want to force new hashes (i.e., new data downloads) salt = 0 - return hash_model(area_of_interest, salt=salt) + return hash_model(space_time_region, salt=salt) def filename_to_hash(filename: str) -> str: @@ -83,7 +82,9 @@ def from_yaml(cls, file_path: str | Path) -> DownloadMetadata: return _generic_load_yaml(file_path, cls) -def get_existing_download(data_folder: Path, aoi_hash: str) -> Path | None: +def get_existing_download( + data_folder: Path, space_time_region_hash: str +) -> Path | None: """Check if a download has already been completed. If so, return the path for existing download.""" for download_path in data_folder.iterdir(): try: @@ -94,7 +95,7 @@ def get_existing_download(data_folder: Path, aoi_hash: str) -> Path | None: ) continue - if hash == aoi_hash: + if hash == space_time_region_hash: assert_complete_download(download_path) return download_path diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 9cc9c6b1..8a611e33 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -11,8 +11,8 @@ DOWNLOAD_METADATA, DownloadMetadata, complete_download, - get_area_of_interest_hash, get_existing_download, + get_space_time_region_hash, hash_to_filename, ) from virtualship.expedition.do_expedition import _get_schedule, do_expedition @@ -68,7 +68,7 @@ def init(path): default=None, ) def fetch(path: str | Path, username: str | None, password: str | None) -> None: - """Entrypoint for the tool to download data based on area of interest.""" + """Entrypoint for the tool to download data based on space-time region.""" if sum([username is None, password is None]) == 1: raise ValueError("Both username and password must be provided when using CLI.") @@ -79,31 +79,31 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: schedule = _get_schedule(path) - if schedule.area_of_interest is None: + if schedule.space_time_region is None: raise ValueError( - "Area of interest not found in schedule, please define it to fetch the data." + "space_time_region not found in schedule, please define it to fetch the data." ) - aoi_hash = get_area_of_interest_hash(schedule.area_of_interest) + space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) - existing_download = get_existing_download(data_folder, aoi_hash) + existing_download = get_existing_download(data_folder, space_time_region_hash) if existing_download is not None: click.echo( - f"Data download for area of interest already completed ('{existing_download}')." + f"Data download for space-time region already completed ('{existing_download}')." ) return creds_path = path / creds.CREDENTIALS_FILE username, password = creds.get_credentials_flow(username, password, creds_path) - # Extract area_of_interest details from the schedule - spatial_range = schedule.area_of_interest.spatial_range - time_range = schedule.area_of_interest.time_range + # Extract space_time_region details from the schedule + spatial_range = schedule.space_time_region.spatial_range + time_range = schedule.space_time_region.time_range start_datetime = time_range.start_time end_datetime = time_range.end_time # Create download folder and set download metadata - download_folder = data_folder / hash_to_filename(aoi_hash) + download_folder = data_folder / hash_to_filename(space_time_region_hash) download_folder.mkdir() DownloadMetadata(download_complete=False).to_yaml( download_folder / DOWNLOAD_METADATA @@ -134,7 +134,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: }, } - # Iterate over all datasets and download each based on area_of_interest + # Iterate over all datasets and download each based on space_time_region try: for dataset in download_dict.values(): copernicusmarine.subset( @@ -160,7 +160,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: raise e complete_download(download_folder) - click.echo("Data download based on area of interest completed.") + click.echo("Data download based on space-time region completed.") @click.command(help="Do the expedition.") diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 9f04b052..1846ea14 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -6,7 +6,7 @@ import pyproj -from virtualship.cli._fetch import get_area_of_interest_hash, get_existing_download +from virtualship.cli._fetch import get_existing_download, get_space_time_region_hash from virtualship.utils import CHECKPOINT, SCHEDULE, SHIP_CONFIG from .checkpoint import Checkpoint @@ -139,8 +139,8 @@ def _load_input_data( :rtype: InputData """ if input_data is None: - aoi_hash = get_area_of_interest_hash(schedule.area_of_interest) - input_data = get_existing_download(expedition_dir, aoi_hash) + space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) + input_data = get_existing_download(expedition_dir, space_time_region_hash) return InputData.load( directory=input_data, diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 7e435a67..5e41e00e 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -15,7 +15,7 @@ class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" waypoints: list[Waypoint] - area_of_interest: SpaceTimeRegion | None = None + space_time_region: SpaceTimeRegion | None = None model_config = pydantic.ConfigDict(extra="forbid") diff --git a/src/virtualship/expedition/space_time_region.py b/src/virtualship/expedition/space_time_region.py index 35373731..37aaee08 100644 --- a/src/virtualship/expedition/space_time_region.py +++ b/src/virtualship/expedition/space_time_region.py @@ -12,7 +12,7 @@ class SpatialRange(BaseModel): - """Defines the geographic boundaries for an area of interest.""" + """Defines geographic boundaries.""" minimum_longitude: Longitude maximum_longitude: Longitude @@ -40,7 +40,7 @@ def _check_lon_lat_domain(self) -> Self: class TimeRange(BaseModel): - """Defines the temporal boundaries for an area of interest.""" + """Defines the temporal boundaries for a space-time region.""" start_time: datetime end_time: datetime @@ -53,7 +53,7 @@ def _check_time_range(self) -> Self: class SpaceTimeRegion(BaseModel): - """An area of interest with spatial and temporal boundaries.""" + """An space-time region with spatial and temporal boundaries.""" spatial_range: SpatialRange time_range: TimeRange diff --git a/src/virtualship/static/schedule.yaml b/src/virtualship/static/schedule.yaml index 4cf824a6..de17a7cc 100644 --- a/src/virtualship/static/schedule.yaml +++ b/src/virtualship/static/schedule.yaml @@ -1,4 +1,4 @@ -area_of_interest: +space_time_region: spatial_range: minimum_longitude: -5 maximum_longitude: 5 From 04d7c13677dff909248e88aac0e4f82fbd78bbcb Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:50:29 +0100 Subject: [PATCH 64/66] Avoid circular import --- src/virtualship/cli/_fetch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 67ddfbf1..cfa52a61 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -3,13 +3,16 @@ import hashlib from datetime import datetime from pathlib import Path +from typing import TYPE_CHECKING import click from pydantic import BaseModel -from virtualship.expedition.space_time_region import SpaceTimeRegion from virtualship.utils import _dump_yaml, _generic_load_yaml +if TYPE_CHECKING: + from virtualship.expedition.space_time_region import SpaceTimeRegion + DOWNLOAD_METADATA = "download_metadata.yaml" From 93576c8c14fbeda3a1850f0793fa2759eb133c11 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:00:36 +0100 Subject: [PATCH 65/66] virtualship help documentation --- src/virtualship/cli/commands.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 8a611e33..b296a03a 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -61,14 +61,24 @@ def init(path): "--username", type=str, default=None, + help="Copernicus Marine username.", ) @click.option( "--password", type=str, default=None, + help="Copernicus Marine password.", ) def fetch(path: str | Path, username: str | None, password: str | None) -> None: - """Entrypoint for the tool to download data based on space-time region.""" + """ + Download input data for an expedition. + + Entrypoint for the tool to download data based on space-time region provided in the + schedule file. Data is downloaded from Copernicus Marine, credentials for which can be + obtained via registration: https://data.marine.copernicus.eu/register . Credentials can + be provided on prompt, via command line arguments, or via a YAML config file. Run + `virtualship fetch` on a expedition for more info. + """ if sum([username is None, password is None]) == 1: raise ValueError("Both username and password must be provided when using CLI.") From a2290b65b876634e41eb37865b5cd7a58b901a61 Mon Sep 17 00:00:00 2001 From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:53:00 +0100 Subject: [PATCH 66/66] Update help messages --- src/virtualship/cli/commands.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index b296a03a..09cb267a 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -19,15 +19,13 @@ from virtualship.utils import SCHEDULE, SHIP_CONFIG -@click.command( - help="Initialize a directory for a new expedition, with an example configuration." -) +@click.command() @click.argument( "path", type=click.Path(exists=False, file_okay=False, dir_okay=True), ) def init(path): - """Entrypoint for the tool.""" + """Initialize a directory for a new expedition, with an example schedule and ship config files.""" path = Path(path) path.mkdir(exist_ok=True) @@ -50,9 +48,7 @@ def init(path): click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.") -@click.command( - help="Download the relevant data specified in an expedition directory (i.e., by the expedition config)." -) +@click.command() @click.argument( "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), @@ -173,7 +169,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: click.echo("Data download based on space-time region completed.") -@click.command(help="Do the expedition.") +@click.command() @click.argument( "path", type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True), @@ -184,5 +180,5 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: type=str, ) def run(path): - """Entrypoint for the tool.""" + """Run the expedition.""" do_expedition(Path(path))