From 157e7b01443a43b7aa6b8934f5c64f5ba832aefe Mon Sep 17 00:00:00 2001 From: Carlos Rueda Date: Thu, 8 Feb 2024 16:10:26 -0800 Subject: [PATCH] add NRS11 notebook --- notebooks/PBP-NRS11-batch.ipynb | 670 ++++++++++++++++++++++++++++++++ 1 file changed, 670 insertions(+) create mode 100644 notebooks/PBP-NRS11-batch.ipynb diff --git a/notebooks/PBP-NRS11-batch.ipynb b/notebooks/PBP-NRS11-batch.ipynb new file mode 100644 index 0000000..dc53a52 --- /dev/null +++ b/notebooks/PBP-NRS11-batch.ipynb @@ -0,0 +1,670 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d087f632", + "metadata": {}, + "source": [ + "## Exercising PBP/PyPAM on NRS11 data\n", + "\n", + "The main steps in this notebook are:\n", + "\n", + "- Do preparations in terms of working space for downloaded and generated files\n", + "- Generate HMB for a single day\n", + "- Generate HMB for multiple days in parallel using Dask" + ] + }, + { + "cell_type": "markdown", + "id": "879b81f4-90c6-4910-8f63-f66ae0013afc", + "metadata": {}, + "source": [ + "## Preparations" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c1afecf4-488f-4d0d-853e-6f59214ebe2f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/opt/pbp/pypam-based-processing\n" + ] + } + ], + "source": [ + "## We need to be located here\n", + "%cd /opt/pbp/pypam-based-processing\n", + "## mainly due to some inputs that are already in place." + ] + }, + { + "cell_type": "markdown", + "id": "6fe8dc4d-97c2-4e40-a4f7-845d7e716855", + "metadata": {}, + "source": [ + "## Some parameters for PBP" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d9f38450-5afd-44e8-a55e-9a3861c61fce", + "metadata": {}, + "outputs": [], + "source": [ + "json_base_dir = 'NRS11/noaa-passive-bioacoustic_nrs_11_2019-2021'\n", + "download_dir = 'NRS11/DOWNLOADS'\n", + "output_dir = 'NRS11/OUTPUT'\n", + "output_prefix = 'NRS11_'\n", + "\n", + "global_attrs_uri = 'NRS11/globalAttributes_NRS11.yaml'\n", + "variable_attrs_uri = 'NRS11/variableAttributes_NRS11.yaml'\n", + "\n", + "voltage_multiplier = 2.5\n", + "sensitivity_uri = 'NRS11/NRS11_H5R6_sensitivity_hms5kHz.nc'\n", + "subset_to = (10, 2_000)" + ] + }, + { + "cell_type": "markdown", + "id": "9fb10e07-b1dd-4a18-979c-1289730746fa", + "metadata": {}, + "source": [ + "## Code imports and preparations" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "27c1c775-5d74-45f7-be76-e82c1d35286e", + "metadata": {}, + "outputs": [], + "source": [ + "## Some base imports:\n", + "import logging\n", + "import xarray as xr\n", + "import dask\n", + "import pandas as pd\n", + "import time\n", + "from google.cloud.storage import Client as GsClient # To handle download of `gs:` resources" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7db17f66-c1a3-4b1a-a637-7af37eefe32a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "## Some PBP imports:\n", + "sys.path = ['.'] + sys.path\n", + "from src.process_helper import ProcessHelper\n", + "from src.file_helper import FileHelper\n", + "from src.logging_helper import create_logger" + ] + }, + { + "cell_type": "markdown", + "id": "421e4acf-b060-40d3-9009-f6c94d1e0d84", + "metadata": {}, + "source": [ + "# A function to process a given day\n", + "\n", + "PBP includes these two main modules that we will be using below:\n", + "\n", + "- `FileHelper`, which facilitates input file reading (including from gs and s3)\n", + "- `ProcessHelper`, which is the main processing module\n", + "\n", + "We will first define a high-level function that takes care of HMB generation for a given date.\n", + "\n", + "Based on that function, we will then define one other function to dispatch multiple dates in parallel.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9d440360", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## Supported by those PBP modules, we define a function that\n", + "## takes care of processing a given day:\n", + "\n", + "def process_date(date: str, gen_netcdf: bool = True):\n", + " \"\"\"\n", + " Main function to generate the HMB product for a given day.\n", + "\n", + " It makes use of supporting elements in PBP in terms of logging,\n", + " file handling, and PyPAM based HMB generation.\n", + "\n", + " :param date: Date to process, in YYYYMMDD format.\n", + "\n", + " :param gen_netcdf: Allows caller to skip the `.nc` creation here\n", + " and instead save the datasets after all days have been generated\n", + " (see parallel execution below).\n", + "\n", + " :return: the generated xarray dataset.\n", + " \"\"\"\n", + "\n", + " log_filename = f\"{output_dir}/{output_prefix}{date}.log\"\n", + "\n", + " logger = create_logger(\n", + " log_filename_and_level=(log_filename, logging.INFO),\n", + " console_level=None,\n", + " )\n", + "\n", + " # we are only downloading publicly accessible datasets:\n", + " gs_client = GsClient.create_anonymous_client()\n", + "\n", + " file_helper = FileHelper(\n", + " logger=logger,\n", + " json_base_dir=json_base_dir,\n", + " gs_client=gs_client,\n", + " download_dir=download_dir,\n", + " assume_downloaded_files=True,\n", + " retain_downloaded_files=True,\n", + " )\n", + "\n", + " process_helper = ProcessHelper(\n", + " logger=logger,\n", + " file_helper=file_helper,\n", + " output_dir=output_dir,\n", + " output_prefix=output_prefix,\n", + " global_attrs_uri=global_attrs_uri,\n", + " variable_attrs_uri=variable_attrs_uri,\n", + " voltage_multiplier=voltage_multiplier,\n", + " sensitivity_uri=sensitivity_uri,\n", + " subset_to=subset_to,\n", + " )\n", + "\n", + " ## now, get the HMB result:\n", + " print(f'::: Started processing {date=} {log_filename=}')\n", + " result = process_helper.process_day(date)\n", + "\n", + " if gen_netcdf:\n", + " nc_filename = f\"{output_dir}/{output_prefix}{date}.nc\"\n", + " print(f'::: Ended processing {date=} => {nc_filename=}')\n", + " else:\n", + " print(f'::: Ended processing {date=} => (dataset generated in memory)')\n", + "\n", + " if result is not None:\n", + " return result.dataset\n", + " else:\n", + " print(f'::: UNEXPECTED: no segments were processed for {date=}')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "771a0332-9278-4c6b-b72f-3e2cdc5831c4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "## Supported by those PBP modules, we define a function that\n", + "## takes care of processing a given day:\n", + "\n", + "def process_date(date: str, gen_netcdf: bool = True):\n", + " \"\"\"\n", + " Main function to generate the HMB product for a given day.\n", + "\n", + " It makes use of supporting elements in PBP in terms of logging,\n", + " file handling, and PyPAM based HMB generation.\n", + "\n", + " :param date: Date to process, in YYYYMMDD format.\n", + "\n", + " :param gen_netcdf: Allows caller to skip the `.nc` creation here\n", + " and instead save the datasets after all days have been generated\n", + " (see parallel execution below).\n", + "\n", + " :return: the generated xarray dataset.\n", + " \"\"\"\n", + "\n", + " log_filename = f\"{output_dir}/{output_prefix}{date}.log\"\n", + "\n", + " logger = create_logger(\n", + " log_filename_and_level=(log_filename, logging.INFO),\n", + " console_level=None,\n", + " )\n", + "\n", + " # we are only downloading publicly accessible datasets:\n", + " gs_client = GsClient.create_anonymous_client()\n", + "\n", + " file_helper = FileHelper(\n", + " logger=logger,\n", + " json_base_dir=json_base_dir,\n", + " gs_client=gs_client,\n", + " download_dir=download_dir,\n", + " assume_downloaded_files=True,\n", + " retain_downloaded_files=True,\n", + " )\n", + "\n", + " process_helper = ProcessHelper(\n", + " logger=logger,\n", + " file_helper=file_helper,\n", + " output_dir=output_dir,\n", + " output_prefix=output_prefix,\n", + " global_attrs_uri=global_attrs_uri,\n", + " variable_attrs_uri=variable_attrs_uri,\n", + " voltage_multiplier=voltage_multiplier,\n", + " sensitivity_uri=sensitivity_uri,\n", + " subset_to=subset_to,\n", + " )\n", + "\n", + " ## now, get the HMB result:\n", + " print(f'::: Started processing {date=} {log_filename=}')\n", + " result = process_helper.process_day(date)\n", + "\n", + " if gen_netcdf:\n", + " nc_filename = f\"{output_dir}/{output_prefix}{date}.nc\"\n", + " print(f'::: Ended processing {date=} => {nc_filename=}')\n", + " else:\n", + " print(f'::: Ended processing {date=} => (dataset generated in memory)')\n", + "\n", + " if result is not None:\n", + " return result.dataset\n", + " else:\n", + " print(f'::: UNEXPECTED: no segments were processed for {date=}')" + ] + }, + { + "cell_type": "markdown", + "id": "71cee960-d5e4-4574-8dd3-f5f30d772869", + "metadata": { + "tags": [] + }, + "source": [ + "# Generating the HMB products" + ] + }, + { + "cell_type": "markdown", + "id": "e8c33ce9-61ed-4169-a976-cb721d472565", + "metadata": {}, + "source": [ + "### Processing a day\n", + "\n", + "We can call the `process_date` function defined above directly as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "99d7252d-45e2-46e8-9379-f072d5d674c5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# start_time = time.time()\n", + "\n", + "# generated_dataset = process_date('20200101')\n", + "\n", + "# elapsed_time = time.time() - start_time\n", + "# print(f'===> date completed. Elapsed time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} mins)')\n", + "\n", + "# generated_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "46ac16db-da76-4478-a89c-7d89ee900dc4", + "metadata": {}, + "source": [ + "## Prepare process_date for parallel execution\n", + "\n", + "We will use [Dask](https://examples.dask.org/delayed.html) to dispatch multiple instances of `process_date` in parallel." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9f820f99-e302-475d-b858-bb5dc806c8c1", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def process_multiple_dates(dates: list[str], gen_netcdf: bool = False) -> list[xr.Dataset]:\n", + " \"\"\"\n", + " Generates HMB for multiple days in parallel using Dask.\n", + " Returns the resulting HMB datasets.\n", + " \n", + " :param dates: The dates to process, each in YYYYMMDD format.\n", + "\n", + " :param gen_netcdf: Allows caller to skip the `.nc` creation here\n", + " and instead save the datasets after all days have been generated.\n", + "\n", + " :return: the list of generated datasets.\n", + " \"\"\"\n", + "\n", + " @dask.delayed\n", + " def delayed_process_date(date: str):\n", + " return process_date(date, gen_netcdf=gen_netcdf)\n", + " \n", + " ## To display total elapsed time at the end the processing:\n", + " start_time = time.time()\n", + "\n", + " ## This will be called by Dask when all dates have completed processing:\n", + " def aggregate(*datasets) -> list[xr.Dataset]:\n", + " elapsed_time = time.time() - start_time\n", + " print(f'===> All {len(datasets)} dates completed. Elapsed time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} mins)')\n", + " return datasets\n", + "\n", + "\n", + " ## Prepare the processes:\n", + " delayed_processes = [delayed_process_date(date) for date in dates]\n", + " aggregation = dask.delayed(aggregate)(*delayed_processes)\n", + "\n", + " ## And launch them:\n", + " return aggregation.compute()\n" + ] + }, + { + "cell_type": "markdown", + "id": "567e2ffd-6f67-425c-bd2f-dfbd5dd702e9", + "metadata": {}, + "source": [ + "### Processing multiple days\n", + "\n", + "We use the `process_multiple_dates` defined above to launch the generation of multiple HMB datasets in parallel." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cd9d6c50-687d-4b5f-9987-7abb0dd8c848", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20200101',\n", + " '20200102',\n", + " '20200103',\n", + " '20200104',\n", + " '20200105',\n", + " '20200106',\n", + " '20200107',\n", + " '20200108',\n", + " '20200109',\n", + " '20200110',\n", + " '20200111',\n", + " '20200112',\n", + " '20200113',\n", + " '20200114',\n", + " '20200115',\n", + " '20200116',\n", + " '20200117',\n", + " '20200118',\n", + " '20200119',\n", + " '20200120',\n", + " '20200121',\n", + " '20200122',\n", + " '20200123',\n", + " '20200124',\n", + " '20200125',\n", + " '20200126',\n", + " '20200127',\n", + " '20200128',\n", + " '20200129',\n", + " '20200130',\n", + " '20200131']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## Set `dates` as the list of 'YYYYMMDD' dates to process in this demo:\n", + "\n", + "## (note: the included JSONs in the current PBP image only cover Jan 01–31, 2020)\n", + "\n", + "## We can use pandas to help us generate the list of desired dates:\n", + "date_range = pd.date_range(start='2020-01-01', end='2020-01-31')\n", + "dates = date_range.strftime('%Y%m%d').tolist()\n", + "dates" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c85cf420", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching HMB generation for 31 dates=['20200101', '20200102', '20200103', '20200104', '20200105', '20200106', '20200107', '20200108', '20200109', '20200110', '20200111', '20200112', '20200113', '20200114', '20200115', '20200116', '20200117', '20200118', '20200119', '20200120', '20200121', '20200122', '20200123', '20200124', '20200125', '20200126', '20200127', '20200128', '20200129', '20200130', '20200131']\n", + "::: Started processing date='20200103' log_filename='NRS11/OUTPUT/NRS11_20200103.log'\n", + "::: Started processing date='20200102' log_filename='NRS11/OUTPUT/NRS11_20200102.log'\n", + "::: Started processing date='20200109' log_filename='NRS11/OUTPUT/NRS11_20200109.log'\n", + "::: Started processing date='20200106' log_filename='NRS11/OUTPUT/NRS11_20200106.log'\n", + "::: Started processing date='20200117' log_filename='NRS11/OUTPUT/NRS11_20200117.log'\n", + "::: Started processing date='20200108' log_filename='NRS11/OUTPUT/NRS11_20200108.log'\n", + "::: Started processing date='20200104' log_filename='NRS11/OUTPUT/NRS11_20200104.log'\n", + "::: Started processing date='20200116' log_filename='NRS11/OUTPUT/NRS11_20200116.log'\n", + "::: Started processing date='20200124' log_filename='NRS11/OUTPUT/NRS11_20200124.log'\n", + "::: Started processing date='20200121' log_filename='NRS11/OUTPUT/NRS11_20200121.log'\n", + "::: Started processing date='20200120' log_filename='NRS11/OUTPUT/NRS11_20200120.log'\n", + "::: Started processing date='20200130' log_filename='NRS11/OUTPUT/NRS11_20200130.log'\n", + "::: Started processing date='20200107' log_filename='NRS11/OUTPUT/NRS11_20200107.log'\n", + "::: Started processing date='20200119' log_filename='NRS11/OUTPUT/NRS11_20200119.log'\n", + "::: Started processing date='20200129' log_filename='NRS11/OUTPUT/NRS11_20200129.log'\n", + "::: Started processing date='20200113' log_filename='NRS11/OUTPUT/NRS11_20200113.log'\n", + "::: Started processing date='20200126' log_filename='NRS11/OUTPUT/NRS11_20200126.log'\n", + "::: Started processing date='20200125' log_filename='NRS11/OUTPUT/NRS11_20200125.log'\n", + "::: Started processing date='20200101' log_filename='NRS11/OUTPUT/NRS11_20200101.log'\n", + "::: Started processing date='20200105' log_filename='NRS11/OUTPUT/NRS11_20200105.log'\n", + "::: Started processing date='20200122' log_filename='NRS11/OUTPUT/NRS11_20200122.log'\n", + "::: Started processing date='20200115' log_filename='NRS11/OUTPUT/NRS11_20200115.log'\n", + "::: Started processing date='20200128' log_filename='NRS11/OUTPUT/NRS11_20200128.log'\n", + "::: Started processing date='20200131' log_filename='NRS11/OUTPUT/NRS11_20200131.log'\n", + "::: Started processing date='20200123' log_filename='NRS11/OUTPUT/NRS11_20200123.log'\n", + "::: Started processing date='20200114' log_filename='NRS11/OUTPUT/NRS11_20200114.log'\n", + "::: Started processing date='20200118' log_filename='NRS11/OUTPUT/NRS11_20200118.log'\n", + "::: Started processing date='20200127' log_filename='NRS11/OUTPUT/NRS11_20200127.log'\n", + "::: Started processing date='20200112' log_filename='NRS11/OUTPUT/NRS11_20200112.log'\n", + "::: Started processing date='20200110' log_filename='NRS11/OUTPUT/NRS11_20200110.log'\n", + "::: Started processing date='20200111' log_filename='NRS11/OUTPUT/NRS11_20200111.log'\n", + "::: Ended processing date='20200117' => (dataset generated in memory)\n", + "::: Ended processing date='20200130' => (dataset generated in memory)\n", + "::: Ended processing date='20200103' => (dataset generated in memory)\n", + "::: Ended processing date='20200102' => (dataset generated in memory)\n", + "::: Ended processing date='20200116' => (dataset generated in memory)\n", + "::: Ended processing date='20200110' => (dataset generated in memory)\n", + "::: Ended processing date='20200107' => (dataset generated in memory)\n", + "::: Ended processing date='20200118' => (dataset generated in memory)\n", + "::: Ended processing date='20200123' => (dataset generated in memory)\n", + "::: Ended processing date='20200127' => (dataset generated in memory)\n", + "::: Ended processing date='20200105' => (dataset generated in memory)\n", + "::: Ended processing date='20200113' => (dataset generated in memory)\n", + "::: Ended processing date='20200115' => (dataset generated in memory)\n", + "::: Ended processing date='20200109' => (dataset generated in memory)\n", + "::: Ended processing date='20200104' => (dataset generated in memory)\n", + "::: Ended processing date='20200119' => (dataset generated in memory)\n", + "::: Ended processing date='20200129' => (dataset generated in memory)\n", + "::: Ended processing date='20200124' => (dataset generated in memory)\n", + "::: Ended processing date='20200114' => (dataset generated in memory)\n", + "::: Ended processing date='20200112' => (dataset generated in memory)\n", + "::: Ended processing date='20200121' => (dataset generated in memory)\n", + "::: Ended processing date='20200108' => (dataset generated in memory)\n", + "::: Ended processing date='20200122' => (dataset generated in memory)\n", + "::: Ended processing date='20200128' => (dataset generated in memory)\n", + "::: Ended processing date='20200111' => (dataset generated in memory)\n", + "::: Ended processing date='20200101' => (dataset generated in memory)\n", + "::: Ended processing date='20200126' => (dataset generated in memory)\n", + "::: Ended processing date='20200106' => (dataset generated in memory)\n", + "::: Ended processing date='20200125' => (dataset generated in memory)\n", + "::: Ended processing date='20200131' => (dataset generated in memory)\n", + "::: Ended processing date='20200120' => (dataset generated in memory)\n", + "===> All 31 dates completed. Elapsed time: 689.1 seconds (11.5 mins)\n", + "Generated datasets: 31\n", + "\n", + "Saving generated datasets...\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200101.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200102.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200103.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200104.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200105.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200106.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200107.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200108.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200109.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200110.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200111.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200112.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200113.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200114.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200115.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200116.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200117.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200118.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200119.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200120.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200121.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200122.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200123.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200124.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200125.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200126.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200127.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200128.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200129.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200130.nc'\n", + " Saving nc_filename='NRS11/OUTPUT/NRS11_20200131.nc'\n" + ] + } + ], + "source": [ + "## Now, launch the generation:\n", + "\n", + "print(f\"Launching HMB generation for {len(dates)} {dates=}\")\n", + "\n", + "## NOTE: due to issues observed when concurrently saving the resulting netCDF files,\n", + "## this flag allows to postpone the saving for after all datasets have been generated:\n", + "gen_netcdf = False\n", + "\n", + "## Get all HMB datasets:\n", + "generated_datasets = process_multiple_dates(dates, gen_netcdf=gen_netcdf)\n", + "\n", + "print(f'Generated datasets: {len(generated_datasets)}\\n')\n", + "\n", + "if not gen_netcdf:\n", + " # so, we now do the file saving here:\n", + " print('Saving generated datasets...')\n", + " for date, ds in zip(dates, generated_datasets):\n", + " nc_filename = f'{output_dir}/{output_prefix}{date}.nc'\n", + " print(f' Saving {nc_filename=}')\n", + " try:\n", + " ds.to_netcdf(nc_filename,\n", + " engine=\"netcdf4\",\n", + " encoding={\n", + " \"effort\": {\"_FillValue\": None},\n", + " \"frequency\": {\"_FillValue\": None},\n", + " \"sensitivity\": {\"_FillValue\": None},\n", + " },\n", + " )\n", + " except Exception as e: # pylint: disable=broad-exception-caught\n", + " print(f\"Unable to save {nc_filename}: {e}\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "824e943e-253d-4e5a-b1ac-4a673998dc98", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Listing *.nc in OUTPUT folder:\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200101.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200102.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200103.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200104.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200105.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200106.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200107.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200108.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200109.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200110.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200111.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200112.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200113.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200114.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200115.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200116.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200117.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200118.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200119.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200120.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200121.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200122.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200123.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200124.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200125.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200126.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200127.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200128.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200129.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200130.nc\n", + "-rw-r--r-- 1 jovyan users 6316372 Feb 8 19:07 NRS11/OUTPUT/NRS11_20200131.nc\n" + ] + } + ], + "source": [ + "print('\\nListing *.nc in OUTPUT folder:')\n", + "!ls -l NRS11/OUTPUT/*.nc" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}