diff --git a/notebooks/demo_individual/demo_parameters.ipynb b/notebooks/demo_individual/demo_parameters.ipynb index d51372b..73e8355 100644 --- a/notebooks/demo_individual/demo_parameters.ipynb +++ b/notebooks/demo_individual/demo_parameters.ipynb @@ -9,7 +9,10 @@ "from birdy import WPSClient\n", "from pkg_resources import resource_filename\n", "\n", - "import requests" + "import requests\n", + "\n", + "# For mixed demo\n", + "from tempfile import NamedTemporaryFile" ] }, { @@ -43,7 +46,7 @@ "data": { "text/plain": [ "\u001b[0;31mType:\u001b[0m WPSClient\n", - "\u001b[0;31mString form:\u001b[0m \n", + "\u001b[0;31mString form:\u001b[0m \n", "\u001b[0;31mFile:\u001b[0m ~/osprey-venv/lib/python3.6/site-packages/birdy/client/base.py\n", "\u001b[0;31mDocstring:\u001b[0m \n", "A Web Processing Service for Climate Data Analysis.\n", @@ -145,8 +148,8 @@ } ], "source": [ - "cfg_file = resource_filename(\"tests\", \"data/samples/sample_parameter_config.cfg\")\n", - "print(cfg_file)" + "cfg_file_local = resource_filename(\"tests\", \"data/samples/sample_parameter_config.cfg\")\n", + "print(cfg_file_local)" ] }, { @@ -157,7 +160,7 @@ "source": [ "# run parameters\n", "output = osprey.parameters(\n", - " config = cfg_file\n", + " config = cfg_file_local\n", ")" ] }, @@ -170,7 +173,7 @@ "data": { "text/plain": [ "parametersResponse(\n", - " output='http://localhost:5002/outputs/22f2799c-dc02-11ea-a870-c86000e3f2fd/sample.rvic.prm.COLUMBIA.20200811.nc'\n", + " output='http://localhost:5002/outputs/c70edd66-e893-11ea-a870-c86000e3f2fd/sample.rvic.prm.COLUMBIA.20200827.nc'\n", ")" ] }, @@ -182,6 +185,100 @@ "source": [ "output.get()" ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/slim/osprey/tests/configs/parameter_mixed.cfg\n" + ] + } + ], + "source": [ + "# FILE_PATHS are a mix of local paths and https urls\n", + "cfg_file_mixed = resource_filename(\"tests\", \"configs/parameter_mixed.cfg\")\n", + "print(cfg_file_mixed)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "config_name = os.path.splitext(cfg_file_mixed)[0] # Remove .cfg extension\n", + "with NamedTemporaryFile(\n", + " suffix=\".cfg\", prefix=os.path.basename(config_name), mode=\"w+t\"\n", + ") as temp_config: # Avoid permanent replacement of https URLs\n", + " read_config = open(cfg_file_mixed, \"r\")\n", + " temp_config.writelines(read_config.read())\n", + " temp_config.read()\n", + " output = osprey.parameters(\n", + " config = temp_config.name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "parametersResponse(\n", + " output='http://localhost:5002/outputs/ce1ee182-e893-11ea-a870-c86000e3f2fd/sample.rvic.prm.COLUMBIA.20200827.nc'\n", + ")" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output.get()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/osprey/processes/wps_parameters.py b/osprey/processes/wps_parameters.py index bd2cf77..dac5ce0 100644 --- a/osprey/processes/wps_parameters.py +++ b/osprey/processes/wps_parameters.py @@ -17,12 +17,13 @@ log_level, nc_output, ) -from osprey.utils import logger +from osprey.utils import logger, replace_urls # Library imports import os import json from datetime import datetime +from pkg_resources import resource_filename class Parameters(Process): @@ -97,6 +98,7 @@ def _handler(self, request, response): logger.info(version.short_version) (config, np, loglevel) = self.collect_args(request) + replace_urls(config, self.workdir) log_handler( self, response, diff --git a/osprey/testing.py b/osprey/testing.py new file mode 100644 index 0000000..a0652a2 --- /dev/null +++ b/osprey/testing.py @@ -0,0 +1,28 @@ +from pkg_resources import resource_filename + + +def make_mock_urls(config, requests_mock): + """Create mock get requests for urls in + config file + + Since it is possible for a test file to not + exist on THREDDS, requests_mock is used to + get these urls, and their content is the corresponding + data coming from the same file in local storage. + + Parameters: + config (str): Path to config file + requests_mock: requests_mock fixture + """ + read_config = open(config, "r") + config_data = read_config.readlines() + read_config.close() + for line in config_data: + if "https" in line: + url = line.split(" ")[-1] # https url is last word in line + url = url.rstrip() # remove \n character at end + filename = url.split("/")[-1] + f = open(resource_filename("tests", f"data/samples/{filename}"), "rb") + filedata = f.read() + f.close() + requests_mock.get(url, content=filedata) diff --git a/osprey/utils.py b/osprey/utils.py index 5e9e55f..e1ee0a2 100644 --- a/osprey/utils.py +++ b/osprey/utils.py @@ -3,7 +3,9 @@ import logging import os import json +import requests from datetime import datetime, timedelta +from tempfile import NamedTemporaryFile logger = logging.getLogger("PYWPS") logger.setLevel(logging.NOTSET) @@ -24,9 +26,8 @@ def replace_filenames(config, temp_config): config (str): Original config file temp_config (TemporaryFile): New config file (to be passed into process) """ - old_config = open(config, "r") - filedata = old_config.read() - old_config.close() + with open(config, "r") as old_config: + filedata = old_config.read() rel_dir = "tests/data" abs_dir = os.path.abspath(resource_filename("tests", "data")) @@ -34,6 +35,36 @@ def replace_filenames(config, temp_config): temp_config.writelines(newdata) +def replace_urls(config, outdir): + """ + Copy https URLs to local storage and replace URLs + with local paths in config file. + Parameters: + config (str): Config file + outdir (str): Output directory + """ + with open(config, "r") as read_config: + filedata = read_config.readlines() + + for i in range(len(filedata)): + if "https" in filedata[i]: + url = filedata[i].split(" ")[-1] # https url is last word in line + url = url.rstrip() # remove \n character at end + r = requests.get(url) + filename = url.split("/")[-1] + prefix, suffix = filename.split(".") + suffix = "." + suffix + local_file = NamedTemporaryFile( + suffix=suffix, prefix=prefix, dir=outdir, delete=False + ) + local_file.write(r.content) + filedata[i] = filedata[i].replace(url, local_file.name) + + with open(config, "w") as write_config: + for line in filedata: + write_config.write(f"{line}") + + def config_hander(workdir, unprocessed, config_template): """ This function enables users to provide dictionary-like string for Configuration input. diff --git a/requirements.txt b/requirements.txt index 6ec7f0e..f7eeca9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ pywps>=4.2 jinja2 click psutil -rvic==1.1.0post1 +rvic==1.1.1 nchelpers==5.5.7 wps-tools==0.1.2 diff --git a/requirements_dev.txt b/requirements_dev.txt index 6559b05..c8d4ce1 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,7 @@ pytest flake8 pytest-flake8 +requests-mock ipython pytest-notebook nbsphinx diff --git a/tests/configs/parameter_opendap.cfg b/tests/configs/parameter_https.cfg similarity index 95% rename from tests/configs/parameter_opendap.cfg rename to tests/configs/parameter_https.cfg index 06d1df0..039aca6 100644 --- a/tests/configs/parameter_opendap.cfg +++ b/tests/configs/parameter_https.cfg @@ -73,7 +73,7 @@ SEARCH_FOR_CHANNEL: False #-- Path to Pour Points File (char) --# # A comma separated file of outlets to route to [lons, lats] - one coordinate pair per line (order not important) # May optionally include a column [names] - which will (if not aggregating) be included in param file -FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_pour.txt +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_pour.txt #-- ====================================== --# @@ -83,7 +83,7 @@ FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/da # This defines the unit hydrograph to rout flow to the edge of each grid cell. # A comma separated file of [time in seconds, unit hydrograph ordinate] - one timestep per line # The timestep should be 1hr (3600 sec) or less. -FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/uhbox.csv +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/uhbox.csv #-- Number of Header lines to ignore in [UH_BOX]FILE_NAME (INT) --# HEADER_LINES = 1 @@ -92,7 +92,7 @@ HEADER_LINES = 1 [ROUTING] #-- ====================================== --# #-- Path to routing inputs netcdf (char) --# -FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_flow_parameters.nc +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_flow_parameters.nc #-- netCDF Variable Names --# LONGITUDE_VAR: lon @@ -124,7 +124,7 @@ CELL_FLOWDAYS: 4 [DOMAIN] #-- ====================================== --# #-- Path to cesm compliant domain file (char) --# -FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_routing_domain.nc +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_routing_domain.nc #-- netCDF Variable Names --# LONGITUDE_VAR: lon diff --git a/tests/configs/parameter_mixed.cfg b/tests/configs/parameter_mixed.cfg new file mode 100644 index 0000000..5791cb1 --- /dev/null +++ b/tests/configs/parameter_mixed.cfg @@ -0,0 +1,135 @@ +#-- ========================================================================== --# +#-- --# +#-- This RVIC namelist contains options and paths for the --# +#-- development of the RVIC parameter file. --# +#-- --# +#-- --# +#-- ========================================================================== --# + +# Note: namelist is parsed by the python ConfigParser module. %(Interpolation) is +# supported inside [sections] only. + +[OPTIONS] +#-- ====================================== --# +#--Level to log output at (char) --# +# valid values: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL: DEBUG + +#--Print output to console in addition to the log file (bool) --# +# valid values: True, False +VERBOSE: True + +#--Delete temporary files, only used if REMAP=True (bool) --# +# valid values: True, False +CLEAN: False + +#--case description (char) --# +CASEID: sample + +#--routing domain grid shortname (char) --# +GRIDID: COLUMBIA + +#--case run directory (char) --# +CASE_DIR: /tmp/%(CASEID)s + +#--Directory to use for temporary read/write operations (char) --# +TEMP_DIR: %(CASE_DIR)s/temp/ + +#--Remap Unit Hydrographs from [ROUTING] grid to [DOMAIN] grid (bool) --# +# valid values: True, False +REMAP: False + +#--Aggregate all [POUR_POINTS] inside each [DOMAIN] grid cell (bool) --# +# This should only be used when routing to coastal grid cells for CESM +AGGREGATE: False + +#--Size of pad to add to aggregated files prior to remapping (int) --# +AGG_PAD: 25 + +#-- Output parameter file format (char) --# +# Valid Values: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, and NETCDF4 +# For use with CESM, NETCDF3_CLASSIC is recommended. +NETCDF_FORMAT: NETCDF4 + +#-- Output parameter file compression options --# +# Descriptions of these options can be found in +NETCDF_ZLIB: False +NETCDF_COMPLEVEL: 4 +NETCDF_SIGFIGS: None + +#-- Length of unit hydrograph subset in days (int) --# +SUBSET_DAYS: + +#-- Constrain the final unit hydrographs sum to be less than or equal to the domain fractions --# +# True when routing to coastal grid cells, else False +CONSTRAIN_FRACTIONS: False + +SEARCH_FOR_CHANNEL: False + +#-- ====================================== --# + +[POUR_POINTS] +#-- ====================================== --# +#-- Path to Pour Points File (char) --# +# A comma separated file of outlets to route to [lons, lats] - one coordinate pair per line (order not important) +# May optionally include a column [names] - which will (if not aggregating) be included in param file +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_pour.txt + +#-- ====================================== --# + +[UH_BOX] +#-- ====================================== --# +#-- Path to UH Box File (char) --# +# This defines the unit hydrograph to rout flow to the edge of each grid cell. +# A comma separated file of [time in seconds, unit hydrograph ordinate] - one timestep per line +# The timestep should be 1hr (3600 sec) or less. +FILE_NAME: tests/data/samples/uhbox.csv + +#-- Number of Header lines to ignore in [UH_BOX]FILE_NAME (INT) --# +HEADER_LINES = 1 +#-- ====================================== --# + +[ROUTING] +#-- ====================================== --# +#-- Path to routing inputs netcdf (char) --# +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_flow_parameters.nc + +#-- netCDF Variable Names --# +LONGITUDE_VAR: lon +LATITUDE_VAR: lat +FLOW_DISTANCE_VAR: Flow_Distance +FLOW_DIRECTION_VAR: Flow_Direction +BASIN_ID_VAR: Basin_ID +VELOCITY: velocity +DIFFUSION: diffusion +#SOURCE_AREA_VAR: + +#-- Velocity and diffusion --# +# The velocity and diffusion parameters may either be specified as variables in +# the routing netcdf (char) or as a single value (float or int) +#VELOCITY: 1 +#DIFFUSION: 2000 + +#-- Output Interval --# +# Timestep of output unit hydrographs. Must be a multiple of the timestep in the UH_BOX +OUTPUT_INTERVAL: 86400 + +#-- Maximum time for runoff to reach outlet (days) --# +BASIN_FLOWDAYS: 100 + +#-- Maximum time for runoff to pass through a grid cell (days) --# +CELL_FLOWDAYS: 4 +#-- ====================================== --# + +[DOMAIN] +#-- ====================================== --# +#-- Path to cesm compliant domain file (char) --# +FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_routing_domain.nc + +#-- netCDF Variable Names --# +LONGITUDE_VAR: lon +LATITUDE_VAR: lat +LAND_MASK_VAR: mask +FRACTION_VAR: frac +AREA_VAR: area +#-- ====================================== --# diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..086f9b2 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +from osprey.testing import make_mock_urls + + +@pytest.fixture +def conftest_make_mock_urls(config, requests_mock): + return make_mock_urls(config, requests_mock) diff --git a/tests/test_wps_parameters.py b/tests/test_wps_parameters.py index e59b2cb..94eec9e 100644 --- a/tests/test_wps_parameters.py +++ b/tests/test_wps_parameters.py @@ -21,3 +21,19 @@ def test_parameters_local(config): temp_config.read() params = f"config={temp_config.name};" run_wps_process(Parameters(), params) + + +@pytest.mark.online +@pytest.mark.parametrize( + ("config"), [resource_filename(__name__, "configs/parameter_https.cfg")], +) +def test_parameters_https(config, conftest_make_mock_urls): + config_name = os.path.splitext(config)[0] # Remove .cfg extension + with NamedTemporaryFile( + suffix=".cfg", prefix=os.path.basename(config_name), mode="w+t" + ) as temp_config: # Avoid permanent replacement of https URLs + read_config = open(config, "r") + temp_config.writelines(read_config.read()) + temp_config.read() + params = f"config={temp_config.name};" + run_wps_process(Parameters(), params)