diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..bcadfed
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,34 @@
+name: CI
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10"]
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3 # This is already the latest version
+
+ - name: Set up Python
+ uses: actions/setup-python@v4 # This is also the latest version
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install .[dev] # Use dev to install pytest
+
+ - name: Run tests
+ run: |
+ pytest
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 0000000..b7a704b
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+ release:
+ types: [published]
+
+permissions:
+ contents: read
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v3
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build
+ - name: Build package
+ run: python -m build
+ - name: Publish package
+ uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/README.md b/README.md
index 627f406..e3417da 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,5 @@
-# global-forecasting-system
-Global Forecasting System (GFS) Search & Download Utility
+# Global Forecasting System (GFS) Search & Download Utility
+
+[Gregory H. Halverson](https://github.com/gregory-halverson-jpl) (they/them)
+[gregory.h.halverson@jpl.nasa.gov](mailto:gregory.h.halverson@jpl.nasa.gov)
+NASA Jet Propulsion Laboratory 329G
diff --git a/global_forecasting_system/__init__.py b/global_forecasting_system/__init__.py
new file mode 100644
index 0000000..aa8ade6
--- /dev/null
+++ b/global_forecasting_system/__init__.py
@@ -0,0 +1,3 @@
+from .global_forecasting_system import *
+
+__version__ = "1.0.1"
diff --git a/global_forecasting_system/global_forecasting_system.py b/global_forecasting_system/global_forecasting_system.py
new file mode 100644
index 0000000..0eff882
--- /dev/null
+++ b/global_forecasting_system/global_forecasting_system.py
@@ -0,0 +1,310 @@
+from fnmatch import fnmatch
+import os
+from os import makedirs, system
+from typing import List
+from matplotlib.colors import LinearSegmentedColormap
+import requests
+from bs4 import BeautifulSoup
+import posixpath
+import re
+import pandas as pd
+from os.path import exists
+from os.path import splitext
+from os.path import join
+from os.path import expanduser
+import pygrib
+from shutil import move
+from datetime import datetime
+from datetime import timedelta
+from datetime import date
+from dateutil import parser
+import rasters as rt
+import numpy as np
+import logging
+import colored_logging
+
+logger = logging.getLogger(__name__)
+
+GFS_SM_MESSAGE = 565
+GFS_TA_MESSAGE = 581
+GFS_RH_MESSAGE = 584
+GFS_U_WIND_MESSAGE = 588
+GFS_V_WIND_MESSAGE = 589
+GFS_SWIN_MESSAGE = 653
+
+REMOTE = "https://www.ncei.noaa.gov/data/global-forecast-system/access/grid-004-0.5-degree/forecast"
+
+SM_CMAP = LinearSegmentedColormap.from_list("SM", [
+ "#f6e8c3",
+ "#d8b365",
+ "#99894a",
+ "#2d6779",
+ "#6bdfd2",
+ "#1839c5"
+])
+
+def generate_GFS_date_URL(date_UTC: date) -> str:
+ if isinstance(date_UTC, str):
+ date_UTC = parser.parse(date_UTC).date()
+
+ URL = f"https://www.ncei.noaa.gov/data/global-forecast-system/access/grid-004-0.5-degree/forecast/202109/"
+
+ return URL
+
+def GFS_month_addresses(URL: str = None) -> List[str]:
+ if URL is None:
+ URL = REMOTE
+
+ response = requests.get(URL)
+ soup = BeautifulSoup(response.text, "html.parser")
+ links = [link.get("href") for link in soup.find_all("a")]
+ directories = [link for link in links if re.compile("[0-9]{6}").match(link)]
+ addresses = sorted([posixpath.join(URL, directory) for directory in directories])
+
+ return addresses
+
+def GFS_most_recent_month_address(year: int = None, month: int = None) -> str:
+ addresses = GFS_month_addresses()
+
+ if year is not None and month is not None:
+ addresses = [address for address in addresses if datetime.strptime(posixpath.basename(address.strip("/")), "%Y%m") <= datetime(year, month, 1)]
+
+ most_recent_address = addresses[-1]
+
+ return most_recent_address
+
+def GFS_date_addresses_from_URL(month_URL: str = None) -> List[str]:
+ if month_URL is None:
+ month_URL = GFS_most_recent_month_address()
+
+ response = requests.get(month_URL)
+ soup = BeautifulSoup(response.text, "html.parser")
+ links = [link.get("href") for link in soup.find_all("a")]
+ directories = [link for link in links if re.compile("[0-9]{8}").match(link)]
+ addresses = sorted([posixpath.join(month_URL, directory) for directory in directories])
+
+ return addresses
+
+def GFS_date_addresses(year: int = None, month: int = None) -> List[str]:
+ month_URL = GFS_most_recent_month_address(year=year, month=month)
+ addresses = GFS_date_addresses_from_URL(month_URL)
+
+ return addresses
+
+def GFS_most_recent_date_address(date_UTC: date = None) -> str:
+ if date_UTC is None:
+ date_UTC = datetime.utcnow()
+
+ if isinstance(date_UTC, str):
+ date_UTC = parser.parse(date_UTC).date()
+
+ if isinstance(date_UTC, datetime):
+ date_UTC = date_UTC.date()
+
+ addresses = GFS_date_addresses(year=date_UTC.year, month=date_UTC.month)
+ addresses = [address for address in addresses if datetime.strptime(posixpath.basename(address.strip("/")), "%Y%m%d").date() <= date_UTC]
+ address = addresses[-1]
+
+ return address
+
+def GFS_file_addresses(date_URL: str = None, pattern: str = None) -> List[str]:
+ if date_URL is None:
+ date_URL = GFS_most_recent_date_address()
+
+ if pattern is None:
+ pattern = "*.grb2"
+
+ response = requests.get(date_URL)
+ soup = BeautifulSoup(response.text, "html.parser")
+ links = [link.get("href") for link in soup.find_all("a")]
+ filenames = [link for link in links if fnmatch(link, pattern)]
+ addresses = [posixpath.join(date_URL, filename) for filename in filenames]
+
+ return addresses
+
+def get_GFS_listing(date_URL: str = None) -> pd.DataFrame:
+ if date_URL is None:
+ date_URL = GFS_most_recent_date_address()
+
+ addresses = GFS_file_addresses(date_URL)
+ address_df = pd.DataFrame({"address": addresses})
+ address_df["basename"] = address_df.address.apply(lambda address: posixpath.basename(address))
+ address_df["source_date_UTC"] = address_df.basename.apply(lambda basename: datetime.strptime(basename.split("_")[2], "%Y%m%d"))
+ address_df["source_hour"] = address_df.basename.apply(lambda basename: int(basename.split("_")[3][:2]))
+ address_df["source_datetime_UTC"] = address_df.apply(lambda row: row.source_date_UTC + timedelta(hours=row.source_hour), axis=1)
+ address_df["forecast_hours"] = address_df.basename.apply(lambda basename: int(splitext(basename)[0].split("_")[-1]))
+ address_df["forecast_time_UTC"] = address_df.apply(lambda row: row.source_datetime_UTC + timedelta(hours=row.forecast_hours), axis=1)
+ address_df.sort_values(by=["forecast_time_UTC", "source_hour"], inplace=True)
+ address_df.drop_duplicates(subset="forecast_time_UTC", keep="last", inplace=True)
+ address_df = address_df[["forecast_time_UTC", "address"]]
+
+ return address_df
+
+def GFS_before_after_addresses(time_UTC: datetime, listing: pd.DataFrame = None) -> pd.DataFrame:
+ if not isinstance(time_UTC, datetime):
+ time_UTC = parser.parse(time_UTC)
+
+ if listing is None:
+ listing = get_GFS_listing()
+
+ # if len(listing) == 0:
+ # raise ValueError(f"zero-length GFS listing at time {time_UTC} UTC")
+ #
+ # min_time = min(listing.forecast_time_UTC)
+ # max_time = max(listing.forecast_time_UTC)
+
+ # logger.info(f"selecting GFS files for time {time_UTC} UTC between {min_time} UTC and {max_time} UTC")
+
+ # before = listing[listing.forecast_time_UTC <= time_UTC].iloc[[-1]]
+
+ before = listing[listing.forecast_time_UTC.apply(lambda forecast_time_UTC: str(forecast_time_UTC) <= str(time_UTC))].iloc[[-1]]
+ after = listing[listing.forecast_time_UTC.apply(lambda forecast_time_UTC: str(forecast_time_UTC) > str(time_UTC))].iloc[[0]]
+ before_after = pd.concat([before, after])
+
+ return before_after
+
+def GFS_download(URL: str, filename: str = None, directory: str = None) -> str:
+ if directory is None:
+ directory = "."
+
+ date_UTC = datetime.strptime(posixpath.basename(URL).split("_")[2], "%Y%m%d").date()
+ directory = expanduser(directory)
+
+ target_directory = join(directory, date_UTC.strftime("%Y-%m-%d"))
+ makedirs(target_directory, exist_ok=True)
+
+ if filename is None:
+ filename = join(target_directory, posixpath.basename(URL))
+
+ if exists(filename):
+ logger.info(f"file already downloaded: {colored_logging.file(filename)}")
+ return filename
+
+ logger.info(f"downloading URL: {colored_logging.URL(URL)}")
+
+ partial_filename = filename + ".download"
+
+ command = f'wget -c -O "{partial_filename}" "{URL}"'
+ logger.info(command)
+ system(command)
+
+ move(partial_filename, filename)
+
+ if not exists(filename):
+ raise ConnectionError(f"unable to download URL: {URL}")
+
+ logger.info(f"downloaded file: {colored_logging.file(filename)}")
+
+ return filename
+
+def read_GFS(filename: str, message: int, geometry: rt.RasterGeometry = None, resampling = "cubic") -> rt.Raster:
+ with pygrib.open(filename) as file:
+ data = file.message(message).values
+
+ rows, cols = data.shape
+ data = np.roll(data, int(cols / 2), axis=1)
+ grid = rt.RasterGrid.from_bbox(rt.BBox(-180, -90, 180, 90), data.shape)
+ image = rt.Raster(data, geometry=grid)
+
+ if geometry is not None:
+ image = image.to_geometry(geometry, resampling=resampling)
+
+ return image
+
+def GFS_interpolate(
+ message: int,
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ before_after = GFS_before_after_addresses(time_UTC, listing=listing)
+
+ before_address = before_after.iloc[0].address
+ logger.info(f"before URL: {colored_logging.URL(before_address)}")
+ before_time = parser.parse(str(before_after.iloc[0].forecast_time_UTC))
+ before_filename = GFS_download(URL=before_address, directory=directory)
+
+ try:
+ before_image = read_GFS(filename=before_filename, message=message, geometry=geometry, resampling=resampling)
+ except Exception as e:
+ logger.warning(e)
+ os.remove(before_filename)
+
+ before_filename = GFS_download(URL=before_address, directory=directory)
+ before_image = read_GFS(filename=before_filename, message=message, geometry=geometry, resampling=resampling)
+
+ after_address = before_after.iloc[-1].address
+ logger.info(f"after URL: {colored_logging.URL(after_address)}")
+ after_time = parser.parse(str(before_after.iloc[-1].forecast_time_UTC))
+ after_filename = GFS_download(URL=after_address, directory=directory)
+
+ try:
+ after_image = read_GFS(filename=after_filename, message=message, geometry=geometry, resampling=resampling)
+ except Exception as e:
+ logger.warning(e)
+ os.remove(after_filename)
+
+ after_filename = GFS_download(URL=after_address, directory=directory)
+ after_image = read_GFS(filename=after_filename, message=message, geometry=geometry, resampling=resampling)
+
+ source_diff = after_image - before_image
+ time_fraction = (parser.parse(str(time_UTC)) - parser.parse(str(before_time))) / (parser.parse(str(after_time)) - parser.parse(str(before_time)))
+ interpolated_image = before_image + source_diff * time_fraction
+
+ return interpolated_image
+
+def forecast_Ta_K(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ return GFS_interpolate(message=GFS_TA_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing)
+
+def forecast_Ta_C(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ return forecast_Ta_K(time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing) - 273.15
+
+def forecast_RH(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ return rt.clip(GFS_interpolate(message=GFS_RH_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing) / 100, 0, 1)
+
+def forecast_SM(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ SM = rt.clip(GFS_interpolate(message=GFS_SM_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing) / 10000, 0, 1)
+ SM.cmap = SM_CMAP
+
+ return SM
+
+def forecast_SWin(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ return rt.clip(GFS_interpolate(message=GFS_SWIN_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing), 0, None)
+
+def forecast_wind(
+ time_UTC: datetime,
+ geometry: rt.RasterGeometry = None,
+ resampling: str = "cubic",
+ directory: str = None,
+ listing: pd.DataFrame = None) -> rt.Raster:
+ U = GFS_interpolate(message=GFS_U_WIND_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing)
+ V = GFS_interpolate(message=GFS_V_WIND_MESSAGE, time_UTC=time_UTC, geometry=geometry, resampling=resampling, directory=directory, listing=listing)
+ wind_speed = rt.clip(np.sqrt(U ** 2.0 + V ** 2.0), 0.0, None)
+
+ return wind_speed
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..d123a9e
--- /dev/null
+++ b/makefile
@@ -0,0 +1,59 @@
+PACKAGE_NAME = global-forecasting-system
+ENVIRONMENT_NAME = $(PACKAGE_NAME)
+DOCKER_IMAGE_NAME = $(shell echo $(PACKAGE_NAME) | tr '[:upper:]' '[:lower:]')
+
+clean:
+ rm -rf *.o *.out *.log
+ rm -rf build/
+ rm -rf dist/
+ rm -rf *.egg-info
+ rm -rf .pytest_cache
+ find . -type d -name "__pycache__" -exec rm -rf {} +
+
+test:
+ pytest
+
+build:
+ python -m build
+
+twine-upload:
+ twine upload dist/*
+
+dist:
+ make clean
+ make build
+ make twine-upload
+
+install:
+ pip install -e .[dev]
+
+uninstall:
+ pip uninstall $(PACKAGE_NAME)
+
+reinstall:
+ make uninstall
+ make install
+
+environment:
+ mamba create -y -n $(ENVIRONMENT_NAME) -c conda-forge python=3.10
+
+remove-environment:
+ mamba env remove -y -n $(ENVIRONMENT_NAME)
+
+colima-start:
+ colima start -m 16 -a x86_64 -d 100
+
+docker-build:
+ docker build -t $(DOCKER_IMAGE_NAME):latest .
+
+docker-build-environment:
+ docker build --target environment -t $(DOCKER_IMAGE_NAME):latest .
+
+docker-build-installation:
+ docker build --target installation -t $(DOCKER_IMAGE_NAME):latest .
+
+docker-interactive:
+ docker run -it $(DOCKER_IMAGE_NAME) fish
+
+docker-remove:
+ docker rmi -f $(DOCKER_IMAGE_NAME)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..c5ccd8c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,48 @@
+[build-system]
+requires = ["setuptools>=60", "setuptools-scm>=8.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "global-forecasting-system"
+version = "1.0.1"
+description = "Global Forecasting System (GFS) Search & Download Utility"
+readme = "README.md"
+authors = [
+ { name = "Gregory H. Halverson", email = "gregory.h.halverson@jpl.nasa.gov" },
+]
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Operating System :: OS Independent",
+]
+dependencies = [
+ "beautifulsoup4",
+ "colored-logging",
+ "matplotlib",
+ "numpy",
+ "pandas",
+ "pygrib",
+ "python-dateutil",
+ "rasters",
+ "requests"
+]
+requires-python = ">=3.10"
+
+[project.optional-dependencies]
+dev = [
+ "build",
+ "pytest>=6.0",
+ "pytest-cov",
+ "jupyter",
+ "pytest",
+ "twine"
+]
+
+[tool.setuptools.package-data]
+global_forecasting_system = ["*.txt"]
+
+[project.urls]
+Homepage = "https://github.com/ET-Toolbox/global-forecasting-system"
+
+[tool.setuptools.packages.find]
+include = ["global_forecasting_system"]
+exclude = ["calval", "testing"]
diff --git a/tests/test_import_dependencies.py b/tests/test_import_dependencies.py
new file mode 100644
index 0000000..7b1ccb7
--- /dev/null
+++ b/tests/test_import_dependencies.py
@@ -0,0 +1,19 @@
+import pytest
+
+# List of dependencies
+dependencies = [
+ "bs4",
+ "colored_logging",
+ "dateutil",
+ "matplotlib",
+ "numpy",
+ "pandas",
+ "pygrib",
+ "rasters",
+ "requests"
+]
+
+# Generate individual test functions for each dependency
+@pytest.mark.parametrize("dependency", dependencies)
+def test_dependency_import(dependency):
+ __import__(dependency)
diff --git a/tests/test_import_global_forecasting_system.py b/tests/test_import_global_forecasting_system.py
new file mode 100644
index 0000000..4531e2c
--- /dev/null
+++ b/tests/test_import_global_forecasting_system.py
@@ -0,0 +1,2 @@
+def test_import_global_forecasting_system():
+ import global_forecasting_system