Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3/.devcontainer/base.Dockerfile


# [Choice] Ubuntu version (use ubuntu-22.04 or ubuntu-18.04 on local arm64/Apple Silicon): ubuntu-22.04, ubuntu-20.04, ubuntu-18.04
ARG VARIANT=ubuntu-24.04
FROM mcr.microsoft.com/vscode/devcontainers/base:${VARIANT}

# Postgres & our packages. Currently not customizable via VERSION param.
# RUN apt-get update \
# && apt-get -y install --no-install-recommends curl ca-certificates gnupg
RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
RUN apt-get update \
&& apt-get -y install --no-install-recommends postgresql-plpython3-14 postgresql-14-postgis-3 libpq-dev
67 changes: 67 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3
{
"name": "Python3 & Poetry & Postgres",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6
// Append -bullseye or -buster to pin to an OS version.
// Use -bullseye variants on local on arm64/Apple Silicon.
"VARIANT": "ubuntu-24.04"
}
},
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
"python.linting.enabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
},
// VSCODE ONLY: Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
//"forwardPorts": [48423],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "bash ./.devcontainer/post-install.sh",
"postStartCommand": "bash ./.devcontainer/post-start.sh",
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": "latest",
"ghcr.io/devcontainers/features/git:1": "latest",
// add python to container
"ghcr.io/devcontainers/features/python:1": {
"version": "3.13"
},
// add poetry to container
"ghcr.io/devcontainers-extra/features/poetry:2": {
"version": "2.1.3"
}
},
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"remoteEnv": {
// "PATH": "${containerEnv:PATH}:${containerEnv:HOME}/.local/bin"
},
"runArgs": [
// allow container to be treated with no network isolation
"--network=host",
// give a nicer name to the container
"--name",
"${localEnv:USER}_crmprtd_devcontainer"
]
}
21 changes: 21 additions & 0 deletions .devcontainer/post-install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -ex

##
## Create some aliases
##
echo 'alias ll="ls -alF"' >> $HOME/.bashrc
echo 'alias la="ls -A"' >> $HOME/.bashrc
echo 'alias l="ls -CF"' >> $HOME/.bashrc

# Convenience workspace directory for later use
WORKSPACE_DIR=$(pwd)

# Change some Poetry settings to better deal with working in a container
poetry config cache-dir ${WORKSPACE_DIR}/.cache
poetry config virtualenvs.in-project true

# Now install all dependencies
poetry install --all-extras

echo "Done!"
8 changes: 8 additions & 0 deletions .devcontainer/post-start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -ex

# Convenience workspace directory for later use
WORKSPACE_DIR=$(pwd)

# # Set current workspace as safe for git
# git config --global --add safe.directory ${WORKSPACE_DIR}
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,8 @@ docs/_build/

# PyBuilder
target/

#temp data directories
infill*/
.pgpass

40 changes: 40 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: crmprtd_process",
"type": "debugpy",
"module": "crmprtd.process:main",
"request": "launch",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}",
"justMyCode": false,
"env": {},
"args": [
"-N", "ec",
"-c", "postgresql://[email protected]:5432/crmp?passfile=/workspaces/crmprtd/.pgpass",
"-L", "logging.yaml",
"-l", "infill-cache/manual-infill.log",
"-o", "DEBUG",
"-D",
"<", "infill-cache/crmprtd_download_2025-05-12.xml"
]
},
{
"name": "Pytest: Current File",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}",
"justMyCode": false,
"env": {},
"args": [
"${file}"
]
}
],
}
16 changes: 16 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "init-poetry-shell",
"type": "shell",
"command": "eval $(saml2aws script)",
"presentation": {
"reveal": "always",
"panel": "shared",
}
}
]
}
1 change: 1 addition & 0 deletions crmprtd/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- Cons: Significant changes to existing code. Greater complexity. Significant changes
to scripts that use it.
"""

from typing import List
from importlib import import_module
from argparse import ArgumentParser
Expand Down
1 change: 1 addition & 0 deletions crmprtd/more_itertools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Some additional iteration tools
"""

from itertools import islice, cycle


Expand Down
1 change: 1 addition & 0 deletions crmprtd/networks/_test/download.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
A test downloader that does nothing.
"""

import logging
import os
from argparse import ArgumentParser
Expand Down
3 changes: 2 additions & 1 deletion crmprtd/networks/bc_hydro/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Downloads data from BC hyrdo
"""Downloads data from BC hyrdo

BC Hydro posts a rolling window (3 months) observing hourly data
once a week. Data is in txt files.
Expand All @@ -8,6 +8,7 @@
for errors. If the script is run less than once every 3 months
you will miss data.
"""

from typing import List
import pysftp
import logging
Expand Down
16 changes: 6 additions & 10 deletions crmprtd/networks/bc_hydro/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from crmprtd import Row
from crmprtd import setup_logging
from crmprtd.swob_ml import get_substitutions

log = logging.getLogger(__name__)

Expand All @@ -28,15 +29,7 @@ def normalize(file_stream):
num_pattern = re.compile(r"-?\d+(\.\d+)?$")

variable_substitutions_path = "networks/bc_hydro/variable_substitutions.yaml"
try:
with (files("crmprtd") / variable_substitutions_path).open("rb") as f:
variable_substitutions = yaml.safe_load(f)
except FileNotFoundError:
log.warning(
f"Cannot open resource file '{variable_substitutions_path}'. "
f"Proceeding with normalization, but there's a risk that variable names will not be recognized."
)
return
variable_substitutions = get_substitutions(variable_substitutions_path)

for line in file_stream:
line = line.decode("utf-8")
Expand Down Expand Up @@ -79,7 +72,10 @@ def normalize(file_stream):
elif num_pattern.match(value):
value = float(value)

if varname in variable_substitutions:
if (
variable_substitutions is not None
and varname in variable_substitutions
):
varname = variable_substitutions[varname]

yield Row(
Expand Down
1 change: 1 addition & 0 deletions crmprtd/networks/crd/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
can be supplied as the username in the authentication file or via the
--username paramenter. No password is necessary.
"""

import logging
import sys
from argparse import ArgumentParser
Expand Down
4 changes: 2 additions & 2 deletions crmprtd/networks/ec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def no_ns_element(name):


def makeurl(
freq="daily",
province="BC",
freq = "daily",
province = "BC",
language="e",
time=None,
baseurl="https://dd.weather.gc.ca",
Expand Down
19 changes: 17 additions & 2 deletions crmprtd/networks/ec/normalize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
from crmprtd.swob_ml import normalize as swob_ml_normalize
import logging

from importlib.resources import files
from crmprtd import Row
from crmprtd.swob_ml import (
normalize as swob_ml_normalize,
get_substitutions,
apply_substitutions,
)

log = logging.getLogger(__name__)


def normalize(file_stream):
return swob_ml_normalize(
variable_substitutions_path = "networks/ec/variable_substitutions.yaml"
variable_substitutions = get_substitutions(variable_substitutions_path)

rows = swob_ml_normalize(
file_stream, "EC_raw", station_id_attr="climate_station_number"
)

return apply_substitutions(variable_substitutions, rows)
10 changes: 10 additions & 0 deletions crmprtd/networks/ec/variable_substitutions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Defines a mapping between variable names given to us by EC
# c.a. 2022 and what the variables were named in the PCDS (a.k.a. their
# "historic" name)
# Values should be of the form: "name_in_near_real_time_feed": "net_var_name-in-pcds"

'air_temperature_yesterday_high': 'air_temperature'
'air_temperature_yesterday_low': 'air_temperature'
'total_precipitation': 'total_precipitation'
'wind_direction': 'wind_from_direction'
'wind_gust_speed': 'wind_speed'
1 change: 1 addition & 0 deletions crmprtd/networks/wamr/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
the last run). If the script is run less than once per month, you will
miss data.
"""

from typing import List
import ftplib
import logging
Expand Down
5 changes: 3 additions & 2 deletions crmprtd/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,9 @@ def gulpy_plus_plus():
help="The network from which the data is coming from. "
"Since gulpy input already identifies the network by way of the provided history_ids, the name will only be used for logging.",
)
parser.add_argument('filenames', metavar='filename', nargs='+',
help='CSV files to process')
parser.add_argument(
"filenames", metavar="filename", nargs="+", help="CSV files to process"
)
args = parser.parse_args()

setup_logging(
Expand Down
40 changes: 40 additions & 0 deletions crmprtd/swob_ml.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Standard module
from typing import Generator, Optional
import pytz
import logging
import yaml
from importlib.resources import files

# Installed libraries
Expand Down Expand Up @@ -127,3 +129,41 @@ def normalize_xml(
lat=lat,
lon=lon,
)


def get_substitutions(variable_substitutions_path) -> Optional[dict[str, str]]:
try:
with (files("crmprtd") / variable_substitutions_path).open("rb") as f:
return yaml.safe_load(f)
except FileNotFoundError:
log.warning(
f"Cannot open resource file '{variable_substitutions_path}'. "
f"Proceeding with normalization, but there's a risk that variable names will not be recognized."
)
return


def apply_substitutions(
variable_substitutions: Optional[dict[str, str]], rows: Generator[Row, None, None]
):
match variable_substitutions:
case None:
log.warning(
"No variable substitutions provided. Skipping substitution step."
)
return
case _:
for row in rows:
if row.variable_name in variable_substitutions:
yield Row(
time=row.time,
val=row.val,
variable_name=variable_substitutions[row.variable_name],
unit=row.unit,
network_name=row.network_name,
station_id=row.station_id,
lat=row.lat,
lon=row.lon,
)
else:
yield row
7,205 changes: 7,195 additions & 10 deletions crmprtd/tests/ec_data.py

Large diffs are not rendered by default.

Loading
Loading