From 7f4df16f655b0f0a51126403e097492d994b2ef3 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Fri, 30 Sep 2022 14:45:49 -0400 Subject: [PATCH 01/10] add secrets manager lib --- poetry.lock | 167 +++++++++++++++++++++++++++++-------------------- pyproject.toml | 1 + 2 files changed, 101 insertions(+), 67 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0912a74..f1ee197 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,6 +1,6 @@ [[package]] name = "aiohttp" -version = "3.8.1" +version = "3.8.3" description = "Async http client/server framework (asyncio)" category = "main" optional = false @@ -37,14 +37,6 @@ category = "main" optional = false python-versions = ">=3.6" -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -category = "dev" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" - [[package]] name = "attrs" version = "22.1.0" @@ -69,7 +61,7 @@ python-versions = ">=3.7,<4.0" [[package]] name = "black" -version = "22.6.0" +version = "22.8.0" description = "The uncompromising code formatter." category = "dev" optional = false @@ -91,15 +83,15 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "cachetools" -version = "4.2.4" +version = "5.2.0" description = "Extensible memoizing collections and decorators" category = "main" optional = false -python-versions = "~=3.5" +python-versions = "~=3.7" [[package]] name = "certifi" -version = "2022.6.15" +version = "2022.9.24" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false @@ -107,7 +99,7 @@ python-versions = ">=3.6" [[package]] name = "charset-normalizer" -version = "2.1.0" +version = "2.1.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false @@ -145,7 +137,7 @@ python-versions = ">=3.5" [[package]] name = "exceptiongroup" -version = "1.0.0rc8" +version = "1.0.0rc9" description = "Backport of PEP 654 (exception groups)" category = "dev" optional = false @@ -164,7 +156,7 @@ python-versions = ">=3.7" [[package]] name = "fsspec" -version = "2022.5.0" +version = "2022.8.2" description = "File-system specification" category = "main" optional = false @@ -184,7 +176,7 @@ github = ["requests"] gs = ["gcsfs"] gui = ["panel"] hdfs = ["pyarrow (>=1)"] -http = ["requests", "aiohttp"] +http = ["requests", "aiohttp (!=4.0.0a0,!=4.0.0a1)"] libarchive = ["libarchive-c"] oci = ["ocifs"] s3 = ["s3fs"] @@ -203,16 +195,16 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" [[package]] name = "gcsfs" -version = "2022.5.0" +version = "2022.8.2" description = "Convenient Filesystem interface over GCS" category = "main" optional = false python-versions = ">=3.7" [package.dependencies] -aiohttp = "<4" +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2022.5.0" +fsspec = "2022.8.2" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -224,49 +216,48 @@ gcsfuse = ["fusepy"] [[package]] name = "google-api-core" -version = "1.32.0" +version = "1.33.1" description = "Google API client core library" category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" +python-versions = ">=3.7" [package.dependencies] -google-auth = ">=1.25.0,<2.0dev" -googleapis-common-protos = ">=1.6.0,<2.0dev" -grpcio = {version = ">=1.29.0,<2.0dev", optional = true, markers = "extra == \"grpc\""} -packaging = ">=14.3" -protobuf = {version = ">=3.12.0,<4.0.0dev", markers = "python_version > \"3\""} -pytz = "*" +google-auth = ">=1.25.0,<3.0dev" +googleapis-common-protos = ">=1.56.2,<2.0dev" +grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} +grpcio-status = {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""} +protobuf = ">=3.20.1,<4.0.0dev" requests = ">=2.18.0,<3.0.0dev" -six = ">=1.13.0" [package.extras] -grpc = ["grpcio (>=1.29.0,<2.0dev)"] -grpcgcp = ["grpcio-gcp (>=0.2.2)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio-status (>=1.33.2,<2.0dev)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] [[package]] name = "google-auth" -version = "1.35.0" +version = "2.12.0" description = "Google Authentication Library" category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" [package.dependencies] -cachetools = ">=2.0.0,<5.0" +cachetools = ">=2.0.0,<6.0" pyasn1-modules = ">=0.2.1" rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} six = ">=1.9.0" [package.extras] aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"] +enterprise_cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] pyopenssl = ["pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] [[package]] name = "google-auth-oauthlib" -version = "0.5.2" +version = "0.5.3" description = "Google Authentication Library" category = "main" optional = false @@ -345,6 +336,23 @@ google-auth = ">=1.25.0,<3.0dev" [package.extras] grpc = ["grpcio (>=1.38.0,<2.0dev)"] +[[package]] +name = "google-cloud-secret-manager" +version = "2.12.4" +description = "Secret Manager API API client library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +google-api-core = {version = ">=1.32.0,<2.0.0 || >=2.8.0,<3.0.0dev", extras = ["grpc"]} +grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" +proto-plus = ">=1.22.0,<2.0.0dev" +protobuf = ">=3.19.0,<5.0.0dev" + +[package.extras] +libcst = ["libcst (>=0.2.5)"] + [[package]] name = "google-cloud-storage" version = "2.5.0" @@ -365,11 +373,11 @@ protobuf = ["protobuf (<5.0.0dev)"] [[package]] name = "google-crc32c" -version = "1.3.0" +version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" [package.extras] testing = ["pytest"] @@ -398,6 +406,7 @@ optional = false python-versions = ">=3.7" [package.dependencies] +grpcio = {version = ">=1.0.0,<2.0.0dev", optional = true, markers = "extra == \"grpc\""} protobuf = ">=3.15.0,<5.0.0dev" [package.extras] @@ -405,7 +414,7 @@ grpc = ["grpcio (>=1.0.0,<2.0.0dev)"] [[package]] name = "greenlet" -version = "1.1.2" +version = "1.1.3" description = "Lightweight in-process concurrent programming" category = "main" optional = false @@ -414,19 +423,44 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" [package.extras] docs = ["sphinx"] +[[package]] +name = "grpc-google-iam-v1" +version = "0.12.4" +description = "IAM API client library" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} +grpcio = ">=1.0.0,<2.0.0dev" + [[package]] name = "grpcio" -version = "1.48.0" +version = "1.49.1" description = "HTTP/2-based RPC framework" category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" [package.dependencies] six = ">=1.5.2" [package.extras] -protobuf = ["grpcio-tools (>=1.48.0)"] +protobuf = ["grpcio-tools (>=1.49.1)"] + +[[package]] +name = "grpcio-status" +version = "1.48.2" +description = "Status proto mapping for gRPC" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +googleapis-common-protos = ">=1.5.5" +grpcio = ">=1.48.2" +protobuf = ">=3.12.0" [[package]] name = "gtfs-realtime-bindings" @@ -441,7 +475,7 @@ protobuf = "*" [[package]] name = "humanize" -version = "4.3.0" +version = "4.4.0" description = "Python humanize utilities" category = "main" optional = false @@ -452,7 +486,7 @@ tests = ["freezegun", "pytest", "pytest-cov"] [[package]] name = "hypothesis" -version = "6.54.3" +version = "6.54.6" description = "A library for property-based testing" category = "dev" optional = false @@ -464,7 +498,7 @@ exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} sortedcontainers = ">=2.1.0,<3.0.0" [package.extras] -all = ["black (>=19.10b0)", "click (>=7.0)", "django (>=3.2)", "dpcontracts (>=0.4)", "lark-parser (>=0.6.5)", "libcst (>=0.3.16)", "numpy (>=1.9.0)", "pandas (>=1.0)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "importlib-metadata (>=3.6)", "backports.zoneinfo (>=0.2.1)", "tzdata (>=2022.1)"] +all = ["black (>=19.10b0)", "click (>=7.0)", "django (>=3.2)", "dpcontracts (>=0.4)", "lark-parser (>=0.6.5)", "libcst (>=0.3.16)", "numpy (>=1.9.0)", "pandas (>=1.0)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "importlib-metadata (>=3.6)", "backports.zoneinfo (>=0.2.1)", "tzdata (>=2022.2)"] cli = ["click (>=7.0)", "black (>=19.10b0)", "rich (>=9.0.0)"] codemods = ["libcst (>=0.3.16)"] dateutil = ["python-dateutil (>=1.4)"] @@ -477,11 +511,11 @@ pandas = ["pandas (>=1.0)"] pytest = ["pytest (>=4.6)"] pytz = ["pytz (>=2014.1)"] redis = ["redis (>=3.0.0)"] -zoneinfo = ["backports.zoneinfo (>=0.2.1)", "tzdata (>=2022.1)"] +zoneinfo = ["backports.zoneinfo (>=0.2.1)", "tzdata (>=2022.2)"] [[package]] name = "idna" -version = "3.3" +version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" category = "main" optional = false @@ -535,7 +569,7 @@ python-versions = "*" [[package]] name = "numpy" -version = "1.23.2" +version = "1.23.3" description = "NumPy is the fundamental package for array computing with Python." category = "main" optional = false @@ -543,7 +577,7 @@ python-versions = ">=3.8" [[package]] name = "oauthlib" -version = "3.2.0" +version = "3.2.1" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" category = "main" optional = false @@ -567,7 +601,7 @@ pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" [[package]] name = "pandas" -version = "1.4.3" +version = "1.5.0" description = "Powerful data structures for data analysis, time series, and statistics" category = "main" optional = false @@ -575,16 +609,14 @@ python-versions = ">=3.8" [package.dependencies] numpy = [ - {version = ">=1.18.5", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, - {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, ] python-dateutil = ">=2.8.1" pytz = ">=2020.1" [package.extras] -test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] +test = ["pytest-xdist (>=1.31)", "pytest (>=6.0)", "hypothesis (>=5.5.3)"] [[package]] name = "pandas-gbq" @@ -606,11 +638,11 @@ tqdm = ["tqdm (>=4.23.0)"] [[package]] name = "pathspec" -version = "0.9.0" +version = "0.10.1" description = "Utility library for gitignore style pattern matching of file paths." category = "dev" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +python-versions = ">=3.7" [[package]] name = "pendulum" @@ -650,7 +682,7 @@ dev = ["tox", "pre-commit"] [[package]] name = "proto-plus" -version = "1.22.0" +version = "1.22.1" description = "Beautiful, Pythonic protocol buffers." category = "main" optional = false @@ -664,7 +696,7 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] [[package]] name = "protobuf" -version = "3.20.1" +version = "3.20.2" description = "Protocol Buffers" category = "main" optional = false @@ -710,14 +742,14 @@ pyasn1 = ">=0.4.6,<0.5.0" [[package]] name = "pydantic" -version = "1.9.2" +version = "1.10.2" description = "Data validation and settings management using python type hints" category = "main" optional = false -python-versions = ">=3.6.1" +python-versions = ">=3.7" [package.dependencies] -typing-extensions = ">=3.7.4.3" +typing-extensions = ">=4.1.0" [package.extras] dotenv = ["python-dotenv (>=0.10.4)"] @@ -748,14 +780,13 @@ diagrams = ["railroad-diagrams", "jinja2"] [[package]] name = "pytest" -version = "7.1.2" +version = "7.1.3" description = "pytest: simple powerful testing with Python" category = "dev" optional = false python-versions = ">=3.7" [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" @@ -945,7 +976,7 @@ python-versions = ">=3.7" [[package]] name = "tqdm" -version = "4.64.0" +version = "4.64.1" description = "Fast, Extensible Progress Meter" category = "main" optional = false @@ -970,7 +1001,7 @@ python-versions = ">=3.7" [[package]] name = "urllib3" -version = "1.26.11" +version = "1.26.12" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false @@ -978,7 +1009,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, [package.extras] brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] -secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] @@ -996,13 +1027,12 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = ">=3.8,<3.11" -content-hash = "ed740146ace27830227f7ca6acc8c1598b3357aee96ecf003f8d684e6e37e521" +content-hash = "622e1b796652460aeea1cc1ce43b899da90cfeb0542ab2a9e63553e65199b14d" [metadata.files] aiohttp = [] aiosignal = [] async-timeout = [] -atomicwrites = [] attrs = [] backoff = [] black = [] @@ -1023,12 +1053,15 @@ google-auth-oauthlib = [] google-cloud-bigquery = [] google-cloud-bigquery-storage = [] google-cloud-core = [] +google-cloud-secret-manager = [] google-cloud-storage = [] google-crc32c = [] google-resumable-media = [] googleapis-common-protos = [] greenlet = [] +grpc-google-iam-v1 = [] grpcio = [] +grpcio-status = [] gtfs-realtime-bindings = [] humanize = [] hypothesis = [] diff --git a/pyproject.toml b/pyproject.toml index d16e2dd..389deb7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ google-api-core = "<2.0.0dev,>=1.32.0" protobuf = ">=3.19.0,<4.0.0dev" tqdm = "^4.64.0" backoff = "^2.1.2" +google-cloud-secret-manager = "^2.12.4" [tool.poetry.dev-dependencies] black = "^22.6.0" From 0e8b1932ed802e05a10bd7c96c09fee9e91aaef6 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Fri, 30 Sep 2022 15:17:53 -0400 Subject: [PATCH 02/10] bring secret loading into calitp py --- calitp/auth.py | 27 +++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 calitp/auth.py diff --git a/calitp/auth.py b/calitp/auth.py new file mode 100644 index 0000000..15bdaf1 --- /dev/null +++ b/calitp/auth.py @@ -0,0 +1,27 @@ +import os +from typing import Sequence + +import google_crc32c +from google.cloud import secretmanager + +AUTH_KEYS_ENV_VAR = "CALITP_AUTH_KEYS" +DEFAULT_AUTH_KEYS = tuple(os.environ[AUTH_KEYS_ENV_VAR].split(",")) if AUTH_KEYS_ENV_VAR in os.environ else tuple() + + +def load_secrets(keys: Sequence[str] = DEFAULT_AUTH_KEYS, secret_client=secretmanager.SecretManagerServiceClient()): + for key in keys: + if key not in os.environ: + print(f"fetching secret {key}") + name = f"projects/cal-itp-data-infra/secrets/{key}/versions/latest" + response = secret_client.access_secret_version(request={"name": name}) + + crc32c = google_crc32c.Checksum() + crc32c.update(response.payload.data) + if response.payload.data_crc32c != int(crc32c.hexdigest(), 16): + raise ValueError(f"Data corruption detected for secret {name}.") + + os.environ[key] = response.payload.data.decode("UTF-8").strip() + + +if __name__ == "__main__": + load_secrets() diff --git a/pyproject.toml b/pyproject.toml index 389deb7..a4feaa8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "calitp" -version = "2022.9.21" +version = "2022.9.30a0" description = "Shared code for the Cal-ITP data codebases" authors = ["Andrew Vaccaro "] license = "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007" From 467403af2d84c9f44dfd56790048a9f9dc12a9f1 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Fri, 30 Sep 2022 16:23:30 -0400 Subject: [PATCH 03/10] auth_dict should just be a mapping type --- calitp/storage.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/calitp/storage.py b/calitp/storage.py index 15db413..76ab718 100644 --- a/calitp/storage.py +++ b/calitp/storage.py @@ -10,7 +10,17 @@ from abc import ABC from datetime import datetime from enum import Enum -from typing import ClassVar, Dict, List, Optional, Tuple, Type, Union, get_type_hints +from typing import ( + ClassVar, + Dict, + List, + Mapping, + Optional, + Tuple, + Type, + Union, + get_type_hints, +) import backoff import gcsfs @@ -556,7 +566,7 @@ def convert_feed_type(cls, v): return v - def build_request(self, auth_dict: dict) -> Request: + def build_request(self, auth_dict: Mapping[str, str]) -> Request: params = {k: auth_dict[v] for k, v in self.auth_query_params.items()} headers = {k: auth_dict[v] for k, v in self.auth_headers.items()} @@ -668,7 +678,7 @@ def timestamped_filename(self): def download_feed( config: GTFSDownloadConfig, - auth_dict: Dict, + auth_dict: Mapping[str, str], ts: pendulum.DateTime, default_filename="feed", ) -> Tuple[GTFSFeedExtract, bytes]: From 74517aabc292919d193747c6b85a0351de1a9f5a Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 25 Oct 2022 20:23:01 -0400 Subject: [PATCH 04/10] remove unnecessary docker/jupyter stuff now that it lives in infra --- .github/workflows/ci.yml | 33 ---------------------- Dockerfile | 27 ------------------ _jupyterhub/custom.sh | 6 ---- _jupyterhub/examples/example_ipyleaflet.py | 10 ------- _jupyterhub/overrides.json | 24 ---------------- _jupyterhub/requirements.txt | 27 ------------------ 6 files changed, 127 deletions(-) delete mode 100644 Dockerfile delete mode 100644 _jupyterhub/custom.sh delete mode 100644 _jupyterhub/examples/example_ipyleaflet.py delete mode 100644 _jupyterhub/overrides.json delete mode 100644 _jupyterhub/requirements.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 833ac34..758bd9e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,36 +67,3 @@ jobs: repository_username: __token__ repository_password: ${{ secrets.PYPI_API_TOKEN }} build_format: "sdist" - -# This is disabled for now; I don't hate the image definition living here, but the CI/CD is overly complicated I think -# build_push: -# name: Package docker image -# runs-on: ubuntu-18.04 -# needs: -# - lint -# - test -# - release -# if: "success('lint') && success('test') && !failure('release')" -# steps: -# - name: Login to GitHub Container Registry -# uses: docker/login-action@v1 -# with: -# registry: ghcr.io -# username: ${{ github.actor }} -# password: ${{ secrets.GITHUB_TOKEN }} -# -# # build release image ---- -# - name: "Release: Build and push" -# uses: docker/build-push-action@v2 -# if: ${{ github.event_name == 'release' && startsWith(github.event.release.tag_name, 'hub') }} -# with: -# push: true -# tags: ghcr.io/${{github.repository}}:${{ github.event.release.tag_name }} -# -# # build any image pushed on a branch starting with development ---- -# - name: "Development: Build and push" -# uses: docker/build-push-action@v2 -# if: ${{ github.event_name != 'release' && startsWith(github.ref_name, 'development') }} -# with: -# push: true -# tags: ghcr.io/${{github.repository}}:${{github.ref_name}} diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 0cc8e4d..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM jupyter/datascience-notebook - -LABEL org.opencontainers.image.source https://github.com/cal-itp/calitp-py - -USER root -RUN curl -sL https://deb.nodesource.com/setup_14.x | bash - -# GitHub CLI https://github.com/cli/cli/blob/trunk/docs/install_linux.md -RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg -RUN echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null -RUN apt update \ - && apt install -y keychain nodejs git-lfs gh libspatialindex-dev -USER $NB_UID -RUN npm install -g --unsafe-perm=true --allow-root netlify-cli - -# gcloud CLI https://cloud.google.com/sdk/docs/install#deb -RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-377.0.0-linux-x86_64.tar.gz \ - && tar -zxvf google-cloud-sdk-377.0.0-linux-x86_64.tar.gz \ - && ./google-cloud-sdk/install.sh - -ADD _jupyterhub/requirements.txt /app/requirements.txt - -RUN pip install -r /app/requirements.txt - -RUN mkdir /opt/conda/share/jupyter/lab/settings/ -COPY _jupyterhub/overrides.json /opt/conda/share/jupyter/lab/settings/overrides.json - -COPY _jupyterhub/custom.sh /tmp/custom.sh diff --git a/_jupyterhub/custom.sh b/_jupyterhub/custom.sh deleted file mode 100644 index 9033190..0000000 --- a/_jupyterhub/custom.sh +++ /dev/null @@ -1,6 +0,0 @@ -parse_git_branch() { - git branch 2> /dev/null | sed -e '/^[^*]/d' -e 's/* \(.*\)/ (\1)/' -} -export PS1="\u@\h \[\033[32m\]\w\[\033[33m\]\$(parse_git_branch)\[\033[00m\] $ " -chmod 600 ~/.ssh/id_ed25519 -eval `keychain --eval --agents ssh id_ed25519` diff --git a/_jupyterhub/examples/example_ipyleaflet.py b/_jupyterhub/examples/example_ipyleaflet.py deleted file mode 100644 index 6075262..0000000 --- a/_jupyterhub/examples/example_ipyleaflet.py +++ /dev/null @@ -1,10 +0,0 @@ -# flake8: noqa - -# + -from ipyleaflet import Map, Marker - -center = (52.204793, 360.121558) -m = Map(center=center, zoom=15) -marker = Marker(location=center, draggable=True) -m.add_layer(marker) -display(m) diff --git a/_jupyterhub/overrides.json b/_jupyterhub/overrides.json deleted file mode 100644 index b250ab6..0000000 --- a/_jupyterhub/overrides.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "@ryantam626/jupyterlab_code_formatter:settings": { - "preferences": { - "default_formatter": { - "python": [ - "isort", - "black" - ] - } - }, - "formatOnSave": false - }, - "@jupyterlab/shortcuts-extension:shortcuts": { - "shortcuts": [ - { - "command": "jupyterlab_code_formatter:black", - "keys": [ - "Ctrl Shift K" - ], - "selector": ".jp-Notebook.jp-mod-editMode" - } - ] - } -} diff --git a/_jupyterhub/requirements.txt b/_jupyterhub/requirements.txt deleted file mode 100644 index e8269ad..0000000 --- a/_jupyterhub/requirements.txt +++ /dev/null @@ -1,27 +0,0 @@ -black==22.3.0 -calitp==0.0.16 -intake==0.6.4 -intake-dcat==0.4.0 -intake-geopandas==0.3.0 -intake-parquet==0.2.3 -ipyleaflet==0.15.0 -jupytext==1.13.5 -jupyterlab-code-formatter==1.4.10 -siuba==1.0.0a2 -voila==0.3.6 -plotnine==0.8.0 -plotly==5.5.0 -folium==0.12.1.post1 -branca==0.4.2 -altair_saver==0.5.0 -vega==3.5.0 -pygeos==0.12.0 -rtree==0.9.7 -openpyxl==3.0.9 -python-dotenv==0.19.2 -isort==5.10.1 -seaborn==0.11.2 -csvkit==1.0.7 -pandas-profiling==3.1.0 -pre-commit==2.18.1 -jupyter-resource-usage==0.6.1 From b218a99509548f922c434fe48cdc7115865487c9 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 25 Oct 2022 20:38:00 -0400 Subject: [PATCH 05/10] bump version, get latest siuba, rename tbl to tbls --- calitp/tables.py | 4 ++-- docs/index.md | 6 +++--- poetry.lock | 4 ++-- pyproject.toml | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/calitp/tables.py b/calitp/tables.py index 0638773..81a1d96 100644 --- a/calitp/tables.py +++ b/calitp/tables.py @@ -111,10 +111,10 @@ def _repr_html_(self): """ -tbl = AutoTable( +tbls = AutoTable( get_engine(), lambda s: s, # s.replace(".", "_"), lambda s: "zzz_test_" not in s if not is_development() else True, ) -tbl._init() +tbls._init() diff --git a/docs/index.md b/docs/index.md index 938a9da..2b04003 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,8 @@ # calitp ```python -from calitp.tables import tbl -from calitp.import query_sql +from calitp.tables import tbls +from calitp. import query_sql ``` @@ -16,7 +16,7 @@ from calitp.import query_sql ### siuba ```python -tbl.gtfs_schedule.agency() +tbls.gtfs_schedule.agency() ``` ### sql diff --git a/poetry.lock b/poetry.lock index f1ee197..aeb2028 100644 --- a/poetry.lock +++ b/poetry.lock @@ -879,7 +879,7 @@ pyasn1 = ">=0.1.3" [[package]] name = "siuba" -version = "0.3.0" +version = "0.4.0" description = "A package for quick, scrappy analyses with pandas and SQL" category = "main" optional = false @@ -1027,7 +1027,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = ">=3.8,<3.11" -content-hash = "622e1b796652460aeea1cc1ce43b899da90cfeb0542ab2a9e63553e65199b14d" +content-hash = "eccb5b8a84b80a0d51f37238464708a16cafc78b28716b31429595e8d4fdb1b8" [metadata.files] aiohttp = [] diff --git a/pyproject.toml b/pyproject.toml index a4feaa8..50b6124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "calitp" -version = "2022.9.30a0" +version = "2022.10.25a0" description = "Shared code for the Cal-ITP data codebases" authors = ["Andrew Vaccaro "] license = "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007" @@ -17,7 +17,7 @@ gtfs-realtime-bindings = "^0.0.7" humanize = "^4.2.3" pendulum = "^2.1.2" pydantic = "^1.9.1" -siuba = "^0.3.0" +siuba = "^0.4.0" Jinja2 = "<3.1.0" # These are just to play nicely with composer google-cloud-bigquery-storage = "2.14.1" From e96139429614eb67c4529d1a70ca946642f166a3 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 25 Oct 2022 20:38:11 -0400 Subject: [PATCH 06/10] add a bit of logging --- calitp/auth.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/calitp/auth.py b/calitp/auth.py index 15bdaf1..d087269 100644 --- a/calitp/auth.py +++ b/calitp/auth.py @@ -9,8 +9,14 @@ def load_secrets(keys: Sequence[str] = DEFAULT_AUTH_KEYS, secret_client=secretmanager.SecretManagerServiceClient()): + if not keys: + print("no secrets to load") + return + for key in keys: - if key not in os.environ: + if key in os.environ: + print(f"found {key} already in os.environ, skipping") + else: print(f"fetching secret {key}") name = f"projects/cal-itp-data-infra/secrets/{key}/versions/latest" response = secret_client.access_secret_version(request={"name": name}) @@ -24,4 +30,5 @@ def load_secrets(keys: Sequence[str] = DEFAULT_AUTH_KEYS, secret_client=secretma if __name__ == "__main__": + print("loading secrets...") load_secrets() From eac2b4659f4c3f6599a87ef406c64a3525a2ebc6 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 25 Oct 2022 20:39:06 -0400 Subject: [PATCH 07/10] make dependabot happy --- poetry.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index aeb2028..10b9121 100644 --- a/poetry.lock +++ b/poetry.lock @@ -577,7 +577,7 @@ python-versions = ">=3.8" [[package]] name = "oauthlib" -version = "3.2.1" +version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" category = "main" optional = false @@ -696,7 +696,7 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] [[package]] name = "protobuf" -version = "3.20.2" +version = "3.20.3" description = "Protocol Buffers" category = "main" optional = false From 4eb8c3f33533002a736996b6eba9fe9bc434d64c Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Tue, 25 Oct 2022 20:52:13 -0400 Subject: [PATCH 08/10] update pandas-gbq --- poetry.lock | 39 ++++++++++++++++++++++++++++----------- pyproject.toml | 2 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index 10b9121..acd1531 100644 --- a/poetry.lock +++ b/poetry.lock @@ -127,6 +127,20 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "db-dtypes" +version = "1.0.4" +description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +numpy = ">=1.16.6,<2.0dev" +packaging = ">=17.0" +pandas = ">=0.24.2,<2.0dev" +pyarrow = ">=3.0.0,<10.0dev" + [[package]] name = "decorator" version = "5.1.1" @@ -280,15 +294,12 @@ python-versions = ">=3.6, <3.11" [package.dependencies] google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} -google-cloud-bigquery-storage = {version = ">=2.0.0,<3.0.0dev", optional = true, markers = "extra == \"bqstorage\""} google-cloud-core = ">=1.4.1,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" grpcio = ">=1.38.1,<2.0dev" packaging = ">=14.3,<22.0dev" -pandas = {version = ">=0.24.2", optional = true, markers = "extra == \"pandas\""} proto-plus = ">=1.15.0,<2.0.0dev" protobuf = ">=3.12.0,<4.0.0dev" -pyarrow = {version = ">=3.0.0,<8.0dev", optional = true, markers = "extra == \"bqstorage\""} python-dateutil = ">=2.7.2,<3.0dev" requests = ">=2.18.0,<3.0.0dev" @@ -620,17 +631,22 @@ test = ["pytest-xdist (>=1.31)", "pytest (>=6.0)", "hypothesis (>=5.5.3)"] [[package]] name = "pandas-gbq" -version = "0.14.1" -description = "Pandas interface to Google BigQuery" +version = "0.17.9" +description = "Google BigQuery connector for pandas" category = "main" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7, <3.11" [package.dependencies] -google-auth = "*" -google-auth-oauthlib = "*" -google-cloud-bigquery = {version = ">=1.11.1,<3.0.0dev", extras = ["bqstorage", "pandas"]} -pandas = ">=0.20.1" +db-dtypes = ">=0.3.1,<2.0.0" +google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0" +google-auth-oauthlib = ">=0.0.1" +google-cloud-bigquery = ">=1.27.2,<2.4.0 || >=2.5.0,<4.0.0dev" +google-cloud-bigquery-storage = ">=1.1.0,<3.0.0dev" +numpy = ">=1.16.6" +pandas = ">=0.24.2" +pyarrow = ">=3.0.0,<10.0dev" pydata-google-auth = "*" [package.extras] @@ -1027,7 +1043,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = ">=3.8,<3.11" -content-hash = "eccb5b8a84b80a0d51f37238464708a16cafc78b28716b31429595e8d4fdb1b8" +content-hash = "fb1600d3864591a40ac992513ba967b8fd6b6ec37386ac90fb2b15bd6f02a89d" [metadata.files] aiohttp = [] @@ -1041,6 +1057,7 @@ certifi = [] charset-normalizer = [] click = [] colorama = [] +db-dtypes = [] decorator = [] exceptiongroup = [] frozenlist = [] diff --git a/pyproject.toml b/pyproject.toml index 50b6124..173393d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,6 @@ python = ">=3.8,<3.11" fsspec = "^2022.5.0,!=2022.7.1" # https://github.com/fsspec/gcsfs/issues/486 gcsfs = "^2022.5.0,!=2022.7.1" # https://github.com/fsspec/gcsfs/issues/486 pandas = "^1.3.3" -pandas-gbq = "<0.15.0" sqlalchemy-bigquery = "^1.4.4" google-cloud-bigquery = "<3.0.0dev,>=1.15.0" gtfs-realtime-bindings = "^0.0.7" @@ -26,6 +25,7 @@ protobuf = ">=3.19.0,<4.0.0dev" tqdm = "^4.64.0" backoff = "^2.1.2" google-cloud-secret-manager = "^2.12.4" +pandas-gbq = "^0.17.9" [tool.poetry.dev-dependencies] black = "^22.6.0" From 58a0934730f380f2353cca6de9b4f63d3780ed17 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Wed, 26 Oct 2022 11:39:36 -0400 Subject: [PATCH 09/10] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 173393d..c4bbffd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "calitp" -version = "2022.10.25a0" +version = "2022.10.26a0" description = "Shared code for the Cal-ITP data codebases" authors = ["Andrew Vaccaro "] license = "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007" From 2cf89a88212efd3400de8af9d5bd9051aea2d312 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Wed, 26 Oct 2022 13:20:40 -0400 Subject: [PATCH 10/10] bump release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c4bbffd..87e3d8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "calitp" -version = "2022.10.26a0" +version = "2022.10.26" description = "Shared code for the Cal-ITP data codebases" authors = ["Andrew Vaccaro "] license = "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007"