Skip to content

Commit

Permalink
Merge pull request #84 from cal-itp/retry-metadata
Browse files Browse the repository at this point in the history
add backoff and allow callers to use it for setting metadata
  • Loading branch information
atvaccaro authored Sep 21, 2022
2 parents 5dcbedb + 0e55421 commit 8b524ee
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
33 changes: 30 additions & 3 deletions calitp/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from enum import Enum
from typing import ClassVar, Dict, List, Optional, Tuple, Type, Union, get_type_hints

import backoff
import gcsfs
import humanize
import pendulum
Expand Down Expand Up @@ -160,6 +161,21 @@ def is_rt(self) -> bool:
raise RuntimeError(f"managed to end up with an invalid enum type of {self}")


def set_metadata(blob: storage.Blob, model: BaseModel, exclude=None):
blob.metadata = {PARTITIONED_ARTIFACT_METADATA_KEY: model.json(exclude=exclude)}
blob.patch()


# Is there a better pattern for making this retry optional by the caller?
@backoff.on_exception(
backoff.expo,
exception=(Exception,),
max_tries=2,
)
def set_metadata_with_retry(*args, **kwargs):
return set_metadata(*args, **kwargs)


class PartitionedGCSArtifact(BaseModel, abc.ABC):
"""
This class is designed to be subclassed to model "extracts", i.e. a particular
Expand Down Expand Up @@ -228,7 +244,14 @@ def name(self):
def path(self):
return os.path.join(self.bucket, self.name)

def save_content(self, content: bytes, exclude=None, fs: gcsfs.GCSFileSystem = None, client: storage.Client = None):
def save_content(
self,
content: bytes,
exclude=None,
fs: gcsfs.GCSFileSystem = None,
client: storage.Client = None,
retry_metadata: bool = False,
):
if (fs is None) == (client is None):
raise TypeError("must provide a gcsfs file system OR a storage client")

Expand All @@ -254,8 +277,12 @@ def save_content(self, content: bytes, exclude=None, fs: gcsfs.GCSFileSystem = N
client=client,
)

blob.metadata = {PARTITIONED_ARTIFACT_METADATA_KEY: self.json(exclude=exclude)}
blob.patch()
set_metadata_func = set_metadata_with_retry if retry_metadata else set_metadata
set_metadata_func(
blob=blob,
model=self,
exclude=exclude,
)


# TODO: this should really use a typevar
Expand Down
11 changes: 10 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "calitp"
version = "2022.9.15"
version = "2022.9.21"
description = "Shared code for the Cal-ITP data codebases"
authors = ["Andrew Vaccaro <[email protected]>"]
license = "GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007"
Expand All @@ -24,6 +24,7 @@ google-cloud-bigquery-storage = "2.14.1"
google-api-core = "<2.0.0dev,>=1.32.0"
protobuf = ">=3.19.0,<4.0.0dev"
tqdm = "^4.64.0"
backoff = "^2.1.2"

[tool.poetry.dev-dependencies]
black = "^22.6.0"
Expand Down

0 comments on commit 8b524ee

Please sign in to comment.