Skip to content

Commit

Permalink
Merge pull request #37 from cal-itp/feature-data-infra-1142
Browse files Browse the repository at this point in the history
adding random protobuff function and a cli to access it
  • Loading branch information
chriscauley authored Mar 2, 2022
2 parents b9c3269 + 44f6c0a commit fde76be
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 1 deletion.
2 changes: 1 addition & 1 deletion calitp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# flake8: noqa

__version__ = "0.0.11"
__version__ = "0.0.12"

from .sql import get_table, write_table, query_sql, to_snakecase, get_engine
from .storage import save_to_gcfs, read_gcfs
42 changes: 42 additions & 0 deletions calitp/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from calitp.protobuff import get_random_protobuff
import datetime
import typer

app = typer.Typer()


@app.command()
def random_protobuff(
glob=typer.Argument("*", help="A glob matching itp_id/url_number/string.",),
bucket=typer.Option("gtfs-data", help="GCS bucket to search.",),
date=typer.Option(f"{datetime.date.today()}*", help="Date glob.",),
format=typer.Option("protobuff", help="format to output, json or protobuff.",),
):
blob, data, error = get_random_protobuff(
glob, bucket=bucket, date=date, format=format,
)
data = str(data)
print(f"downloaded {blob}")
if error:
print(error)
lines = data.split("\n")
if len("\n".join(lines[:20])) > 20 * 80:
print(data[: 20 * 80])
print(f"... (only showing {20*80}/{len(data)} characters)")
return
print("\n".join(lines[:20]))
if len(lines) > 20:
print(f"... (only showing 20/{len(lines)} lines)")


@app.callback()
def callback():
"""
Pull a random protobuff file from the rt archiver storage.
without --date defaults to midnignt this morning
full --date string is like 2022-01-01T10:42:16
--date must be at least YYYY-MM-DD and will default to zeros after that
"""


app()
44 changes: 44 additions & 0 deletions calitp/protobuff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from calitp.storage import get_fs
import datetime
from google.protobuf import json_format
from google.transit import gtfs_realtime_pb2
import json
import random


def get_random_protobuff(glob, bucket="gtfs-data", date="", format="protobuff"):
date = date.strip("*") # ignore ending asterix
if len(date) < len("2022-01-01"):
raise ValueError("You must at least specify YYYY-MM-DD in the date string.")
if len(date) > len("2022-01-01T00:00:"):
# user is specifying full time stamp
date = date + "*"
else:
# This ensures that a date like 2022-01-01 is like 2022-01-01T00:00*
# This way we are prefixing on everything up to the seconds
today = datetime.date.today().replace(day=1)
today = datetime.datetime.fromisoformat(today.isoformat()).isoformat()
default_date = str(today).rsplit(":", 1)[0] + "*"
date = date + default_date[len(date) :]

# defines the proto schema I think
feed = gtfs_realtime_pb2.FeedMessage()
fs = get_fs()
glob = glob + "*"
glob = f"gs://{bucket}/rt/{date}/{glob}"
blobs = fs.glob(glob)
if len(blobs) == 0:
raise Exception(f"No files were found matching glob {glob}")
blob = random.choice(blobs)

with fs.open(blob, "rb") as f:
result = f.read()
feed.ParseFromString(result)

if not str(feed):
error = "ERROR: File could not be parsed as a protobuff."
error += " Displaying raw file instead."
return blob, result.decode(encoding="utf-8"), error
if format == "json":
feed = json.dumps(json_format.MessageToDict(feed), indent=2)
return blob, feed, None
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ google-crc32c==1.1.2
google-resumable-media==1.3.0
googleapis-common-protos==1.53.0
grpcio==1.38.0
gtfs-realtime-bindings==0.0.7
identify==2.2.10
idna==2.10
iniconfig==1.1.1
Expand Down Expand Up @@ -68,6 +69,7 @@ six==1.16.0
SQLAlchemy==1.3.24
toml==0.10.2
traitlets==5.0.5
typer==0.4.0
typing-extensions==3.10.0.0
ujson==4.0.2
urllib3==1.26.5
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"pandas-gbq",
"pybigquery",
"google-cloud-bigquery",
"gtfs-realtime-bindings",
],
description="",
author="",
Expand Down

0 comments on commit fde76be

Please sign in to comment.