-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #37 from cal-itp/feature-data-infra-1142
adding random protobuff function and a cli to access it
- Loading branch information
Showing
5 changed files
with
90 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# flake8: noqa | ||
|
||
__version__ = "0.0.11" | ||
__version__ = "0.0.12" | ||
|
||
from .sql import get_table, write_table, query_sql, to_snakecase, get_engine | ||
from .storage import save_to_gcfs, read_gcfs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from calitp.protobuff import get_random_protobuff | ||
import datetime | ||
import typer | ||
|
||
app = typer.Typer() | ||
|
||
|
||
@app.command() | ||
def random_protobuff( | ||
glob=typer.Argument("*", help="A glob matching itp_id/url_number/string.",), | ||
bucket=typer.Option("gtfs-data", help="GCS bucket to search.",), | ||
date=typer.Option(f"{datetime.date.today()}*", help="Date glob.",), | ||
format=typer.Option("protobuff", help="format to output, json or protobuff.",), | ||
): | ||
blob, data, error = get_random_protobuff( | ||
glob, bucket=bucket, date=date, format=format, | ||
) | ||
data = str(data) | ||
print(f"downloaded {blob}") | ||
if error: | ||
print(error) | ||
lines = data.split("\n") | ||
if len("\n".join(lines[:20])) > 20 * 80: | ||
print(data[: 20 * 80]) | ||
print(f"... (only showing {20*80}/{len(data)} characters)") | ||
return | ||
print("\n".join(lines[:20])) | ||
if len(lines) > 20: | ||
print(f"... (only showing 20/{len(lines)} lines)") | ||
|
||
|
||
@app.callback() | ||
def callback(): | ||
""" | ||
Pull a random protobuff file from the rt archiver storage. | ||
without --date defaults to midnignt this morning | ||
full --date string is like 2022-01-01T10:42:16 | ||
--date must be at least YYYY-MM-DD and will default to zeros after that | ||
""" | ||
|
||
|
||
app() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from calitp.storage import get_fs | ||
import datetime | ||
from google.protobuf import json_format | ||
from google.transit import gtfs_realtime_pb2 | ||
import json | ||
import random | ||
|
||
|
||
def get_random_protobuff(glob, bucket="gtfs-data", date="", format="protobuff"): | ||
date = date.strip("*") # ignore ending asterix | ||
if len(date) < len("2022-01-01"): | ||
raise ValueError("You must at least specify YYYY-MM-DD in the date string.") | ||
if len(date) > len("2022-01-01T00:00:"): | ||
# user is specifying full time stamp | ||
date = date + "*" | ||
else: | ||
# This ensures that a date like 2022-01-01 is like 2022-01-01T00:00* | ||
# This way we are prefixing on everything up to the seconds | ||
today = datetime.date.today().replace(day=1) | ||
today = datetime.datetime.fromisoformat(today.isoformat()).isoformat() | ||
default_date = str(today).rsplit(":", 1)[0] + "*" | ||
date = date + default_date[len(date) :] | ||
|
||
# defines the proto schema I think | ||
feed = gtfs_realtime_pb2.FeedMessage() | ||
fs = get_fs() | ||
glob = glob + "*" | ||
glob = f"gs://{bucket}/rt/{date}/{glob}" | ||
blobs = fs.glob(glob) | ||
if len(blobs) == 0: | ||
raise Exception(f"No files were found matching glob {glob}") | ||
blob = random.choice(blobs) | ||
|
||
with fs.open(blob, "rb") as f: | ||
result = f.read() | ||
feed.ParseFromString(result) | ||
|
||
if not str(feed): | ||
error = "ERROR: File could not be parsed as a protobuff." | ||
error += " Displaying raw file instead." | ||
return blob, result.decode(encoding="utf-8"), error | ||
if format == "json": | ||
feed = json.dumps(json_format.MessageToDict(feed), indent=2) | ||
return blob, feed, None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters