Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some bugs in upload benchmark scripts #6429

Merged
merged 3 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": false, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [3086359081]}}]
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": false, "device": "cpu", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_eager", "type": "add_loop", "backend": "eager"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [3086359081]}}]
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cuda", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_inductor_dynamic_gpu", "type": "add_loop", "backend": "inductor"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [40859830085]}}]
[{"benchmark": {"name": "pr_time_benchmarks", "extra_info": {"is_dynamic": true, "device": "cuda", "description": "a loop over 100 add node"}}, "model": {"name": "add_loop_inductor_dynamic_gpu", "type": "add_loop", "backend": "inductor"}, "metric": {"name": "compile_time_instruction_count", "benchmark_values": [40859830085]}}]
38 changes: 27 additions & 11 deletions .github/scripts/upload_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@
"""
Copied from upload stats script
"""
info(f"Writing {len(docs)} documents to DynamoDB {dynamodb_table}")
msg = f"Writing {len(docs)} documents to DynamoDB {dynamodb_table}"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comes from https://google.github.io/styleguide/pyguide.html#3101-logging, I'm lazy and just move the message to a variable to silent the warning.

info(msg)
if not dry_run:
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/dynamodb.html#batch-writing
with boto3.resource("dynamodb").Table(dynamodb_table).batch_writer() as batch:
Expand All @@ -187,19 +188,31 @@
benchmark_results = []
with open(filepath) as f:
try:
benchmark_results = json.load(f)
r = json.load(f)
# Handle the JSONEachRow case where there is only one record in the
# JSON file, it can still be loaded normally, but will need to be
# added into the list of benchmark results with the length of 1
if isinstance(r, dict):
benchmark_results.append(r)
elif isinstance(r, (list, tuple)):
benchmark_results = r

except JSONDecodeError:
f.seek(0)

# Try again in ClickHouse JSONEachRow format
for line in f:
try:
r = json.loads(line)
# Each row needs to be a dictionary in JSON format
if not isinstance(r, dict):
warn(f"Not a JSON dict {line}, skipping")
# Each row needs to be a dictionary in JSON format or a list
if isinstance(r, dict):
benchmark_results.append(r)
elif isinstance(r, (list, tuple)):
benchmark_results.extend(r)
else:
warn(f"Not a JSON dict or list {line}, skipping")
continue
benchmark_results.append(r)

except JSONDecodeError:
warn(f"Invalid JSON {line}, skipping")

Expand All @@ -220,7 +233,7 @@
for result in benchmark_results:
# This is a required field
if "metric" not in result:
warn(f"{result} is not a benchmark record, skipping")
warn(f"{result} from {filepath} is not a benchmark record, skipping")
continue

record: Dict[str, Any] = {**metadata, **result}
Expand Down Expand Up @@ -284,10 +297,12 @@
"""
s3_path = generate_s3_path(benchmark_results, filepath, schema_version)
if not s3_path:
info(f"Could not generate an S3 path for {filepath}, skipping...")
msg = f"Could not generate an S3 path for {filepath}, skipping..."
info(msg)
return

info(f"Upload {filepath} to s3://{s3_bucket}/{s3_path}")
msg = f"Upload {filepath} to s3://{s3_bucket}/{s3_path}"
info(msg)
if not dry_run:
# Write in JSONEachRow format
data = "\n".join([json.dumps(result) for result in benchmark_results])
Expand All @@ -314,7 +329,8 @@
# NB: This is for backward compatibility before we move to schema v3
if schema_version == "v2":
with open(filepath) as f:
info(f"Uploading {filepath} to dynamoDB ({schema_version})")
msg = f"Uploading {filepath} to dynamoDB ({schema_version})"
info(msg)
upload_to_dynamodb(
dynamodb_table=args.dynamodb_table,
# NB: DynamoDB only accepts decimal number, not float
Expand All @@ -331,7 +347,7 @@
)

if not benchmark_results:
return
continue

upload_to_s3(
s3_bucket=OSSCI_BENCHMARKS_BUCKET,
Expand Down
Loading