@@ -172,7 +172,8 @@ def upload_to_dynamodb(
172
172
"""
173
173
Copied from upload stats script
174
174
"""
175
- info (f"Writing { len (docs )} documents to DynamoDB { dynamodb_table } " )
175
+ msg = f"Writing { len (docs )} documents to DynamoDB { dynamodb_table } "
176
+ info (msg )
176
177
if not dry_run :
177
178
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/dynamodb.html#batch-writing
178
179
with boto3 .resource ("dynamodb" ).Table (dynamodb_table ).batch_writer () as batch :
@@ -187,19 +188,31 @@ def read_benchmark_results(filepath: str) -> List[Dict[str, Any]]:
187
188
benchmark_results = []
188
189
with open (filepath ) as f :
189
190
try :
190
- benchmark_results = json .load (f )
191
+ r = json .load (f )
192
+ # Handle the JSONEachRow case where there is only one record in the
193
+ # JSON file, it can still be loaded normally, but will need to be
194
+ # added into the list of benchmark results with the length of 1
195
+ if isinstance (r , dict ):
196
+ benchmark_results .append (r )
197
+ elif isinstance (r , list ):
198
+ benchmark_results = r
199
+
191
200
except JSONDecodeError :
192
201
f .seek (0 )
193
202
194
203
# Try again in ClickHouse JSONEachRow format
195
204
for line in f :
196
205
try :
197
206
r = json .loads (line )
198
- # Each row needs to be a dictionary in JSON format
199
- if not isinstance (r , dict ):
200
- warn (f"Not a JSON dict { line } , skipping" )
207
+ # Each row needs to be a dictionary in JSON format or a list
208
+ if isinstance (r , dict ):
209
+ benchmark_results .append (r )
210
+ elif isinstance (r , list ):
211
+ benchmark_results .extend (r )
212
+ else :
213
+ warn (f"Not a JSON dict or list { line } , skipping" )
201
214
continue
202
- benchmark_results . append ( r )
215
+
203
216
except JSONDecodeError :
204
217
warn (f"Invalid JSON { line } , skipping" )
205
218
@@ -220,7 +233,7 @@ def process_benchmark_results(
220
233
for result in benchmark_results :
221
234
# This is a required field
222
235
if "metric" not in result :
223
- warn (f"{ result } is not a benchmark record, skipping" )
236
+ warn (f"{ result } from { filepath } is not a benchmark record, skipping" )
224
237
continue
225
238
226
239
record : Dict [str , Any ] = {** metadata , ** result }
@@ -284,10 +297,12 @@ def upload_to_s3(
284
297
"""
285
298
s3_path = generate_s3_path (benchmark_results , filepath , schema_version )
286
299
if not s3_path :
287
- info (f"Could not generate an S3 path for { filepath } , skipping..." )
300
+ msg = f"Could not generate an S3 path for { filepath } , skipping..."
301
+ info (msg )
288
302
return
289
303
290
- info (f"Upload { filepath } to s3://{ s3_bucket } /{ s3_path } " )
304
+ msg = f"Upload { filepath } to s3://{ s3_bucket } /{ s3_path } "
305
+ info (msg )
291
306
if not dry_run :
292
307
# Write in JSONEachRow format
293
308
data = "\n " .join ([json .dumps (result ) for result in benchmark_results ])
@@ -314,7 +329,8 @@ def main() -> None:
314
329
# NB: This is for backward compatibility before we move to schema v3
315
330
if schema_version == "v2" :
316
331
with open (filepath ) as f :
317
- info (f"Uploading { filepath } to dynamoDB ({ schema_version } )" )
332
+ msg = f"Uploading { filepath } to dynamoDB ({ schema_version } )"
333
+ info (msg )
318
334
upload_to_dynamodb (
319
335
dynamodb_table = args .dynamodb_table ,
320
336
# NB: DynamoDB only accepts decimal number, not float
@@ -331,7 +347,7 @@ def main() -> None:
331
347
)
332
348
333
349
if not benchmark_results :
334
- return
350
+ continue
335
351
336
352
upload_to_s3 (
337
353
s3_bucket = OSSCI_BENCHMARKS_BUCKET ,
0 commit comments