File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -259,6 +259,7 @@ def check_json(
259
259
if not os .path .isfile (file ):
260
260
report_dict ["file_present" ] = f"File not found at given file path { file } "
261
261
report_dict ["is_check_passed" ] = False
262
+ return report_dict
262
263
else :
263
264
report_dict ["file_present" ] = "File found"
264
265
@@ -272,9 +273,22 @@ def check_json(
272
273
elif file_size == 0 :
273
274
report_dict ["file_size" ] = "File is empty"
274
275
report_dict ["is_check_passed" ] = False
276
+ return report_dict
275
277
else :
276
278
report_dict ["file_size" ] = f"File size { round (file_size / (2 ** 30 ) ,3 )} GB"
277
279
280
+ # Check that the file is UTF-8 encoded. If not report where the error occurs.
281
+ try :
282
+ with open (file , "r" , encoding = "utf-8" ) as f :
283
+ f .read ()
284
+ except UnicodeDecodeError as e :
285
+ report_dict ["utf8" ] = (
286
+ f"File is not UTF-8 encoded. Error raised: { e } ."
287
+ f"See https://docs.together.ai/docs/fine-tuning for more information."
288
+ )
289
+ report_dict ["is_check_passed" ] = False
290
+ return report_dict
291
+
278
292
with open (file ) as f :
279
293
# idx must be instantiated so decode errors (e.g. file is a tar) or empty files are caught
280
294
idx = - 1
You can’t perform that action at this time.
0 commit comments