@@ -19,7 +19,7 @@ class Remediation(NamedTuple):
1919 error_msg : Optional [str ] = None
2020
2121
22- OptionalDataFrameT = TypeVar ("OptionalDataFrameT" , bound = "Optional[pd.Dataframe ]" )
22+ OptionalDataFrameT = TypeVar ("OptionalDataFrameT" , bound = "Optional[pd.DataFrame ]" )
2323
2424
2525def num_examples_validator (df : pd .DataFrame ) -> Remediation :
@@ -490,28 +490,28 @@ def read_any_format(
490490 dtype = str ,
491491 ).fillna ("" )
492492 elif fname .lower ().endswith (".jsonl" ):
493- df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" )
493+ df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" ) # type: ignore
494494 if len (df ) == 1 : # type: ignore
495495 # this is NOT what we expect for a .jsonl file
496496 immediate_msg = "\n - Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
497497 necessary_msg = "Your format `JSON` will be converted to `JSONL`"
498- df = pd .read_json (fname , dtype = str ).fillna ("" )
498+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
499499 else :
500500 pass # this is what we expect for a .jsonl file
501501 elif fname .lower ().endswith (".json" ):
502502 try :
503503 # to handle case where .json file is actually a .jsonl file
504- df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" )
504+ df = pd .read_json (fname , lines = True , dtype = str ).fillna ("" ) # type: ignore
505505 if len (df ) == 1 : # type: ignore
506506 # this code path corresponds to a .json file that has one line
507- df = pd .read_json (fname , dtype = str ).fillna ("" )
507+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
508508 else :
509509 # this is NOT what we expect for a .json file
510510 immediate_msg = "\n - Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
511511 necessary_msg = "Your format `JSON` will be converted to `JSONL`"
512512 except ValueError :
513513 # this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
514- df = pd .read_json (fname , dtype = str ).fillna ("" )
514+ df = pd .read_json (fname , dtype = str ).fillna ("" ) # type: ignore
515515 else :
516516 error_msg = (
517517 "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
0 commit comments