Skip to content

Commit

Permalink
Fix Check Data Tables throwing exception. (#158)
Browse files Browse the repository at this point in the history
  • Loading branch information
jon-ide committed Jan 4, 2024
1 parent 29a1be0 commit b1fed9e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
3 changes: 0 additions & 3 deletions webapp/home/check_data_table_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,9 +603,6 @@ def check_data_table(eml_file_url:str=None,
eml_node, _ = load_eml_file(eml_file_url)
df, truncated = load_df(eml_node, csv_file_url, data_table_name, max_rows=None)

import sys
foo = sys.getsizeof(df)

if truncated:
flash(f'The number of rows in {os.path.basename(unquote_plus(csv_file_url))} is greater than 5 million. ezEML checks '
f'only the first 5 million rows. Often this suffices to indicate the kinds of errors that are present. The full '
Expand Down
15 changes: 10 additions & 5 deletions webapp/home/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ def check_data_table():
csv_file_url = request.headers.get('csv_file_url')
data_table_name = request.headers.get('data_table_name')
column_names = request.headers.get('column_names').split(',')
return profile_and_save("check_data_table", check_data_table_contents.check_data_table(eml_file_url, csv_file_url, data_table_name, column_names))
# return profile_and_save("check_data_table", check_data_table_contents.check_data_table(eml_file_url, csv_file_url, data_table_name, column_names))
return check_data_table_contents.check_data_table(eml_file_url, csv_file_url, data_table_name, column_names)


@home_bp.route('/data_table_errors/<data_table_name>', methods=['GET', 'POST'])
Expand Down Expand Up @@ -451,11 +452,15 @@ def data_table_errors(data_table_name:str=None):
errors = check_data_table_contents.get_data_file_eval(current_document, csv_filename, metadata_hash)
if not errors:
try:
errors = check_data_table_contents.check_data_table(eml_file_url,
csv_file_url,
data_table_name,
max_errs_per_column=None)
# start = datetime.now()
errors = profile_and_save(#"check_data_table",
check_data_table_contents.check_data_table(eml_file_url, csv_file_url,
data_table_name,
max_errs_per_column=None))
# errors = profile_and_save(#"check_data_table",
# check_data_table_contents.check_data_table(eml_file_url, csv_file_url,
# data_table_name,
# max_errs_per_column=None))
# log_info(f'check_data_table() returned {errors[:1000]}')
# end = datetime.now()
# elapsed = (end - start).total_seconds()
Expand Down

0 comments on commit b1fed9e

Please sign in to comment.