diff --git a/app/controllers/bulkrax/guided_imports_controller.rb b/app/controllers/bulkrax/guided_imports_controller.rb index 26f43e65..2bccd26e 100644 --- a/app/controllers/bulkrax/guided_imports_controller.rb +++ b/app/controllers/bulkrax/guided_imports_controller.rb @@ -39,7 +39,7 @@ def validate admin_set_id = params[:importer]&.[](:admin_set_id) validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id) raw_csv_data = validation_result.delete(:raw_csv_data) - cache_key = cache_validation_errors(validation_result, raw_csv_data) + cache_key = cache_validation_errors(validation_result, raw_csv_data, csv_file) formatted = StepperResponseFormatter.format(validation_result) formatted[:validationErrorsCacheKey] = cache_key render json: formatted, status: :ok @@ -58,9 +58,10 @@ def download_validation_errors csv = ValidationErrorCsvBuilder.build( headers: cached[:headers], csv_data: cached[:csv_data], - row_errors: cached[:row_errors] + row_errors: cached[:row_errors], + file_errors: cached[:file_errors] ) - send_data csv, filename: 'import_validation_errors.csv', type: 'text/csv', disposition: 'attachment' + send_data csv, filename: error_csv_filename(cached[:original_filename]), type: 'text/csv', disposition: 'attachment' end def create @@ -105,13 +106,29 @@ def render_invalid_uploaded_files_response # @param zip_file [File, nil] an optional ZIP containing file attachments # @param admin_set_id [String, nil] optional admin set ID for validation context # @return [Hash] validation result data - def cache_validation_errors(validation_result, raw_csv_data) - return nil unless validation_result[:rowErrors]&.any? + def cache_validation_errors(validation_result, raw_csv_data, csv_file) + has_errors = validation_result[:rowErrors]&.any? || + validation_result[:missingRequired]&.any? || + validation_result[:unrecognized]&.any? || + validation_result[:emptyColumns]&.any? || + validation_result[:missingFiles]&.any? + return nil unless has_errors key = "guided_import_errors:#{session.id}:#{Time.now.to_i}" Rails.cache.write( key, - { headers: validation_result[:headers], csv_data: raw_csv_data, row_errors: validation_result[:rowErrors] }, + { + headers: validation_result[:headers], + csv_data: raw_csv_data, + row_errors: validation_result[:rowErrors] || [], + file_errors: { + missing_required: validation_result[:missingRequired] || [], + unrecognized: validation_result[:unrecognized] || {}, + empty_columns: validation_result[:emptyColumns] || [], + missing_files: validation_result[:missingFiles] || [] + }, + original_filename: filename_for(csv_file) + }, expires_in: 1.hour ) key @@ -134,6 +151,13 @@ def apply_field_mapping @importer.field_mapping = Bulkrax.field_mappings['Bulkrax::CsvParser'] end + def error_csv_filename(original_filename) + return 'import_errors.csv' if original_filename.blank? + + base = File.basename(original_filename, '.*') + "#{base}_errors.csv" + end + def set_locale_from_params I18n.locale = params[:locale] if params[:locale].present? && I18n.available_locales.include?(params[:locale].to_sym) end diff --git a/app/services/bulkrax/validation_error_csv_builder.rb b/app/services/bulkrax/validation_error_csv_builder.rb index f6a57474..744b49ad 100644 --- a/app/services/bulkrax/validation_error_csv_builder.rb +++ b/app/services/bulkrax/validation_error_csv_builder.rb @@ -3,44 +3,59 @@ require 'csv' module Bulkrax - # Builds a CSV string containing only the rows from a validated CSV that have - # row-level errors. An `errors` column is prepended as column 1; multiple - # errors on the same row are joined with " | ". + # Builds a CSV string containing all validation errors from a guided import. + # File-level errors (missing required columns, unrecognized headers, empty + # columns, missing files) appear first as summary rows with a blank `row` + # cell. Row-level errors follow, one output row per errored data row. # # Usage: # csv = Bulkrax::ValidationErrorCsvBuilder.build( - # headers: result[:headers], - # csv_data: result[:raw_csv_data], - # row_errors: result[:rowErrors] + # headers: result[:headers], + # csv_data: result[:raw_csv_data], + # row_errors: result[:rowErrors], + # file_errors: { + # missing_required: result[:missingRequired], + # unrecognized: result[:unrecognized], + # empty_columns: result[:emptyColumns], + # missing_files: result[:missingFiles] + # } # ) class ValidationErrorCsvBuilder # @param headers [Array] original CSV headers in order - # @param csv_data [Array] one entry per data row; each hash must have - # :row_number (Integer, 1-indexed data row, so first data row == 2 matching - # validator convention) and :raw_row (String-keyed hash of column=>value) + # @param csv_data [Array] one entry per data row; each hash has + # :raw_row (String-keyed hash of column=>value) # @param row_errors [Array] each hash has :row (Integer) and :message (String) + # @param file_errors [Hash] file-level issues: + # - :missing_required [Array] each hash has :model and :field + # - :unrecognized [Hash] column_name => suggestion_or_nil + # - :empty_columns [Array] 1-based column positions with no header + # - :missing_files [Array] filenames referenced but not found # @return [String] CSV content - def self.build(headers:, csv_data:, row_errors:) - new(headers: headers, csv_data: csv_data, row_errors: row_errors).build + def self.build(headers:, csv_data:, row_errors:, file_errors: {}) + new(headers: headers, csv_data: csv_data, row_errors: row_errors, file_errors: file_errors).build end - def initialize(headers:, csv_data:, row_errors:) + def initialize(headers:, csv_data:, row_errors:, file_errors:) @headers = headers @csv_data = csv_data @row_errors = row_errors + @file_errors = file_errors end def build errors_by_row = group_errors_by_row + blank_data = Array.new(@headers.length) CSV.generate(force_quotes: false) do |csv| csv << ['row', 'errors'] + @headers + file_level_error_rows.each do |message| + csv << [nil, message] + blank_data + end + @csv_data.each_with_index do |record, index| row_number = index + 2 # header is row 1; first data row is row 2 - next unless errors_by_row.key?(row_number) - - error_messages = errors_by_row[row_number].map { |e| e[:message] }.join(' | ') + error_messages = errors_by_row[row_number]&.map { |e| e[:message] }&.join(' | ') raw_row = record[:raw_row] || {} csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] } end @@ -56,5 +71,29 @@ def group_errors_by_row hash[row_num] << error end end + + def file_level_error_rows + messages = [] + + Array(@file_errors[:missing_required]).each do |entry| + messages << "Missing required column '#{entry[:field]}' (#{entry[:model]})" + end + + Hash(@file_errors[:unrecognized]).each do |col, suggestion| + msg = "Unrecognized column '#{col}'" + msg += " (did you mean '#{suggestion}'?)" if suggestion.present? + messages << msg + end + + Array(@file_errors[:empty_columns]).each do |pos| + messages << "Column #{pos + 2} has no header and will be ignored during import" + end + + Array(@file_errors[:missing_files]).each do |filename| + messages << "Missing file: #{filename}" + end + + messages + end end end diff --git a/spec/services/bulkrax/validation_error_csv_builder_spec.rb b/spec/services/bulkrax/validation_error_csv_builder_spec.rb index 010bc89a..22c9c0e2 100644 --- a/spec/services/bulkrax/validation_error_csv_builder_spec.rb +++ b/spec/services/bulkrax/validation_error_csv_builder_spec.rb @@ -19,10 +19,10 @@ [{ row: 2, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: "Required field 'title' is missing" }] end - it 'includes only the errored row plus the header' do + it 'includes all data rows plus the header' do result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) rows = CSV.parse(result) - expect(rows.length).to eq(2) + expect(rows.length).to eq(csv_data.length + 1) end it 'puts the original row number in column 1' do @@ -45,11 +45,12 @@ expect(rows[1][4]).to eq('My Title') end - it 'excludes clean rows' do + it 'includes clean rows with a blank errors cell' do result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) rows = CSV.parse(result) - source_ids = rows[1..].map { |r| r[3] } - expect(source_ids).not_to include('id-002') + clean_row = rows.find { |r| r[3] == 'id-002' } + expect(clean_row).not_to be_nil + expect(clean_row[1]).to be_nil end end @@ -64,7 +65,7 @@ it 'joins multiple error messages with " | "' do result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) rows = CSV.parse(result) - expect(rows[1][1]).to eq('Title is required | Description is required') + expect(rows[3][1]).to eq('Title is required | Description is required') end end @@ -76,17 +77,19 @@ ] end - it 'includes one output row per errored input row' do + it 'includes all data rows' do result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) rows = CSV.parse(result) - expect(rows.length).to eq(3) # header + 2 errored rows + expect(rows.length).to eq(csv_data.length + 1) end - it 'outputs errored rows in original order' do + it 'outputs rows in original order with errors on the correct rows' do result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) rows = CSV.parse(result) - expect(rows[1][3]).to eq('id-001') - expect(rows[2][3]).to eq('id-003') + row_by_id = rows[1..].index_by { |r| r[3] } + expect(row_by_id['id-001'][1]).to eq('Duplicate source_identifier') + expect(row_by_id['id-003'][1]).to eq('Title is required') + expect(row_by_id['id-002'][1]).to be_nil end end @@ -106,5 +109,80 @@ expect(rows.first[2..]).to eq(headers) end end + + context 'file-level errors' do + let(:row_errors) { [] } + + it 'emits a row for each missing required column' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { missing_required: [{ model: 'GenericWork', field: 'title' }] } + ) + rows = CSV.parse(result) + expect(rows[1][0]).to be_nil + expect(rows[1][1]).to eq("Missing required column 'title' (GenericWork)") + end + + it 'emits a row for each unrecognized header, with suggestion when present' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { unrecognized: { 'legacy_id' => 'identifier', 'notes' => nil } } + ) + rows = CSV.parse(result) + messages = rows[1..].map { |r| r[1] } + expect(messages).to include("Unrecognized column 'legacy_id' (did you mean 'identifier'?)") + expect(messages).to include("Unrecognized column 'notes'") + end + + it 'emits a row for each empty column position' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { empty_columns: [3] } + ) + rows = CSV.parse(result) + expect(rows[1][1]).to eq('Column 5 has no header and will be ignored during import') + end + + it 'emits a row for each missing file' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { missing_files: ['photo.jpg'] } + ) + rows = CSV.parse(result) + expect(rows[1][1]).to eq('Missing file: photo.jpg') + end + + it 'leaves the row number blank for file-level rows' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { missing_files: ['photo.jpg'] } + ) + rows = CSV.parse(result) + expect(rows[1][0]).to be_nil + end + + it 'leaves data columns blank for file-level rows' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { missing_files: ['photo.jpg'] } + ) + rows = CSV.parse(result) + expect(rows[1][2..]).to all(be_nil) + end + + context 'when both file-level and row-level errors are present' do + let(:row_errors) { [{ row: 2, severity: 'error', category: 'test', column: 'title', value: nil, message: 'Row error' }] } + + it 'outputs file-level rows before row-level rows' do + result = described_class.build( + headers: headers, csv_data: csv_data, row_errors: row_errors, + file_errors: { missing_required: [{ model: 'GenericWork', field: 'title' }] } + ) + rows = CSV.parse(result) + expect(rows[1][0]).to be_nil # file-level row has no row number + expect(rows[2][0]).to eq('2') # row-level row has row number + end + end + end end end