Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions app/controllers/bulkrax/guided_imports_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def validate
admin_set_id = params[:importer]&.[](:admin_set_id)
validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id)
raw_csv_data = validation_result.delete(:raw_csv_data)
cache_key = cache_validation_errors(validation_result, raw_csv_data)
cache_key = cache_validation_errors(validation_result, raw_csv_data, csv_file)
formatted = StepperResponseFormatter.format(validation_result)
formatted[:validationErrorsCacheKey] = cache_key
render json: formatted, status: :ok
Expand All @@ -58,9 +58,10 @@ def download_validation_errors
csv = ValidationErrorCsvBuilder.build(
headers: cached[:headers],
csv_data: cached[:csv_data],
row_errors: cached[:row_errors]
row_errors: cached[:row_errors],
file_errors: cached[:file_errors]
)
send_data csv, filename: 'import_validation_errors.csv', type: 'text/csv', disposition: 'attachment'
send_data csv, filename: error_csv_filename(cached[:original_filename]), type: 'text/csv', disposition: 'attachment'
end

def create
Expand Down Expand Up @@ -105,13 +106,29 @@ def render_invalid_uploaded_files_response
# @param zip_file [File, nil] an optional ZIP containing file attachments
# @param admin_set_id [String, nil] optional admin set ID for validation context
# @return [Hash] validation result data
def cache_validation_errors(validation_result, raw_csv_data)
return nil unless validation_result[:rowErrors]&.any?
def cache_validation_errors(validation_result, raw_csv_data, csv_file)
has_errors = validation_result[:rowErrors]&.any? ||
validation_result[:missingRequired]&.any? ||
validation_result[:unrecognized]&.any? ||
validation_result[:emptyColumns]&.any? ||
validation_result[:missingFiles]&.any?
return nil unless has_errors

key = "guided_import_errors:#{session.id}:#{Time.now.to_i}"
Rails.cache.write(
key,
{ headers: validation_result[:headers], csv_data: raw_csv_data, row_errors: validation_result[:rowErrors] },
{
headers: validation_result[:headers],
csv_data: raw_csv_data,
row_errors: validation_result[:rowErrors] || [],
file_errors: {
missing_required: validation_result[:missingRequired] || [],
unrecognized: validation_result[:unrecognized] || {},
empty_columns: validation_result[:emptyColumns] || [],
missing_files: validation_result[:missingFiles] || []
},
original_filename: filename_for(csv_file)
},
expires_in: 1.hour
)
key
Expand All @@ -134,6 +151,13 @@ def apply_field_mapping
@importer.field_mapping = Bulkrax.field_mappings['Bulkrax::CsvParser']
end

def error_csv_filename(original_filename)
return 'import_errors.csv' if original_filename.blank?

base = File.basename(original_filename, '.*')
"#{base}_errors.csv"
end

def set_locale_from_params
I18n.locale = params[:locale] if params[:locale].present? && I18n.available_locales.include?(params[:locale].to_sym)
end
Expand Down
69 changes: 54 additions & 15 deletions app/services/bulkrax/validation_error_csv_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,59 @@
require 'csv'

module Bulkrax
# Builds a CSV string containing only the rows from a validated CSV that have
# row-level errors. An `errors` column is prepended as column 1; multiple
# errors on the same row are joined with " | ".
# Builds a CSV string containing all validation errors from a guided import.
# File-level errors (missing required columns, unrecognized headers, empty
# columns, missing files) appear first as summary rows with a blank `row`
# cell. Row-level errors follow, one output row per errored data row.
#
# Usage:
# csv = Bulkrax::ValidationErrorCsvBuilder.build(
# headers: result[:headers],
# csv_data: result[:raw_csv_data],
# row_errors: result[:rowErrors]
# headers: result[:headers],
# csv_data: result[:raw_csv_data],
# row_errors: result[:rowErrors],
# file_errors: {
# missing_required: result[:missingRequired],
# unrecognized: result[:unrecognized],
# empty_columns: result[:emptyColumns],
# missing_files: result[:missingFiles]
# }
# )
class ValidationErrorCsvBuilder
# @param headers [Array<String>] original CSV headers in order
# @param csv_data [Array<Hash>] one entry per data row; each hash must have
# :row_number (Integer, 1-indexed data row, so first data row == 2 matching
# validator convention) and :raw_row (String-keyed hash of column=>value)
# @param csv_data [Array<Hash>] one entry per data row; each hash has
# :raw_row (String-keyed hash of column=>value)
# @param row_errors [Array<Hash>] each hash has :row (Integer) and :message (String)
# @param file_errors [Hash] file-level issues:
# - :missing_required [Array<Hash>] each hash has :model and :field
# - :unrecognized [Hash] column_name => suggestion_or_nil
# - :empty_columns [Array<Integer>] 1-based column positions with no header
# - :missing_files [Array<String>] filenames referenced but not found
# @return [String] CSV content
def self.build(headers:, csv_data:, row_errors:)
new(headers: headers, csv_data: csv_data, row_errors: row_errors).build
def self.build(headers:, csv_data:, row_errors:, file_errors: {})
new(headers: headers, csv_data: csv_data, row_errors: row_errors, file_errors: file_errors).build
end

def initialize(headers:, csv_data:, row_errors:)
def initialize(headers:, csv_data:, row_errors:, file_errors:)
@headers = headers
@csv_data = csv_data
@row_errors = row_errors
@file_errors = file_errors
end

def build
errors_by_row = group_errors_by_row
blank_data = Array.new(@headers.length)

CSV.generate(force_quotes: false) do |csv|
csv << ['row', 'errors'] + @headers

file_level_error_rows.each do |message|
csv << [nil, message] + blank_data
end

@csv_data.each_with_index do |record, index|
row_number = index + 2 # header is row 1; first data row is row 2
next unless errors_by_row.key?(row_number)

error_messages = errors_by_row[row_number].map { |e| e[:message] }.join(' | ')
error_messages = errors_by_row[row_number]&.map { |e| e[:message] }&.join(' | ')
raw_row = record[:raw_row] || {}
csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
end
Expand All @@ -56,5 +71,29 @@ def group_errors_by_row
hash[row_num] << error
end
end

def file_level_error_rows
messages = []

Array(@file_errors[:missing_required]).each do |entry|
messages << "Missing required column '#{entry[:field]}' (#{entry[:model]})"
end

Hash(@file_errors[:unrecognized]).each do |col, suggestion|
msg = "Unrecognized column '#{col}'"
msg += " (did you mean '#{suggestion}'?)" if suggestion.present?
messages << msg
end

Array(@file_errors[:empty_columns]).each do |pos|
messages << "Column #{pos + 2} has no header and will be ignored during import"
end

Array(@file_errors[:missing_files]).each do |filename|
messages << "Missing file: #{filename}"
end

messages
end
end
end
100 changes: 89 additions & 11 deletions spec/services/bulkrax/validation_error_csv_builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
[{ row: 2, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: "Required field 'title' is missing" }]
end

it 'includes only the errored row plus the header' do
it 'includes all data rows plus the header' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.length).to eq(2)
expect(rows.length).to eq(csv_data.length + 1)
end

it 'puts the original row number in column 1' do
Expand All @@ -45,11 +45,12 @@
expect(rows[1][4]).to eq('My Title')
end

it 'excludes clean rows' do
it 'includes clean rows with a blank errors cell' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
source_ids = rows[1..].map { |r| r[3] }
expect(source_ids).not_to include('id-002')
clean_row = rows.find { |r| r[3] == 'id-002' }
expect(clean_row).not_to be_nil
expect(clean_row[1]).to be_nil
end
end

Expand All @@ -64,7 +65,7 @@
it 'joins multiple error messages with " | "' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][1]).to eq('Title is required | Description is required')
expect(rows[3][1]).to eq('Title is required | Description is required')
end
end

Expand All @@ -76,17 +77,19 @@
]
end

it 'includes one output row per errored input row' do
it 'includes all data rows' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.length).to eq(3) # header + 2 errored rows
expect(rows.length).to eq(csv_data.length + 1)
end

it 'outputs errored rows in original order' do
it 'outputs rows in original order with errors on the correct rows' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][3]).to eq('id-001')
expect(rows[2][3]).to eq('id-003')
row_by_id = rows[1..].index_by { |r| r[3] }
expect(row_by_id['id-001'][1]).to eq('Duplicate source_identifier')
expect(row_by_id['id-003'][1]).to eq('Title is required')
expect(row_by_id['id-002'][1]).to be_nil
end
end

Expand All @@ -106,5 +109,80 @@
expect(rows.first[2..]).to eq(headers)
end
end

context 'file-level errors' do
let(:row_errors) { [] }

it 'emits a row for each missing required column' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { missing_required: [{ model: 'GenericWork', field: 'title' }] }
)
rows = CSV.parse(result)
expect(rows[1][0]).to be_nil
expect(rows[1][1]).to eq("Missing required column 'title' (GenericWork)")
end

it 'emits a row for each unrecognized header, with suggestion when present' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { unrecognized: { 'legacy_id' => 'identifier', 'notes' => nil } }
)
rows = CSV.parse(result)
messages = rows[1..].map { |r| r[1] }
expect(messages).to include("Unrecognized column 'legacy_id' (did you mean 'identifier'?)")
expect(messages).to include("Unrecognized column 'notes'")
end

it 'emits a row for each empty column position' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { empty_columns: [3] }
)
rows = CSV.parse(result)
expect(rows[1][1]).to eq('Column 5 has no header and will be ignored during import')
end

it 'emits a row for each missing file' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { missing_files: ['photo.jpg'] }
)
rows = CSV.parse(result)
expect(rows[1][1]).to eq('Missing file: photo.jpg')
end

it 'leaves the row number blank for file-level rows' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { missing_files: ['photo.jpg'] }
)
rows = CSV.parse(result)
expect(rows[1][0]).to be_nil
end

it 'leaves data columns blank for file-level rows' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { missing_files: ['photo.jpg'] }
)
rows = CSV.parse(result)
expect(rows[1][2..]).to all(be_nil)
end

context 'when both file-level and row-level errors are present' do
let(:row_errors) { [{ row: 2, severity: 'error', category: 'test', column: 'title', value: nil, message: 'Row error' }] }

it 'outputs file-level rows before row-level rows' do
result = described_class.build(
headers: headers, csv_data: csv_data, row_errors: row_errors,
file_errors: { missing_required: [{ model: 'GenericWork', field: 'title' }] }
)
rows = CSV.parse(result)
expect(rows[1][0]).to be_nil # file-level row has no row number
expect(rows[2][0]).to eq('2') # row-level row has row number
end
end
end
end
end
Loading