diff --git a/app/assets/javascripts/bulkrax/importers_stepper.js b/app/assets/javascripts/bulkrax/importers_stepper.js
index 6a5ab211..066af38c 100644
--- a/app/assets/javascripts/bulkrax/importers_stepper.js
+++ b/app/assets/javascripts/bulkrax/importers_stepper.js
@@ -94,7 +94,8 @@
// API endpoints
ENDPOINTS: {
DEMO_SCENARIOS: '/importers/guided_import/demo_scenarios',
- VALIDATE: '/importers/guided_import/validate'
+ VALIDATE: '/importers/guided_import/validate',
+ DOWNLOAD_VALIDATION_ERRORS: '/importers/guided_import/download_validation_errors'
}
}
@@ -1560,7 +1561,8 @@
missingFiles: data.missingFiles || data.missing_files,
foundFiles: data.foundFiles != null ? data.foundFiles : data.found_files,
zipIncluded: data.zipIncluded != null ? data.zipIncluded : data.zip_included,
- messages: data.messages
+ messages: data.messages,
+ validationErrorsCacheKey: data.validationErrorsCacheKey || null
}
}
@@ -1653,6 +1655,24 @@
if (data.hasWarnings || !data.isValid) {
$('.warning-acknowledgment').show()
}
+
+ // Download errors button
+ if (data.validationErrorsCacheKey) {
+ var downloadUrl =
+ CONSTANTS.ENDPOINTS.DOWNLOAD_VALIDATION_ERRORS + '?key=' + encodeURIComponent(data.validationErrorsCacheKey)
+ var $btn = $(
+ '' +
+ ' ' +
+ t('download_validation_errors_csv') +
+ ''
+ )
+ $('.validation-results .download-errors-container').remove()
+ $('.validation-results').append($('
').append($btn))
+ } else {
+ $('.validation-results .download-errors-container').remove()
+ }
}
// Render import size gauge
diff --git a/app/controllers/bulkrax/guided_imports_controller.rb b/app/controllers/bulkrax/guided_imports_controller.rb
index ea6a490d..26f43e65 100644
--- a/app/controllers/bulkrax/guided_imports_controller.rb
+++ b/app/controllers/bulkrax/guided_imports_controller.rb
@@ -37,11 +37,32 @@ def validate
end
admin_set_id = params[:importer]&.[](:admin_set_id)
- render json: StepperResponseFormatter.format(run_validation(csv_file, zip_file, admin_set_id: admin_set_id)), status: :ok
+ validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id)
+ raw_csv_data = validation_result.delete(:raw_csv_data)
+ cache_key = cache_validation_errors(validation_result, raw_csv_data)
+ formatted = StepperResponseFormatter.format(validation_result)
+ formatted[:validationErrorsCacheKey] = cache_key
+ render json: formatted, status: :ok
ensure
close_file_handles(files)
end
+ def download_validation_errors
+ cache_key = params[:key].to_s
+ expected_prefix = "guided_import_errors:#{session.id}:"
+ return head :not_found unless cache_key.start_with?(expected_prefix)
+
+ cached = Rails.cache.read(cache_key)
+ return head :not_found unless cached
+
+ csv = ValidationErrorCsvBuilder.build(
+ headers: cached[:headers],
+ csv_data: cached[:csv_data],
+ row_errors: cached[:row_errors]
+ )
+ send_data csv, filename: 'import_validation_errors.csv', type: 'text/csv', disposition: 'attachment'
+ end
+
def create
files = nil
files = resolve_create_files
@@ -84,6 +105,18 @@ def render_invalid_uploaded_files_response
# @param zip_file [File, nil] an optional ZIP containing file attachments
# @param admin_set_id [String, nil] optional admin set ID for validation context
# @return [Hash] validation result data
+ def cache_validation_errors(validation_result, raw_csv_data)
+ return nil unless validation_result[:rowErrors]&.any?
+
+ key = "guided_import_errors:#{session.id}:#{Time.now.to_i}"
+ Rails.cache.write(
+ key,
+ { headers: validation_result[:headers], csv_data: raw_csv_data, row_errors: validation_result[:rowErrors] },
+ expires_in: 1.hour
+ )
+ key
+ end
+
def run_validation(csv_file, zip_file, admin_set_id: nil)
CsvParser.validate_csv(csv_file: csv_file, zip_file: zip_file, admin_set_id: admin_set_id)
end
diff --git a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
index fc483605..02e79764 100644
--- a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
+++ b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
@@ -47,6 +47,7 @@ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
file_sets: file_sets
)
apply_rights_statement_validation_override!(result, missing_required)
+ result[:raw_csv_data] = csv_data
result
end
diff --git a/app/services/bulkrax/validation_error_csv_builder.rb b/app/services/bulkrax/validation_error_csv_builder.rb
new file mode 100644
index 00000000..f6a57474
--- /dev/null
+++ b/app/services/bulkrax/validation_error_csv_builder.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+require 'csv'
+
+module Bulkrax
+ # Builds a CSV string containing only the rows from a validated CSV that have
+ # row-level errors. An `errors` column is prepended as column 1; multiple
+ # errors on the same row are joined with " | ".
+ #
+ # Usage:
+ # csv = Bulkrax::ValidationErrorCsvBuilder.build(
+ # headers: result[:headers],
+ # csv_data: result[:raw_csv_data],
+ # row_errors: result[:rowErrors]
+ # )
+ class ValidationErrorCsvBuilder
+ # @param headers [Array] original CSV headers in order
+ # @param csv_data [Array] one entry per data row; each hash must have
+ # :row_number (Integer, 1-indexed data row, so first data row == 2 matching
+ # validator convention) and :raw_row (String-keyed hash of column=>value)
+ # @param row_errors [Array] each hash has :row (Integer) and :message (String)
+ # @return [String] CSV content
+ def self.build(headers:, csv_data:, row_errors:)
+ new(headers: headers, csv_data: csv_data, row_errors: row_errors).build
+ end
+
+ def initialize(headers:, csv_data:, row_errors:)
+ @headers = headers
+ @csv_data = csv_data
+ @row_errors = row_errors
+ end
+
+ def build
+ errors_by_row = group_errors_by_row
+
+ CSV.generate(force_quotes: false) do |csv|
+ csv << ['row', 'errors'] + @headers
+
+ @csv_data.each_with_index do |record, index|
+ row_number = index + 2 # header is row 1; first data row is row 2
+ next unless errors_by_row.key?(row_number)
+
+ error_messages = errors_by_row[row_number].map { |e| e[:message] }.join(' | ')
+ raw_row = record[:raw_row] || {}
+ csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
+ end
+ end
+ end
+
+ private
+
+ def group_errors_by_row
+ @row_errors.each_with_object({}) do |error, hash|
+ row_num = error[:row]
+ hash[row_num] ||= []
+ hash[row_num] << error
+ end
+ end
+ end
+end
diff --git a/config/locales/bulkrax.de.yml b/config/locales/bulkrax.de.yml
index 5b7994aa..8e4d7a4e 100644
--- a/config/locales/bulkrax.de.yml
+++ b/config/locales/bulkrax.de.yml
@@ -158,6 +158,7 @@ de:
gauge_large_msg: Große Importe dauern länger und sind schwieriger zu debuggen. Wir empfehlen dringend, sie in kleinere Pakete von maximal %{limit} aufzuteilen.
gauge_moderate: Mäßig
gauge_moderate_msg: Erwägen Sie, die Arbeit in kleinere Chargen aufzuteilen, um die Fehlerbehebung zu vereinfachen.
+ download_validation_errors_csv: Fehler-CSV herunterladen
gauge_optimal: Optimal
gauge_optimal_msg: Großartig! Kleinere Importe lassen sich leichter überprüfen und Fehler beheben.
hierarchy_too_deep: Hierarchie zu tief (maximal %{max} Ebenen)
diff --git a/config/locales/bulkrax.en.yml b/config/locales/bulkrax.en.yml
index 33588ad8..ed105464 100644
--- a/config/locales/bulkrax.en.yml
+++ b/config/locales/bulkrax.en.yml
@@ -183,6 +183,7 @@ en:
gauge_large_msg: Large imports take longer and are harder to debug. We strongly recommend splitting into batches of %{limit} or fewer.
gauge_moderate: Moderate
gauge_moderate_msg: Consider splitting into smaller batches for easier error resolution.
+ download_validation_errors_csv: Download errors CSV
gauge_optimal: Optimal
gauge_optimal_msg: Great! Smaller imports are easier to validate and troubleshoot.
hierarchy_too_deep: Hierarchy too deep (max %{max} levels)
diff --git a/config/locales/bulkrax.es.yml b/config/locales/bulkrax.es.yml
index c5507ec3..52d6cf53 100644
--- a/config/locales/bulkrax.es.yml
+++ b/config/locales/bulkrax.es.yml
@@ -158,6 +158,7 @@ es:
gauge_large_msg: Las importaciones grandes tardan más y son más difíciles de depurar. Recomendamos encarecidamente dividirlas en lotes de %{limit} o menos.
gauge_moderate: Moderado
gauge_moderate_msg: Considere dividirlo en lotes más pequeños para facilitar la resolución de errores.
+ download_validation_errors_csv: Descargar CSV de errores
gauge_optimal: Óptimo
gauge_optimal_msg: "¡Genial! Las importaciones más pequeñas son más fáciles de validar y solucionar."
hierarchy_too_deep: Jerarquía demasiado profunda (máximo %{max} niveles)
diff --git a/config/locales/bulkrax.fr.yml b/config/locales/bulkrax.fr.yml
index 6ee6653f..c010c444 100644
--- a/config/locales/bulkrax.fr.yml
+++ b/config/locales/bulkrax.fr.yml
@@ -158,6 +158,7 @@ fr:
gauge_large_msg: Les importations volumineuses sont plus longues et plus difficiles à déboguer. Nous vous recommandons vivement de les diviser en lots de %{limit} ou moins.
gauge_moderate: Modéré
gauge_moderate_msg: Envisagez de diviser le travail en lots plus petits pour faciliter la résolution des erreurs.
+ download_validation_errors_csv: Télécharger le CSV des erreurs
gauge_optimal: Optimal
gauge_optimal_msg: Super ! Les importations de petite taille sont plus faciles à valider et à dépanner.
hierarchy_too_deep: Hiérarchie trop profonde (max. %{max} niveaux)
diff --git a/config/locales/bulkrax.it.yml b/config/locales/bulkrax.it.yml
index 5b6f3b41..38028a14 100644
--- a/config/locales/bulkrax.it.yml
+++ b/config/locales/bulkrax.it.yml
@@ -158,6 +158,7 @@ it:
gauge_large_msg: Le importazioni di grandi dimensioni richiedono più tempo e sono più difficili da eseguire il debug. Consigliamo vivamente di suddividere i dati in lotti di %{limit} o meno.
gauge_moderate: Moderare
gauge_moderate_msg: Per una più facile risoluzione degli errori, si consiglia di suddividere il lavoro in lotti più piccoli.
+ download_validation_errors_csv: Scarica CSV degli errori
gauge_optimal: Ottimale
gauge_optimal_msg: Ottimo! Le importazioni più piccole sono più facili da convalidare e risolvere i problemi.
hierarchy_too_deep: Gerarchia troppo profonda (livelli massimi %{max})
diff --git a/config/locales/bulkrax.pt-BR.yml b/config/locales/bulkrax.pt-BR.yml
index c1f9cab4..a1619f8d 100644
--- a/config/locales/bulkrax.pt-BR.yml
+++ b/config/locales/bulkrax.pt-BR.yml
@@ -158,6 +158,7 @@ pt-BR:
gauge_large_msg: Importações grandes demoram mais e são mais difíceis de depurar. Recomendamos fortemente dividi-las em lotes de %{limit} ou menos.
gauge_moderate: Moderado
gauge_moderate_msg: Considere dividir em lotes menores para facilitar a resolução de erros.
+ download_validation_errors_csv: Baixar CSV de erros
gauge_optimal: Ótimo
gauge_optimal_msg: Ótimo! Importações menores são mais fáceis de validar e solucionar problemas.
hierarchy_too_deep: Hierarquia muito profunda (máximo de %{max} níveis)
diff --git a/config/locales/bulkrax.zh.yml b/config/locales/bulkrax.zh.yml
index 5b7111df..de4540a5 100644
--- a/config/locales/bulkrax.zh.yml
+++ b/config/locales/bulkrax.zh.yml
@@ -158,6 +158,7 @@ zh:
gauge_large_msg: 大型导入操作耗时更长,调试难度也更大。我们强烈建议将导入操作拆分成若干批次,每批次不超过 %{limit} 个文件。
gauge_moderate: 缓和
gauge_moderate_msg: 为了便于排查错误,可以考虑分成更小的批次进行处理。
+ download_validation_errors_csv: 下载错误 CSV
gauge_optimal: 最佳的
gauge_optimal_msg: 太好了!较小的导入操作更容易验证和排查问题。
hierarchy_too_deep: 层级过深(最多 %{max} 层)
diff --git a/config/routes.rb b/config/routes.rb
index df66c396..186e96c2 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -14,6 +14,7 @@
get 'new/guided_import', to: 'guided_imports#new', as: :guided_import_new
post 'guided_import', to: 'guided_imports#create', as: :guided_import_create
post 'guided_import/validate', to: 'guided_imports#validate', as: :guided_import_validate
+ get 'guided_import/download_validation_errors', to: 'guided_imports#download_validation_errors', as: :guided_import_download_validation_errors
get 'guided_import/demo_scenarios', to: 'guided_imports#demo_scenarios', as: :guided_import_demo_scenarios if Bulkrax.config.guided_import_demo_scenarios_enabled
end
diff --git a/spec/services/bulkrax/validation_error_csv_builder_spec.rb b/spec/services/bulkrax/validation_error_csv_builder_spec.rb
new file mode 100644
index 00000000..010bc89a
--- /dev/null
+++ b/spec/services/bulkrax/validation_error_csv_builder_spec.rb
@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+RSpec.describe Bulkrax::ValidationErrorCsvBuilder do
+ let(:headers) { ['model', 'source_identifier', 'title', 'description'] }
+
+ let(:csv_data) do
+ [
+ { source_identifier: 'id-001', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-001', 'title' => 'My Title', 'description' => 'A desc' } },
+ { source_identifier: 'id-002', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-002', 'title' => 'Good Row', 'description' => '' } },
+ { source_identifier: 'id-003', raw_row: { 'model' => 'Collection', 'source_identifier' => 'id-003', 'title' => '', 'description' => '' } }
+ ]
+ end
+
+ describe '.build' do
+ context 'when one row has a single error' do
+ let(:row_errors) do
+ [{ row: 2, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: "Required field 'title' is missing" }]
+ end
+
+ it 'includes only the errored row plus the header' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows.length).to eq(2)
+ end
+
+ it 'puts the original row number in column 1' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows[1][0]).to eq('2')
+ end
+
+ it 'puts the error message in column 2' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows[1][1]).to eq("Required field 'title' is missing")
+ end
+
+ it 'preserves the original row values in subsequent columns' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows[1][2]).to eq('GenericWork')
+ expect(rows[1][3]).to eq('id-001')
+ expect(rows[1][4]).to eq('My Title')
+ end
+
+ it 'excludes clean rows' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ source_ids = rows[1..].map { |r| r[3] }
+ expect(source_ids).not_to include('id-002')
+ end
+ end
+
+ context 'when one row has multiple errors' do
+ let(:row_errors) do
+ [
+ { row: 4, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' },
+ { row: 4, severity: 'error', category: 'missing_required_value', column: 'description', value: nil, message: 'Description is required' }
+ ]
+ end
+
+ it 'joins multiple error messages with " | "' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows[1][1]).to eq('Title is required | Description is required')
+ end
+ end
+
+ context 'when errors span multiple rows' do
+ let(:row_errors) do
+ [
+ { row: 2, severity: 'error', category: 'duplicate_source_identifier', column: 'source_identifier', value: 'id-001', message: 'Duplicate source_identifier' },
+ { row: 4, severity: 'warning', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' }
+ ]
+ end
+
+ it 'includes one output row per errored input row' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows.length).to eq(3) # header + 2 errored rows
+ end
+
+ it 'outputs errored rows in original order' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows[1][3]).to eq('id-001')
+ expect(rows[2][3]).to eq('id-003')
+ end
+ end
+
+ context 'header row' do
+ let(:row_errors) { [{ row: 2, severity: 'error', category: 'test', column: 'title', value: nil, message: 'Error' }] }
+
+ it 'has "row" as the first column and "errors" as the second' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows.first[0]).to eq('row')
+ expect(rows.first[1]).to eq('errors')
+ end
+
+ it 'preserves the original headers after the row and errors columns' do
+ result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
+ rows = CSV.parse(result)
+ expect(rows.first[2..]).to eq(headers)
+ end
+ end
+ end
+end