diff --git a/app/assets/javascripts/bulkrax/importers_stepper.js b/app/assets/javascripts/bulkrax/importers_stepper.js index 6a5ab211..066af38c 100644 --- a/app/assets/javascripts/bulkrax/importers_stepper.js +++ b/app/assets/javascripts/bulkrax/importers_stepper.js @@ -94,7 +94,8 @@ // API endpoints ENDPOINTS: { DEMO_SCENARIOS: '/importers/guided_import/demo_scenarios', - VALIDATE: '/importers/guided_import/validate' + VALIDATE: '/importers/guided_import/validate', + DOWNLOAD_VALIDATION_ERRORS: '/importers/guided_import/download_validation_errors' } } @@ -1560,7 +1561,8 @@ missingFiles: data.missingFiles || data.missing_files, foundFiles: data.foundFiles != null ? data.foundFiles : data.found_files, zipIncluded: data.zipIncluded != null ? data.zipIncluded : data.zip_included, - messages: data.messages + messages: data.messages, + validationErrorsCacheKey: data.validationErrorsCacheKey || null } } @@ -1653,6 +1655,24 @@ if (data.hasWarnings || !data.isValid) { $('.warning-acknowledgment').show() } + + // Download errors button + if (data.validationErrorsCacheKey) { + var downloadUrl = + CONSTANTS.ENDPOINTS.DOWNLOAD_VALIDATION_ERRORS + '?key=' + encodeURIComponent(data.validationErrorsCacheKey) + var $btn = $( + '' + + ' ' + + t('download_validation_errors_csv') + + '' + ) + $('.validation-results .download-errors-container').remove() + $('.validation-results').append($('
').append($btn)) + } else { + $('.validation-results .download-errors-container').remove() + } } // Render import size gauge diff --git a/app/controllers/bulkrax/guided_imports_controller.rb b/app/controllers/bulkrax/guided_imports_controller.rb index ea6a490d..26f43e65 100644 --- a/app/controllers/bulkrax/guided_imports_controller.rb +++ b/app/controllers/bulkrax/guided_imports_controller.rb @@ -37,11 +37,32 @@ def validate end admin_set_id = params[:importer]&.[](:admin_set_id) - render json: StepperResponseFormatter.format(run_validation(csv_file, zip_file, admin_set_id: admin_set_id)), status: :ok + validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id) + raw_csv_data = validation_result.delete(:raw_csv_data) + cache_key = cache_validation_errors(validation_result, raw_csv_data) + formatted = StepperResponseFormatter.format(validation_result) + formatted[:validationErrorsCacheKey] = cache_key + render json: formatted, status: :ok ensure close_file_handles(files) end + def download_validation_errors + cache_key = params[:key].to_s + expected_prefix = "guided_import_errors:#{session.id}:" + return head :not_found unless cache_key.start_with?(expected_prefix) + + cached = Rails.cache.read(cache_key) + return head :not_found unless cached + + csv = ValidationErrorCsvBuilder.build( + headers: cached[:headers], + csv_data: cached[:csv_data], + row_errors: cached[:row_errors] + ) + send_data csv, filename: 'import_validation_errors.csv', type: 'text/csv', disposition: 'attachment' + end + def create files = nil files = resolve_create_files @@ -84,6 +105,18 @@ def render_invalid_uploaded_files_response # @param zip_file [File, nil] an optional ZIP containing file attachments # @param admin_set_id [String, nil] optional admin set ID for validation context # @return [Hash] validation result data + def cache_validation_errors(validation_result, raw_csv_data) + return nil unless validation_result[:rowErrors]&.any? + + key = "guided_import_errors:#{session.id}:#{Time.now.to_i}" + Rails.cache.write( + key, + { headers: validation_result[:headers], csv_data: raw_csv_data, row_errors: validation_result[:rowErrors] }, + expires_in: 1.hour + ) + key + end + def run_validation(csv_file, zip_file, admin_set_id: nil) CsvParser.validate_csv(csv_file: csv_file, zip_file: zip_file, admin_set_id: admin_set_id) end diff --git a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb index fc483605..02e79764 100644 --- a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb +++ b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb @@ -47,6 +47,7 @@ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil) file_sets: file_sets ) apply_rights_statement_validation_override!(result, missing_required) + result[:raw_csv_data] = csv_data result end diff --git a/app/services/bulkrax/validation_error_csv_builder.rb b/app/services/bulkrax/validation_error_csv_builder.rb new file mode 100644 index 00000000..f6a57474 --- /dev/null +++ b/app/services/bulkrax/validation_error_csv_builder.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'csv' + +module Bulkrax + # Builds a CSV string containing only the rows from a validated CSV that have + # row-level errors. An `errors` column is prepended as column 1; multiple + # errors on the same row are joined with " | ". + # + # Usage: + # csv = Bulkrax::ValidationErrorCsvBuilder.build( + # headers: result[:headers], + # csv_data: result[:raw_csv_data], + # row_errors: result[:rowErrors] + # ) + class ValidationErrorCsvBuilder + # @param headers [Array] original CSV headers in order + # @param csv_data [Array] one entry per data row; each hash must have + # :row_number (Integer, 1-indexed data row, so first data row == 2 matching + # validator convention) and :raw_row (String-keyed hash of column=>value) + # @param row_errors [Array] each hash has :row (Integer) and :message (String) + # @return [String] CSV content + def self.build(headers:, csv_data:, row_errors:) + new(headers: headers, csv_data: csv_data, row_errors: row_errors).build + end + + def initialize(headers:, csv_data:, row_errors:) + @headers = headers + @csv_data = csv_data + @row_errors = row_errors + end + + def build + errors_by_row = group_errors_by_row + + CSV.generate(force_quotes: false) do |csv| + csv << ['row', 'errors'] + @headers + + @csv_data.each_with_index do |record, index| + row_number = index + 2 # header is row 1; first data row is row 2 + next unless errors_by_row.key?(row_number) + + error_messages = errors_by_row[row_number].map { |e| e[:message] }.join(' | ') + raw_row = record[:raw_row] || {} + csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] } + end + end + end + + private + + def group_errors_by_row + @row_errors.each_with_object({}) do |error, hash| + row_num = error[:row] + hash[row_num] ||= [] + hash[row_num] << error + end + end + end +end diff --git a/config/locales/bulkrax.de.yml b/config/locales/bulkrax.de.yml index 5b7994aa..8e4d7a4e 100644 --- a/config/locales/bulkrax.de.yml +++ b/config/locales/bulkrax.de.yml @@ -158,6 +158,7 @@ de: gauge_large_msg: Große Importe dauern länger und sind schwieriger zu debuggen. Wir empfehlen dringend, sie in kleinere Pakete von maximal %{limit} aufzuteilen. gauge_moderate: Mäßig gauge_moderate_msg: Erwägen Sie, die Arbeit in kleinere Chargen aufzuteilen, um die Fehlerbehebung zu vereinfachen. + download_validation_errors_csv: Fehler-CSV herunterladen gauge_optimal: Optimal gauge_optimal_msg: Großartig! Kleinere Importe lassen sich leichter überprüfen und Fehler beheben. hierarchy_too_deep: Hierarchie zu tief (maximal %{max} Ebenen) diff --git a/config/locales/bulkrax.en.yml b/config/locales/bulkrax.en.yml index 33588ad8..ed105464 100644 --- a/config/locales/bulkrax.en.yml +++ b/config/locales/bulkrax.en.yml @@ -183,6 +183,7 @@ en: gauge_large_msg: Large imports take longer and are harder to debug. We strongly recommend splitting into batches of %{limit} or fewer. gauge_moderate: Moderate gauge_moderate_msg: Consider splitting into smaller batches for easier error resolution. + download_validation_errors_csv: Download errors CSV gauge_optimal: Optimal gauge_optimal_msg: Great! Smaller imports are easier to validate and troubleshoot. hierarchy_too_deep: Hierarchy too deep (max %{max} levels) diff --git a/config/locales/bulkrax.es.yml b/config/locales/bulkrax.es.yml index c5507ec3..52d6cf53 100644 --- a/config/locales/bulkrax.es.yml +++ b/config/locales/bulkrax.es.yml @@ -158,6 +158,7 @@ es: gauge_large_msg: Las importaciones grandes tardan más y son más difíciles de depurar. Recomendamos encarecidamente dividirlas en lotes de %{limit} o menos. gauge_moderate: Moderado gauge_moderate_msg: Considere dividirlo en lotes más pequeños para facilitar la resolución de errores. + download_validation_errors_csv: Descargar CSV de errores gauge_optimal: Óptimo gauge_optimal_msg: "¡Genial! Las importaciones más pequeñas son más fáciles de validar y solucionar." hierarchy_too_deep: Jerarquía demasiado profunda (máximo %{max} niveles) diff --git a/config/locales/bulkrax.fr.yml b/config/locales/bulkrax.fr.yml index 6ee6653f..c010c444 100644 --- a/config/locales/bulkrax.fr.yml +++ b/config/locales/bulkrax.fr.yml @@ -158,6 +158,7 @@ fr: gauge_large_msg: Les importations volumineuses sont plus longues et plus difficiles à déboguer. Nous vous recommandons vivement de les diviser en lots de %{limit} ou moins. gauge_moderate: Modéré gauge_moderate_msg: Envisagez de diviser le travail en lots plus petits pour faciliter la résolution des erreurs. + download_validation_errors_csv: Télécharger le CSV des erreurs gauge_optimal: Optimal gauge_optimal_msg: Super ! Les importations de petite taille sont plus faciles à valider et à dépanner. hierarchy_too_deep: Hiérarchie trop profonde (max. %{max} niveaux) diff --git a/config/locales/bulkrax.it.yml b/config/locales/bulkrax.it.yml index 5b6f3b41..38028a14 100644 --- a/config/locales/bulkrax.it.yml +++ b/config/locales/bulkrax.it.yml @@ -158,6 +158,7 @@ it: gauge_large_msg: Le importazioni di grandi dimensioni richiedono più tempo e sono più difficili da eseguire il debug. Consigliamo vivamente di suddividere i dati in lotti di %{limit} o meno. gauge_moderate: Moderare gauge_moderate_msg: Per una più facile risoluzione degli errori, si consiglia di suddividere il lavoro in lotti più piccoli. + download_validation_errors_csv: Scarica CSV degli errori gauge_optimal: Ottimale gauge_optimal_msg: Ottimo! Le importazioni più piccole sono più facili da convalidare e risolvere i problemi. hierarchy_too_deep: Gerarchia troppo profonda (livelli massimi %{max}) diff --git a/config/locales/bulkrax.pt-BR.yml b/config/locales/bulkrax.pt-BR.yml index c1f9cab4..a1619f8d 100644 --- a/config/locales/bulkrax.pt-BR.yml +++ b/config/locales/bulkrax.pt-BR.yml @@ -158,6 +158,7 @@ pt-BR: gauge_large_msg: Importações grandes demoram mais e são mais difíceis de depurar. Recomendamos fortemente dividi-las em lotes de %{limit} ou menos. gauge_moderate: Moderado gauge_moderate_msg: Considere dividir em lotes menores para facilitar a resolução de erros. + download_validation_errors_csv: Baixar CSV de erros gauge_optimal: Ótimo gauge_optimal_msg: Ótimo! Importações menores são mais fáceis de validar e solucionar problemas. hierarchy_too_deep: Hierarquia muito profunda (máximo de %{max} níveis) diff --git a/config/locales/bulkrax.zh.yml b/config/locales/bulkrax.zh.yml index 5b7111df..de4540a5 100644 --- a/config/locales/bulkrax.zh.yml +++ b/config/locales/bulkrax.zh.yml @@ -158,6 +158,7 @@ zh: gauge_large_msg: 大型导入操作耗时更长,调试难度也更大。我们强烈建议将导入操作拆分成若干批次,每批次不超过 %{limit} 个文件。 gauge_moderate: 缓和 gauge_moderate_msg: 为了便于排查错误,可以考虑分成更小的批次进行处理。 + download_validation_errors_csv: 下载错误 CSV gauge_optimal: 最佳的 gauge_optimal_msg: 太好了!较小的导入操作更容易验证和排查问题。 hierarchy_too_deep: 层级过深(最多 %{max} 层) diff --git a/config/routes.rb b/config/routes.rb index df66c396..186e96c2 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -14,6 +14,7 @@ get 'new/guided_import', to: 'guided_imports#new', as: :guided_import_new post 'guided_import', to: 'guided_imports#create', as: :guided_import_create post 'guided_import/validate', to: 'guided_imports#validate', as: :guided_import_validate + get 'guided_import/download_validation_errors', to: 'guided_imports#download_validation_errors', as: :guided_import_download_validation_errors get 'guided_import/demo_scenarios', to: 'guided_imports#demo_scenarios', as: :guided_import_demo_scenarios if Bulkrax.config.guided_import_demo_scenarios_enabled end diff --git a/spec/services/bulkrax/validation_error_csv_builder_spec.rb b/spec/services/bulkrax/validation_error_csv_builder_spec.rb new file mode 100644 index 00000000..010bc89a --- /dev/null +++ b/spec/services/bulkrax/validation_error_csv_builder_spec.rb @@ -0,0 +1,110 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Bulkrax::ValidationErrorCsvBuilder do + let(:headers) { ['model', 'source_identifier', 'title', 'description'] } + + let(:csv_data) do + [ + { source_identifier: 'id-001', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-001', 'title' => 'My Title', 'description' => 'A desc' } }, + { source_identifier: 'id-002', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-002', 'title' => 'Good Row', 'description' => '' } }, + { source_identifier: 'id-003', raw_row: { 'model' => 'Collection', 'source_identifier' => 'id-003', 'title' => '', 'description' => '' } } + ] + end + + describe '.build' do + context 'when one row has a single error' do + let(:row_errors) do + [{ row: 2, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: "Required field 'title' is missing" }] + end + + it 'includes only the errored row plus the header' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows.length).to eq(2) + end + + it 'puts the original row number in column 1' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows[1][0]).to eq('2') + end + + it 'puts the error message in column 2' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows[1][1]).to eq("Required field 'title' is missing") + end + + it 'preserves the original row values in subsequent columns' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows[1][2]).to eq('GenericWork') + expect(rows[1][3]).to eq('id-001') + expect(rows[1][4]).to eq('My Title') + end + + it 'excludes clean rows' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + source_ids = rows[1..].map { |r| r[3] } + expect(source_ids).not_to include('id-002') + end + end + + context 'when one row has multiple errors' do + let(:row_errors) do + [ + { row: 4, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' }, + { row: 4, severity: 'error', category: 'missing_required_value', column: 'description', value: nil, message: 'Description is required' } + ] + end + + it 'joins multiple error messages with " | "' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows[1][1]).to eq('Title is required | Description is required') + end + end + + context 'when errors span multiple rows' do + let(:row_errors) do + [ + { row: 2, severity: 'error', category: 'duplicate_source_identifier', column: 'source_identifier', value: 'id-001', message: 'Duplicate source_identifier' }, + { row: 4, severity: 'warning', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' } + ] + end + + it 'includes one output row per errored input row' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows.length).to eq(3) # header + 2 errored rows + end + + it 'outputs errored rows in original order' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows[1][3]).to eq('id-001') + expect(rows[2][3]).to eq('id-003') + end + end + + context 'header row' do + let(:row_errors) { [{ row: 2, severity: 'error', category: 'test', column: 'title', value: nil, message: 'Error' }] } + + it 'has "row" as the first column and "errors" as the second' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows.first[0]).to eq('row') + expect(rows.first[1]).to eq('errors') + end + + it 'preserves the original headers after the row and errors columns' do + result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors) + rows = CSV.parse(result) + expect(rows.first[2..]).to eq(headers) + end + end + end +end