Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions app/assets/javascripts/bulkrax/importers_stepper.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@
// API endpoints
ENDPOINTS: {
DEMO_SCENARIOS: '/importers/guided_import/demo_scenarios',
VALIDATE: '/importers/guided_import/validate'
VALIDATE: '/importers/guided_import/validate',
DOWNLOAD_VALIDATION_ERRORS: '/importers/guided_import/download_validation_errors'
}
}

Expand Down Expand Up @@ -1560,7 +1561,8 @@
missingFiles: data.missingFiles || data.missing_files,
foundFiles: data.foundFiles != null ? data.foundFiles : data.found_files,
zipIncluded: data.zipIncluded != null ? data.zipIncluded : data.zip_included,
messages: data.messages
messages: data.messages,
validationErrorsCacheKey: data.validationErrorsCacheKey || null
}
}

Expand Down Expand Up @@ -1653,6 +1655,24 @@
if (data.hasWarnings || !data.isValid) {
$('.warning-acknowledgment').show()
}

// Download errors button
if (data.validationErrorsCacheKey) {
var downloadUrl =
CONSTANTS.ENDPOINTS.DOWNLOAD_VALIDATION_ERRORS + '?key=' + encodeURIComponent(data.validationErrorsCacheKey)
var $btn = $(
'<a class="btn btn-outline-secondary btn-sm mt-2" href="' +
downloadUrl +
'" download>' +
'<i class="fa fa-download"></i> ' +
t('download_validation_errors_csv') +
'</a>'
)
$('.validation-results .download-errors-container').remove()
$('.validation-results').append($('<div class="download-errors-container mt-2"></div>').append($btn))
} else {
$('.validation-results .download-errors-container').remove()
}
}

// Render import size gauge
Expand Down
35 changes: 34 additions & 1 deletion app/controllers/bulkrax/guided_imports_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,32 @@ def validate
end

admin_set_id = params[:importer]&.[](:admin_set_id)
render json: StepperResponseFormatter.format(run_validation(csv_file, zip_file, admin_set_id: admin_set_id)), status: :ok
validation_result = run_validation(csv_file, zip_file, admin_set_id: admin_set_id)
raw_csv_data = validation_result.delete(:raw_csv_data)
cache_key = cache_validation_errors(validation_result, raw_csv_data)
formatted = StepperResponseFormatter.format(validation_result)
formatted[:validationErrorsCacheKey] = cache_key
render json: formatted, status: :ok
ensure
close_file_handles(files)
end

def download_validation_errors
cache_key = params[:key].to_s
expected_prefix = "guided_import_errors:#{session.id}:"
return head :not_found unless cache_key.start_with?(expected_prefix)

cached = Rails.cache.read(cache_key)
return head :not_found unless cached

csv = ValidationErrorCsvBuilder.build(
headers: cached[:headers],
csv_data: cached[:csv_data],
row_errors: cached[:row_errors]
)
send_data csv, filename: 'import_validation_errors.csv', type: 'text/csv', disposition: 'attachment'
end

def create
files = nil
files = resolve_create_files
Expand Down Expand Up @@ -84,6 +105,18 @@ def render_invalid_uploaded_files_response
# @param zip_file [File, nil] an optional ZIP containing file attachments
# @param admin_set_id [String, nil] optional admin set ID for validation context
# @return [Hash] validation result data
def cache_validation_errors(validation_result, raw_csv_data)
return nil unless validation_result[:rowErrors]&.any?

key = "guided_import_errors:#{session.id}:#{Time.now.to_i}"
Rails.cache.write(
key,
{ headers: validation_result[:headers], csv_data: raw_csv_data, row_errors: validation_result[:rowErrors] },
expires_in: 1.hour
)
key
end

def run_validation(csv_file, zip_file, admin_set_id: nil)
CsvParser.validate_csv(csv_file: csv_file, zip_file: zip_file, admin_set_id: admin_set_id)
end
Expand Down
1 change: 1 addition & 0 deletions app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
file_sets: file_sets
)
apply_rights_statement_validation_override!(result, missing_required)
result[:raw_csv_data] = csv_data
result
end

Expand Down
60 changes: 60 additions & 0 deletions app/services/bulkrax/validation_error_csv_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# frozen_string_literal: true

require 'csv'

module Bulkrax
# Builds a CSV string containing only the rows from a validated CSV that have
# row-level errors. An `errors` column is prepended as column 1; multiple
# errors on the same row are joined with " | ".
#
# Usage:
# csv = Bulkrax::ValidationErrorCsvBuilder.build(
# headers: result[:headers],
# csv_data: result[:raw_csv_data],
# row_errors: result[:rowErrors]
# )
class ValidationErrorCsvBuilder
# @param headers [Array<String>] original CSV headers in order
# @param csv_data [Array<Hash>] one entry per data row; each hash must have
# :row_number (Integer, 1-indexed data row, so first data row == 2 matching
# validator convention) and :raw_row (String-keyed hash of column=>value)
# @param row_errors [Array<Hash>] each hash has :row (Integer) and :message (String)
# @return [String] CSV content
def self.build(headers:, csv_data:, row_errors:)
new(headers: headers, csv_data: csv_data, row_errors: row_errors).build
end

def initialize(headers:, csv_data:, row_errors:)
@headers = headers
@csv_data = csv_data
@row_errors = row_errors
end

def build
errors_by_row = group_errors_by_row

CSV.generate(force_quotes: false) do |csv|
csv << ['row', 'errors'] + @headers

@csv_data.each_with_index do |record, index|
row_number = index + 2 # header is row 1; first data row is row 2
next unless errors_by_row.key?(row_number)

error_messages = errors_by_row[row_number].map { |e| e[:message] }.join(' | ')
raw_row = record[:raw_row] || {}
csv << [row_number, error_messages] + @headers.map { |h| raw_row[h] }
end
end
end

private

def group_errors_by_row
@row_errors.each_with_object({}) do |error, hash|
row_num = error[:row]
hash[row_num] ||= []
hash[row_num] << error
end
end
end
end
1 change: 1 addition & 0 deletions config/locales/bulkrax.de.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ de:
gauge_large_msg: Große Importe dauern länger und sind schwieriger zu debuggen. Wir empfehlen dringend, sie in kleinere Pakete von maximal %{limit} aufzuteilen.
gauge_moderate: Mäßig
gauge_moderate_msg: Erwägen Sie, die Arbeit in kleinere Chargen aufzuteilen, um die Fehlerbehebung zu vereinfachen.
download_validation_errors_csv: Fehler-CSV herunterladen
gauge_optimal: Optimal
gauge_optimal_msg: Großartig! Kleinere Importe lassen sich leichter überprüfen und Fehler beheben.
hierarchy_too_deep: Hierarchie zu tief (maximal %{max} Ebenen)
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ en:
gauge_large_msg: Large imports take longer and are harder to debug. We strongly recommend splitting into batches of %{limit} or fewer.
gauge_moderate: Moderate
gauge_moderate_msg: Consider splitting into smaller batches for easier error resolution.
download_validation_errors_csv: Download errors CSV
gauge_optimal: Optimal
gauge_optimal_msg: Great! Smaller imports are easier to validate and troubleshoot.
hierarchy_too_deep: Hierarchy too deep (max %{max} levels)
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.es.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ es:
gauge_large_msg: Las importaciones grandes tardan más y son más difíciles de depurar. Recomendamos encarecidamente dividirlas en lotes de %{limit} o menos.
gauge_moderate: Moderado
gauge_moderate_msg: Considere dividirlo en lotes más pequeños para facilitar la resolución de errores.
download_validation_errors_csv: Descargar CSV de errores
gauge_optimal: Óptimo
gauge_optimal_msg: "¡Genial! Las importaciones más pequeñas son más fáciles de validar y solucionar."
hierarchy_too_deep: Jerarquía demasiado profunda (máximo %{max} niveles)
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.fr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ fr:
gauge_large_msg: Les importations volumineuses sont plus longues et plus difficiles à déboguer. Nous vous recommandons vivement de les diviser en lots de %{limit} ou moins.
gauge_moderate: Modéré
gauge_moderate_msg: Envisagez de diviser le travail en lots plus petits pour faciliter la résolution des erreurs.
download_validation_errors_csv: Télécharger le CSV des erreurs
gauge_optimal: Optimal
gauge_optimal_msg: Super ! Les importations de petite taille sont plus faciles à valider et à dépanner.
hierarchy_too_deep: Hiérarchie trop profonde (max. %{max} niveaux)
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.it.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ it:
gauge_large_msg: Le importazioni di grandi dimensioni richiedono più tempo e sono più difficili da eseguire il debug. Consigliamo vivamente di suddividere i dati in lotti di %{limit} o meno.
gauge_moderate: Moderare
gauge_moderate_msg: Per una più facile risoluzione degli errori, si consiglia di suddividere il lavoro in lotti più piccoli.
download_validation_errors_csv: Scarica CSV degli errori
gauge_optimal: Ottimale
gauge_optimal_msg: Ottimo! Le importazioni più piccole sono più facili da convalidare e risolvere i problemi.
hierarchy_too_deep: Gerarchia troppo profonda (livelli massimi %{max})
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.pt-BR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ pt-BR:
gauge_large_msg: Importações grandes demoram mais e são mais difíceis de depurar. Recomendamos fortemente dividi-las em lotes de %{limit} ou menos.
gauge_moderate: Moderado
gauge_moderate_msg: Considere dividir em lotes menores para facilitar a resolução de erros.
download_validation_errors_csv: Baixar CSV de erros
gauge_optimal: Ótimo
gauge_optimal_msg: Ótimo! Importações menores são mais fáceis de validar e solucionar problemas.
hierarchy_too_deep: Hierarquia muito profunda (máximo de %{max} níveis)
Expand Down
1 change: 1 addition & 0 deletions config/locales/bulkrax.zh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ zh:
gauge_large_msg: 大型导入操作耗时更长,调试难度也更大。我们强烈建议将导入操作拆分成若干批次,每批次不超过 %{limit} 个文件。
gauge_moderate: 缓和
gauge_moderate_msg: 为了便于排查错误,可以考虑分成更小的批次进行处理。
download_validation_errors_csv: 下载错误 CSV
gauge_optimal: 最佳的
gauge_optimal_msg: 太好了!较小的导入操作更容易验证和排查问题。
hierarchy_too_deep: 层级过深(最多 %{max} 层)
Expand Down
1 change: 1 addition & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
get 'new/guided_import', to: 'guided_imports#new', as: :guided_import_new
post 'guided_import', to: 'guided_imports#create', as: :guided_import_create
post 'guided_import/validate', to: 'guided_imports#validate', as: :guided_import_validate
get 'guided_import/download_validation_errors', to: 'guided_imports#download_validation_errors', as: :guided_import_download_validation_errors
get 'guided_import/demo_scenarios', to: 'guided_imports#demo_scenarios', as: :guided_import_demo_scenarios if Bulkrax.config.guided_import_demo_scenarios_enabled
end

Expand Down
110 changes: 110 additions & 0 deletions spec/services/bulkrax/validation_error_csv_builder_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe Bulkrax::ValidationErrorCsvBuilder do
let(:headers) { ['model', 'source_identifier', 'title', 'description'] }

let(:csv_data) do
[
{ source_identifier: 'id-001', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-001', 'title' => 'My Title', 'description' => 'A desc' } },
{ source_identifier: 'id-002', raw_row: { 'model' => 'GenericWork', 'source_identifier' => 'id-002', 'title' => 'Good Row', 'description' => '' } },
{ source_identifier: 'id-003', raw_row: { 'model' => 'Collection', 'source_identifier' => 'id-003', 'title' => '', 'description' => '' } }
]
end

describe '.build' do
context 'when one row has a single error' do
let(:row_errors) do
[{ row: 2, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: "Required field 'title' is missing" }]
end

it 'includes only the errored row plus the header' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.length).to eq(2)
end

it 'puts the original row number in column 1' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][0]).to eq('2')
end

it 'puts the error message in column 2' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][1]).to eq("Required field 'title' is missing")
end

it 'preserves the original row values in subsequent columns' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][2]).to eq('GenericWork')
expect(rows[1][3]).to eq('id-001')
expect(rows[1][4]).to eq('My Title')
end

it 'excludes clean rows' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
source_ids = rows[1..].map { |r| r[3] }
expect(source_ids).not_to include('id-002')
end
end

context 'when one row has multiple errors' do
let(:row_errors) do
[
{ row: 4, severity: 'error', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' },
{ row: 4, severity: 'error', category: 'missing_required_value', column: 'description', value: nil, message: 'Description is required' }
]
end

it 'joins multiple error messages with " | "' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][1]).to eq('Title is required | Description is required')
end
end

context 'when errors span multiple rows' do
let(:row_errors) do
[
{ row: 2, severity: 'error', category: 'duplicate_source_identifier', column: 'source_identifier', value: 'id-001', message: 'Duplicate source_identifier' },
{ row: 4, severity: 'warning', category: 'missing_required_value', column: 'title', value: nil, message: 'Title is required' }
]
end

it 'includes one output row per errored input row' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.length).to eq(3) # header + 2 errored rows
end

it 'outputs errored rows in original order' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows[1][3]).to eq('id-001')
expect(rows[2][3]).to eq('id-003')
end
end

context 'header row' do
let(:row_errors) { [{ row: 2, severity: 'error', category: 'test', column: 'title', value: nil, message: 'Error' }] }

it 'has "row" as the first column and "errors" as the second' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.first[0]).to eq('row')
expect(rows.first[1]).to eq('errors')
end

it 'preserves the original headers after the row and errors columns' do
result = described_class.build(headers: headers, csv_data: csv_data, row_errors: row_errors)
rows = CSV.parse(result)
expect(rows.first[2..]).to eq(headers)
end
end
end
end
Loading