diff --git a/app/assets/javascripts/bulkrax/importers_stepper.js b/app/assets/javascripts/bulkrax/importers_stepper.js
index 0f7631c1..e2c9d314 100644
--- a/app/assets/javascripts/bulkrax/importers_stepper.js
+++ b/app/assets/javascripts/bulkrax/importers_stepper.js
@@ -1579,8 +1579,8 @@
})
allItems.forEach(function (item) {
- if (item.childrenIds && item.childrenIds.length > 0) {
- item.childrenIds.forEach(function (childId) {
+ if (item.childIds && item.childIds.length > 0) {
+ item.childIds.forEach(function (childId) {
var child = itemMap[childId]
if (child) {
if (child.parentIds.indexOf(item.id) === -1) {
@@ -1591,12 +1591,38 @@
}
})
- // Build hierarchy lookup map from normalized parentIds
- var hierarchyMap = {}
+ // Inject stub nodes for existing-record relationships so the tree can
+ // render them as children/parents even though they are not in the CSV.
allItems.forEach(function (item) {
+ // existingChildIds → the item in the CSV has a child that lives in the repo
+ ;(item.existingChildIds || []).forEach(function (childId) {
+ if (!itemMap[childId]) {
+ itemMap[childId] = { id: childId, title: childId, type: 'existing', parentIds: [], existing: true }
+ }
+ if (itemMap[childId].parentIds.indexOf(item.id) === -1) {
+ itemMap[childId].parentIds.push(item.id)
+ }
+ })
+ // existingParentIds → the item in the CSV has a parent that lives in the repo
+ ;(item.existingParentIds || []).forEach(function (parentId) {
+ if (!itemMap[parentId]) {
+ itemMap[parentId] = { id: parentId, title: parentId, type: 'existing', parentIds: [], existing: true }
+ }
+ if (item.parentIds.indexOf(parentId) === -1) {
+ item.parentIds.push(parentId)
+ }
+ })
+ })
+
+ // Build hierarchy lookup map from normalized parentIds (including existing stubs)
+ var hierarchyMap = {}
+ Object.keys(itemMap).forEach(function (id) {
+ var item = itemMap[id]
item.parentIds.forEach(function (parentId) {
if (!hierarchyMap[parentId]) { hierarchyMap[parentId] = [] }
- hierarchyMap[parentId].push(item)
+ if (hierarchyMap[parentId].indexOf(item) === -1) {
+ hierarchyMap[parentId].push(item)
+ }
})
})
@@ -1870,9 +1896,23 @@
var orphanWorks = data.works.filter(function (w) {
return !w.parentIds || w.parentIds.length === 0
})
+ // Existing-record stubs that are top-level parents (not themselves children of anything)
+ var existingRoots = Object.keys(hierarchyMap)
+ .filter(function (id) {
+ var allCsvIds = data.collections.concat(data.works).map(function (i) { return i.id })
+ return allCsvIds.indexOf(id) === -1
+ })
+ .map(function (id) {
+ return { id: id, title: id, type: 'existing', parentIds: [], existing: true }
+ })
var visited = new Set()
var hierarchyContent =
'
' +
+ existingRoots
+ .map(function (e) {
+ return renderTreeItem(e, hierarchyMap, 0, visited)
+ })
+ .join('') +
topLevelCollections
.map(function (c) {
return renderTreeItem(c, hierarchyMap, 0, visited)
@@ -1925,8 +1965,9 @@
var children = hierarchyMap[item.id] || []
var hasChildren = children.length > 0
+ var isExisting = !!item.existing
var icon = item.type === 'collection' ? 'fa-folder' : 'fa-file-o'
- var iconColor = item.type === 'collection' ? 'text-primary' : 'text-muted'
+ var iconColor = isExisting ? 'text-muted' : (item.type === 'collection' ? 'text-primary' : 'text-muted')
// Hidden chevron still takes up space (via fixed width in CSS) to prevent icon shifting
var chevronClass = hasChildren ? 'tree-chevron' : 'tree-chevron tree-chevron-hidden'
var chevron = '
'
@@ -1944,8 +1985,13 @@
? ' tabindex="0" role="treeitem" aria-expanded="false"'
: ''
+ var existingBadge = isExisting
+ ? '
' +
+ t('existing_record_badge') + ''
+ : ''
+
var html =
- '
' +
- '' +
+ '' +
safeTitle +
'' +
(item.parentIds && item.parentIds.length > 1
@@ -1965,6 +2011,7 @@
' ' + t('shared_badge') + ''
: '') +
count +
+ existingBadge +
'
'
if (hasChildren) {
diff --git a/app/assets/stylesheets/bulkrax/stepper/_summary.scss b/app/assets/stylesheets/bulkrax/stepper/_summary.scss
index c59620b8..edfa7995 100644
--- a/app/assets/stylesheets/bulkrax/stepper/_summary.scss
+++ b/app/assets/stylesheets/bulkrax/stepper/_summary.scss
@@ -100,6 +100,28 @@ $summary-variants: (
color: $color-text-dark;
}
+.tree-item-existing {
+ opacity: 0.7;
+}
+
+.tree-label-existing {
+ color: $color-text-muted;
+ font-style: italic;
+}
+
+.tree-existing-badge {
+ display: inline-block;
+ font-size: 10px;
+ font-weight: 600;
+ color: $color-text-muted;
+ background: $bg-muted;
+ border-radius: 10px;
+ padding: 1px 7px;
+ margin-left: 6px;
+ white-space: nowrap;
+ vertical-align: middle;
+}
+
.tree-shared-badge {
display: inline-flex;
align-items: center;
diff --git a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
index ad9e9e40..cd3613bd 100644
--- a/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
+++ b/app/parsers/concerns/bulkrax/csv_parser/csv_validation.rb
@@ -2,7 +2,7 @@
module Bulkrax
class CsvParser < ApplicationParser
- module CsvValidation # rubocop:disable Metrics/ModuleLength
+ module CsvValidation
extend ActiveSupport::Concern
included do
@@ -12,265 +12,102 @@ module CsvValidation # rubocop:disable Metrics/ModuleLength
end
class_methods do
+ include CsvValidationHelpers
+
# Validate a CSV (and optional zip) without a persisted Importer record.
#
# @param csv_file [File, ActionDispatch::Http::UploadedFile, String] path or file object
# @param zip_file [File, ActionDispatch::Http::UploadedFile, nil]
# @param admin_set_id [String, nil]
# @return [Hash] validation result compatible with the guided import UI
- def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
- # 1. Read headers — use CsvEntry.read_data so header normalisation
- # (special-char stripping, symbolisation) is identical to a real import.
+ def validate_csv(csv_file:, zip_file: nil, admin_set_id: nil)
+ raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer =
+ parse_csv_inputs(csv_file, admin_set_id)
+
+ all_ids = csv_data.map { |r| r[:source_identifier] }.compact.to_set
+
+ header_issues = check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer)
+ missing_required = header_issues[:missing_required]
+ find_record = build_find_record(mapping_manager, mappings)
+ row_errors = run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record)
+ file_validator = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
+ collections, works, file_sets = extract_validation_items(csv_data, all_ids, find_record)
+
+ append_missing_source_id!(missing_required, headers, source_id_key, csv_data.map { |r| r[:model] }.compact.uniq)
+
+ result = assemble_result(
+ headers: headers,
+ missing_required: missing_required,
+ header_issues: header_issues,
+ row_errors: row_errors,
+ csv_data: csv_data,
+ file_validator: file_validator,
+ collections: collections,
+ works: works,
+ file_sets: file_sets
+ )
+ apply_rights_statement_validation_override!(result, missing_required)
+ result
+ end
+
+ private
+
+ # Reads the CSV, resolves mappings, parses rows, and builds field metadata.
+ # Returns the values needed by all subsequent validation steps.
+ def parse_csv_inputs(csv_file, admin_set_id)
+ # Use CsvEntry.read_data so header normalisation is identical to a real import.
raw_csv = CsvEntry.read_data(csv_file)
headers = raw_csv.headers.map(&:to_s)
- # 2. Field mappings / column name resolution
mapping_manager = CsvTemplate::MappingManager.new
mappings = mapping_manager.mappings
- source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
- parent_key = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping', default: :parents)
- children_key = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
- file_key = resolve_validation_key(mapping_manager, key: 'file', default: :file)
+ source_id_key = resolve_validation_key(mapping_manager, flag: 'source_identifier', default: :source_identifier)
+ parent_key = resolve_validation_key(mapping_manager, flag: 'related_parents_field_mapping', default: :parents)
+ children_key = resolve_validation_key(mapping_manager, flag: 'related_children_field_mapping', default: :children)
+ file_key = resolve_validation_key(mapping_manager, key: 'file', default: :file)
- # 3. Parse rows — CsvEntry.read_data already filters blank rows and
- # returns symbol-keyed rows (same as a real import).
- csv_data = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
-
- # 4. Field metadata
+ csv_data = parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
all_models = csv_data.map { |r| r[:model] }.compact.uniq
field_analyzer = CsvTemplate::FieldAnalyzer.new(mappings, admin_set_id)
field_metadata = build_validation_field_metadata(all_models, field_analyzer)
- # 5. Valid-header set (drives unrecognised-header detection)
- valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
-
- # 6. Suffixed variants seen in this specific CSV (e.g. title_1, creator_2)
- suffixed_headers = headers.select { |h| h.match?(/_\d+\z/) }
- valid_headers = (valid_headers + suffixed_headers).uniq
+ [raw_csv, headers, mapping_manager, mappings, source_id_key, csv_data, field_metadata, field_analyzer]
+ end
- # 7. Header-level checks
- missing_required = find_missing_required_headers(headers, field_metadata, mapping_manager)
- unrecognized = find_unrecognized_validation_headers(headers, valid_headers)
- empty_columns = find_empty_column_positions(headers, raw_csv)
+ # Runs all header-level checks and returns a hash of results.
+ def check_headers(headers, raw_csv, mapping_manager, mappings, field_metadata, field_analyzer) # rubocop:disable Metrics/ParameterLists
+ all_models = field_metadata.keys
+ valid_headers = build_valid_validation_headers(mapping_manager, field_analyzer,
+ all_models, mappings, field_metadata)
+ suffixed = headers.select { |h| h.match?(/_\d+\z/) }
+ valid_headers = (valid_headers + suffixed).uniq
+
+ {
+ missing_required: find_missing_required_headers(headers, field_metadata, mapping_manager),
+ unrecognized: find_unrecognized_validation_headers(headers, valid_headers),
+ empty_columns: find_empty_column_positions(headers, raw_csv)
+ }
+ end
- # 8. Row-level validators
- parent_split = resolve_parent_split_pattern(mappings)
- all_ids = csv_data.map { |r| r[:source_identifier] }.compact.to_set
- validator_context = {
+ # Runs all registered row validators and returns the collected errors.
+ def run_row_validators(csv_data, all_ids, source_id_key, mappings, field_metadata, find_record) # rubocop:disable Metrics/ParameterLists
+ context = {
errors: [],
warnings: [],
seen_ids: {},
all_ids: all_ids,
source_identifier: source_id_key.to_s,
- parent_split_pattern: parent_split,
+ parent_split_pattern: resolve_parent_split_pattern(mappings),
mappings: mappings,
- field_metadata: field_metadata
+ field_metadata: field_metadata,
+ find_record_by_source_identifier: find_record
}
-
csv_data.each_with_index do |record, index|
row_number = index + 2 # 1-indexed, plus header row
- Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, validator_context) }
- end
-
- # 9. File validation
- file_validator = CsvTemplate::FileValidator.new(csv_data, zip_file, admin_set_id)
-
- # 10. Item hierarchy for UI display
- collections, works, file_sets = extract_validation_items(csv_data, all_ids)
-
- # 11. Assemble result
- source_id_missing = !headers.map(&:to_s).include?(source_id_key.to_s)
- if source_id_missing && Bulkrax.fill_in_blank_source_identifiers.blank?
- all_models.each do |model|
- missing_required << { model: model, field: source_id_key.to_s }
- end
- end
-
- row_errors = validator_context[:errors]
- has_errors = missing_required.any? || headers.blank? || csv_data.empty? ||
- file_validator.missing_files.any? || row_errors.any?
- has_warnings = unrecognized.any? || empty_columns.any? || file_validator.possible_missing_files?
-
- result = {
- headers: headers,
- missingRequired: missing_required,
- unrecognized: unrecognized,
- emptyColumns: empty_columns,
- rowCount: csv_data.length,
- isValid: !has_errors,
- hasWarnings: has_warnings,
- rowErrors: row_errors,
- collections: collections,
- works: works,
- fileSets: file_sets,
- totalItems: csv_data.length,
- fileReferences: file_validator.count_references,
- missingFiles: file_validator.missing_files,
- foundFiles: file_validator.found_files_count,
- zipIncluded: file_validator.zip_included?
- }
-
- apply_rights_statement_validation_override!(result, missing_required)
- result
- end
-
- private
-
- # Resolve a symbol key from mappings for use as a record hash key.
- # Returns a Symbol matching the parser's symbol-keyed record hash.
- def resolve_validation_key(mapping_manager, key: nil, flag: nil, default:)
- options = mapping_manager.resolve_column_name(key: key, flag: flag, default: default.to_s)
- options.first&.to_sym || default
- end
-
- # Parse rows from a CsvEntry.read_data result into the canonical record shape.
- # CsvEntry.read_data returns CSV::Row objects with symbol headers; blank rows
- # are already filtered by CsvWrapper.
- def parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
- raw_csv.map do |row|
- # CSV::Row#to_h converts symbol headers → string-keyed hash
- row_hash = row.to_h.transform_keys(&:to_s)
- {
- source_identifier: row[source_id_key],
- model: row[:model],
- parent: row[parent_key],
- children: row[children_key],
- file: row[file_key],
- raw_row: row_hash
- }
- end
- rescue StandardError => e
- Rails.logger.error("CsvParser.validate_csv: error parsing rows – #{e.message}")
- []
- end
-
- def build_validation_field_metadata(all_models, field_analyzer)
- all_models.each_with_object({}) do |model, hash|
- field_list = field_analyzer.find_or_create_field_list_for(model_name: model)
- hash[model] = {
- properties: field_list.dig(model, 'properties') || [],
- required_terms: field_list.dig(model, 'required_terms') || [],
- controlled_vocab_terms: field_list.dig(model, 'controlled_vocab_terms') || []
- }
- end
- end
-
- def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
- svc = ValidationContext.new(
- mapping_manager: mapping_manager,
- field_analyzer: field_analyzer,
- all_models: all_models,
- mappings: mappings
- )
- all_cols = CsvTemplate::ColumnBuilder.new(svc).all_columns
- all_cols - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
- rescue StandardError => e
- Rails.logger.error("CsvParser.validate_csv: error building valid headers – #{e.message}")
- standard = %w[model source_identifier parent parents file]
- model_fields = field_metadata.values.flat_map { |m| m[:properties] }
- (standard + model_fields).uniq
- end
-
- def find_missing_required_headers(headers, field_metadata, mapping_manager)
- csv_keys = headers.map { |h| mapping_manager.mapped_to_key(h).sub(/_\d+\z/, '') }.uniq
- missing = []
- field_metadata.each do |model, meta|
- (meta[:required_terms] || []).each do |field|
- missing << { model: model, field: field } unless csv_keys.include?(field)
- end
- end
- missing.uniq
- end
-
- def find_unrecognized_validation_headers(headers, valid_headers)
- checker = DidYouMean::SpellChecker.new(dictionary: valid_headers)
- headers
- .reject { |h| h.blank? || valid_headers.include?(h) || valid_headers.include?(h.sub(/_\d+\z/, '')) }
- .index_with { |h| checker.correct(h).first }
- end
-
- def find_empty_column_positions(headers, raw_csv)
- headers.each_with_index.filter_map do |h, i|
- next if h.present?
- has_data = raw_csv.any? { |row| row.fields[i].present? }
- i + 1 if has_data
+ Bulkrax.csv_row_validators.each { |v| v.call(record, row_number, context) }
end
- end
-
- def resolve_parent_split_pattern(mappings)
- split_val = mappings.dig('parents', 'split') || mappings.dig(:parents, :split)
- return nil if split_val.blank?
- return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
-
- split_val
- end
-
- def extract_validation_items(csv_data, all_ids = Set.new) # rubocop:disable Metrics/MethodLength
- child_to_parents = build_child_to_parents_map(csv_data)
- collections = []
- works = []
- file_sets = []
-
- csv_data.each do |item|
- categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets)
- end
-
- [collections, works, file_sets]
- end
-
- def build_child_to_parents_map(csv_data)
- Hash.new { |h, k| h[k] = [] }.tap do |map|
- csv_data.each do |item|
- next if item[:source_identifier].blank?
-
- parse_relationship_field(item[:children]).each do |child_id|
- map[child_id] << item[:source_identifier]
- end
- end
- end
- end
-
- def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets) # rubocop:disable Metrics/ParameterLists
- item_id = item[:source_identifier]
- title = item[:raw_row]['title'] || item_id
- model_str = item[:model].to_s
-
- if model_str.casecmp('collection').zero? || model_str.casecmp('collectionresource').zero?
- explicit = resolvable_ids(parse_relationship_field(item[:parent]), all_ids)
- inferred = resolvable_ids(child_to_parents[item_id] || [], all_ids)
- collections << { id: item_id, title: title, type: 'collection',
- parentIds: (explicit + inferred).uniq,
- childIds: resolvable_ids(parse_relationship_field(item[:children]), all_ids) }
- elsif model_str.casecmp('fileset').zero? || model_str.casecmp('hyrax::fileset').zero?
- file_sets << { id: item_id, title: title, type: 'file_set' }
- else
- explicit = resolvable_ids(parse_relationship_field(item[:parent]), all_ids)
- inferred = resolvable_ids(child_to_parents[item_id] || [], all_ids)
- works << { id: item_id, title: title, type: 'work',
- parentIds: (explicit + inferred).uniq,
- childIds: resolvable_ids(parse_relationship_field(item[:children]), all_ids) }
- end
- end
-
- def parse_relationship_field(value)
- return [] if value.blank?
- value.to_s.split('|').map(&:strip).reject(&:blank?)
- end
-
- def resolvable_ids(ids, all_ids)
- ids.select { |id| all_ids.include?(id) }
- end
-
- def apply_rights_statement_validation_override!(result, missing_required)
- only_rights = missing_required.present? &&
- missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
- return unless only_rights && !result[:isValid]
- return if result[:headers].blank?
- return if result[:missingFiles]&.any?
-
- result[:isValid] = true
- result[:hasWarnings] = true
+ context[:errors]
end
end
end
diff --git a/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb b/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb
new file mode 100644
index 00000000..5630200d
--- /dev/null
+++ b/app/parsers/concerns/bulkrax/csv_parser/csv_validation_helpers.rb
@@ -0,0 +1,181 @@
+# frozen_string_literal: true
+
+module Bulkrax
+ class CsvParser < ApplicationParser
+ # Private helper methods for CsvValidation.
+ module CsvValidationHelpers # rubocop:disable Metrics/ModuleLength
+ include CsvValidationHierarchy
+
+ # Resolve a symbol key from mappings for use as a record hash key.
+ # Returns a Symbol matching the parser's symbol-keyed record hash.
+ def resolve_validation_key(mapping_manager, key: nil, flag: nil, default:)
+ options = mapping_manager.resolve_column_name(key: key, flag: flag, default: default.to_s)
+ options.first&.to_sym || default
+ end
+
+ # Parse rows from a CsvEntry.read_data result into the canonical record shape.
+ # CsvEntry.read_data returns CSV::Row objects with symbol headers; blank rows
+ # are already filtered by CsvWrapper.
+ def parse_validation_rows(raw_csv, source_id_key, parent_key, children_key, file_key)
+ raw_csv.map do |row|
+ # CSV::Row#to_h converts symbol headers → string-keyed hash
+ row_hash = row.to_h.transform_keys(&:to_s)
+ {
+ source_identifier: row[source_id_key],
+ model: row[:model],
+ parent: row[parent_key],
+ children: row[children_key],
+ file: row[file_key],
+ raw_row: row_hash
+ }
+ end
+ rescue StandardError => e
+ Rails.logger.error("CsvParser.validate_csv: error parsing rows – #{e.message}")
+ []
+ end
+
+ def build_validation_field_metadata(all_models, field_analyzer)
+ all_models.each_with_object({}) do |model, hash|
+ field_list = field_analyzer.find_or_create_field_list_for(model_name: model)
+ hash[model] = {
+ properties: field_list.dig(model, 'properties') || [],
+ required_terms: field_list.dig(model, 'required_terms') || [],
+ controlled_vocab_terms: field_list.dig(model, 'controlled_vocab_terms') || []
+ }
+ end
+ end
+
+ def build_valid_validation_headers(mapping_manager, field_analyzer, all_models, mappings, field_metadata)
+ svc = ValidationContext.new(
+ mapping_manager: mapping_manager,
+ field_analyzer: field_analyzer,
+ all_models: all_models,
+ mappings: mappings
+ )
+ all_cols = CsvTemplate::ColumnBuilder.new(svc).all_columns
+ all_cols - CsvTemplate::CsvBuilder::IGNORED_PROPERTIES
+ rescue StandardError => e
+ Rails.logger.error("CsvParser.validate_csv: error building valid headers – #{e.message}")
+ standard = %w[model source_identifier parent parents file]
+ model_fields = field_metadata.values.flat_map { |m| m[:properties] }
+ (standard + model_fields).uniq
+ end
+
+ def find_missing_required_headers(headers, field_metadata, mapping_manager)
+ csv_keys = headers.map { |h| mapping_manager.mapped_to_key(h).sub(/_\d+\z/, '') }.uniq
+ missing = []
+ field_metadata.each do |model, meta|
+ (meta[:required_terms] || []).each do |field|
+ missing << { model: model, field: field } unless csv_keys.include?(field)
+ end
+ end
+ missing.uniq
+ end
+
+ def find_unrecognized_validation_headers(headers, valid_headers)
+ checker = DidYouMean::SpellChecker.new(dictionary: valid_headers)
+ headers
+ .reject { |h| h.blank? || valid_headers.include?(h) || valid_headers.include?(h.sub(/_\d+\z/, '')) }
+ .index_with { |h| checker.correct(h).first }
+ end
+
+ def find_empty_column_positions(headers, raw_csv)
+ headers.each_with_index.filter_map do |h, i|
+ next if h.present?
+ has_data = raw_csv.any? { |row| row.fields[i].present? }
+ i + 1 if has_data
+ end
+ end
+
+ # Adds a missing source_identifier entry to missing_required when the column
+ # is absent and fill_in_blank_source_identifiers is not configured.
+ def append_missing_source_id!(missing_required, headers, source_id_key, all_models)
+ return if headers.map(&:to_s).include?(source_id_key.to_s)
+ return if Bulkrax.fill_in_blank_source_identifiers.present?
+
+ all_models.each { |model| missing_required << { model: model, field: source_id_key.to_s } }
+ end
+
+ def apply_rights_statement_validation_override!(result, missing_required)
+ only_rights = missing_required.present? &&
+ missing_required.all? { |h| h[:field].to_s == 'rights_statement' }
+ return unless only_rights && !result[:isValid]
+ return if result[:headers].blank?
+ return if result[:missingFiles]&.any?
+
+ result[:isValid] = true
+ result[:hasWarnings] = true
+ end
+
+ # Assembles the final result hash returned to the guided import UI.
+ def assemble_result(headers:, missing_required:, header_issues:, row_errors:, csv_data:, file_validator:, collections:, works:, file_sets:) # rubocop:disable Metrics/ParameterLists
+ has_errors = missing_required.any? || headers.blank? || csv_data.empty? ||
+ file_validator.missing_files.any? || row_errors.any?
+ has_warnings = header_issues[:unrecognized].any? || header_issues[:empty_columns].any? ||
+ file_validator.possible_missing_files?
+
+ {
+ headers: headers,
+ missingRequired: missing_required,
+ unrecognized: header_issues[:unrecognized],
+ emptyColumns: header_issues[:empty_columns],
+ rowCount: csv_data.length,
+ isValid: !has_errors,
+ hasWarnings: has_warnings,
+ rowErrors: row_errors,
+ collections: collections,
+ works: works,
+ fileSets: file_sets,
+ totalItems: csv_data.length,
+ fileReferences: file_validator.count_references,
+ missingFiles: file_validator.missing_files,
+ foundFiles: file_validator.found_files_count,
+ zipIncluded: file_validator.zip_included?
+ }
+ end
+
+ # Builds the find_record lambda used by row validators and hierarchy extraction.
+ def build_find_record(mapping_manager, mappings)
+ work_identifier = mapping_manager.resolve_column_name(flag: 'source_identifier', default: 'source').first&.to_s || 'source'
+ work_identifier_search = Array.wrap(mappings.dig(work_identifier, 'search_field')).first&.to_s ||
+ "#{work_identifier}_sim"
+ ->(id) { find_record_by_source_identifier(id, work_identifier, work_identifier_search) }
+ end
+
+ # Attempt to locate an existing repository record by its identifier.
+ # The identifier may be a Bulkrax source_identifier or a repository object ID.
+ # This mimics the find behavior of the actual import process, which checks for existing records to determine whether to create or update.
+ # Since we don't have the full importer context here, we check both the Entry model and the repository directly.
+ #
+ # @param identifier [String]
+ # @param work_identifier [String] the source_identifier property name (e.g. "source")
+ # @param work_identifier_search [String] the Solr field for source_identifier (e.g. "source_sim")
+ # @return [Boolean] true if a matching Entry or repository object is found
+ def find_record_by_source_identifier(identifier, work_identifier, work_identifier_search)
+ return false if identifier.blank?
+
+ return true if Entry.exists?(identifier: identifier, importerexporter_type: 'Bulkrax::Importer')
+ return true if Bulkrax.object_factory.find_or_nil(identifier).present?
+
+ [Bulkrax.collection_model_class, *Bulkrax.curation_concerns].any? do |klass|
+ Bulkrax.object_factory.search_by_property(
+ value: identifier,
+ klass: klass,
+ search_field: work_identifier_search,
+ name_field: work_identifier
+ ).present?
+ end
+ rescue StandardError
+ false
+ end
+
+ def resolve_parent_split_pattern(mappings)
+ split_val = mappings.dig('parents', 'split') || mappings.dig(:parents, :split)
+ return nil if split_val.blank?
+ return Bulkrax::DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON if split_val == true
+
+ split_val
+ end
+ end
+ end
+end
diff --git a/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb b/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb
new file mode 100644
index 00000000..69188f0b
--- /dev/null
+++ b/app/parsers/concerns/bulkrax/csv_parser/csv_validation_hierarchy.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module Bulkrax
+ class CsvParser < ApplicationParser
+ # Hierarchy-building helpers for CsvValidation. Handles extracting and
+ # categorising items from parsed CSV data for the guided import tree view.
+ module CsvValidationHierarchy
+ def extract_validation_items(csv_data, all_ids = Set.new, find_record = nil)
+ child_to_parents = build_child_to_parents_map(csv_data)
+ collections = []
+ works = []
+ file_sets = []
+
+ csv_data.each do |item|
+ categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record)
+ end
+
+ [collections, works, file_sets]
+ end
+
+ def build_child_to_parents_map(csv_data)
+ Hash.new { |h, k| h[k] = [] }.tap do |map|
+ csv_data.each do |item|
+ next if item[:source_identifier].blank?
+
+ parse_relationship_field(item[:children]).each do |child_id|
+ map[child_id] << item[:source_identifier]
+ end
+ end
+ end
+ end
+
+ def categorise_validation_item(item, child_to_parents, all_ids, collections, works, file_sets, find_record = nil) # rubocop:disable Metrics/ParameterLists
+ item_id = item[:source_identifier]
+ model_str = item[:model].to_s
+
+ if model_str.casecmp('collection').zero? || model_str.casecmp('collectionresource').zero?
+ collections << build_item_hash(item, child_to_parents, all_ids, find_record, type: 'collection')
+ elsif model_str.casecmp('fileset').zero? || model_str.casecmp('hyrax::fileset').zero?
+ file_sets << { id: item_id, title: item[:raw_row]['title'] || item_id, type: 'file_set' }
+ else
+ works << build_item_hash(item, child_to_parents, all_ids, find_record, type: 'work')
+ end
+ end
+
+ def build_item_hash(item, child_to_parents, all_ids, find_record, type:)
+ item_id = item[:source_identifier]
+ title = item[:raw_row]['title'] || item_id
+ parents = parse_relationship_field(item[:parent])
+ children = parse_relationship_field(item[:children])
+
+ {
+ id: item_id,
+ title: title,
+ type: type,
+ parentIds: (resolvable_ids(parents, all_ids) + resolvable_ids(child_to_parents[item_id] || [], all_ids)).uniq,
+ childIds: resolvable_ids(children, all_ids),
+ existingParentIds: external_ids(parents, all_ids, find_record),
+ existingChildIds: external_ids(children, all_ids, find_record)
+ }
+ end
+
+ def parse_relationship_field(value)
+ return [] if value.blank?
+ value.to_s.split('|').map(&:strip).reject(&:blank?)
+ end
+
+ def resolvable_ids(ids, all_ids)
+ ids.select { |id| all_ids.include?(id) }
+ end
+
+ # Returns ids from the list that are NOT in the CSV but exist in the repository.
+ def external_ids(ids, all_ids, find_record)
+ return [] if find_record.nil?
+
+ ids.reject { |id| all_ids.include?(id) }
+ .select { |id| find_record.call(id) }
+ end
+ end
+ end
+end
diff --git a/app/validators/bulkrax/csv_row/child_reference.rb b/app/validators/bulkrax/csv_row/child_reference.rb
new file mode 100644
index 00000000..4a464ae5
--- /dev/null
+++ b/app/validators/bulkrax/csv_row/child_reference.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module Bulkrax
+ module CsvRow
+ ##
+ # Validates that any child references in a row point to source identifiers
+ # that exist either elsewhere in the same CSV or as existing repository records.
+ # Uses context[:all_ids] (Set of all source identifiers) to validate references
+ # within the CSV, and context[:find_record_by_source_identifier] (callable) to
+ # look up existing records in the same way the importer does at runtime.
+ # Skips validation when all_ids is empty and fill_in_blank_source_identifiers is
+ # configured, since generated identifiers cannot be cross-referenced at validation time.
+ module ChildReference
+ def self.call(record, row_index, context)
+ children = record[:children]
+ return if children.blank?
+
+ all_ids = context[:all_ids]
+ return if all_ids.empty? && Bulkrax.fill_in_blank_source_identifiers.present?
+
+ find_record = context[:find_record_by_source_identifier]
+ child_ids = children.to_s.split('|').map(&:strip).reject(&:blank?)
+
+ child_ids.each do |child_id|
+ next if all_ids.include?(child_id)
+ next if find_record&.call(child_id)
+
+ context[:errors] << {
+ row: row_index,
+ source_identifier: record[:source_identifier],
+ severity: 'error',
+ category: 'invalid_child_reference',
+ column: 'children',
+ value: child_id,
+ message: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.message',
+ value: child_id,
+ field: 'source_identifier'),
+ suggestion: I18n.t('bulkrax.importer.guided_import.validation.child_reference_validator.errors.suggestion')
+ }
+ end
+ end
+ end
+ end
+end
diff --git a/app/validators/bulkrax/csv_row/parent_reference.rb b/app/validators/bulkrax/csv_row/parent_reference.rb
index fd64f410..f1e6a4f8 100644
--- a/app/validators/bulkrax/csv_row/parent_reference.rb
+++ b/app/validators/bulkrax/csv_row/parent_reference.rb
@@ -4,8 +4,10 @@ module Bulkrax
module CsvRow
##
# Validates that any parent references in a row point to source identifiers
- # that exist elsewhere in the same CSV.
- # Uses context[:all_ids] (Set of all source identifiers) to validate references.
+ # that exist either elsewhere in the same CSV or as existing repository records.
+ # Uses context[:all_ids] (Set of all source identifiers) to validate references
+ # within the CSV, and context[:find_record_by_source_identifier] (callable) to
+ # look up existing records in the same way the importer does at runtime.
# Uses context[:parent_split_pattern] (String/Regexp, may be nil) for multi-value splitting.
module ParentReference
def self.call(record, row_index, context)
@@ -14,6 +16,7 @@ def self.call(record, row_index, context)
all_ids = context[:all_ids]
split_pattern = context[:parent_split_pattern]
+ find_record = context[:find_record_by_source_identifier]
parent_ids = if split_pattern
parents.to_s.split(split_pattern).map(&:strip).reject(&:blank?)
@@ -23,6 +26,7 @@ def self.call(record, row_index, context)
parent_ids.each do |parent_id|
next if all_ids.include?(parent_id)
+ next if find_record&.call(parent_id)
context[:errors] << {
row: row_index,
diff --git a/config/locales/bulkrax.en.yml b/config/locales/bulkrax.en.yml
index a39529f3..f13d6efb 100644
--- a/config/locales/bulkrax.en.yml
+++ b/config/locales/bulkrax.en.yml
@@ -209,6 +209,8 @@ en:
review_total: "%{total} total — %{collections} collections, %{works} works, %{file_sets} file sets"
review_visibility: 'Visibility:'
server_error: Server error during validation. Please try again or contact support.
+ existing_record_badge: existing
+ existing_record_title: This record already exists in the repository and will be linked during import
shared_badge: shared
starting: Starting...
upload_csv_and_zip: CSV + files uploaded separately
@@ -340,6 +342,10 @@ en:
errors:
message: "Referenced parent '%{value}' does not exist as a %{field} in this CSV."
suggestion: "Check for typos or add the parent record."
+ child_reference_validator:
+ errors:
+ message: "Referenced child '%{value}' does not exist as a %{field} in this CSV."
+ suggestion: "Check for typos or add the child record."
passed: Validation Passed
passed_warnings: Validation Passed with Warnings
recognized_fields: 'Recognized fields: %{fields}'
diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb
index 4f8b10b2..12649e29 100644
--- a/lib/bulkrax.rb
+++ b/lib/bulkrax.rb
@@ -183,6 +183,7 @@ def csv_row_validators
@csv_row_validators ||= [
Bulkrax::CsvRow::DuplicateIdentifier,
Bulkrax::CsvRow::ParentReference,
+ Bulkrax::CsvRow::ChildReference,
Bulkrax::CsvRow::RequiredValues,
Bulkrax::CsvRow::ControlledVocabulary
]
diff --git a/spec/parsers/bulkrax/csv_parser/csv_validation_helpers_spec.rb b/spec/parsers/bulkrax/csv_parser/csv_validation_helpers_spec.rb
new file mode 100644
index 00000000..7fe7c6d7
--- /dev/null
+++ b/spec/parsers/bulkrax/csv_parser/csv_validation_helpers_spec.rb
@@ -0,0 +1,207 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+RSpec.describe Bulkrax::CsvParser::CsvValidationHelpers do
+ # Minimal host object that mixes in the concern under test.
+ let(:host) do
+ Object.new.tap { |o| o.extend(described_class) }
+ end
+
+ # All specs in this file exercise the Valkyrie path. ActiveFedora / Wings is
+ # not verified to work with this feature, so we configure the factory
+ # globally for the file rather than repeating it in every context.
+ before { Bulkrax.object_factory = Bulkrax::ValkyrieObjectFactory }
+ after { Bulkrax.object_factory = Bulkrax::ObjectFactory }
+
+ describe '#find_record_by_source_identifier' do
+ let(:work_identifier) { 'source' }
+ let(:work_identifier_search) { 'source_sim' }
+
+ def find(id)
+ host.find_record_by_source_identifier(id, work_identifier, work_identifier_search)
+ end
+
+ context 'when the identifier is blank' do
+ it 'returns false for nil' do
+ expect(find(nil)).to be false
+ end
+
+ it 'returns false for an empty string' do
+ expect(find('')).to be false
+ end
+ end
+
+ context 'when a matching Bulkrax::Entry exists in the database' do
+ let!(:importer) { FactoryBot.create(:bulkrax_importer) }
+ let!(:entry) { FactoryBot.create(:bulkrax_csv_entry, identifier: 'entry_id_001', importerexporter: importer) }
+
+ it 'returns true without querying the repository' do
+ # ValkyrieObjectFactory.find delegates to Hyrax.query_service.find_by;
+ # the Entry short-circuit means it should never be reached.
+ expect(Hyrax.query_service).not_to receive(:find_by)
+ expect(find('entry_id_001')).to be true
+ end
+ end
+
+ context 'when no Entry exists but the repository has a matching object by ID' do
+ # ValkyrieObjectFactory.find_or_nil calls ValkyrieObjectFactory.find which
+ # calls Hyrax.query_service.find_by(id:). Stub at that level so we verify
+ # the full Valkyrie call chain.
+ before do
+ allow(Hyrax.query_service).to receive(:find_by)
+ .with(id: 'repo-uuid-001')
+ .and_return(instance_double(Hyrax::Work))
+ end
+
+ it 'returns true' do
+ expect(find('repo-uuid-001')).to be true
+ end
+
+ it 'does not fall through to search_by_property' do
+ expect(Bulkrax::ValkyrieObjectFactory).not_to receive(:search_by_property)
+ find('repo-uuid-001')
+ end
+ end
+
+ context 'when no Entry exists and find_or_nil returns nil' do
+ # ValkyrieObjectFactory.find raises ObjectNotFoundError when the object
+ # does not exist; find_or_nil rescues that to nil.
+ before do
+ allow(Hyrax.query_service).to receive(:find_by)
+ .and_raise(Hyrax::ObjectNotFoundError)
+ end
+
+ context 'when search_by_property finds a match on one of the model classes' do
+ before do
+ allow(Bulkrax).to receive(:collection_model_class).and_return(Collection)
+ allow(Bulkrax).to receive(:curation_concerns).and_return([Work])
+
+ # Collection misses, Work hits.
+ allow(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property)
+ .with(value: 'custom_source_001', klass: Collection,
+ search_field: work_identifier_search, name_field: work_identifier)
+ .and_return(nil)
+ allow(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property)
+ .with(value: 'custom_source_001', klass: Work,
+ search_field: work_identifier_search, name_field: work_identifier)
+ .and_return(instance_double(Hyrax::Work))
+ end
+
+ it 'returns true' do
+ expect(find('custom_source_001')).to be true
+ end
+ end
+
+ context 'when search_by_property finds nothing across all model classes' do
+ before do
+ allow(Bulkrax).to receive(:collection_model_class).and_return(Collection)
+ allow(Bulkrax).to receive(:curation_concerns).and_return([Work])
+ allow(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).and_return(nil)
+ end
+
+ it 'returns false' do
+ expect(find('nonexistent_id')).to be false
+ end
+ end
+
+ context 'when search_by_property is called with the correct field arguments' do
+ let(:work_identifier) { 'local_id' }
+ let(:work_identifier_search) { 'local_id_sim' }
+
+ before do
+ allow(Bulkrax).to receive(:collection_model_class).and_return(Collection)
+ allow(Bulkrax).to receive(:curation_concerns).and_return([])
+ allow(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).and_return(nil)
+ end
+
+ it 'passes the resolved work_identifier and search field through to search_by_property' do
+ expect(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).with(
+ value: 'some_local_id',
+ klass: Collection,
+ search_field: 'local_id_sim',
+ name_field: 'local_id'
+ )
+ find('some_local_id')
+ end
+ end
+ end
+
+ context 'when an exception is raised during lookup' do
+ before do
+ allow(Bulkrax::Entry).to receive(:exists?).and_raise(StandardError, 'DB unavailable')
+ end
+
+ it 'returns false instead of propagating the error' do
+ expect(find('some_id')).to be false
+ end
+ end
+ end
+
+ describe '#build_find_record' do
+ let(:mapping_manager) { instance_double(Bulkrax::CsvTemplate::MappingManager) }
+ let(:mappings) { {} }
+
+ before do
+ allow(Hyrax.query_service).to receive(:find_by).and_raise(Hyrax::ObjectNotFoundError)
+ allow(Bulkrax).to receive(:collection_model_class).and_return(Collection)
+ allow(Bulkrax).to receive(:curation_concerns).and_return([Work])
+ allow(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).and_return(nil)
+ end
+
+ context 'with default source_identifier mapping' do
+ before do
+ allow(mapping_manager).to receive(:resolve_column_name)
+ .with(flag: 'source_identifier', default: 'source')
+ .and_return(['source'])
+ end
+
+ it 'returns a callable lambda' do
+ lam = host.build_find_record(mapping_manager, mappings)
+ expect(lam).to respond_to(:call)
+ end
+
+ it 'defaults the search field to
_sim when no search_field mapping is present' do
+ lam = host.build_find_record(mapping_manager, mappings)
+ expect(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).with(
+ hash_including(search_field: 'source_sim', name_field: 'source')
+ ).and_return(nil)
+ lam.call('anything')
+ end
+ end
+
+ context 'when the mapping provides a custom search_field' do
+ let(:mappings) { { 'local_id' => { 'search_field' => 'local_id_tesim' } } }
+
+ before do
+ allow(mapping_manager).to receive(:resolve_column_name)
+ .with(flag: 'source_identifier', default: 'source')
+ .and_return(['local_id'])
+ end
+
+ it 'uses the mapped search_field instead of the default _sim suffix' do
+ lam = host.build_find_record(mapping_manager, mappings)
+ expect(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).with(
+ hash_including(search_field: 'local_id_tesim', name_field: 'local_id')
+ ).and_return(nil)
+ lam.call('anything')
+ end
+ end
+
+ context 'when resolve_column_name returns nothing' do
+ before do
+ allow(mapping_manager).to receive(:resolve_column_name)
+ .with(flag: 'source_identifier', default: 'source')
+ .and_return([])
+ end
+
+ it 'falls back to "source" as the work_identifier' do
+ lam = host.build_find_record(mapping_manager, mappings)
+ expect(Bulkrax::ValkyrieObjectFactory).to receive(:search_by_property).with(
+ hash_including(search_field: 'source_sim', name_field: 'source')
+ ).and_return(nil)
+ lam.call('anything')
+ end
+ end
+ end
+end
diff --git a/spec/validators/bulkrax/csv_row/child_reference_spec.rb b/spec/validators/bulkrax/csv_row/child_reference_spec.rb
new file mode 100644
index 00000000..d788e7c9
--- /dev/null
+++ b/spec/validators/bulkrax/csv_row/child_reference_spec.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+RSpec.describe Bulkrax::CsvRow::ChildReference do
+ def make_context(all_ids: Set.new(%w[col1 work1]), find_record: nil)
+ { errors: [], warnings: [], all_ids: all_ids, parent_split_pattern: nil,
+ find_record_by_source_identifier: find_record }
+ end
+
+ def make_record(children: nil)
+ { source_identifier: 'col1', model: 'Collection', children: children, raw_row: {} }
+ end
+
+ it 'adds no error when children field is blank' do
+ context = make_context
+ described_class.call(make_record(children: nil), 2, context)
+ expect(context[:errors]).to be_empty
+ end
+
+ it 'adds no error when the child exists in the CSV' do
+ context = make_context
+ described_class.call(make_record(children: 'work1'), 2, context)
+ expect(context[:errors]).to be_empty
+ end
+
+ it 'adds an error when the child does not exist in the CSV' do
+ context = make_context
+ described_class.call(make_record(children: 'missing_child'), 2, context)
+ expect(context[:errors].length).to eq(1)
+ expect(context[:errors].first[:category]).to eq('invalid_child_reference')
+ expect(context[:errors].first[:value]).to eq('missing_child')
+ end
+
+ it 'adds an error for each unresolvable id in a pipe-separated list' do
+ context = make_context
+ described_class.call(make_record(children: 'work1|missing1|missing2'), 2, context)
+ expect(context[:errors].length).to eq(2)
+ expect(context[:errors].map { |e| e[:value] }).to contain_exactly('missing1', 'missing2')
+ end
+
+ it 'adds no error when the child is not in the CSV but exists as a repository record' do
+ find_record = ->(id) { id == 'existing_repo_child' }
+ context = make_context(find_record: find_record)
+ described_class.call(make_record(children: 'existing_repo_child'), 2, context)
+ expect(context[:errors]).to be_empty
+ end
+
+ it 'adds an error when the child is not in the CSV and not found in the repository' do
+ find_record = ->(_id) { false }
+ context = make_context(find_record: find_record)
+ described_class.call(make_record(children: 'truly_missing'), 2, context)
+ expect(context[:errors].length).to eq(1)
+ expect(context[:errors].first[:category]).to eq('invalid_child_reference')
+ end
+
+ it 'resolves mixed pipe-separated ids using both CSV and repository lookup' do
+ find_record = ->(id) { id == 'repo_child' }
+ context = make_context(find_record: find_record)
+ described_class.call(make_record(children: 'work1|repo_child|truly_missing'), 2, context)
+ expect(context[:errors].length).to eq(1)
+ expect(context[:errors].first[:value]).to eq('truly_missing')
+ end
+
+ context 'when fill_in_blank_source_identifiers is configured and all_ids is empty' do
+ before do
+ allow(Bulkrax).to receive(:fill_in_blank_source_identifiers)
+ .and_return(->(_parser, _index) { SecureRandom.uuid })
+ end
+
+ it 'skips the check — child ids cannot be validated against generated identifiers' do
+ context = make_context(all_ids: Set.new)
+ described_class.call(make_record(children: 'bcd123'), 2, context)
+ expect(context[:errors]).to be_empty
+ end
+ end
+end
diff --git a/spec/validators/bulkrax/csv_row/parent_reference_spec.rb b/spec/validators/bulkrax/csv_row/parent_reference_spec.rb
index 53ba10f9..9ff0ae22 100644
--- a/spec/validators/bulkrax/csv_row/parent_reference_spec.rb
+++ b/spec/validators/bulkrax/csv_row/parent_reference_spec.rb
@@ -3,8 +3,9 @@
require 'rails_helper'
RSpec.describe Bulkrax::CsvRow::ParentReference do
- def make_context(all_ids: Set.new(%w[col1 work1]))
- { errors: [], warnings: [], all_ids: all_ids, parent_split_pattern: nil }
+ def make_context(all_ids: Set.new(%w[col1 work1]), find_record: nil)
+ { errors: [], warnings: [], all_ids: all_ids, parent_split_pattern: nil,
+ find_record_by_source_identifier: find_record }
end
def make_record(parent: nil)
@@ -30,4 +31,19 @@ def make_record(parent: nil)
expect(context[:errors].first[:category]).to eq('invalid_parent_reference')
expect(context[:errors].first[:value]).to eq('missing_parent')
end
+
+ it 'adds no error when the parent is not in the CSV but exists as a repository record' do
+ find_record = ->(id) { id == 'existing_repo_parent' }
+ context = make_context(find_record: find_record)
+ described_class.call(make_record(parent: 'existing_repo_parent'), 2, context)
+ expect(context[:errors]).to be_empty
+ end
+
+ it 'adds an error when the parent is not in the CSV and not found in the repository' do
+ find_record = ->(_id) { false }
+ context = make_context(find_record: find_record)
+ described_class.call(make_record(parent: 'truly_missing'), 2, context)
+ expect(context[:errors].length).to eq(1)
+ expect(context[:errors].first[:category]).to eq('invalid_parent_reference')
+ end
end