Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions app/jobs/cleanup_sub_directory_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,21 @@
class CleanupSubDirectoryJob < ApplicationJob
non_tenant_job

# Avoid ApplicationJob's retry_on(StandardError) burning 5 attempts
# and swallowing the error when the retry block is present.
discard_on ArgumentError do |_job, error|
logger.warn(error.message)
end

# Assumptions:
# The second to last element in the path is the ID of the associated FileSet
# The directory has a pair-tree structure
# +directory+ must be a single hex pair-tree top segment (00-ff), same filter as CleanupUploadFilesJob
# (never tenant UUID roots, site/, hyrax/, etc.).
attr_reader :delete_ingested_after_days, :delete_all_after_days, :directory, :files_checked, :files_deleted
def perform(delete_ingested_after_days:, directory:, delete_all_after_days: 730)
assert_hex_pair_tree_directory!(directory)

@directory = directory
@delete_ingested_after_days = delete_ingested_after_days
@delete_all_after_days = delete_all_after_days
Expand All @@ -21,6 +31,16 @@ def perform(delete_ingested_after_days:, directory:, delete_all_after_days: 730)

private

def assert_hex_pair_tree_directory!(dir)
path = File.expand_path(dir.to_s)
basename = File.basename(path)
return if basename.match?(CleanupUploadFilesJob::HEX_TOP_DIR_PATTERN)

raise ArgumentError,
"CleanupSubDirectoryJob only accepts pair-tree hex top-level directories (basename 00-ff per " \
"CleanupUploadFilesJob::HEX_TOP_DIR_PATTERN); got #{dir.inspect}"
end

def delete_files
Dir.glob("#{directory}/**/*").each do |path|
next unless should_be_deleted?(path)
Expand Down
48 changes: 41 additions & 7 deletions spec/jobs/cleanup_sub_directory_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
let(:old_time) { Time.zone.now - 1.year }
let(:very_old_time) { Time.zone.now - 3.years }
let(:new_time) { Time.zone.now - 1.week }
let(:hex_pair_tree_root) { '/app/samvera/uploads/ff' }
let(:path_1) { '/app/samvera/uploads/ff/00/27/d1/file-set-id-1/path_1' }
let(:path_2) { '/app/samvera/uploads/ff/11/28/19/file-set-id-2/path_2' }
let(:path_3) { '/app/samvera/uploads/ff/22/17/de/file-set-id-3/path_3' }
Expand Down Expand Up @@ -42,39 +43,72 @@
allow(FileSet).to receive(:exists?).with('file-set-id-6').and_return(false)
end

describe 'hex pair-tree directory guard' do
# Call #perform directly so ArgumentError is not intercepted by ApplicationJob's retry_on(StandardError).
it 'raises when given a tenant UUID directory (site branding lives here)' do
tenant_root = '/app/samvera/uploads/56e0eb81-c2d5-4d5d-9171-b251bf7299a4'
expect do
described_class.new.perform(delete_ingested_after_days: 180, directory: tenant_root)
end.to raise_error(ArgumentError, /only accepts pair-tree hex top-level directories/)
end

it 'raises when given a site banner path' do
banner = '/app/samvera/uploads/56e0eb81-c2d5-4d5d-9171-b251bf7299a4/site/banner_images/1'
expect do
described_class.new.perform(delete_ingested_after_days: 180, directory: banner)
end.to raise_error(ArgumentError, /only accepts pair-tree hex top-level directories/)
end

it 'raises when given the hyrax uploaded_file cache root' do
hyrax_dir = '/app/samvera/uploads/hyrax'
expect do
described_class.new.perform(delete_ingested_after_days: 180, directory: hyrax_dir)
end.to raise_error(ArgumentError, /only accepts pair-tree hex top-level directories/)
end

it 'accepts a valid hex pair-tree root with a trailing slash' do
allow(Dir).to receive(:glob).and_return([])
allow(FileUtils).to receive(:rmdir)

expect do
described_class.new.perform(delete_ingested_after_days: 180, directory: "#{hex_pair_tree_root}/")
end.not_to raise_error
end
end

it 'deletes old files with matching FileSets' do
expect(File).to receive(:delete).with(path_1)
expect(File).to receive(:delete).with(path_2)
described_class.perform_now(delete_ingested_after_days: 180, directory: '/app/samvera/uploads/ff')
described_class.perform_now(delete_ingested_after_days: 180, directory: hex_pair_tree_root)
end

it 'does not delete files newer than delete_ingested_after_days threshold' do
expect(File).not_to receive(:delete).with(path_3)
described_class.perform_now(delete_ingested_after_days: 180, directory: '/app/samvera/uploads/ff')
described_class.perform_now(delete_ingested_after_days: 180, directory: hex_pair_tree_root)
end

it 'does not delete directories' do
expect(File).not_to receive(:delete).with(path_4)
described_class.perform_now(delete_ingested_after_days: 180, directory: '/app/samvera/uploads/ff')
described_class.perform_now(delete_ingested_after_days: 180, directory: hex_pair_tree_root)
end

it 'does not delete old files without matching FileSets (orphaned but not old enough)' do
expect(File).not_to receive(:delete).with(path_5)
described_class.perform_now(delete_ingested_after_days: 180, directory: '/app/samvera/uploads/ff')
described_class.perform_now(delete_ingested_after_days: 180, directory: hex_pair_tree_root)
end

it 'deletes orphaned files older than delete_all_after_days' do
expect(File).to receive(:delete).with(path_6)
described_class.perform_now(delete_ingested_after_days: 180,
directory: '/app/samvera/uploads/ff',
directory: hex_pair_tree_root,
delete_all_after_days: 730)
end

it 'uses configurable delete_all_after_days threshold' do
# With delete_all_after_days: 300, path_5 (1 year old) should be deleted
expect(File).to receive(:delete).with(path_5)
described_class.perform_now(delete_ingested_after_days: 180,
directory: '/app/samvera/uploads/ff',
directory: hex_pair_tree_root,
delete_all_after_days: 300)
end

Expand All @@ -100,7 +134,7 @@
expect(FileUtils).to receive(:rmdir).with('/app/samvera/uploads/ff/11/28/19/file-set-id-2', parents: true)
expect(FileUtils).to receive(:rmdir).with('/app/samvera/uploads/ff/22/17/de/file-set-id-3', parents: true)

described_class.perform_now(delete_ingested_after_days: 180, directory: '/app/samvera/uploads/ff')
described_class.perform_now(delete_ingested_after_days: 180, directory: hex_pair_tree_root)
end
end
end
Loading