diff --git a/app/jobs/cleanup_upload_files_job.rb b/app/jobs/cleanup_upload_files_job.rb index 8aa66052..a5286a95 100644 --- a/app/jobs/cleanup_upload_files_job.rb +++ b/app/jobs/cleanup_upload_files_job.rb @@ -4,6 +4,11 @@ class CleanupUploadFilesJob < ApplicationJob non_tenant_job + # Only process pair-tree hex directories (00-ff); do not process tenant UUID directories + # which contain permanent site/branding files (e.g. banner_images), or other protected + # dirs (uploaded_collection_thumbnails, identity_provider, hyrax). + HEX_TOP_DIR_PATTERN = /\A[0-9a-f]{2}\z/ + attr_reader :uploads_path def perform(delete_ingested_after_days:, uploads_path:, delete_all_after_days: 730) @uploads_path = uploads_path @@ -20,7 +25,9 @@ def perform(delete_ingested_after_days:, uploads_path:, delete_all_after_days: 7 private def top_level_directories - @top_level_directories ||= Dir.glob("#{uploads_path}/*").select { |path| File.directory?(path) } + @top_level_directories ||= Dir.glob("#{uploads_path}/*").select do |path| + File.directory?(path) && File.basename(path).match?(HEX_TOP_DIR_PATTERN) + end end def message(delete_ingested_after_days, delete_all_after_days) diff --git a/spec/jobs/cleanup_upload_files_job_spec.rb b/spec/jobs/cleanup_upload_files_job_spec.rb index b770ad4d..1bfcd6aa 100644 --- a/spec/jobs/cleanup_upload_files_job_spec.rb +++ b/spec/jobs/cleanup_upload_files_job_spec.rb @@ -1,33 +1,85 @@ # frozen_string_literal: true RSpec.describe CleanupUploadFilesJob do + let(:hex_dir_ff) { '/app/samvera/uploads/ff' } + let(:hex_dir_00) { '/app/samvera/uploads/00' } + let(:hex_dir_ab) { '/app/samvera/uploads/ab' } + let(:uuid_tenant_dir) { '/app/samvera/uploads/56e0eb81-c2d5-4d5d-9171-b251bf7299a4' } + let(:uploaded_collection_thumbnails_dir) { '/app/samvera/uploads/uploaded_collection_thumbnails' } + let(:identity_provider_dir) { '/app/samvera/uploads/identity_provider' } + let(:hyrax_uploaded_file_dir) { '/app/samvera/uploads/hyrax' } + + let(:all_top_level_entries) do + [ + hex_dir_ff, hex_dir_00, hex_dir_ab, + uuid_tenant_dir, + uploaded_collection_thumbnails_dir, + identity_provider_dir, + hyrax_uploaded_file_dir, + '/app/samvera/uploads/somefile' + ] + end + before do allow(Dir).to receive(:glob).and_call_original - allow(Dir).to receive(:glob).with('/app/samvera/uploads/*').and_return(['path_1', 'path_2', 'path_3', 'path_4']) + allow(Dir).to receive(:glob).with('/app/samvera/uploads/*').and_return(all_top_level_entries) allow(File).to receive(:directory?).and_call_original - allow(File).to receive(:directory?).with('path_1').and_return(true) - allow(File).to receive(:directory?).with('path_2').and_return(true) - allow(File).to receive(:directory?).with('path_3').and_return(true) - allow(File).to receive(:directory?).with('path_4').and_return(false) + [hex_dir_ff, hex_dir_00, hex_dir_ab, uuid_tenant_dir, + uploaded_collection_thumbnails_dir, identity_provider_dir, hyrax_uploaded_file_dir].each do |dir| + allow(File).to receive(:directory?).with(dir).and_return(true) + end + allow(File).to receive(:directory?).with('/app/samvera/uploads/somefile').and_return(false) end - it 'spawns child jobs for each sub-directory' do + it 'spawns child jobs only for hex pair-tree directories (00-ff)' do expect { described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') } .to have_enqueued_job(CleanupSubDirectoryJob).exactly(3).times end + it 'does not create CleanupSubDirectoryJob for tenant UUID directories (site/banner_images, etc.)' do + expect do + described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') + end.not_to have_enqueued_job(CleanupSubDirectoryJob).with(directory: uuid_tenant_dir) + end + + it 'does not create CleanupSubDirectoryJob for uploaded_collection_thumbnails directory' do + expect do + described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') + end.not_to have_enqueued_job(CleanupSubDirectoryJob).with(directory: uploaded_collection_thumbnails_dir) + end + + it 'does not create CleanupSubDirectoryJob for identity_provider directory (LogoUploader)' do + expect do + described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') + end.not_to have_enqueued_job(CleanupSubDirectoryJob).with(directory: identity_provider_dir) + end + + it 'does not create CleanupSubDirectoryJob for hyrax directory (UploadedFile cache)' do + expect do + described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') + end.not_to have_enqueued_job(CleanupSubDirectoryJob).with(directory: hyrax_uploaded_file_dir) + end + it 'passes delete_all_after_days parameter to child jobs' do expect do described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads', delete_all_after_days: 365) end.to have_enqueued_job(CleanupSubDirectoryJob) - .with(delete_ingested_after_days: 180, directory: 'path_1', delete_all_after_days: 365) + .with( + delete_ingested_after_days: 180, + directory: hex_dir_ff, + delete_all_after_days: 365 + ) end it 'uses default delete_all_after_days of 730 when not specified' do expect { described_class.perform_now(delete_ingested_after_days: 180, uploads_path: '/app/samvera/uploads') } .to have_enqueued_job(CleanupSubDirectoryJob) - .with(delete_ingested_after_days: 180, directory: 'path_1', delete_all_after_days: 730) + .with( + delete_ingested_after_days: 180, + directory: hex_dir_ff, + delete_all_after_days: 730 + ) end end