diff --git a/seqBackupLib/backup.py b/seqBackupLib/backup.py index f9745a0..56fd0d7 100644 --- a/seqBackupLib/backup.py +++ b/seqBackupLib/backup.py @@ -41,6 +41,14 @@ def return_md5(fp: Path) -> str: return hash_md5.hexdigest() +def check_backup(forward_reads: Path, dest_dir: Path) -> bool: + with gzip.open(forward_reads, mode="rt") as f: + r1 = IlluminaFastq(f) + archive_dir = dest_dir / r1.build_archive_dir() + md5_fp = archive_dir / f"{r1.build_archive_dir()}.md5" + return archive_dir.is_dir() and md5_fp.is_file() + + def backup_fastq( forward_reads: Path, dest_dir: Path, @@ -144,7 +152,14 @@ def main(argv=None): default=DEFAULT_MIN_FILE_SIZE, help="Minimum file size to register in bytes", ) + parser.add_argument( + "--check", + action="store_true", + help="Check if the target directory and md5 file exist without backing up", + ) args = parser.parse_args(argv) + if args.check: + return check_backup(args.forward_reads, args.destination_dir) return backup_fastq( args.forward_reads, args.destination_dir, diff --git a/test/test_backup.py b/test/test_backup.py index b571898..75c07c6 100644 --- a/test/test_backup.py +++ b/test/test_backup.py @@ -132,3 +132,34 @@ def test_main_returns_archive_path(tmp_path, full_miseq_dir): expected_dir = raw / "250407_M03543_0443_000000000-DTHBL_L001" assert out_dir == expected_dir assert expected_dir.is_dir() + + +def test_main_check(tmp_path, full_miseq_dir): + raw = tmp_path / "raw_reads" + raw.mkdir(parents=True, exist_ok=True) + sample_sheet_fp = full_miseq_dir / "sample_sheet.csv" + + base_args = [ + "--forward-reads", + str(full_miseq_dir / "Undetermined_S0_L001_R1_001.fastq.gz"), + "--destination-dir", + str(raw), + "--sample-sheet", + str(sample_sheet_fp), + "--min-file-size", + "100", + ] + + assert not main(base_args + ["--check"]) + + main(base_args) + + assert main(base_args + ["--check"]) + + md5_fp = ( + raw + / "250407_M03543_0443_000000000-DTHBL_L001" + / "250407_M03543_0443_000000000-DTHBL_L001.md5" + ) + md5_fp.unlink() + assert not main(base_args + ["--check"])