Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions seqBackupLib/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ def return_md5(fp: Path) -> str:
return hash_md5.hexdigest()


def check_backup(forward_reads: Path, dest_dir: Path) -> bool:
Copy link

Copilot AI Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function signature is inconsistent with backup_fastq which takes sample_sheet_fp and other parameters. Consider adding sample_sheet_fp parameter for consistency and to enable more comprehensive validation.

Suggested change
def check_backup(forward_reads: Path, dest_dir: Path) -> bool:
def check_backup(forward_reads: Path, dest_dir: Path, sample_sheet_fp: Path) -> bool:

Copilot uses AI. Check for mistakes.
with gzip.open(forward_reads, mode="rt") as f:
Copy link

Copilot AI Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing error handling for file operations. If forward_reads doesn't exist or isn't a valid gzip file, this will raise an unhandled exception instead of returning False.

Copilot uses AI. Check for mistakes.
r1 = IlluminaFastq(f)
archive_dir = dest_dir / r1.build_archive_dir()
md5_fp = archive_dir / f"{r1.build_archive_dir()}.md5"
return archive_dir.is_dir() and md5_fp.is_file()


def backup_fastq(
forward_reads: Path,
dest_dir: Path,
Expand Down Expand Up @@ -144,7 +152,14 @@ def main(argv=None):
default=DEFAULT_MIN_FILE_SIZE,
help="Minimum file size to register in bytes",
)
parser.add_argument(
"--check",
action="store_true",
help="Check if the target directory and md5 file exist without backing up",
)
args = parser.parse_args(argv)
if args.check:
return check_backup(args.forward_reads, args.destination_dir)
return backup_fastq(
args.forward_reads,
args.destination_dir,
Expand Down
31 changes: 31 additions & 0 deletions test/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,34 @@ def test_main_returns_archive_path(tmp_path, full_miseq_dir):
expected_dir = raw / "250407_M03543_0443_000000000-DTHBL_L001"
assert out_dir == expected_dir
assert expected_dir.is_dir()


def test_main_check(tmp_path, full_miseq_dir):
raw = tmp_path / "raw_reads"
raw.mkdir(parents=True, exist_ok=True)
sample_sheet_fp = full_miseq_dir / "sample_sheet.csv"

base_args = [
"--forward-reads",
str(full_miseq_dir / "Undetermined_S0_L001_R1_001.fastq.gz"),
"--destination-dir",
str(raw),
"--sample-sheet",
str(sample_sheet_fp),
"--min-file-size",
"100",
]

assert not main(base_args + ["--check"])

main(base_args)

assert main(base_args + ["--check"])

md5_fp = (
raw
/ "250407_M03543_0443_000000000-DTHBL_L001"
/ "250407_M03543_0443_000000000-DTHBL_L001.md5"
)
md5_fp.unlink()
assert not main(base_args + ["--check"])