diff --git a/python/lib/config.py b/python/lib/config.py index e011164bc..e503725d2 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from typing import Literal from lib.db.queries.config import try_get_config_with_setting_name @@ -26,15 +27,15 @@ def get_patient_id_dicom_header_config(env: Env) -> Literal['PatientID', 'Patien return patient_id_dicom_header -def get_data_dir_path_config(env: Env) -> str: +def get_data_dir_path_config(env: Env) -> Path: """ Get the LORIS base data directory path from the in-database configuration, or exit the program with an error if that configuration value does not exist or is incorrect. """ - data_dir_path = os.path.normpath(_get_config_value(env, 'dataDirBasepath')) + data_dir_path = Path(_get_config_value(env, 'dataDirBasepath')) - if not os.path.isdir(data_dir_path): + if not data_dir_path.is_dir(): log_error_exit( env, ( @@ -52,20 +53,20 @@ def get_data_dir_path_config(env: Env) -> str: return data_dir_path -def get_dicom_archive_dir_path_config(env: Env) -> str: +def get_dicom_archive_dir_path_config(env: Env) -> Path: """ Get the LORIS DICOM archive directory path from the in-database configuration, or exit the program with an error if that configuration value does not exist or is incorrect. """ - dicom_archive_dir_path = os.path.normpath(_get_config_value(env, 'tarchiveLibraryDir')) + dicom_archive_dir_path = Path(_get_config_value(env, 'tarchiveLibraryDir')) - if not os.path.isdir(dicom_archive_dir_path): + if not dicom_archive_dir_path.is_dir(): log_error_exit( env, ( f"The LORIS DICOM archive directory path configuration value '{dicom_archive_dir_path}' does not refer" - " to an existing diretory." + " to an existing directory." ), ) diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index 5a241623f..9ec4112f1 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -689,7 +689,7 @@ def _create_pic_image(self): """ file_info = { 'cand_id': self.session.candidate.cand_id, - 'data_dir_path': self.data_dir, + 'data_dir_path': str(self.data_dir), 'file_rel_path': self.assembly_nifti_rel_path, 'is_4D_dataset': self.json_file_dict['time'] is not None, 'file_id': self.file_id diff --git a/python/lib/import_dicom_study/dicom_database.py b/python/lib/import_dicom_study/dicom_database.py index 90fc9354d..ef2472c44 100644 --- a/python/lib/import_dicom_study/dicom_database.py +++ b/python/lib/import_dicom_study/dicom_database.py @@ -1,5 +1,6 @@ from datetime import datetime from functools import cmp_to_key +from pathlib import Path from sqlalchemy.orm import Session as Database @@ -17,14 +18,14 @@ def insert_dicom_archive( db: Database, dicom_summary: DicomStudySummary, dicom_import_log: DicomStudyImportLog, - archive_location: str, + archive_path: Path, ): """ Insert a DICOM archive in the database. """ dicom_archive = DbDicomArchive() - populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_path) dicom_archive.date_first_archived = datetime.now() db.add(dicom_archive) db.commit() @@ -37,7 +38,7 @@ def update_dicom_archive( dicom_archive: DbDicomArchive, dicom_summary: DicomStudySummary, dicom_import_log: DicomStudyImportLog, - archive_location: str, + archive_path: Path, ): """ Update a DICOM archive in the database. @@ -47,7 +48,7 @@ def update_dicom_archive( delete_dicom_archive_file_series(db, dicom_archive) # Update the database record with the new DICOM information. - populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_location) + populate_dicom_archive(dicom_archive, dicom_summary, dicom_import_log, archive_path) db.commit() # Insert the new DICOM files and series. @@ -58,7 +59,7 @@ def populate_dicom_archive( dicom_archive: DbDicomArchive, dicom_summary: DicomStudySummary, dicom_import_log: DicomStudyImportLog, - archive_location: str, + archive_path: Path, ): """ Populate a DICOM archive database object with information from its DICOM summary and DICOM @@ -83,8 +84,8 @@ def populate_dicom_archive( dicom_archive.creating_user = dicom_import_log.creator_name dicom_archive.sum_type_version = dicom_import_log.summary_version dicom_archive.tar_type_version = dicom_import_log.archive_version - dicom_archive.source_location = dicom_import_log.source_path - dicom_archive.archive_location = archive_location + dicom_archive.source_location = str(dicom_import_log.source_path) + dicom_archive.archive_location = str(archive_path) dicom_archive.scanner_manufacturer = dicom_summary.info.scanner.manufacturer or '' dicom_archive.scanner_model = dicom_summary.info.scanner.model or '' dicom_archive.scanner_serial_number = dicom_summary.info.scanner.serial_number or '' diff --git a/python/lib/import_dicom_study/import_log.py b/python/lib/import_dicom_study/import_log.py index 1edf68a14..3c58e067e 100644 --- a/python/lib/import_dicom_study/import_log.py +++ b/python/lib/import_dicom_study/import_log.py @@ -3,6 +3,7 @@ import socket from dataclasses import dataclass from datetime import datetime +from pathlib import Path from lib.import_dicom_study.text_dict import DictWriter @@ -13,8 +14,8 @@ class DicomStudyImportLog: Information about the past import of a DICOM study. """ - source_path: str - target_path: str + source_path: Path + target_path: Path creator_host: str creator_os: str creator_name: str @@ -32,8 +33,8 @@ def write_dicom_study_import_log_to_string(import_log: DicomStudyImportLog): """ return DictWriter([ - ("Taken from dir", import_log.source_path), - ("Archive target location", import_log.target_path), + ("Taken from dir", str(import_log.source_path)), + ("Archive target location", str(import_log.target_path)), ("Name of creating host", import_log.creator_host), ("Name of host OS", import_log.creator_os), ("Created by user", import_log.creator_name), @@ -46,7 +47,7 @@ def write_dicom_study_import_log_to_string(import_log: DicomStudyImportLog): ]).write() -def write_dicom_study_import_log_to_file(import_log: DicomStudyImportLog, file_path: str): +def write_dicom_study_import_log_to_file(import_log: DicomStudyImportLog, file_path: Path): """ Serialize a DICOM study import log into a text file. """ @@ -56,7 +57,7 @@ def write_dicom_study_import_log_to_file(import_log: DicomStudyImportLog, file_p file.write(string) -def make_dicom_study_import_log(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): +def make_dicom_study_import_log(source: Path, target: Path, tarball_md5_sum: str, zipball_md5_sum: str): """ Create a DICOM study import log from the provided arguments about a DICOM study, as well as the current execution environment. diff --git a/python/lib/import_dicom_study/summary_get.py b/python/lib/import_dicom_study/summary_get.py index 7b6c09053..6ed083f07 100644 --- a/python/lib/import_dicom_study/summary_get.py +++ b/python/lib/import_dicom_study/summary_get.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import pydicom import pydicom.errors @@ -17,7 +18,7 @@ from lib.util.fs import iter_all_dir_files -def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): +def get_dicom_study_summary(dicom_study_dir_path: Path, verbose: bool): """ Get information about a DICOM study by reading the files in the DICOM study directory. """ @@ -31,7 +32,7 @@ def get_dicom_study_summary(dicom_study_dir_path: str, verbose: bool): if verbose: print(f"Processing file '{file_rel_path}' ({i}/{len(file_rel_paths)})") - file_path = os.path.join(dicom_study_dir_path, file_rel_path) + file_path = dicom_study_dir_path / file_rel_path try: dicom = pydicom.dcmread(file_path) # type: ignore @@ -112,13 +113,13 @@ def get_dicom_file_info(dicom: pydicom.Dataset) -> DicomStudyDicomFile: ) -def get_other_file_info(file_path: str) -> DicomStudyOtherFile: +def get_other_file_info(file_path: Path) -> DicomStudyOtherFile: """ Get information about a non-DICOM file within a DICOM study. """ return DicomStudyOtherFile( - os.path.basename(file_path), + file_path.name, compute_file_md5_hash(file_path), ) diff --git a/python/lib/import_dicom_study/summary_write.py b/python/lib/import_dicom_study/summary_write.py index aad415ebd..604dc36ac 100644 --- a/python/lib/import_dicom_study/summary_write.py +++ b/python/lib/import_dicom_study/summary_write.py @@ -1,5 +1,6 @@ import xml.etree.ElementTree as ET from functools import cmp_to_key +from pathlib import Path from lib.import_dicom_study.summary_type import ( DicomStudyDicomFile, @@ -14,14 +15,14 @@ from lib.util.iter import count, flatten -def write_dicom_study_summary_to_file(dicom_summary: DicomStudySummary, filename: str): +def write_dicom_study_summary_to_file(dicom_summary: DicomStudySummary, file_path: Path): """ Serialize a DICOM study summary object into a text file. """ - string = write_dicom_study_summary(dicom_summary) - with open(filename, 'w') as file: - file.write(string) + summary = write_dicom_study_summary(dicom_summary) + with open(file_path, 'w') as file: + file.write(summary) def write_dicom_study_summary(dicom_summary: DicomStudySummary) -> str: diff --git a/python/lib/import_dicom_study/text.py b/python/lib/import_dicom_study/text.py index b5f3004fe..01159dd76 100644 --- a/python/lib/import_dicom_study/text.py +++ b/python/lib/import_dicom_study/text.py @@ -3,8 +3,8 @@ different types of values. """ -import os from datetime import date, datetime +from pathlib import Path from lib.util.crypto import compute_file_md5_hash @@ -66,9 +66,9 @@ def read_float_none(string: str | None): return float(string) -def compute_md5_hash_with_name(path: str): +def compute_md5_hash_with_name(path: Path): """ Get the MD5 sum hash of a file with the filename appended. """ - return f'{compute_file_md5_hash(path)} {os.path.basename(path)}' + return f'{compute_file_md5_hash(path)} {path.name}' diff --git a/python/lib/util/crypto.py b/python/lib/util/crypto.py index 72a790512..84c25910c 100644 --- a/python/lib/util/crypto.py +++ b/python/lib/util/crypto.py @@ -1,7 +1,8 @@ import hashlib +from pathlib import Path -def compute_file_blake2b_hash(file_path: str) -> str: +def compute_file_blake2b_hash(file_path: Path | str) -> str: """ Compute the BLAKE2b hash of a file. """ @@ -15,7 +16,7 @@ def compute_file_blake2b_hash(file_path: str) -> str: return hash.hexdigest() -def compute_file_md5_hash(file_path: str) -> str: +def compute_file_md5_hash(file_path: Path | str) -> str: """ Compute the MD5 hash of a file. """ diff --git a/python/lib/util/fs.py b/python/lib/util/fs.py index 126a7c217..411211da5 100644 --- a/python/lib/util/fs.py +++ b/python/lib/util/fs.py @@ -5,6 +5,7 @@ import tempfile from collections.abc import Iterator from datetime import datetime +from pathlib import Path import lib.exitcode from lib.env import Env @@ -26,16 +27,17 @@ def extract_archive(env: Env, tar_path: str, prefix: str, dir_path: str) -> str: return extract_path -def iter_all_dir_files(dir_path: str) -> Iterator[str]: +def iter_all_dir_files(dir_path: Path) -> Iterator[Path]: """ Iterate through all the files in a directory recursively, and yield the path of each file relative to that directory. """ - for sub_dir_path, _, file_names in os.walk(dir_path): - for file_name in file_names: - file_path = os.path.join(sub_dir_path, file_name) - yield os.path.relpath(file_path, start=dir_path) + for item_path in dir_path.iterdir(): + if item_path.is_dir(): + yield from iter_all_dir_files(item_path) + elif item_path.is_file(): + yield item_path.relative_to(dir_path) def remove_directory(env: Env, path: str): diff --git a/python/scripts/import_dicom_study.py b/python/scripts/import_dicom_study.py index bec34dd11..19f549fab 100755 --- a/python/scripts/import_dicom_study.py +++ b/python/scripts/import_dicom_study.py @@ -5,6 +5,7 @@ import shutil import tarfile import tempfile +from pathlib import Path from typing import Any, cast import lib.exitcode @@ -30,7 +31,7 @@ class Args: profile: str - source: str + source: Path insert: bool update: bool session: bool @@ -39,7 +40,7 @@ class Args: def __init__(self, options_dict: dict[str, Any]): self.profile = options_dict['profile']['value'] - self.source = os.path.normpath(options_dict['source']['value']) + self.source = Path(options_dict['source']['value']) self.overwrite = options_dict['overwrite']['value'] self.insert = options_dict['insert']['value'] self.update = options_dict['update']['value'] @@ -108,13 +109,13 @@ def main() -> None: # Get the CLI arguments and connect to the database. - loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + loris_getopt_obj = LorisGetOpt(usage, options_dict, 'import_dicom_study') env = make_env(loris_getopt_obj) args = Args(loris_getopt_obj.options_dict) # Check arguments. - if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): + if not args.source.is_dir() or not os.access(args.source, os.R_OK): log_error_exit( env, "Argument '--source' must be a readable directory path.", @@ -141,7 +142,7 @@ def main() -> None: # Utility variables. - dicom_study_name = os.path.basename(args.source) + dicom_study_name = args.source.name log(env, "Extracting DICOM information... (may take a long time)") @@ -193,24 +194,24 @@ def main() -> None: if dicom_summary.info.scan_date is None: log_warning(env, "No DICOM scan date found in the DICOM files.") - dicom_archive_rel_path = f'DCM_{dicom_study_name}.tar' + dicom_archive_rel_path = Path(f'DCM_{dicom_study_name}.tar') else: log(env, f"Found DICOM scan date: {dicom_summary.info.scan_date}") scan_date_string = lib.import_dicom_study.text.write_date(dicom_summary.info.scan_date) - dicom_archive_rel_path = os.path.join( - str(dicom_summary.info.scan_date.year), - f'DCM_{scan_date_string}_{dicom_study_name}.tar', + dicom_archive_rel_path = ( + Path(str(dicom_summary.info.scan_date.year)) + / f'DCM_{scan_date_string}_{dicom_study_name}.tar' ) - dicom_archive_year_dir_path = os.path.join(dicom_archive_dir_path, str(dicom_summary.info.scan_date.year)) - if not os.path.exists(dicom_archive_year_dir_path): + dicom_archive_year_dir_path = dicom_archive_dir_path / str(dicom_summary.info.scan_date.year) + if not dicom_archive_year_dir_path.exists(): log(env, f"Creating year directory '{dicom_archive_year_dir_path}'...") - os.mkdir(dicom_archive_year_dir_path) + dicom_archive_year_dir_path.mkdir() - dicom_archive_path = os.path.join(dicom_archive_dir_path, dicom_archive_rel_path) + dicom_archive_path = dicom_archive_dir_path / dicom_archive_rel_path - if os.path.exists(dicom_archive_path): + if dicom_archive_path.exists(): if not args.overwrite: log_error_exit( env, @@ -219,20 +220,21 @@ def main() -> None: log_warning(env, f"Overwriting file '{dicom_archive_path}'...") - os.remove(dicom_archive_path) + dicom_archive_path.unlink() - with tempfile.TemporaryDirectory() as tmp_dir_path: - tar_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar') - zip_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.tar.gz') - summary_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.meta') - log_path = os.path.join(tmp_dir_path, f'{dicom_study_name}.log') + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_dir_path = Path(tmp_dir) + tar_path = tmp_dir_path / f'{dicom_study_name}.tar' + zip_path = tmp_dir_path / f'{dicom_study_name}.tar.gz' + summary_path = tmp_dir_path / f'{dicom_study_name}.meta' + log_path = tmp_dir_path / f'{dicom_study_name}.log' log(env, "Copying the DICOM files into a new tar archive...") with tarfile.open(tar_path, 'w') as tar: for file_rel_path in iter_all_dir_files(args.source): - file_path = os.path.join(args.source, file_rel_path) - file_tar_path = os.path.join(os.path.basename(args.source), file_rel_path) + file_path = args.source / file_rel_path + file_tar_path = Path(args.source.name) / file_rel_path tar.add(file_path, arcname=file_tar_path) log(env, "Calculating the tar archive MD5 sum...") @@ -270,9 +272,9 @@ def main() -> None: log(env, 'Copying files into the final DICOM study archive...') with tarfile.open(dicom_archive_path, 'w') as tar: - tar.add(zip_path, os.path.basename(zip_path)) - tar.add(summary_path, os.path.basename(summary_path)) - tar.add(log_path, os.path.basename(log_path)) + tar.add(zip_path, zip_path.name) + tar.add(summary_path, summary_path.name) + tar.add(log_path, log_path.name) log(env, "Calculating final DICOM study archive MD5 sum...") diff --git a/python/scripts/summarize_dicom_study.py b/python/scripts/summarize_dicom_study.py index a0fffbb32..7df1d12cb 100755 --- a/python/scripts/summarize_dicom_study.py +++ b/python/scripts/summarize_dicom_study.py @@ -3,6 +3,7 @@ import argparse import sys from dataclasses import dataclass +from pathlib import Path import lib.exitcode from lib.import_dicom_study.summary_get import get_dicom_study_summary @@ -15,6 +16,7 @@ parser.add_argument( 'directory', + type=Path, help='The DICOM directory') parser.add_argument( @@ -25,7 +27,7 @@ @dataclass class Args: - directory: str + directory: Path verbose: bool