diff --git a/pyproject.toml b/pyproject.toml index 929bf2b8a..417072c00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "nose", "numpy", "pybids==0.17.0", + "pydantic", "pydicom", "python-dateutil", "scikit-learn", @@ -45,6 +46,7 @@ allow-direct-references = true [tool.hatch.build.targets.wheel] packages = [ "python/lib", + "python/loris_bids_reader", "python/tests", ] @@ -55,7 +57,7 @@ line-length = 120 preview = true [tool.ruff.lint] -ignore = ["E202", "E203", "E221", "E241", "E251", "E272"] +ignore = ["E202", "E203", "E221", "E241", "E251", "E271", "E272"] select = ["E", "EXE", "F", "I", "N", "RUF", "UP", "W"] [tool.ruff.lint.per-file-ignores] @@ -70,15 +72,17 @@ include = [ "python/tests", "python/lib/db", "python/lib/imaging_lib", + "python/lib/import_bids_dataset", "python/lib/import_dicom_study", "python/lib/util", - "python/lib/bids.py", "python/lib/config.py", "python/lib/config_file.py", "python/lib/env.py", "python/lib/get_session_info.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/loris_bids_reader", + "python/scripts/import_bids_dataset.py", "python/scripts/import_dicom_study.py", "python/scripts/summarize_dicom_study.py", ] diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py deleted file mode 100644 index bd7da2508..000000000 --- a/python/lib/bidsreader.py +++ /dev/null @@ -1,283 +0,0 @@ -"""Reads a BIDS structure into a data dictionary using bids.grabbids.""" - -import json -import re -import sys - -from bids import BIDSLayout - -import lib.exitcode -import lib.utilities as utilities - -# import bids -# BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 -# bids_pack_version = list(map(int, bids.__version__.split('.'))) -# if (bids_pack_version[0] > 0 -# or bids_pack_version[1] > 12 -# or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - -# from bids import BIDSLayoutIndexer - - -class BidsReader: - """ - This class reads a BIDS structure into a data dictionary using BIDS grabbids. - This dictionary will then be used to determine what to register into the - database. - - :Example: - - from lib.bidsreader import BidsReader - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - """ - - def __init__(self, bids_dir, verbose, validate = True): - """ - Constructor method for the BidsReader class. - - :param bids_dir: path to the BIDS structure to read - :type bids_dir: str - :param verbose : boolean to print verbose information - :type verbose : bool - :param validate : boolean to validate the BIDS dataset - :type validate : bool - """ - - self.verbose = verbose - self.bids_dir = bids_dir - self.bids_layout = self.load_bids_data(validate) - - # load dataset name and BIDS version - self.dataset_name = None - self.bids_version = None - try: - dataset_json = bids_dir + "/dataset_description.json" - dataset_description = {} - with open(dataset_json) as json_file: - dataset_description = json.load(json_file) - self.dataset_name = dataset_description['Name'] - self.bids_version = dataset_description['BIDSVersion'] - except Exception: - print("WARNING: Cannot read dataset_description.json") - - # load BIDS candidates information - self.participants_info = self.load_candidates_from_bids() - - # load BIDS sessions information - self.cand_sessions_list = self.load_sessions_from_bids() - - # load BIDS modality information - self.cand_session_modalities_list = self.load_modalities_from_bids() - - def load_bids_data(self, validate): - """ - Loads the BIDS study using the BIDSLayout function (part of the pybids - package) and return the object. - - :return: bids structure - """ - - if self.verbose: - print('Loading the BIDS dataset with BIDS layout library...\n') - - exclude_arr = ['code/', 'sourcedata/', 'log/', '.git'] - force_arr = [re.compile(r"_annotations\.(tsv|json)$")] - - # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 - # bids_pack_version = list(map(int, bids.__version__.split('.'))) - # disabled until is a workaround for https://github.com/bids-standard/pybids/issues/760 is found - # [file] bids_import.py - # [function] read_and_insert_bids - # [line] for modality in row['modalities']: (row['modalities'] is empty) - # if (bids_pack_version[0] > 0 - # or bids_pack_version[1] > 12 - # or (bids_pack_version[1] == 12 and bids_pack_version[2] > 0)): - # bids_layout = BIDSLayout( - # root=self.bids_dir, - # indexer=BIDSLayoutIndexer(ignore=exclude_arr, force_index=force_arr) - # ) - # else: - bids_layout = BIDSLayout( - root=self.bids_dir, - ignore=exclude_arr, - force_index=force_arr, - derivatives=True, - validate=validate - ) - - if self.verbose: - print('\t=> BIDS dataset loaded with BIDS layout\n') - - return bids_layout - - def load_candidates_from_bids(self): - """ - Loads the list of candidates from the BIDS study. List of - participants and their information will be stored in participants_info. - - :return: list of dictionaries with participant information from BIDS - :rtype: list - """ - - if self.verbose: - print('Grepping candidates from the BIDS layout...') - - # grep the participant.tsv file and parse it - participants_info = None - for file in self.bids_layout.get(suffix='participants', return_type='filename'): - # note file[0] returns the path to participants.tsv - if 'participants.tsv' in file: - participants_info = utilities.read_tsv_file(file) - else: - continue - - if participants_info: - self.candidates_list_validation(participants_info) - else: - bids_subjects = self.bids_layout.get_subjects() - participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects] - - if self.verbose: - print('\t=> List of participants found:') - for participant in participants_info: - print('\t\t' + participant['participant_id']) - print('\n') - - return participants_info - - def candidates_list_validation(self, participants_info): - """ - Validates whether the subjects listed in participants.tsv match the - list of participant directory. If there is a mismatch, will exit with - error code from lib.exitcode. - """ - - if self.verbose: - print('Validating the list of participants...') - - subjects = self.bids_layout.get_subjects() - - mismatch_message = ("\nERROR: Participant ID mismatch between " - "participants.tsv and raw data found in the BIDS " - "directory") - - # check that all subjects listed in participants_info are also in - # subjects array and vice versa - for row in participants_info: - # remove the "sub-" in front of the subject ID if present - row['participant_id'] = row['participant_id'].replace('sub-', '') - if row['participant_id'] not in subjects: - print(mismatch_message) - print(row['participant_id'] + 'is missing from the BIDS Layout') - print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - # remove the subject from the list of subjects - subjects.remove(row['participant_id']) - - # check that no subjects are left in subjects array - if subjects: - print(mismatch_message) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - - if self.verbose: - print('\t=> Passed validation of the list of participants\n') - - def load_sessions_from_bids(self): - """ - Grep the list of sessions for each candidate directly from the BIDS - structure. - - :return: dictionary with the list of sessions and candidates found in the - BIDS structure - :rtype: dict - """ - - if self.verbose: - print('Grepping list of sessions from the BIDS layout...') - - cand_sessions = {} - - for row in self.participants_info: - ses = self.bids_layout.get_sessions(subject=row['participant_id']) - cand_sessions[row['participant_id']] = ses - - if self.verbose: - print('\t=> List of sessions found:\n') - for candidate in cand_sessions: - if cand_sessions[candidate]: - print('\t\t' + candidate + ': ' + ', '.join(cand_sessions[candidate])) - else: - print('\t\tNo session found for candidate ' + candidate) - print('\n') - - return cand_sessions - - def load_modalities_from_bids(self): - """ - Grep the list of modalities available for each session and candidate directly - from the BIDS structure. - - :return: dictionary for candidate and session with list of modalities - :rtype: dict - """ - - if self.verbose: - print('Grepping the different modalities from the BIDS layout...') - - cand_session_modalities_list = [] - - for subject, visit_list in self.cand_sessions_list.items(): - if visit_list: - for visit in visit_list: - modalities = self.bids_layout.get_datatype(subject=subject, session=visit) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': visit, - 'modalities' : modalities - }) - else: - modalities = self.bids_layout.get_datatype(subject=subject) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': None, - 'modalities' : modalities - }) - - if self.verbose: - print('\t=> Done grepping the different modalities from the BIDS layout\n') - - return cand_session_modalities_list - - @staticmethod - def grep_file(files_list, match_pattern, derivative_pattern=None): - """ - Grep a unique file based on a match pattern and returns it. - - :param files_list : list of files to look into - :type files_list : list - :param match_pattern : pattern to use to find the file - :type match_pattern : str - :param derivative_pattern: derivative pattern to use if the file we look for - is a derivative file - :type derivative_pattern: str - - :return: name of the first file that matches the pattern - :rtype: str - """ - - for filename in files_list: - if not derivative_pattern: - if 'derivatives' in filename: - # skip all files with 'derivatives' string in their path - continue - elif re.search(match_pattern, filename): - # grep the file that matches the match_pattern (extension) - return filename - else: - matches_derivative = re.search(derivative_pattern, filename) - if re.search(match_pattern, filename) and matches_derivative: - return filename - - return None diff --git a/python/lib/candidate.py b/python/lib/candidate.py index d0994beb1..8f617f466 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -1,11 +1,6 @@ """This class gather functions for candidate handling.""" import random -import sys - -from dateutil.parser import parse - -import lib.exitcode class Candidate: @@ -57,127 +52,6 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None): self.center_id = None self.project_id = None - def create_candidate(self, db, participants_info): - """ - Creates a candidate using BIDS information provided in the - participants_info's list. - - :param db : database handler object - :type db : object - :param participants_info: list of dictionary with participants - information from BIDS - :type participants_info: list - - :return: dictionary with candidate info from the candidate's table - :rtype: dict - """ - - if not self.psc_id: - print("Cannot create a candidate without a PSCID.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not self.cand_id: - self.cand_id = self.generate_cand_id(db) - - for row in participants_info: - if not row['participant_id'] == self.psc_id: - continue - self.grep_bids_dob(row) - if 'sex' in row: - self.map_sex(row['sex']) - if 'age' in row: - self.age = row['age'] - - # three steps to find site: - # 1. try matching full name from 'site' column in participants.tsv in db - # 2. try extracting alias from pscid - # 3. try finding previous site in candidate table - - if 'site' in row and row['site'].lower() not in ("null", ""): - # search site id in psc table by its full name - site_info = db.pselect( - "SELECT CenterID FROM psc WHERE Name = %s", - [row['site'], ] - ) - if len(site_info) > 0: - self.center_id = site_info[0]['CenterID'] - - if self.center_id is None: - # search site id in psc table by its alias extracted from pscid - db_sites = db.pselect("SELECT CenterID, Alias FROM psc") - for site in db_sites: - if site['Alias'] in row['participant_id']: - self.center_id = site['CenterID'] - - if self.center_id is None: - # try to find participant site in db - candidate_site_project = db.pselect( - "SELECT RegistrationCenterID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationCenterID'] - - # two steps to find project: - # 1. find full name in 'project' column in participants.tsv - # 2. find previous in candidate table - - if 'project' in row and row['project'].lower() not in ("null", ""): - # search project id in Project table by its full name - project_info = db.pselect( - "SELECT ProjectID FROM Project WHERE Name = %s OR Alias = %s", - [row['project'], row['project']] - ) - if len(project_info) > 0: - self.project_id = project_info[0]['ProjectID'] - - if self.project_id is None: - # try to find participant project - candidate_site_project = db.pselect( - "SELECT RegistrationProjectID FROM candidate WHERE pscid = %s", - [self.psc_id, ] - ) - if len(candidate_site_project) > 0: - self.center_id = candidate_site_project[0]['RegistrationProjectID'] - - if not self.center_id: - print("ERROR: could not determine site for " + self.psc_id + "." - + " Please check that your psc table contains a site with an" - + " alias matching the BIDS participant_id or a name matching the site mentioned in" - + " participants.tsv's site column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if not self.project_id: - print("ERROR: could not determine project for " + self.psc_id + "." - + " Please check that your project table contains a project with a" - + " name matching the participants.tsv's project column") - sys.exit(lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE) - - if self.verbose: - print("Creating candidate with \n" - + "PSCID = " + self.psc_id + ",\n" - + "CandID = " + str(self.cand_id) + ",\n" - + "CenterID = " + str(self.center_id) + ",\n" - + "ProjectID = " + str(self.project_id)) - - insert_col = ('PSCID', 'CandID', 'RegistrationCenterID', 'RegistrationProjectID') - insert_val = (self.psc_id, str(self.cand_id), str(self.center_id), str(self.project_id)) - - if self.sex: - insert_col = (*insert_col, 'Sex') - insert_val = (*insert_val, self.sex) - if self.dob: - insert_col = (*insert_col, 'DoB') - insert_val = (*insert_val, self.dob) - - db.insert( - table_name='candidate', - column_names=insert_col, - values=insert_val - ) - - return self.get_candidate_info_from_loris(db) - def get_candidate_info_from_loris(self, db): """ Grep candidate information from the candidate table using the PSCID or CandID. @@ -218,22 +92,6 @@ def map_sex(self, sex): if sex.lower() in ('f', 'female'): self.sex = 'Female' - def grep_bids_dob(self, subject_info): - """ - Greps the date of birth from the BIDS structure and add it to self.dob which - will be inserted into the DoB field of the candidate table - - :param subject_info: dictionary with all information present in the BIDS - participants.tsv file for a given candidate - :type subject_info: dict - """ - - dob_names = ['date_of_birth', 'birth_date', 'dob'] - for name in dob_names: - if name in subject_info: - dob = parse(subject_info[name]) - self.dob = dob.strftime('%Y-%m-%d') - @staticmethod def generate_cand_id(db): """ diff --git a/python/lib/config.py b/python/lib/config.py index e503725d2..5fc033c0c 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -27,6 +27,15 @@ def get_patient_id_dicom_header_config(env: Env) -> Literal['PatientID', 'Patien return patient_id_dicom_header +def get_default_bids_visit_label_config(env: Env) -> str: + """ + Get the default BIDS visit label from the in-database configuration, or exit the program with + an error if that configuration value does not exist. + """ + + return _get_config_value(env, 'default_bids_vl') + + def get_data_dir_path_config(env: Env) -> Path: """ Get the LORIS base data directory path from the in-database configuration, or exit the program @@ -79,6 +88,15 @@ def get_dicom_archive_dir_path_config(env: Env) -> Path: return dicom_archive_dir_path +def get_eeg_viz_enabled_config(env: Env) -> bool: + """ + Get whether the EEG visualization is enabled from the in-database configuration. + """ + + eeg_viz_enabled = _try_get_config_value(env, 'useEEGBrowserVisualizationComponents') + return eeg_viz_enabled == 'true' or eeg_viz_enabled == '1' + + def _get_config_value(env: Env, setting_name: str) -> str: """ Get a configuration value from the database using a configuration setting name, or exit the @@ -99,3 +117,13 @@ def _get_config_value(env: Env, setting_name: str) -> str: ) return config.value + + +def _try_get_config_value(env: Env, setting_name: str) -> str | None: + """ + Get a configuration value from the database using a configuration setting name, or exit the + program with an error that value does not exist or is not a string. + """ + + config = try_get_config_with_setting_name(env.db, setting_name) + return config.value if config is not None else None diff --git a/python/lib/db/models/physio_file.py b/python/lib/db/models/physio_file.py index 4c8819ddd..dde1dcdcd 100644 --- a/python/lib/db/models/physio_file.py +++ b/python/lib/db/models/physio_file.py @@ -16,10 +16,10 @@ class DbPhysioFile(Base): id : Mapped[int] = mapped_column('PhysiologicalFileID', primary_key=True) modality_id : Mapped[int | None] = mapped_column('PhysiologicalModalityID', ForeignKey('physiological_modality.PhysiologicalModalityID')) - output_type_id : Mapped[int ] = mapped_column('PhysiologicalOutputTypeID', ForeignKey('physiological_output_type.PhysiologicalOutputTypeID')) - session_id : Mapped[int ] = mapped_column('SessionID') - insert_time : Mapped[datetime] = mapped_column('InsertTime') - file_type : Mapped[str | None] = mapped_column('FileType') + output_type_id : Mapped[int] = mapped_column('PhysiologicalOutputTypeID', ForeignKey('physiological_output_type.PhysiologicalOutputTypeID')) + session_id : Mapped[int] = mapped_column('SessionID') + insert_time : Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) + type : Mapped[str | None] = mapped_column('FileType') acquisition_time : Mapped[datetime | None] = mapped_column('AcquisitionTime') inserted_by_user : Mapped[str] = mapped_column('InsertedByUser') path : Mapped[Path] = mapped_column('FilePath', StringPath) diff --git a/python/lib/db/queries/imaging_file_type.py b/python/lib/db/queries/imaging_file_type.py index 403a2bf2e..2b38e449b 100644 --- a/python/lib/db/queries/imaging_file_type.py +++ b/python/lib/db/queries/imaging_file_type.py @@ -12,3 +12,14 @@ def get_all_imaging_file_types(db: Database) -> Sequence[DbImagingFileType]: """ return db.execute(select(DbImagingFileType)).scalars().all() + + +def try_get_imaging_file_type_with_type(db: Database, type: str) -> DbImagingFileType | None: + """ + Get an imaging file type from the database using its type, or return `None` if no imaging file + type is found. + """ + + return db.execute(select(DbImagingFileType) + .where(DbImagingFileType.type == type) + ).scalar_one_or_none() diff --git a/python/lib/db/queries/parameter_type.py b/python/lib/db/queries/parameter_type.py index a4102feec..d9ba90d2a 100644 --- a/python/lib/db/queries/parameter_type.py +++ b/python/lib/db/queries/parameter_type.py @@ -15,14 +15,17 @@ def get_all_parameter_types(db: Database) -> Sequence[DbParameterType]: return db.execute(select(DbParameterType)).scalars().all() -def try_get_parameter_type_with_name(db: Database, name: str) -> DbParameterType | None: +def try_get_parameter_type_with_name_source(db: Database, name: str, source: str) -> DbParameterType | None: """ - Get a parameter type from the database using its name, or return `None` if no parameter type is - found. + Get a parameter type from the database using its name and source, or return `None` if no + parameter type is found. """ return db.execute(select(DbParameterType) - .where(DbParameterType.name == name) + .where( + DbParameterType.name == name, + DbParameterType.source_from == source, + ) ).scalar_one_or_none() diff --git a/python/lib/db/queries/physio.py b/python/lib/db/queries/physio.py new file mode 100644 index 000000000..42f9859c7 --- /dev/null +++ b/python/lib/db/queries/physio.py @@ -0,0 +1,41 @@ +from pathlib import Path + +from sqlalchemy import select +from sqlalchemy.orm import Session as Database + +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_modality import DbPhysioModality +from lib.db.models.physio_output_type import DbPhysioOutputType + + +def try_get_physio_file_with_path(db: Database, path: Path) -> DbPhysioFile | None: + """ + Get a physiological file from the database using its path, or return `None` if no file was + found. + """ + + return db.execute(select(DbPhysioFile) + .where(DbPhysioFile.path == str(path)) + ).scalar_one_or_none() + + +def try_get_physio_modality_with_name(db: Database, name: str) -> DbPhysioModality | None: + """ + Get a physiological modality from the database using its name, or return `None` if no modality + was found. + """ + + return db.execute(select(DbPhysioModality) + .where(DbPhysioModality.name == name) + ).scalar_one_or_none() + + +def try_get_physio_output_type_with_name(db: Database, name: str) -> DbPhysioOutputType | None: + """ + Get a physiological output type from the database using its name, or return `None` if no + output type was found. + """ + + return db.execute(select(DbPhysioOutputType) + .where(DbPhysioOutputType.name == name) + ).scalar_one_or_none() diff --git a/python/lib/db/queries/physio_parameter.py b/python/lib/db/queries/physio_parameter.py index b062b47dc..4d5bf0146 100644 --- a/python/lib/db/queries/physio_parameter.py +++ b/python/lib/db/queries/physio_parameter.py @@ -5,6 +5,54 @@ from lib.db.models.parameter_type import DbParameterType from lib.db.models.physio_file_parameter import DbPhysioFileParameter +from lib.db.queries.parameter_type import try_get_parameter_type_with_name_source + + +def try_get_physio_parameter_type_with_name( + db: Database, + name: str +) -> DbParameterType | None: + """ + Try to get a physiological parameter type using its name, or return `None` if no physiological + parameter is found. + """ + + return try_get_parameter_type_with_name_source(db, name, 'physiological_file') + + +def try_get_physio_file_parameter_with_file_id_type_id( + db: Database, + file_id: int, + type_id: int, +) -> DbPhysioFileParameter | None: + """ + Get a physiological file parameter from the database using its file ID and type ID, or return + `None` if no physiological file parameter is found. + """ + + return db.execute(select(DbPhysioFileParameter) + .where( + DbPhysioFileParameter.type_id == type_id, + DbPhysioFileParameter.file_id == file_id, + ) + ).scalar_one_or_none() + + +def try_get_physio_file_parameter_with_file_id_name( + db: Database, + file_id: int, + name: str, +) -> DbPhysioFileParameter | None: + """ + Try to get a physiological file parameter using its file ID and parameter type name, or return + `None` if no parameter is found. + """ + + parameter_type = try_get_physio_parameter_type_with_name(db, name) + if parameter_type is None: + return None + + return try_get_physio_file_parameter_with_file_id_type_id(db, file_id, parameter_type.id) def get_physio_file_parameters( diff --git a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py index dbdd3ab49..79a04b81d 100644 --- a/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py +++ b/python/lib/dcm2bids_imaging_pipeline_lib/nifti_insertion_pipeline.py @@ -7,13 +7,13 @@ import sys import lib.exitcode -from lib.bids import get_bids_json_session_info from lib.db.queries.dicom_archive import try_get_dicom_archive_series_with_series_uid_echo_time from lib.dcm2bids_imaging_pipeline_lib.base_pipeline import BasePipeline from lib.get_session_info import SessionConfigError, get_dicom_archive_session_info -from lib.imaging_lib.nifti import add_nifti_spatial_file_parameters +from lib.imaging_lib.nifti import add_nifti_file_parameters from lib.logging import log_error_exit, log_verbose from lib.util.crypto import compute_file_blake2b_hash, compute_file_md5_hash +from loris_bids_reader.json import get_bids_json_session_info class NiftiInsertionPipeline(BasePipeline): @@ -74,7 +74,7 @@ def __init__(self, loris_getopt_obj, script_name): # Load the JSON file object with scan parameters if a JSON file was provided # --------------------------------------------------------------------------------------------- self.json_file_dict = self._load_json_sidecar_file() - add_nifti_spatial_file_parameters(self.nifti_path, self.json_file_dict) + add_nifti_file_parameters(self.nifti_path, self.nifti_blake2, self.json_file_dict) # --------------------------------------------------------------------------------- # Determine subject IDs based on DICOM headers and validate the IDs against the DB @@ -560,7 +560,6 @@ def _create_destination_dir_and_move_image_files(self, destination): self.move_file(original_file_path, new_file_path) if destination == 'assembly_bids': - self.json_file_dict['file_blake2b_hash'] = self.nifti_blake2 if self.json_path: self.json_file_dict['bids_json_file'] = json_rel_path self.json_file_dict['bids_json_file_blake2b_hash'] = self.json_blake2 diff --git a/python/lib/eeg.py b/python/lib/eeg.py index cfddb1bad..eb402969c 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -3,117 +3,53 @@ import getpass import json import os -import sys +from typing import Any, Literal import lib.exitcode import lib.utilities as utilities -from lib.candidate import Candidate +from lib.database import Database from lib.database_lib.config import Config from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive from lib.database_lib.physiological_event_file import PhysiologicalEventFile from lib.database_lib.physiological_modality import PhysiologicalModality from lib.database_lib.physiological_output_type import PhysiologicalOutputType +from lib.db.models.session import DbSession from lib.physiological import Physiological -from lib.scanstsv import ScansTSV -from lib.session import Session from lib.util.crypto import compute_file_blake2b_hash +from loris_bids_reader.eeg.data_type import BIDSEEGDataType class Eeg: """ This class reads the BIDS EEG data structure and register the EEG datasets into the database by calling the lib.physiological class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.eeg import Eeg - from lib.database import Database - from lib.database_lib.config import Config - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality == 'eeg': - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_eeg_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/eeg/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_eeg_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_eeg_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict - ) - - # disconnect from the database - db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__( + self, data_type: BIDSEEGDataType, session: DbSession, db: Database, verbose: bool, data_dir: str, + loris_bids_eeg_rel_dir: str, loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], + dataset_type: Literal['raw', 'derivative'] | None, + ): """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict - :param bids_sub_id : BIDS subject ID (that will be used as PSCID) - :type bids_sub_id : str - :param bids_ses_id : BIDS session ID (that will be used for the visit label) - :type bids_ses_id : str - :param bids_modality: BIDS modality (a.k.a. EEG) - :tyoe bids_modality: str + :param data_type : The BIDS data type object. + :param session : The session database object. :param db : Database class object - :type db : object :param verbose : whether to be verbose - :type verbose : bool :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str - :param default_visit_label : default visit label to be used if no BIDS - session are present in the BIDS structure - :type default_visit_label : str :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ # config self.config_db_obj = Config(db, verbose) # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout + self.data_type = data_type + self.bids_layout = data_type.root_dataset.layout # load the LORIS BIDS import root directory where the eeg files will # be copied @@ -121,11 +57,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.loris_bids_root_dir = loris_bids_root_dir self.data_dir = data_dir - # load bids subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - # load dataset tag dict. Used to ensure HED tags aren't duplicated self.dataset_tag_dict = dataset_tag_dict @@ -134,35 +65,19 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.verbose = verbose # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] + self.session = session hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' self.hed_union = self.db.pselect(query=hed_query, args=()) - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.bids_sub_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - # check if a tsv with acquisition dates or age is available for the subject self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.bids_sub_id, return_type='filename')[0] + if self.bids_layout.get(suffix='scans', subject=self.data_type.subject.label, return_type='filename'): + self.scans_file = self.bids_layout.get( + suffix='scans', + subject=self.data_type.subject.label, + return_type='filename' + )[0] # register the data into LORIS if (dataset_type and dataset_type == 'raw'): @@ -173,59 +88,6 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.register_data() self.register_data(derivatives=True) - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, cand_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - if not loris_cand_info: - print("Candidate " + self.bids_sub_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + " does not exist in " + \ - "the session table for candidate " + str(self.cand_id) + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - def grep_bids_files(self, bids_type): """ Greps the BIDS files and their layout information from the BIDSLayout @@ -239,18 +101,18 @@ def grep_bids_files(self, bids_type): :rtype: list """ - if self.bids_ses_id: + if self.data_type.session.label: return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) else: return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, + subject = self.data_type.subject.label, + datatype = self.data_type.name, suffix = bids_type, return_type = 'filename' ) @@ -371,17 +233,17 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if detect: # TODO if derivatives, grep the source file as well as the input file ID??? eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, + subject = self.data_type.subject.label, + session = self.data_type.session.label, scope = 'derivatives' if derivatives else 'raw', - suffix = self.bids_modality, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) else: eeg_files = self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - suffix = self.bids_modality, + subject = self.data_type.subject.label, + session = self.data_type.session.label, + suffix = self.data_type.name, extension = ['set', 'edf', 'vhdr', 'vmrk', 'eeg', 'bdf'] ) @@ -395,7 +257,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): return_type = 'tuple', strict=False, extension = 'json', - suffix = self.bids_modality, + suffix = self.data_type.name, all_ = False, full_search = False, ) @@ -438,15 +300,14 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # get the acquisition date of the EEG file or the age at the time of the EEG recording eeg_acq_time = None if self.scans_file: - scan_info = ScansTSV(self.scans_file, eeg_file.path, self.verbose) - eeg_acq_time = scan_info.get_acquisition_time() - eeg_file_data['age_at_scan'] = scan_info.get_age_at_scan() + tsv_scan = self.data_type.session.get_tsv_scan(os.path.basename(self.scans_file)) + + eeg_acq_time = tsv_scan.acquisition_time + eeg_file_data['age_at_scan'] = tsv_scan.age_at_scan if self.loris_bids_root_dir: # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) + scans_path = self.copy_scans_tsv_file_to_loris_bids_dir() eeg_file_data['scans_tsv_file'] = scans_path scans_blake2 = compute_file_blake2b_hash(self.scans_file) @@ -481,7 +342,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if not physio_file_id: # grep the modality ID from physiological_modality table - modality_id = physiological_modality.grep_id_from_modality_value(self.bids_modality) + modality_id = physiological_modality.grep_id_from_modality_value(self.data_type.name) eeg_path = os.path.relpath(eeg_file.path, self.data_dir) if self.loris_bids_root_dir: @@ -495,7 +356,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): eeg_file_info = { 'FileType': file_type, 'FilePath': eeg_path, - 'SessionID': self.session_id, + 'SessionID': self.session.id, 'AcquisitionTime': eeg_acq_time, 'InsertedByUser': getpass.getuser(), 'PhysiologicalOutputTypeID': output_type_id, @@ -601,7 +462,7 @@ def fetch_and_insert_electrode_file( suffix = 'coordsystem', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) if not coordsystem_metadata_file: message = '\nWARNING: no electrode metadata files (coordsystem.json) ' \ @@ -762,7 +623,7 @@ def fetch_and_insert_event_files( suffix = 'events', all_ = False, full_search = False, - subject=self.bids_sub_id, + subject=self.data_type.subject.label, ) inheritance = False @@ -787,7 +648,7 @@ def fetch_and_insert_event_files( event_metadata=event_metadata, event_metadata_file=event_metadata_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, project_wide=False, hed_union=self.hed_union @@ -810,7 +671,7 @@ def fetch_and_insert_event_files( event_data=event_data, event_file=event_path, physiological_file_id=physiological_file_id, - project_id=self.project_id, + project_id=self.session.project_id, blake2=blake2, dataset_tag_dict=self.dataset_tag_dict, file_tag_dict=file_tag_dict, @@ -847,7 +708,7 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False copy_file = "" if not inheritance: copy_file = self.loris_bids_eeg_rel_dir - if self.bids_ses_id: + if self.data_type.session.label: copy_file = os.path.join(copy_file, os.path.basename(file)) else: # make sure the ses- is included in the new filename if using @@ -859,7 +720,8 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False "sub-" + self.data_type.subject.label + "_ses-" + self.default_vl ) ) - copy_file = self.loris_bids_root_dir + copy_file + + copy_file = os.path.join(self.loris_bids_root_dir, copy_file) # create the directory if it does not exist lib.utilities.create_dir( diff --git a/python/lib/imaging_lib/file.py b/python/lib/imaging_lib/file.py new file mode 100644 index 000000000..741d843a9 --- /dev/null +++ b/python/lib/imaging_lib/file.py @@ -0,0 +1,46 @@ +import getpass +from datetime import datetime +from pathlib import Path + +from lib.db.models.file import DbFile +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.env import Env + + +def register_imaging_file( + env: Env, + file_type: str, + file_rel_path: Path, + session: DbSession, + mri_scan_type: DbMriScanType | None, + echo_time: float | None, + echo_number: str | None, + phase_encoding_direction: str | None, +) -> DbFile: + """ + Register an imaging file in the database. + """ + + user = getpass.getuser() + time = datetime.now() + + file = DbFile( + file_type = file_type, + rel_path = str(file_rel_path), + session_id = session.id, + inserted_by_user_id = user, + insert_time = time, + coordinate_space = 'native', + output_type = 'native', + echo_time = echo_time, + echo_number = echo_number, + phase_encoding_direction = phase_encoding_direction, + source_file_id = None, + scan_type_id = mri_scan_type.id if mri_scan_type is not None else None, + ) + + env.db.add(file) + env.db.commit() + + return file diff --git a/python/lib/imaging_lib/file_parameter.py b/python/lib/imaging_lib/file_parameter.py new file mode 100644 index 000000000..bfadfe4a3 --- /dev/null +++ b/python/lib/imaging_lib/file_parameter.py @@ -0,0 +1,46 @@ +from datetime import datetime +from typing import Any + +from lib.db.models.file import DbFile +from lib.db.models.file_parameter import DbFileParameter +from lib.db.queries.file_parameter import try_get_file_parameter_with_file_id_type_id +from lib.env import Env +from lib.imaging_lib.parameter import get_or_create_parameter_type + + +def register_file_parameters(env: Env, file: DbFile, parameter_infos: dict[str, Any]): + """ + Insert or upate some file parameters with the provided parameter names and values. + """ + + for parameter_name, parameter_value in parameter_infos.items(): + register_file_parameter(env, file, parameter_name, parameter_value) + + +def register_file_parameter(env: Env, file: DbFile, parameter_name: str, parameter_value: Any): + """ + Insert or upate a file parameter with the provided parameter name and value. + """ + + if isinstance(parameter_value, list): + parameter_values = map(lambda parameter_value: str(parameter_value), parameter_value) # type: ignore + parameter_value = f"[{', '.join(parameter_values)}]" + + parameter_type = get_or_create_parameter_type(env, parameter_name, 'MRI Variables', 'parameter_file') + + parameter = try_get_file_parameter_with_file_id_type_id(env.db, file.id, parameter_type.id) + if parameter is None: + time = datetime.now() + + parameter = DbFileParameter( + type_id = parameter_type.id, + file_id = file.id, + value = parameter_value, + insert_time = time, + ) + + env.db.add(parameter) + else: + parameter.value = parameter_value + + env.db.commit() diff --git a/python/lib/imaging_lib/mri_scan_type.py b/python/lib/imaging_lib/mri_scan_type.py new file mode 100644 index 000000000..df648affd --- /dev/null +++ b/python/lib/imaging_lib/mri_scan_type.py @@ -0,0 +1,17 @@ +from lib.db.models.mri_scan_type import DbMriScanType +from lib.env import Env + + +def create_mri_scan_type(env: Env, name: str) -> DbMriScanType: + """ + Create an MRI scan type in the database. + """ + + mri_scan_type = DbMriScanType( + name = name, + ) + + env.db.add(mri_scan_type) + env.db.commit() + + return mri_scan_type diff --git a/python/lib/imaging_lib/nifti.py b/python/lib/imaging_lib/nifti.py index 7d8c5697f..267b597e7 100644 --- a/python/lib/imaging_lib/nifti.py +++ b/python/lib/imaging_lib/nifti.py @@ -1,13 +1,12 @@ -import os -from collections.abc import Iterator +from pathlib import Path from typing import Any, cast import nibabel as nib -def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str, Any]): +def add_nifti_file_parameters(nifti_path: Path, nifti_file_hash: str, file_parameters: dict[str, Any]): """ - Read a NIfTI image and add its spatial metadata to the file parameters. + Read a NIfTI image and add some of its properties to the file parameters. """ img = nib.load(nifti_path) # type: ignore @@ -30,12 +29,5 @@ def add_nifti_spatial_file_parameters(nifti_path: str, file_parameters: dict[str else: file_parameters['time'] = None - -def find_dir_nifti_names(dir_path: str) -> Iterator[str]: - """ - Iterate over the names of the NIfTI files found in a directory. - """ - - for file_name in os.listdir(dir_path): - if file_name.endswith(('.nii', '.nii.gz')): - yield file_name + # Add the file BLAKE2b hash. + file_parameters['file_blake2b_hash'] = nifti_file_hash diff --git a/python/lib/imaging_lib/nifti_pic.py b/python/lib/imaging_lib/nifti_pic.py new file mode 100644 index 000000000..f69228887 --- /dev/null +++ b/python/lib/imaging_lib/nifti_pic.py @@ -0,0 +1,67 @@ +import re +from pathlib import Path + +import nibabel as nib +import numpy as np +from nibabel.nifti1 import Nifti1Image +from nilearn import plotting + +from lib.config import get_data_dir_path_config +from lib.db.models.file import DbFile +from lib.env import Env + + +def create_imaging_pic(env: Env, file: DbFile, is_4d_data: bool) -> Path: + """ + Creates the preview pic that will show in the imaging browser view session + page. This pic will be stored in the data_dir/pic folder + + :param file_info: dictionary with file information (path, file_id, cand_id...) + :type file_info: dict + :param pic_rel_path: relative path to the pic to use if one provided. Otherwise + create_imaging_pic will automatically generate the pic name + based on the file path of the NIfTI file + :type pic_rel_path: str + + :return: path to the created pic + :rtype: str + """ + + data_dir_path = get_data_dir_path_config(env) + + cand_id = file.session.candidate.cand_id + file_path = data_dir_path / file.path + + pic_name = re.sub(r"\.nii(\.gz)?$", f'_{file.id}_check.png', file.path.name) + pic_rel_path = Path(str(cand_id)) / pic_name + pic_dir_path = data_dir_path / 'pic' / str(cand_id) + pic_path = data_dir_path / 'pic' / pic_rel_path + + # create the candID directory where the pic will go if it does not already exist + if not pic_dir_path.exists(): + pic_dir_path.mkdir() + + img = nib.load(file_path) # type: ignore + + if is_4d_data: + # Only load the first slice of a 4D image. + data = img.dataobj[..., 0] # type: ignore + else: + data = img.dataobj[...] # type: ignore + + # Load the image as float32 for plotting. + volume = Nifti1Image( + data.astype(np.float32, copy=False), # type: ignore + img.affine, # type: ignore + ) + + plotting.plot_anat( # type: ignore + anat_img=volume, + output_file=pic_path, + display_mode='ortho', + black_bg=True, # type: ignore + draw_cross=False, + annotate=False, + ) + + return pic_rel_path diff --git a/python/lib/imaging_lib/parameter.py b/python/lib/imaging_lib/parameter.py new file mode 100644 index 000000000..20d18d283 --- /dev/null +++ b/python/lib/imaging_lib/parameter.py @@ -0,0 +1,44 @@ +from typing import Literal + +from lib.db.models.parameter_type import DbParameterType +from lib.db.models.parameter_type_category_rel import DbParameterTypeCategoryRel +from lib.db.queries.parameter_type import get_parameter_type_category_with_name, try_get_parameter_type_with_name_source +from lib.env import Env + + +def get_or_create_parameter_type( + env: Env, + parameter_name: str, + category: Literal['Electrophysiology Variables', 'MRI Variables'], + source: Literal['parameter_file', 'physiological_parameter_file'] +) -> DbParameterType: + """ + Get a parameter type using its name, or create that parameter if it does not exist. + """ + + parameter_type = try_get_parameter_type_with_name_source(env.db, parameter_name, source) + if parameter_type is not None: + return parameter_type + + parameter_type = DbParameterType( + name = parameter_name, + alias = None, + data_type = 'text', + description = f'{parameter_name} created by the lib.imaging.parameter Python module', + source_from = source, + queryable = False, + ) + + env.db.add(parameter_type) + env.db.flush() + + parameter_type_category = get_parameter_type_category_with_name(env.db, category) + parameter_type_category_rel = DbParameterTypeCategoryRel( + parameter_type_id = parameter_type.id, + parameter_type_category_id = parameter_type_category.id, + ) + + env.db.add(parameter_type_category_rel) + env.db.flush() + + return parameter_type diff --git a/python/lib/imaging_lib/physio.py b/python/lib/imaging_lib/physio.py index 958cfcd68..7576a665f 100644 --- a/python/lib/imaging_lib/physio.py +++ b/python/lib/imaging_lib/physio.py @@ -1,6 +1,21 @@ +import getpass +from datetime import datetime +from pathlib import Path +from typing import Any + from sqlalchemy.orm import Session as Database -from lib.db.queries.physio_parameter import get_physio_file_parameters +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_file_parameter import DbPhysioFileParameter +from lib.db.models.physio_modality import DbPhysioModality +from lib.db.models.physio_output_type import DbPhysioOutputType +from lib.db.models.session import DbSession +from lib.db.queries.physio_parameter import ( + get_physio_file_parameters, + try_get_physio_file_parameter_with_file_id_type_id, +) +from lib.env import Env +from lib.imaging_lib.parameter import get_or_create_parameter_type def get_physio_file_parameters_dict(db: Database, physio_file_id: int) -> dict[str, str | None]: @@ -13,3 +28,65 @@ def get_physio_file_parameters_dict(db: Database, physio_file_id: int) -> dict[s return { parameter_type.name: parameter.value for parameter_type, parameter in parameters } + + +def insert_physio_file( + env: Env, + session: DbSession, + modality: DbPhysioModality, + output_type: DbPhysioOutputType, + file_path: Path, + file_type: str, + acquisition_time: datetime | None, +) -> DbPhysioFile: + file = DbPhysioFile( + path = file_path, + type = file_type, + session_id = session.id, + modality_id = modality.id, + output_type_id = output_type.id, + acquisition_time = acquisition_time, + inserted_by_user = getpass.getuser(), + ) + + env.db.add(file) + env.db.flush() + return file + + +def insert_physio_file_parameter( + env: Env, + file: DbPhysioFile, + session: DbSession, + parameter_name: str, + parameter_value: Any, +): + """ + Insert or upate a file parameter with the provided parameter name and value. + """ + + if isinstance(parameter_value, list): + parameter_values = map(lambda parameter_value: str(parameter_value), parameter_value) # type: ignore + parameter_value = f"[{', '.join(parameter_values)}]" + + parameter_type = get_or_create_parameter_type( + env, + parameter_name, + 'Electrophysiology Variables', + 'physiological_parameter_file', + ) + + parameter = try_get_physio_file_parameter_with_file_id_type_id(env.db, file.id, parameter_type.id) + if parameter is None: + parameter = DbPhysioFileParameter( + file_id = file.id, + project_id = session.project.id, + type_id = parameter_type.id, + value = parameter_value, + ) + + env.db.add(parameter) + else: + parameter.value = parameter_value + + return parameter diff --git a/python/lib/import_bids_dataset/args.py b/python/lib/import_bids_dataset/args.py new file mode 100644 index 000000000..b4d8f549d --- /dev/null +++ b/python/lib/import_bids_dataset/args.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + + +@dataclass +class Args: + source_bids_path: Path + type: Literal[None, 'raw', 'derivative'] + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool diff --git a/python/lib/import_bids_dataset/check_subjects_sessions.py b/python/lib/import_bids_dataset/check_subjects_sessions.py new file mode 100644 index 000000000..dfdd29a8d --- /dev/null +++ b/python/lib/import_bids_dataset/check_subjects_sessions.py @@ -0,0 +1,428 @@ +import random +from datetime import datetime + +from dateutil.parser import ParserError, parse +from sqlalchemy.orm import Session as Database + +from lib.config import get_default_bids_visit_label_config +from lib.db.models.candidate import DbCandidate +from lib.db.models.cohort import DbCohort +from lib.db.models.project import DbProject +from lib.db.models.session import DbSession +from lib.db.models.site import DbSite +from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id +from lib.db.queries.cohort import try_get_cohort_with_name +from lib.db.queries.project import try_get_project_with_alias, try_get_project_with_name +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.db.queries.sex import try_get_sex_with_name +from lib.db.queries.site import try_get_site_with_alias, try_get_site_with_name +from lib.db.queries.visit import try_get_visit_with_visit_label +from lib.env import Env +from lib.logging import log, log_error, log_error_exit +from loris_bids_reader.dataset import BIDSDataset, BIDSSubject +from loris_bids_reader.participants import BIDSParticipantRow + + +class CheckBidsSubjectSessionError(Exception): + """ + Exception raised if the check or creation of a candidate or session from a BIDS dataset fails. + """ + + def __init__(self, message: str): + super().__init__(message) + + +def check_bids_session_labels( + env: Env, + bids: BIDSDataset, +): + """ + Check that all the session labels in a BIDS dataset correspond to a LORIS visit, or exit the + program with an error if that is not the case. + """ + + unknown_session_labels: list[str] = [] + + for session_label in bids.session_labels: + visit = try_get_visit_with_visit_label(env.db, session_label) + if visit is None: + unknown_session_labels.append(session_label) + + if unknown_session_labels != []: + log_error_exit( + env, + ( + f"Found {len(unknown_session_labels)} unknown session labels in the BIDS dataset. Unknown session" + f" labels are: {', '.join(unknown_session_labels)}. Each BIDS session label should correspond to a" + " LORIS visit label." + ) + ) + + +def check_or_create_bids_subjects_and_sessions( + env: Env, + bids: BIDSDataset, + create_candidate: bool, + create_session: bool, +) -> int: + """ + Check that the subjects and sessions of a BIDS dataset correspond to LORIS candidates and + sessions, or create them using information extracted from the BIDS dataset if the relevant + arguments are passed. + + Exit the program with an error if the check or creation of any candidate or session fails. + Return the project ID of the last candidate processed. + """ + + try: + # Read the participants.tsv property to raise an exception if the file is incorrect. + bids.tsv_participants + except Exception as exception: + log_error_exit(env, f"Error while reading the participants.tsv file. Full error:\n{exception}") + + candidate = None + errors: list[Exception] = [] + + for subject in bids.subjects: + try: + candidate = check_or_create_bids_subject_and_sessions(env, subject, create_candidate, create_session) + except Exception as error: + log_error(env, str(error)) + errors.append(error) + + if errors != []: + error_message = f"Found {len(errors)} errors while checking BIDS subjects and sessions." + if create_candidate or create_session: + error_message += " No candidate or session has been created." + + log_error_exit(env, error_message) + + if candidate is None: + log_error_exit(env, "No subject found in the BIDS dataset.") + + # Only commit the new candidates and sessions if no error has occured. + env.db.commit() + + # Return the project ID of a candidate of the BIDS dataset. For this value to be used, it + # should be assumed that all the candidates of the BIDS dataset are in the same project. + return candidate.registration_project_id + + +def check_or_create_bids_subject_and_sessions( + env: Env, + subject: BIDSSubject, + create_candidate: bool, + create_session: bool, +) -> DbCandidate: + """ + Check that a BIDS subject and its sessions correspond to a LORIS candidate and its sessions, or + create them using information extracted from the BIDS dataset if the relevant arguments are + passed. + + Raise an error if the check or creation of the candidate or any of its sessions fail. Return + the candidate corresponding to the BIDS subject. + """ + + tsv_participant = subject.root_dataset.tsv_participants and subject.root_dataset.tsv_participants.get(subject.label) + if tsv_participant is None: + raise CheckBidsSubjectSessionError( + f"No participants.tsv entry found for subject label '{subject.label}' in the BIDS dataset. The BIDS" + " directory subjects do not match the participants.tsv file." + ) + + candidate = check_or_create_bids_subject(env, tsv_participant, create_candidate) + + if create_session: + cohort = get_tsv_participant_cohort(env, tsv_participant) + else: + cohort = None + + for session in subject.sessions: + if session.label is not None: + visit_label = session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + check_or_create_bids_session(env, candidate, cohort, visit_label, create_session) + + return candidate + + +def check_or_create_bids_subject(env: Env, tsv_participant: BIDSParticipantRow, create_candidate: bool) -> DbCandidate: + """ + Check that the subject of a BIDS participants.tsv row exists in LORIS, or create them using the + information of that row if the relevant argument is passed. Raise an exception if the candidate + does not exist or cannot be created. + """ + + try: + cand_id = int(tsv_participant.participant_id) + candidate = try_get_candidate_with_cand_id(env.db, cand_id) + if candidate is None: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.participant_id}' (identified as a CandID)." + ) + + return candidate + except ValueError: + pass + + candidate = try_get_candidate_with_psc_id(env.db, tsv_participant.participant_id) + if candidate is not None: + return candidate + + if not create_candidate: + raise CheckBidsSubjectSessionError( + f"No LORIS candidate found for the BIDS participant ID '{tsv_participant.participant_id}' (identified as a PSCID)." + ) + + return create_bids_candidate(env, tsv_participant) + + +def create_bids_candidate(env: Env, tsv_participant: BIDSParticipantRow) -> DbCandidate: + """ + Check a candidate using the information of a BIDS participants.tsv row, or raise an exception + if that candidate cannot be created. + """ + + log(env, f"Creating LORIS candidate for BIDS subject '{tsv_participant.participant_id}'...") + + psc_id = tsv_participant.participant_id + + cand_id = generate_new_cand_id(env.db) + + birth_date = get_tsv_participant_birth_date(tsv_participant) + + sex = get_tsv_participant_sex(env, tsv_participant) + + site = get_tsv_participant_site(env, tsv_participant) + + project = get_tsv_participant_project(env, tsv_participant) + + log( + env, + ( + "Creating candidate with information:\n" + f" PSCID = {psc_id}\n" + f" CandID = {cand_id}\n" + f" Site = {site.name}\n" + f" Project = {project.name}" + ) + ) + + now = datetime.now() + + candidate = DbCandidate( + cand_id = cand_id, + psc_id = psc_id, + date_of_birth = birth_date, + sex = sex, + registration_site_id = site.id, + registration_project_id = project.id, + user_id = 'imaging.py', + entity_type = 'Human', + date_active = now, + date_registered = now, + active = True, + ) + + env.db.add(candidate) + env.db.flush() + + return candidate + + +def check_or_create_bids_session( + env: Env, + candidate: DbCandidate, + cohort: DbCohort | None, + visit_label: str, + create_session: bool, +) -> DbSession: + """ + Check that a BIDS session exists in LORIS, or create it using information previously obtained + from the BIDS dataset if the relevant argument is passed. Raise an exception if the session + does not exist or cannot be created. + """ + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is not None: + return session + + if not create_session: + log_error_exit( + env, + f"No session found for candidate '{candidate.psc_id}' and visit label '{visit_label}'." + ) + + return create_bids_session(env, candidate, cohort, visit_label) + + +def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort | None, visit_label: str) -> DbSession: + """ + Create a session using information previously obtained from the BIDS dataset, or raise an + exception if the session does not exist or cannot be created. + """ + + if cohort is None: + log_error_exit(env, f"No cohort found for candidate '{candidate.psc_id}', cannot create session.") + + log( + env, + ( + "Creating session with:\n" + f" PSCID = {candidate.psc_id}\n" + f" Visit label = {visit_label}" + ) + ) + + session = DbSession( + candidate_id = candidate.id, + visit_label = visit_label, + current_stage = 'Not Started', + site_id = candidate.registration_site_id, + project_id = candidate.registration_project_id, + cohort_id = cohort.id, + scan_done = True, + submitted = False, + active = True, + user_id = '', + hardcopy_request = '-', + mri_qc_status = '', + mri_qc_pending = False, + mri_caveat = True, + ) + + env.db.add(session) + env.db.flush() + + return session + + +def get_tsv_participant_birth_date(tsv_participant: BIDSParticipantRow) -> datetime | None: + """ + Get the birth date of a BIDS participants.tsv row, or return `None` if no birth date is + specified. Raise an exception if a birth date is specified but cannot be parsed. + """ + + if tsv_participant.birth_date is None: + return None + + try: + return parse(tsv_participant.birth_date) + except ParserError: + raise CheckBidsSubjectSessionError( + f"Could not parse the BIDS participants.tsv birth date '{tsv_participant.birth_date}'." + ) + + +def get_tsv_participant_sex(env: Env, tsv_participant: BIDSParticipantRow) -> str | None: + """ + Get the sex of a BIDS participants.tsv row, or return `None` if no sex is specified. Raise an + exception if a sex is specified but does not exist in LORIS. + """ + + if tsv_participant.sex is None: + return None + + tsv_participant_sex = tsv_participant.sex.lower() + + if tsv_participant_sex in ['m', 'male']: + sex_name = 'Male' + elif tsv_participant_sex in ['f', 'female']: + sex_name = 'Female' + elif tsv_participant_sex in ['o', 'other']: + sex_name = 'Other' + else: + sex_name = tsv_participant.sex + + sex = try_get_sex_with_name(env.db, sex_name) + if sex is None: + raise CheckBidsSubjectSessionError( + f"No LORIS sex found for the BIDS participants.tsv sex name or alias '{tsv_participant.sex}'." + ) + + return sex.name + + +def get_tsv_participant_site(env: Env, tsv_participant: BIDSParticipantRow) -> DbSite: + """ + Get the site of a BIDS participants.tsv row, or raise an exception if no site is specified or + the site does not exist in LORIS. + """ + + if tsv_participant.site is None: + raise CheckBidsSubjectSessionError( + "No 'site' column found in the BIDS participants.tsv file, this field is required to create candidates or" + " sessions. " + ) + + site = try_get_site_with_name(env.db, tsv_participant.site) + if site is not None: + return site + + site = try_get_site_with_alias(env.db, tsv_participant.site) + if site is not None: + return site + + raise CheckBidsSubjectSessionError( + f"No site found for the BIDS participants.tsv site name or alias '{tsv_participant.site}'." + ) + + +def get_tsv_participant_project(env: Env, tsv_participant: BIDSParticipantRow) -> DbProject: + """ + Get the project of a BIDS participants.tsv row, or raise an exception if no project is + specified or the project does not exist in LORIS. + """ + + if tsv_participant.project is None: + raise CheckBidsSubjectSessionError( + "No 'project' column found in the BIDS participants.tsv file, this field is required to create candidates" + " or sessions. " + ) + + project = try_get_project_with_name(env.db, tsv_participant.project) + if project is not None: + return project + + project = try_get_project_with_alias(env.db, tsv_participant.project) + if project is not None: + return project + + raise CheckBidsSubjectSessionError( + f"No project found for the BIDS participants.tsv project name or alias '{tsv_participant.project}'." + ) + + +def get_tsv_participant_cohort(env: Env, tsv_participant: BIDSParticipantRow) -> DbCohort: + """ + Get the cohort of a BIDS participants.tsv row, or raise an exception if no cohort is specified + or the cohort does not exist in LORIS. + """ + + if tsv_participant.cohort is None: + raise CheckBidsSubjectSessionError( + "No 'cohort' column found in the BIDS participants.tsv file, this field is required to create session." + ) + + cohort = try_get_cohort_with_name(env.db, tsv_participant.cohort) + if cohort is None: + raise CheckBidsSubjectSessionError( + f"No cohort found for the BIDS participants.tsv cohort name '{tsv_participant.cohort}'." + ) + + return cohort + + +# TODO: Move this function to a more appropriate place. +def generate_new_cand_id(db: Database) -> int: + """ + Generate a new random CandID that is not already in the database. + """ + + while True: + cand_id = random.randint(100000, 999999) + candidate = try_get_candidate_with_cand_id(db, cand_id) + if candidate is None: + return cand_id diff --git a/python/lib/import_bids_dataset/copy_files.py b/python/lib/import_bids_dataset/copy_files.py new file mode 100644 index 000000000..11822521e --- /dev/null +++ b/python/lib/import_bids_dataset/copy_files.py @@ -0,0 +1,125 @@ + +import re +import shutil +from pathlib import Path + +from lib.db.models.session import DbSession +from lib.env import Env +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.logging import log_error_exit +from loris_bids_reader.dataset import BIDSAcquisition, BIDSDataset, BIDSDataType +from loris_bids_reader.participants import BIDSParticipantsFile +from loris_bids_reader.scans import BIDSScansFile + + +def get_loris_bids_path(env: Env, bids: BIDSDataset, data_dir_path: Path) -> Path: + """ + Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if + it does not exist yet. + """ + + try: + dataset_description = bids.get_dataset_description() + except Exception as error: + log_error_exit(env, str(error)) + + if dataset_description is None: + log_error_exit( + env, + "No file 'dataset_description.json' found in the input BIDS dataset.", + ) + + # Sanitize the dataset metadata to have a usable name for the directory. + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.data.name) + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.data.bids_version) + + loris_bids_path = data_dir_path / 'bids_imports' / f'{dataset_name}_BIDSVersion_{dataset_version}' + + if not loris_bids_path.exists(): + loris_bids_path.mkdir() + + return loris_bids_path + + +def get_loris_file_path( + import_env: BIDSImportEnv, + session: DbSession, + acquisition: BIDSAcquisition[BIDSDataType], + file_path: Path, +) -> Path: + if import_env.loris_bids_path is None: + return file_path + + loris_file_path = ( + import_env.loris_bids_path + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / acquisition.data_type.name + / file_path.name + ) + + return loris_file_path.relative_to(import_env.data_dir_path) + + +def copy_static_dataset_files(source_bids_path: Path, loris_bids_path: Path): + """ + Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. + """ + + for file_name in ['README', 'dataset_description.json']: + source_file_path = source_bids_path / file_name + if not source_file_path.is_file(): + continue + + loris_file_path = loris_bids_path / file_name + shutil.copyfile(source_file_path, loris_file_path) + + +def copy_bids_tsv_participants(tsv_participants: BIDSParticipantsFile, loris_participants_tsv_path: Path): + """ + Copy some participants.tsv rows into the LORIS participants.tsv file, creating it if necessary. + """ + + if loris_participants_tsv_path.exists(): + tsv_participants.merge(BIDSParticipantsFile(loris_participants_tsv_path)) + + tsv_participants.write(loris_participants_tsv_path, ['participant_id']) + + +def copy_bids_tsv_scans(tsv_scans: BIDSScansFile, loris_scans_tsv_path: Path): + """ + Copy some scans.tsv rows into a LORIS scans.tsv file, creating it if necessary. + """ + + if loris_scans_tsv_path.exists(): + tsv_scans.merge(BIDSScansFile(loris_scans_tsv_path)) + + tsv_scans.write(loris_scans_tsv_path, ['filename', 'acq_time', 'age_at_scan']) + + +def copy_bids_file( + loris_bids_path: Path, + session: DbSession, + acquisition: BIDSAcquisition[BIDSDataType], + file_path: Path, +): + """ + Copy a BIDS file to a directory. + """ + + loris_file_path = ( + loris_bids_path + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / acquisition.data_type.name + / file_path.name + ) + + if loris_file_path.exists(): + raise Exception(f"File '{loris_file_path}' already exists in LORIS.") + + loris_file_path.parent.mkdir(exist_ok=True) + if file_path.is_file(): + shutil.copyfile(file_path, loris_file_path) + elif file_path.is_dir(): + shutil.copytree(file_path, loris_file_path) diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py new file mode 100644 index 000000000..85b98df8e --- /dev/null +++ b/python/lib/import_bids_dataset/env.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class BIDSImportEnv: + """ + Pipeline-specific variables of the BIDS dataset import pipeline. + """ + + data_dir_path : Path + loris_bids_path : Path | None + total_files_count : int + imported_files_count : int + ignored_files_count : int + failed_files_count : int + unknown_scan_types : list[str] + + def __init__(self, data_dir_path: Path, loris_bids_path: Path | None, total_files_count: int): + self.data_dir_path = data_dir_path + self.loris_bids_path = loris_bids_path + self.total_files_count = total_files_count + self.imported_files_count = 0 + self.ignored_files_count = 0 + self.failed_files_count = 0 + self.unknown_scan_types = [] + + @property + def processed_files_count(self) -> int: + return self.imported_files_count + self.ignored_files_count + self.failed_files_count diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py new file mode 100644 index 000000000..91e46634a --- /dev/null +++ b/python/lib/import_bids_dataset/events.py @@ -0,0 +1,70 @@ +import json +import os +from pathlib import Path +from typing import Any + +import lib.utilities +from lib.database import Database +from lib.env import Env +from lib.import_bids_dataset.args import Args +from lib.logging import log_warning +from lib.physiological import Physiological +from lib.util.crypto import compute_file_blake2b_hash +from loris_bids_reader.dataset import BIDSDataset + + +def get_events_metadata( + env: Env, + args: Args, + bids: BIDSDataset, + legacy_db: Database, + loris_bids_path: Path | None, + project_id: int, +) -> dict[Any, Any]: + """ + Get the root level 'events.json' data, assuming a singe project for the BIDS dataset. + """ + + root_event_metadata_file = bids.layout.get_nearest( # type: ignore + bids.path, + return_type='tuple', + strict=False, + extension='json', + suffix='events', + all_=False, + subject=None, + session=None, + ) + + if not root_event_metadata_file: + log_warning(env, "No event metadata files (events.json) in the BIDS root directory.") + return {} + + # Copy the event file to the LORIS BIDS import directory. + + copy_file = str.replace(root_event_metadata_file.path, bids.layout.root, '') # type: ignore + + if loris_bids_path is not None: + event_metadata_path = os.path.join(loris_bids_path, copy_file) + lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, args.verbose) # type: ignore + + hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' + hed_union = legacy_db.pselect(query=hed_query, args=()) # type: ignore + + # load json data + with open(root_event_metadata_file.path) as metadata_file: # type: ignore + event_metadata = json.load(metadata_file) + + blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) # type: ignore + physio = Physiological(legacy_db, args.verbose) + _, dataset_tag_dict = physio.insert_event_metadata( # type: ignore + event_metadata=event_metadata, + event_metadata_file=event_metadata_path, # type: ignore + physiological_file_id=None, + project_id=project_id, + blake2=blake2, + project_wide=True, + hed_union=hed_union # type: ignore + ) + + return dataset_tag_dict # type: ignore diff --git a/python/lib/import_bids_dataset/file_type.py b/python/lib/import_bids_dataset/file_type.py new file mode 100644 index 000000000..d0b491906 --- /dev/null +++ b/python/lib/import_bids_dataset/file_type.py @@ -0,0 +1,35 @@ +import re + +from lib.db.queries.imaging_file_type import get_all_imaging_file_types +from lib.env import Env + + +# TODO: This code seems to be MRI-specific code that makes assumptions that are not true for MEG. +# Create good abstractions for both MRI and MEG. +def determine_imaging_file_type(env: Env, file_name: str) -> str | None: + """ + Determine the file type of an imaging file from the database using its name, or return `None` + if no corresponding file type is found. + """ + + imaging_file_types = get_all_imaging_file_types(env.db) + + for imaging_file_type in imaging_file_types: + regex = re.escape(imaging_file_type.type) + r'(\.gz)?$' + if re.search(regex, file_name): + return imaging_file_type.type + + return None + + +def get_check_imaging_file_type(env: Env, file_name: str) -> str: + """ + Get the file type of an imaging file or raise an exception if that file type is not + registered in the database. + """ + + file_type = determine_imaging_file_type(env, file_name) + if file_type is None: + raise Exception("No matching file type found in the database.") + + return file_type diff --git a/python/lib/import_bids_dataset/imaging.py b/python/lib/import_bids_dataset/imaging.py new file mode 100644 index 000000000..9390aa18b --- /dev/null +++ b/python/lib/import_bids_dataset/imaging.py @@ -0,0 +1,48 @@ +from typing import Any + +from lib.db.queries.parameter_type import get_all_parameter_types +from lib.env import Env + + +def map_bids_param_to_loris_param(env: Env, file_parameters: dict[str, Any]): + """ + Maps the BIDS parameters found in the BIDS JSON file with the + parameter type names of LORIS. + + :param file_parameters: dictionary with the list of parameters + found in the BIDS JSON file + :type file_parameters: dict + + :return: returns a dictionary with the BIDS JSON parameter names + as well as their LORIS equivalent + :rtype: dict + """ + + parameter_types_mapping = get_bids_to_minc_parameter_types_mapping(env) + + # Map BIDS parameters with the LORIS ones. + for file_parameter in list(file_parameters.keys()): + file_parameter_type = parameter_types_mapping.get(file_parameter) + if file_parameter_type is not None: + file_parameters[file_parameter_type] = file_parameters[file_parameter] + + +def get_bids_to_minc_parameter_types_mapping(env: Env) -> dict[str, str]: + """ + Queries the BIDS to MINC mapping dictionary stored in the paramater_type table and returns a + dictionary with the BIDS term as keys and the MINC terms as values. + + :return: BIDS to MINC mapping dictionary + :rtype: dict + """ + + parameter_types = get_all_parameter_types(env.db) + + parameter_types_mapping: dict[str, str] = {} + for parameter_type in parameter_types: + if parameter_type.alias is None: + continue + + parameter_types_mapping[parameter_type.alias] = parameter_type.name + + return parameter_types_mapping diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py new file mode 100644 index 000000000..0362efc10 --- /dev/null +++ b/python/lib/import_bids_dataset/main.py @@ -0,0 +1,292 @@ +import os +from collections.abc import Callable +from typing import Any + +from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config +from lib.database import Database +from lib.db.models.session import DbSession +from lib.db.queries.candidate import try_get_candidate_with_psc_id +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.eeg import Eeg +from lib.env import Env +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.check_subjects_sessions import ( + check_bids_session_labels, + check_or_create_bids_subjects_and_sessions, +) +from lib.import_bids_dataset.copy_files import ( + copy_bids_tsv_participants, + copy_bids_tsv_scans, + copy_static_dataset_files, + get_loris_bids_path, +) +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.import_bids_dataset.events import get_events_metadata +from lib.import_bids_dataset.meg import import_bids_meg_acquisition +from lib.import_bids_dataset.mri import import_bids_mri_acquisition +from lib.import_bids_dataset.print import print_bids_import_summary +from lib.logging import log, log_error, log_error_exit, log_warning +from lib.util.iter import count +from loris_bids_reader.dataset import BIDSAcquisition, BIDSDataset, BIDSDataType, BIDSSession +from loris_bids_reader.eeg.data_type import BIDSEEGDataType +from loris_bids_reader.meg.data_type import BIDSMEGAcquisition, BIDSMEGDataType +from loris_bids_reader.mri.data_type import BIDSMRIAcquisition, BIDSMRIDataType + + +def import_bids_dataset(env: Env, args: Args, legacy_db: Database): + """ + Read the provided BIDS dataset and import it into LORIS. + """ + + data_dir_path = get_data_dir_path_config(env) + + log(env, "Parsing BIDS dataset...") + + bids = BIDSDataset(args.source_bids_path, args.bids_validation) + + acquisitions_count = count(bids.acquisitions) + + log(env, f"Found {acquisitions_count} acquisitions.") + + log(env, f"Found {len(bids.subject_labels)} subjects:") + for subject_label in bids.subject_labels: + log(env, f"- {subject_label}") + + log(env, f"Found {len(bids.session_labels)} sessions:") + for session_label in bids.session_labels: + log(env, f"- {session_label}") + + # Check the BIDS subject and session labels and create their candidates and sessions in LORIS + # if needed. + + check_bids_session_labels(env, bids) + + project_id = check_or_create_bids_subjects_and_sessions(env, bids, args.create_candidate, args.create_session) + + # Get the LORIS BIDS import directory path and create the directory if needed. + + if args.copy: + loris_bids_path = get_loris_bids_path(env, bids, data_dir_path) + else: + loris_bids_path = None + + # Get the BIDS events metadata. + + events_metadata = get_events_metadata(env, args, bids, legacy_db, loris_bids_path, project_id) + + # Copy the `participants.tsv` file rows. + + if loris_bids_path is not None and bids.tsv_participants is not None: + loris_participants_tsv_path = loris_bids_path / 'participants.tsv' + copy_bids_tsv_participants(bids.tsv_participants, loris_participants_tsv_path) + + # Process each session directory. + + import_env = BIDSImportEnv( + data_dir_path = data_dir_path, + loris_bids_path = loris_bids_path, + total_files_count = acquisitions_count, + ) + + for bids_session in bids.sessions: + import_bids_session(env, import_env, args, bids_session, events_metadata, legacy_db) + + # Copy the static BIDS files. + + if loris_bids_path is not None: + copy_static_dataset_files(bids.path, loris_bids_path) + + # Print import summary. + + print_bids_import_summary(env, import_env) + + +def import_bids_session( + env: Env, + import_env: BIDSImportEnv, + args: Args, + bids_session: BIDSSession, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS session directory and import it into LORIS. + """ + + log(env, f"Importing files for subject '{bids_session.subject.label}' and session '{bids_session.label}'.") + + candidate = try_get_candidate_with_psc_id(env.db, bids_session.subject.label) + if candidate is None: + # This should not happen as BIDS subject labels should have been checked previously. + log_error_exit(env, f"Candidate not found for PSCID '{bids_session.subject.label}'.") + + if bids_session.label is not None: + visit_label = bids_session.label + else: + visit_label = get_default_bids_visit_label_config(env) + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is None: + # This should not happen as BIDS session labels should have been checked previously. + log_error_exit(env, f"Visit not found for visit label '{visit_label}'.") + + try: + # Read the scans.tsv property to raise an exception if the file is incorrect. + tsv_scans = bids_session.tsv_scans + + if import_env.loris_bids_path is not None and tsv_scans is not None: + loris_scans_tsv_path = ( + import_env.loris_bids_path + / f'sub-{bids_session.subject.label}' + / f'ses-{bids_session.label}' + / f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv' + ) + + copy_bids_tsv_scans(tsv_scans, loris_scans_tsv_path) + except Exception as exception: + log_warning( + env, + f"Error while reading the session scans.tsv file, scans.tsv data will be ignored. Full error:\n{exception}" + ) + + # Process each data type directory. + + for data_type in bids_session.data_types: + import_bids_data_type(env, import_env, args, session, data_type, events_metadata, legacy_db) + + +def import_bids_data_type( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + data_type: BIDSDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS data type directory and import it into LORIS. + """ + + match data_type: + case BIDSMRIDataType() | BIDSMEGDataType(): + import_bids_data_type_acquisitions( + env, + import_env, + data_type, + lambda acquisition: import_bids_acquisition(env, import_env, args, session, acquisition), + ) + case BIDSEEGDataType(): + import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + case _: + log_warning(env, f"Unknown data type '{data_type.name}'. Skipping.") + + +def import_bids_data_type_acquisitions( + env: Env, + import_env: BIDSImportEnv, + data_type: BIDSDataType, + import_acquisition: Callable[[BIDSAcquisition[BIDSDataType]], None], +): + """ + Read the BIDS MRI data type directory and import its files into LORIS. + Read a BIDS data type directory and import its acquisitions into LORIS. + """ + + log(env, f"Importing data type {data_type.name}") + + if data_type.session.tsv_scans is None: + log_warning(env, "No 'scans.tsv' file found, 'scans.tsv' data will be ignored.") + + for acquisition in data_type.acquisitions: + try: + import_acquisition(acquisition) + except Exception as exception: + import_env.failed_files_count += 1 + log_error( + env, + ( + f"Error while importing acquisition '{acquisition.name}'. Error message:\n" + f"{exception}\n" + "Skipping." + ) + ) + import traceback + print(traceback.format_exc()) + + +def import_bids_acquisition( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + acquisition: BIDSAcquisition[BIDSDataType], +): + """ + Import a BIDS acquisition and its associated files in LORIS. + """ + + log( + env, + ( + f"Importing {acquisition.data_type.name} acquisition '{acquisition.name}'..." + f" ({import_env.processed_files_count + 1} / {import_env.total_files_count})" + ), + ) + + # Get the relevant `scans.tsv` row if there is one. + + if acquisition.session.tsv_scans is not None: + tsv_scan = acquisition.session.tsv_scans.get(acquisition.name) + if tsv_scan is None: + log_warning( + env, + f"No row for acquisition '{acquisition.name}' found in 'scans.tsv', 'scans.tsv' data will be ignored.", + ) + + else: + tsv_scan = None + + # Get the path at which to copy the file. + + match acquisition: + case BIDSMRIAcquisition(): + import_bids_mri_acquisition(env, import_env, session, acquisition, tsv_scan) + case BIDSMEGAcquisition(): + import_bids_meg_acquisition(env, import_env, args, session, acquisition, tsv_scan) + case _: + log_warning(env, f"Unknown acquisition type '{acquisition.name}'. Skipping.") + + print(f"Successfully imported acquisition '{acquisition.name}'.") + + +def import_bids_eeg_data_type_files( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + data_type: BIDSEEGDataType, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS EEG data type directory and import it into LORIS. + """ + + loris_data_type_dir_rel_path = os.path.join( + f'sub-{session.candidate.psc_id}', + f'ses-{session.visit_label}', + data_type.name, + ) + + Eeg( + data_type = data_type, + db = legacy_db, + verbose = env.verbose, + data_dir = str(import_env.data_dir_path), + session = session, + loris_bids_eeg_rel_dir = loris_data_type_dir_rel_path, + loris_bids_root_dir = str(import_env.loris_bids_path), + dataset_tag_dict = events_metadata, + dataset_type = args.type, + ) diff --git a/python/lib/import_bids_dataset/meg.py b/python/lib/import_bids_dataset/meg.py new file mode 100644 index 000000000..129cd890f --- /dev/null +++ b/python/lib/import_bids_dataset/meg.py @@ -0,0 +1,153 @@ +from pathlib import Path + +from lib.config import get_eeg_viz_enabled_config +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.session import DbSession +from lib.db.queries.imaging_file_type import try_get_imaging_file_type_with_type +from lib.db.queries.physio import ( + try_get_physio_file_with_path, + try_get_physio_modality_with_name, + try_get_physio_output_type_with_name, +) +from lib.db.queries.physio_parameter import try_get_physio_file_parameter_with_file_id_name +from lib.env import Env +from lib.imaging_lib.physio import insert_physio_file, insert_physio_file_parameter +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.copy_files import copy_bids_file, get_loris_file_path +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.logging import log +from loris_bids_reader.meg.data_type import BIDSMEGAcquisition +from loris_bids_reader.scans import BIDSScanRow + + +def import_bids_meg_acquisition( + env: Env, + import_env: BIDSImportEnv, + args: Args, + session: DbSession, + acquisition: BIDSMEGAcquisition, + scan_row: BIDSScanRow | None, +): + log(env, f"Found MEG acquisition '{acquisition.path}'.") + log(env, f"Sidecar:\n{acquisition.sidecar.path}") + if acquisition.channels is not None: + log(env, f"Channels:\n{acquisition.channels.path}") + if acquisition.events is not None: + log(env, f"Events:\n{acquisition.events.path}") + + modality = try_get_physio_modality_with_name(env.db, acquisition.data_type.name) + if modality is None: + raise Exception('TODO: Modality not found') + + output_type = try_get_physio_output_type_with_name(env.db, args.type or 'raw') # TODO: Make this pretty + if output_type is None: + raise Exception('TODO: Output type not found') + + file_type = try_get_imaging_file_type_with_type(env.db, 'ctf') + if file_type is None: + raise Exception('TODO: File type not found') + + loris_file_path = get_loris_file_path(import_env, session, acquisition, acquisition.ctf_path) + + loris_file = try_get_physio_file_with_path(env.db, loris_file_path) + if loris_file is not None: + import_env.ignored_files_count += 1 + log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") + return + + file = insert_physio_file( + env, + session, + modality, + output_type, + loris_file_path, + file_type.type, + scan_row.get_acquisition_time() if scan_row is not None else None + ) + + for name, value in acquisition.sidecar.data.model_dump(by_alias=True).items(): + insert_physio_file_parameter(env, file, session, name, value) + + if import_env.loris_bids_path is not None: + copy_bids_file(import_env.loris_bids_path, session, acquisition, acquisition.ctf_path) + + env.db.commit() + + print(f"FILE INSERTED WITH ID {file.id}") + + if get_eeg_viz_enabled_config(env): + print("CREATE EEG visualization chunks") + + +# TODO: Make this prettier and likelize factorize somewhere else. +def create_chunks_for_visualization(env: Env, physio_file: DbPhysioFile, data_dir: Path): + """ + Calls chunking scripts if no chunk datasets yet available for + PhysiologicalFileID based on the file type of the original + electrophysiology dataset. + + :param physio_file_id: PhysiologicalFileID of the dataset to chunk + :type physio_file_id: int + :param data_dir : LORIS data directory (/data/%PROJECT%/data) + :type data_dir : str + """ + + # check if chunks already exists for this PhysiologicalFileID + chunk_path = try_get_physio_file_parameter_with_file_id_name( + env.db, + physio_file.id, + 'electrophysiology_chunked_dataset_path', + ) + + if chunk_path is not None: + return + + """ + # No chunks found + script = None + file_path = self.grep_file_path_from_file_id(physio_file_id) + + chunk_root_dir_config = self.config_db_obj.get_config("EEGChunksPath") + chunk_root_dir = chunk_root_dir_config + file_path_parts = Path(file_path).parts + if chunk_root_dir_config: + chunk_root_dir = chunk_root_dir_config + else: + chunk_root_dir = os.path.join(data_dir, file_path_parts[0]) + + chunk_root_dir = os.path.join(chunk_root_dir, f'{file_path_parts[1]}_chunks') + + full_file_path = os.path.join(data_dir, file_path) + + # determine which script to run based on the file type + file_type = self.grep_file_type_from_file_id(physio_file_id) + match file_type: + case 'set': + script = 'eeglab-to-chunks' + case 'edf': + script = 'edf-to-chunks' + + command = script + ' ' + full_file_path + ' --destination ' + chunk_root_dir + + # chunk the electrophysiology dataset if a command was determined above + try: + subprocess.call( + command, + shell = True, + stdout = open(os.devnull, 'wb') + ) + except subprocess.CalledProcessError as err: + print(f'ERROR: {script} execution failure. Error was:\n {err}') + sys.exit(lib.exitcode.CHUNK_CREATION_FAILURE) + except OSError: + print('ERROR: ' + script + ' not found') + sys.exit(lib.exitcode.CHUNK_CREATION_FAILURE) + + chunk_path = os.path.join(chunk_root_dir, os.path.splitext(os.path.basename(file_path))[0] + '.chunks') + if os.path.isdir(chunk_path): + self.insert_physio_parameter_file( + physiological_file_id = physio_file_id, + parameter_name = 'electrophysiology_chunked_dataset_path', + value = os.path.relpath(chunk_path, data_dir) + ) + """ diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py new file mode 100644 index 000000000..a56fbcf4f --- /dev/null +++ b/python/lib/import_bids_dataset/mri.py @@ -0,0 +1,177 @@ +from pathlib import Path +from typing import Any + +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_path +from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name +from lib.env import Env +from lib.imaging_lib.file import register_imaging_file +from lib.imaging_lib.file_parameter import register_file_parameter, register_file_parameters +from lib.imaging_lib.mri_scan_type import create_mri_scan_type +from lib.imaging_lib.nifti import add_nifti_file_parameters +from lib.imaging_lib.nifti_pic import create_imaging_pic +from lib.import_bids_dataset.copy_files import copy_bids_file, get_loris_file_path +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.import_bids_dataset.file_type import get_check_imaging_file_type +from lib.logging import log +from lib.util.crypto import compute_file_blake2b_hash +from lib.util.fs import get_path_extension +from loris_bids_reader.json import add_bids_json_file_parameters +from loris_bids_reader.mri.data_type import BIDSMRIAcquisition +from loris_bids_reader.scans import BIDSScanRow +from loris_bids_reader.tsv_scans import add_scan_tsv_file_parameters + +KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { + 'anat': [ + 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', 'FLASH', 'PD', 'PDmap', 'PDT2', + 'inplaneT1', 'inplaneT2', 'angio', + ], + 'func': [ + 'bold', 'cbv', 'phase', + ], + 'dwi': [ + 'dwi', 'sbref', + ], + 'fmap': [ + 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'fieldmap', 'epi', + ], +} + + +def import_bids_mri_acquisition( + env: Env, + import_env: BIDSImportEnv, + session: DbSession, + acquisition: BIDSMRIAcquisition, + tsv_scan: BIDSScanRow | None, +): + """ + Import a BIDS NIfTI file and its associated files in LORIS. + """ + + loris_file_path = get_loris_file_path(import_env, session, acquisition, acquisition.nifti_path) + + # Check whether the file is already registered in LORIS. + + loris_file = try_get_file_with_path(env.db, loris_file_path) + if loris_file is not None: + import_env.ignored_files_count += 1 + log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") + return + + # Get information about the file. + + file_type = get_check_imaging_file_type(env, acquisition.nifti_path.name) + file_hash = get_check_nifti_file_hash(env, acquisition) + mri_scan_type = get_nifti_mri_scan_type(env, import_env, acquisition) + + # Get the auxiliary files. + + aux_file_paths: list[Path] = [] + + if acquisition.bval_path is not None: + aux_file_paths.append(acquisition.bval_path) + + if acquisition.bvec_path is not None: + aux_file_paths.append(acquisition.bvec_path) + + # Get the file parameters. + + file_parameters: dict[str, Any] = {} + + if acquisition.sidecar_path is not None: + json_loris_path = get_loris_file_path(import_env, session, acquisition, acquisition.sidecar_path) + add_bids_json_file_parameters(env, acquisition.sidecar_path, json_loris_path, file_parameters) + + add_nifti_file_parameters(acquisition.nifti_path, file_hash, file_parameters) + + if acquisition.session.tsv_scans is not None and tsv_scan is not None: + add_scan_tsv_file_parameters(tsv_scan, acquisition.session.tsv_scans.path, file_parameters) + + for aux_file_path in aux_file_paths: + aux_file_type = get_path_extension(aux_file_path) + aux_file_hash = compute_file_blake2b_hash(aux_file_path) + aux_file_loris_path = get_loris_file_path(import_env, session, acquisition, aux_file_path) + file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_path) + file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash + + # Copy the files on the file system. + + if import_env.loris_bids_path is not None: + copy_bids_file(import_env.loris_bids_path, session, acquisition, acquisition.nifti_path) + + if acquisition.sidecar_path is not None: + copy_bids_file(import_env.loris_bids_path, session, acquisition, acquisition.sidecar_path) + + for aux_file_path in aux_file_paths: + copy_bids_file(import_env.loris_bids_path, session, acquisition, aux_file_path) + + # Register the file and its parameters in the database. + + echo_time = file_parameters.get('EchoTime') + echo_number = file_parameters.get('EchoNumber') + phase_encoding_direction = file_parameters.get('PhaseEncodingDirection') + + file = register_imaging_file( + env, + file_type, + loris_file_path, + session, + mri_scan_type, + echo_time, + echo_number, + phase_encoding_direction, + ) + + register_file_parameters(env, file, file_parameters) + + # Create and register the file picture. + + pic_rel_path = create_imaging_pic(env, file, True if 'time' in file_parameters else False) + + register_file_parameter(env, file, 'check_pic_filename', str(pic_rel_path)) + + import_env.imported_files_count += 1 + + +def get_check_nifti_file_hash(env: Env, acquisition: BIDSMRIAcquisition) -> str: + """ + Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already + registered in the database. + """ + + file_hash = compute_file_blake2b_hash(acquisition.nifti_path) + + file = try_get_file_with_hash(env.db, file_hash) + if file is not None: + raise Exception(f"File with hash '{file_hash}' already present in the database.") + + return file_hash + + +def get_nifti_mri_scan_type( + env: Env, + import_env: BIDSImportEnv, + acquisition: BIDSMRIAcquisition, +) -> DbMriScanType | None: + """ + Get the MRI scan type corresponding to a BIDS MRI acquisition using its BIDS suffix. Create the + MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not + already exist in the database, or raise an exception if no known scan type is found. + """ + + if acquisition.suffix is None: + raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") + + mri_scan_type = try_get_mri_scan_type_with_name(env.db, acquisition.suffix) + if mri_scan_type is not None: + return mri_scan_type + + if acquisition.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[acquisition.data_type.name]: + if acquisition.suffix not in import_env.unknown_scan_types: + import_env.unknown_scan_types.append(acquisition.suffix) + + raise Exception(f"Found unknown MRI file suffix '{acquisition.suffix}'.") + + return create_mri_scan_type(env, acquisition.suffix) diff --git a/python/lib/import_bids_dataset/print.py b/python/lib/import_bids_dataset/print.py new file mode 100644 index 000000000..0782a9522 --- /dev/null +++ b/python/lib/import_bids_dataset/print.py @@ -0,0 +1,30 @@ +from lib.env import Env +from lib.import_bids_dataset.env import BIDSImportEnv +from lib.logging import log + + +def print_bids_import_summary(env: Env, import_env: BIDSImportEnv): + """ + Print a summary of this BIDS import process. + """ + + log( + env, + ( + f"Processed {import_env.processed_files_count} MRI files, including {import_env.imported_files_count}" + f" imported files, {import_env.ignored_files_count} ignored files, and {import_env.failed_files_count}" + " errors." + ), + ) + + if import_env.unknown_scan_types != []: + import_env.unknown_scan_types.sort() + + unknwon_scan_types_string = "" + for unknown_scan_type in import_env.unknown_scan_types: + unknwon_scan_types_string += f"\n- {unknown_scan_type}" + + log( + env, + f"Found {len(import_env.unknown_scan_types)} unknown MRI scan types:{unknwon_scan_types_string}" + ) diff --git a/python/lib/mri.py b/python/lib/mri.py deleted file mode 100644 index 03259b5ca..000000000 --- a/python/lib/mri.py +++ /dev/null @@ -1,455 +0,0 @@ -"""Deals with MRI BIDS datasets and register them into the database.""" - -import getpass -import json -import os -import re -import sys - -import lib.exitcode -import lib.utilities as utilities -from lib.candidate import Candidate -from lib.imaging import Imaging -from lib.scanstsv import ScansTSV -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - - -class Mri: - """ - This class reads the BIDS MRI data structure and registers the MRI datasets into the - database by calling lib.imaging class. - - :Example: - - from lib.bidsreader import BidsReader - from lib.mri import Mri - from lib.database import Database - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # load the BIDS directory - bids_reader = BidsReader(bids_dir) - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality in ['anat', 'dwi', 'fmap', 'func']: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_mri_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/mri/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_mri_rel_dir, verbose - ) - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_mri_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - - # enumerate the different suffixes supported by BIDS per modality type - self.possible_suffix_per_modality = { - 'anat' : [ - 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', - 'FLASH', 'PD', 'PDmap', 'PDT2', 'inplaneT1', 'inplaneT2', 'angio' - ], - 'func' : [ - 'bold', 'cbv', 'phase' - ], - 'dwi' : [ - 'dwi', 'sbref' - ], - 'fmap' : [ - 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', - 'fieldmap', 'epi' - ] - } - - # load bids objects - self.bids_reader = bids_reader - self.bids_layout = bids_reader.bids_layout - - # load the LORIS BIDS import root directory where the files will be copied - self.loris_bids_mri_rel_dir = loris_bids_mri_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir - - # load BIDS subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - - # load database handler object and verbose bool - self.db = db - self.verbose = verbose - - # find corresponding CandID and SessionID in LORIS - self.loris_cand_info = self.get_loris_cand_info() - self.default_vl = default_visit_label - self.psc_id = self.loris_cand_info['PSCID'] - self.cand_id = self.loris_cand_info['CandID'] - self.center_id = self.loris_cand_info['RegistrationCenterID'] - self.project_id = self.loris_cand_info['RegistrationProjectID'] - self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: - continue - if 'cohort' in row: - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] - ) - if len(cohort_info) > 0: - self.cohort_id = cohort_info[0]['CohortID'] - break - - self.session_id = self.get_loris_session_id() - - # grep all the NIfTI files for the modality - self.nifti_files = self.grep_nifti_files() - - # check if a tsv with acquisition dates or age is available for the subject - self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.psc_id, return_type='filename'): - self.scans_file = self.bids_layout.get(suffix='scans', subject=self.psc_id, - return_type='filename', extension='tsv')[0] - - # loop through NIfTI files and register them in the DB - for nifti_file in self.nifti_files: - self.register_raw_file(nifti_file) - - def get_loris_cand_info(self): - """ - Gets the LORIS Candidate info for the BIDS subject. - - :return: Candidate info of the subject found in the database - :rtype: list - """ - - candidate = Candidate(verbose=self.verbose, psc_id=self.bids_sub_id) - loris_cand_info = candidate.get_candidate_info_from_loris(self.db) - - return loris_cand_info - - def get_loris_session_id(self): - """ - Greps the LORIS session.ID corresponding to the BIDS visit. Note, - if no BIDS visit are set, will use the default visit label value set - in the config module - - :return: the session's ID in LORIS - :rtype: int - """ - - # check if there are any visit label in BIDS structure, if not, - # will use the default visit label set in the config module - visit_label = self.bids_ses_id if self.bids_ses_id else self.default_vl - - session = Session( - self.db, self.verbose, self.cand_id, visit_label, - self.center_id, self.project_id, self.cohort_id - ) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info: - message = "ERROR: visit label " + visit_label + "does not exist in " + \ - "the session table for candidate " + self.cand_id + \ - "\nPlease make sure the visit label is created in the " + \ - "database or run bids_import.py with the -s option -s if " + \ - "you wish that the insertion pipeline creates the visit " + \ - "label in the session table." - print(message) - exit(lib.exitcode.SELECT_FAILURE) - - return loris_vl_info['ID'] - - def grep_nifti_files(self): - """ - Returns the list of NIfTI files found for the modality. - - :return: list of NIfTI files found for the modality - :rtype: list - """ - - # grep all the possible suffixes for the modality - modality_possible_suffix = self.possible_suffix_per_modality[self.bids_modality] - - # loop through the possible suffixes and grep the NIfTI files - nii_files_list = [] - for suffix in modality_possible_suffix: - nii_files_list.extend(self.grep_bids_files(suffix, 'nii.gz')) - - # return the list of found NIfTI files - return nii_files_list - - def grep_bids_files(self, bids_type, extension): - """ - Greps the BIDS files and their layout information from the BIDSLayout - and return that list. - - :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str - :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str - - :return: list of files from the BIDS layout - :rtype: list - """ - - if self.bids_ses_id: - return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - else: - return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - - def register_raw_file(self, nifti_file): - """ - Registers raw MRI files and related files into the files and parameter_file tables. - - :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object - """ - - # insert the NIfTI file - self.fetch_and_insert_nifti_file(nifti_file) - - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): - """ - Gather NIfTI file information to insert into the files and parameter_file tables. - Once all the information has been gathered, it will call imaging.insert_imaging_file - that will perform the insertion into the files and parameter_file tables. - - :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object - :param derivatives: whether the file to be registered is a derivative file - :type derivatives: bool - - :return: dictionary with the inserted file_id and file_path - :rtype: dict - """ - - # load the Imaging object that will be used to insert the imaging data into the database - imaging = Imaging(self.db, self.verbose) - - # load the list of associated files with the NIfTI file - associated_files = nifti_file.get_associations() - - # load the entity information from the NIfTI file - entities = nifti_file.get_entities() - scan_type = entities['suffix'] - - # loop through the associated files to grep JSON, bval, bvec... - json_file = None - other_assoc_files = {} - for assoc_file in associated_files: - file_info = assoc_file.get_entities() - if re.search(r'json$', file_info['extension']): - json_file = assoc_file.path - elif re.search(r'bvec$', file_info['extension']): - other_assoc_files['bvec_file'] = assoc_file.path - elif re.search(r'bval$', file_info['extension']): - other_assoc_files['bval_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'events': - other_assoc_files['task_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'physio': - other_assoc_files['physio_file'] = assoc_file.path - - # read the json file if it exists - file_parameters = {} - if json_file: - with open(json_file) as data_file: - file_parameters = json.load(data_file) - file_parameters = imaging.map_bids_param_to_loris_param(file_parameters) - # copy the JSON file to the LORIS BIDS import directory - json_path = self.copy_file_to_loris_bids_dir(json_file) - file_parameters['bids_json_file'] = json_path - json_blake2 = compute_file_blake2b_hash(json_file) - file_parameters['bids_json_file_blake2b_hash'] = json_blake2 - - # grep the file type from the ImagingFileTypes table - file_type = imaging.determine_file_type(nifti_file.filename) - if not file_type: - message = "\nERROR: File type for " + nifti_file.filename \ - + " does not exist in ImagingFileTypes database table\n" - print(message) - sys.exit(lib.exitcode.SELECT_FAILURE) - - # determine the output type - output_type = 'derivatives' if derivatives else 'native' - if not derivatives: - coordinate_space = 'native' - - # get the acquisition date of the MRI or the age at the time of acquisition - if self.scans_file: - scan_info = ScansTSV(self.scans_file, nifti_file.filename, self.verbose) - file_parameters['scan_acquisition_time'] = scan_info.get_acquisition_time() - file_parameters['age_at_scan'] = scan_info.get_age_at_scan() - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = scan_info.copy_scans_tsv_file_to_loris_bids_dir( - self.bids_sub_id, self.loris_bids_root_dir, self.data_dir - ) - file_parameters['scans_tsv_file'] = scans_path - scans_blake2 = compute_file_blake2b_hash(self.scans_file) - file_parameters['scans_tsv_file_bake2hash'] = scans_blake2 - - # grep voxel step from the NIfTI file header - step_parameters = imaging.get_nifti_image_step_parameters(nifti_file.path) - file_parameters['xstep'] = step_parameters[0] - file_parameters['ystep'] = step_parameters[1] - file_parameters['zstep'] = step_parameters[2] - - # grep the time length from the NIfTI file header - is_4d_dataset = False - length_parameters = imaging.get_nifti_image_length_parameters(nifti_file.path) - if len(length_parameters) == 4: - file_parameters['time'] = length_parameters[3] - is_4d_dataset = True - - # add all other associated files to the file_parameters so they get inserted - # in parameter_file - for type in other_assoc_files: - original_file_path = other_assoc_files[type] - copied_path = self.copy_file_to_loris_bids_dir(original_file_path) - file_param_name = 'bids_' + type - file_parameters[file_param_name] = copied_path - file_blake2 = compute_file_blake2b_hash(original_file_path) - hash_param_name = file_param_name + '_blake2b_hash' - file_parameters[hash_param_name] = file_blake2 - - # append the blake2b to the MRI file parameters dictionary - blake2 = compute_file_blake2b_hash(nifti_file.path) - file_parameters['file_blake2b_hash'] = blake2 - - # check that the file is not already inserted before inserting it - result = imaging.grep_file_info_from_hash(blake2) - file_id = result['FileID'] if result else None - file_path = result['File'] if result else None - if not file_id: - # grep the scan type ID from the mri_scan_type table (if it is not already in - # the table, it will add a row to the mri_scan_type table) - scan_type_id = self.db.grep_id_from_lookup_table( - id_field_name = 'MriScanTypeID', - table_name = 'mri_scan_type', - where_field_name = 'MriScanTypeName', - where_value = scan_type, - insert_if_not_found = True - ) - - # copy the NIfTI file to the LORIS BIDS import directory - file_path = self.copy_file_to_loris_bids_dir(nifti_file.path) - - # insert the file along with its information into files and parameter_file tables - echo_time = file_parameters['EchoTime'] if 'EchoTime' in file_parameters.keys() else None - echo_nb = file_parameters['EchoNumber'] if 'EchoNumber' in file_parameters.keys() else None - phase_enc_dir = file_parameters['PhaseEncodingDirection'] \ - if 'PhaseEncodingDirection' in file_parameters.keys() else None - file_info = { - 'FileType' : file_type, - 'File' : file_path, - 'SessionID' : self.session_id, - 'InsertedByUserID': getpass.getuser(), - 'CoordinateSpace' : coordinate_space, - 'OutputType' : output_type, - 'EchoTime' : echo_time, - 'PhaseEncodingDirection': phase_enc_dir, - 'EchoNumber' : echo_nb, - 'SourceFileID' : None, - 'MriScanTypeID' : scan_type_id - } - file_id = imaging.insert_imaging_file(file_info, file_parameters) - - # create the pic associated with the file - pic_rel_path = imaging.create_imaging_pic( - { - 'cand_id' : self.cand_id, - 'data_dir_path': self.data_dir, - 'file_rel_path': file_path, - 'is_4D_dataset': is_4d_dataset, - 'file_id' : file_id - } - ) - if os.path.exists(os.path.join(self.data_dir, 'pic/', pic_rel_path)): - imaging.insert_parameter_file(file_id, 'check_pic_filename', pic_rel_path) - - return {'file_id': file_id, 'file_path': file_path} - - def copy_file_to_loris_bids_dir(self, file, derivatives_path=None): - """ - Wrapper around the utilities.copy_file function that copies the file - to the LORIS BIDS import directory and returns the relative path of the - file (without the data_dir part). - - :param file: full path to the original file - :type file: str - :param derivatives_path: path to the derivative folder - :type derivatives_path: str - - :return: relative path to the copied file - :rtype: str - """ - - # determine the path of the copied file - copy_file = self.loris_bids_mri_rel_dir - if self.bids_ses_id: - copy_file += os.path.basename(file) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file += str.replace( - os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl - ) - if derivatives_path: - # create derivative subject/vl/modality directory - lib.utilities.create_dir( - derivatives_path + self.loris_bids_mri_rel_dir, - self.verbose - ) - copy_file = derivatives_path + copy_file - else: - copy_file = self.loris_bids_root_dir + copy_file - - # copy the file - utilities.copy_file(file, copy_file, self.verbose) - - # determine the relative path and return it - relative_path = copy_file.replace(self.data_dir, "") - - return relative_path diff --git a/python/lib/scanstsv.py b/python/lib/scanstsv.py deleted file mode 100644 index 5cb938615..000000000 --- a/python/lib/scanstsv.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Deals with sub-XXX_scans.tsv BIDS files""" - -import os - -from dateutil.parser import parse - -import lib -import lib.utilities as utilities - - -class ScansTSV: - """ - This class reads the BIDS sub-XXX_scans.tsv file that includes acquisition level information - such as scan date or age at scan... - - :Example: - from lib.scanstsv import ScansTSV - - scan_info = ScansTSV(scans_tsv_file, acquisition_file) - - acquisition_time = scan_info.get_acquisition_time() - age_at_scan = scan_info.get_age_at_scan - - """ - - def __init__(self, scans_tsv_file, acquisition_file, verbose): - """ - Constructor method for the ScansTSV class - - :param scans_tsv_file : path to the BIDS sub-XXX_scans.tsv file - :type scans_tsv_file : str - :param acquisition_file: path to the acquisition file (.nii, .set, .edf...) - :type acquisition_file: str - """ - - self.verbose = verbose - - # store files' paths - self.scans_tsv_file = scans_tsv_file - self.acquisition_file = acquisition_file - - # read the TSV file and store the header names and data - self.tsv_entries = utilities.read_tsv_file(self.scans_tsv_file) - self.tsv_headers = self.tsv_entries[0] - - # get the acquisition information for the acquisition file - self.acquisition_data = self.find_acquisition_data() - - def find_acquisition_data(self): - """ - Gets the information for the acquisition file from the TSV file. - - :return: the acquisition information found in the TSV file for the acquisition file - :rtype: list - """ - - for entry in self.tsv_entries: - if os.path.basename(self.acquisition_file) in entry['filename']: - return entry - - def get_acquisition_time(self): - """ - Get the acquisition time of the acquisition file. - - :return: acquisition time or None if not found - :rtype: str - """ - - if not self.acquisition_data: - # if no entry in self.acquisition_data, then no information available to get the acquisition time - return None - - if 'acq_time' in self.acquisition_data: - acq_time_list = [ele for ele in self.tsv_entries if ele['filename'] in self.acquisition_file] - if len(acq_time_list) == 1: - # the variable name could be mri_acq_time, but is eeg originally. - eeg_acq_time = acq_time_list[0]['acq_time'] - else: - print('More than one or no acquisition time has been found for ', self.acquisition_file) - exit() - - if eeg_acq_time == 'n/a': - return None - - try: - eeg_acq_time = parse(eeg_acq_time) - except ValueError as e: - message = "ERROR: could not convert acquisition time '" + \ - eeg_acq_time + \ - "' to datetime: " + str(e) - print(message) - exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - return eeg_acq_time - - return None - - def get_age_at_scan(self): - """ - Get the age at the time of acquisition. - - :return: age at acquisition time - :rtype: str - """ - - # list of possible header names containing the age information - age_header_list = ['age', 'age_at_scan', 'age_acq_time'] - - for header_name in age_header_list: - if header_name in self.tsv_headers and self.acquisition_data: - return self.acquisition_data[header_name].strip() - - return None - - def copy_scans_tsv_file_to_loris_bids_dir(self, bids_sub_id, loris_bids_root_dir, data_dir): - - original_file_path = self.scans_tsv_file - final_file_path = loris_bids_root_dir + '/sub-' + bids_sub_id + '/' + os.path.basename(self.scans_tsv_file) - - # copy the scans.tsv file to the new directory - if os.path.exists(final_file_path): - lib.utilities.append_to_tsv_file(original_file_path, final_file_path, "filename", self.verbose) - else: - lib.utilities.copy_file(original_file_path, final_file_path, self.verbose) - - # determine the relative path and return it - relative_path = final_file_path.replace(data_dir, '') - - return relative_path diff --git a/python/lib/session.py b/python/lib/session.py deleted file mode 100644 index dd34aa954..000000000 --- a/python/lib/session.py +++ /dev/null @@ -1,228 +0,0 @@ -"""This class gather functions for session handling.""" - -from typing_extensions import deprecated - -from lib.database_lib.candidate_db import CandidateDB -from lib.database_lib.project_cohort_rel import ProjectCohortRel -from lib.database_lib.session_db import SessionDB -from lib.database_lib.site import Site - - -class Session: - """ - This class gather functions that interact with the database and allow session - creation or to fetch session information directly from the database. - - :Example: - - from lib.session import Session - from lib.database import Database - - # database connection - db = Database(config.mysql, verbose) - db.connect() - - session = Session( - verbose, cand_id, visit_label, - center_id, project_id, cohort_id - ) - - # grep session information from the database - loris_vl_info = session.get_session_info_from_loris(db) - - # insert the session into the database - loris_vl_info = session.create_session(db) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, db, verbose, cand_id=None, visit_label=None, - center_id=None, project_id=None, cohort_id=None): - """ - Constructor method for the Session class. - - :param verbose : whether to be verbose - :type verbose : bool - :param cand_id : candidate's CandID - :type cand_id : int - :param visit_label : visit label - :type visit_label : str - :param center_id : center ID to associate with the session - :type center_id : int - :param project_id : project ID to associate with the session - :type project_id : int - :param cohort_id: cohort ID to associate with the session - :type cohort_id: int - """ - self.db = db - self.verbose = verbose - - self.proj_cohort_rel_db_obj = ProjectCohortRel(db, verbose) - self.candidate_db_obj = CandidateDB(db, verbose) - self.session_db_obj = SessionDB(db, verbose) - self.site_db_obj = Site(db, verbose) - - self.cand_id = str(cand_id) - self.visit_label = visit_label - self.center_id = center_id - self.project_id = project_id - self.cohort_id = cohort_id - - self.proj_cohort_rel_info_dict = dict() - self.session_info_dict = dict() - self.session_id = None - - def create_session(self): - """ - Creates a session using BIDS information. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - if self.verbose: - print("Creating visit " + self.visit_label - + " for CandID " + self.cand_id) - - # fetch the candidate.ID associated to the CandID first - candidate_id = self.candidate_db_obj.get_candidate_id(self.cand_id) - column_names = ('CandidateID', 'Visit_label', 'CenterID', 'Current_stage') - values = (candidate_id, self.visit_label, str(self.center_id), 'Not Started') - - if self.project_id: - column_names = (*column_names, 'ProjectID') - values = (*values, str(self.project_id)) - - if self.cohort_id: - column_names = (*column_names, 'CohortID') - values = (*values, str(self.cohort_id)) - - self.db.insert( - table_name='session', - column_names=column_names, - values=values - ) - - loris_session_info = self.get_session_info_from_loris() - - return loris_session_info - - def get_session_info_from_loris(self): - """ - Grep session information from the session table using CandID and - Visit_label. - - :param db: database handler object - :type db: object - - :return: dictionary with session info from the session's table - :rtype: dict - """ - # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though - loris_session_info = self.db.pselect( - """ - SELECT PSCID, CandID, session.* - FROM session - JOIN candidate ON (candidate.ID=session.CandidateID) - WHERE CandID = %s AND Visit_label = %s - """, - (self.cand_id, self.visit_label) - ) - - return loris_session_info[0] if loris_session_info else None - - @deprecated('Use `lib.db.queries.site.try_get_site_with_psc_id_visit_label` instead') - def get_session_center_info(self, pscid, visit_label): - """ - Get the session center information based on the PSCID and visit label of a session. - - :param pscid: candidate site ID (PSCID) - :type pscid: str - :param visit_label: visit label - :type visit_label: str - - :return: dictionary of site information for the visit/candidate queried - :rtype: dict - """ - return self.session_db_obj.get_session_center_info(pscid, visit_label) - - @deprecated('Use `lib.db.queries.try_get_candidate_with_cand_id_visit_label` instead') - def create_session_dict(self, cand_id, visit_label): - """ - Creates the session information dictionary based on a candidate ID and visit label. This will populate - self.session_info_dict based on the result returned from the database query. - - :param cand_id: CandID - :type cand_id: int - :param visit_label: Visit label of the session - :type visit_label: str - """ - self.session_info_dict = self.session_db_obj.create_session_dict(cand_id, visit_label) - if self.session_info_dict: - self.cand_id = self.session_info_dict['CandID'] - self.visit_label = self.session_info_dict['Visit_label'] - self.center_id = self.session_info_dict['CenterID'] - self.project_id = self.session_info_dict['ProjectID'] - self.cohort_id = self.session_info_dict['CohortID'] - self.session_id = self.session_info_dict['ID'] - - @deprecated('Use `lib.db.models.session.DbSession` instead') - def insert_into_session(self, session_info_to_insert_dict): - """ - Insert a new row in the session table using fields list as column names and values as values. - - :param session_info_to_insert_dict: dictionary with the column names and values to use for insertion - :type session_info_to_insert_dict: dict - - :return: ID of the new session registered - :rtype: int - """ - self.session_id = self.session_db_obj.insert_into_session( - fields=list(session_info_to_insert_dict.keys()), - values=list(session_info_to_insert_dict.values()) - ) - - return self.session_id - - @deprecated('Use `lib.get_subject_session.get_candidate_next_visit_number` instead') - def get_next_session_site_id_and_visit_number(self, cand_id): - """ - Determines the next session site and visit number based on the last session inserted for a given candidate. - - :param cand_id: candidate ID - :type cand_id: int - - :return: a dictionary with 'newVisitNo' and 'CenterID' keys/values - :rtype: dict - """ - return self.session_db_obj.determine_next_session_site_id_and_visit_number(cand_id) - - @deprecated('Use `lib.db.queries.site.get_all_sites` instead') - def get_list_of_sites(self): - """ - Get the list of sites available in the psc table. - - :return: list of sites - :rtype: list - """ - - return self.site_db_obj.get_list_of_sites() - - @deprecated('Use `lib.db.models.project_cohort.DbProjectCohort` instead') - def create_proj_cohort_rel_info_dict(self, project_id, cohort_id): - """ - Populate self.proj_cohort_rel_info_dict with the content returned from the database for the ProjectID and - CohortID. - - :param project_id: ID of the Project - :type project_id: int - :param cohort_id: ID of the Cohort - :type cohort_id: int - """ - self.proj_cohort_rel_info_dict = self.proj_cohort_rel_db_obj.create_proj_cohort_rel_dict( - project_id, cohort_id - ) diff --git a/python/lib/util/fs.py b/python/lib/util/fs.py index 411211da5..ffed26419 100644 --- a/python/lib/util/fs.py +++ b/python/lib/util/fs.py @@ -82,37 +82,54 @@ def remove_empty_directories(dir_path: str): os.rmdir(subdir_path) -def get_file_extension(file_name: str) -> str: +def get_path_stem(path: Path) -> str: """ - Get the extension (including multiple extensions) of a file name or path without the leading - dot. + Get the stem of a path, that is, the name of the file without its extension (including multiple + extensions). """ - parts = file_name.split('.', maxsplit=1) + parts = path.name.split('.') + return parts[0] + + +def get_path_extension(path: Path) -> str: + """ + Get the extension (including multiple extensions) of a path without the leading dot. + """ + + parts = path.name.split('.', maxsplit=1) if len(parts) == 1: return '' return parts[1] -def replace_file_extension(file_name: str, extension: str) -> str: +def remove_path_extension(path: Path) -> Path: + """ + Remove the extension (including multiple extensions) of a path. + """ + + parts = path.name.split('.') + return path.parent / parts[0] + + +def replace_path_extension(path: Path, extension: str) -> Path: """ - Replace the extension (including multiple extensions) of a file name or path by another - extension. + Replace the extension (including multiple extensions) of a path by another extension. """ - parts = file_name.split('.') - return f'{parts[0]}.{extension}' + parts = path.name.split('.') + return path.parent / f'{parts[0]}.{extension}' -def search_dir_file_with_regex(dir_path: str, regex: str) -> str | None: +def search_dir_file_with_regex(dir_path: Path, regex: str) -> Path | None: """ - Search for a file within a directory whose name matches a regular expression, or return `None` - if no such file is found. + Search for a file or directory within a directory whose name matches a regular expression, or + return `None` if no such file is found. """ - for file in os.scandir(dir_path): - if re.search(regex, file.name): - return file.name + for file_path in dir_path.iterdir(): + if re.search(regex, file_path.name): + return file_path return None diff --git a/python/lib/util/iter.py b/python/lib/util/iter.py index 5b243b4e3..aae52c8b6 100644 --- a/python/lib/util/iter.py +++ b/python/lib/util/iter.py @@ -63,3 +63,17 @@ def flatten(iterables: Iterable[Iterable[T]]) -> Iterator[T]: for iterable in iterables: yield from iterable + + +def replace_or_append(elements: list[T], predicate: Callable[[T], bool], value: T) -> None: + """ + Replace the first element of a list that satisfies a predicate with a value, or append that + value to the list. + """ + + for i, element in enumerate(elements): + if predicate(element): + elements[i] = value + return + + elements.append(value) diff --git a/python/loris_bids_reader/agnostic/events.py b/python/loris_bids_reader/agnostic/events.py new file mode 100644 index 000000000..72e1c6155 --- /dev/null +++ b/python/loris_bids_reader/agnostic/events.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field + +from loris_bids_reader.tsv_file import BIDSTSVFile + + +class BIDSEventRow(BaseModel): + """ + Model for a BIDS events TSV file row. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/events.html#events + """ + + model_config = ConfigDict(extra='allow', validate_assignment=True) + + # REQUIRED fields + onset: float = Field(...) + duration: float = Field(..., ge=0) + + # OPTIONAL fields + trial_type: str | None = None + response_time: float | None = None + HED: str | None = None + stim_file: str | None = None + channel: str | None = None + + +class BIDSEventsFile(BIDSTSVFile[BIDSEventRow]): + """ + Wrapper for a BIDS events TSV file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/events.html#events + """ + + def __init__(self, path: Path): + super().__init__(BIDSEventRow, path) diff --git a/python/loris_bids_reader/dataset.py b/python/loris_bids_reader/dataset.py new file mode 100644 index 000000000..bb0f681d1 --- /dev/null +++ b/python/loris_bids_reader/dataset.py @@ -0,0 +1,328 @@ +import re +from abc import ABC, abstractmethod +from collections.abc import Iterator, Sequence +from functools import cached_property +from pathlib import Path +from typing import TYPE_CHECKING, Generic, Self, TypeVar + +from bids import BIDSLayout + +from lib.util.fs import search_dir_file_with_regex +from lib.util.iter import find +from loris_bids_reader.dataset_description import BIDSDatasetDescriptionFile +from loris_bids_reader.participants import BIDSParticipantsFile +from loris_bids_reader.scans import BIDSScansFile + +if TYPE_CHECKING: + from loris_bids_reader.eeg.data_type import BIDSEEGDataType + from loris_bids_reader.meg.data_type import BIDSMEGDataType + from loris_bids_reader.mri.data_type import BIDSMRIDataType + + +PYBIDS_IGNORE = ['code', 'sourcedata', 'log', '.git'] + +PYBIDS_FORCE = [re.compile(r"_annotations\.(tsv|json)$")] + + +class BIDSDataset: + path: Path + validate: bool + + def __init__(self, bids_path: Path, validate: bool): + self.path = bids_path + self.validate = validate + + @property + def sessions(self) -> Iterator['BIDSSession']: + for subject in self.subjects: + yield from subject.sessions + + @property + def data_types(self) -> Iterator['BIDSDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def acquisitions(self) -> Iterator['BIDSAcquisition[BIDSDataType]']: + for data_type in self.data_types: + yield from data_type.acquisitions + + @cached_property + def subjects(self) -> list['BIDSSubject']: + """ + The subject directories found in the BIDS dataset. + """ + + subjects: list[BIDSSubject] = [] + + for file in self.path.iterdir(): + subject_match = re.match(r'sub-([a-zA-Z0-9]+)', file.name) + if subject_match is None: + continue + + if not file.is_dir(): + continue + + subject_label = subject_match.group(1) + subjects.append(BIDSSubject(self, subject_label)) + + return subjects + + def get_dataset_description(self) -> BIDSDatasetDescriptionFile | None: + """ + Read the BIDS dataset description file of this BIDS dataset. Return `None` if no dataset + description file is present in the dataset, or raise an exeption if the file is present but + does contains incorrect data. + """ + + dataset_description_path = self.path / 'dataset_description.json' + if not dataset_description_path.exists(): + return None + + return BIDSDatasetDescriptionFile(dataset_description_path) + + @cached_property + def tsv_participants(self) -> BIDSParticipantsFile | None: + participants_tsv_path = self.path / 'participants.tsv' + if not participants_tsv_path.exists(): + return None + + return BIDSParticipantsFile(participants_tsv_path) + + @cached_property + def subject_labels(self) -> list[str]: + """ + All the subject labels found in the BIDS dataset. + """ + + subject_labels = list(set(subject.label for subject in self.subjects)) + subject_labels.sort() + return subject_labels + + @cached_property + def session_labels(self) -> list[str]: + """ + All the session labels found in this BIDS dataset. + """ + + session_labels = list(set(session.label for session in self.sessions if session.label is not None)) + session_labels.sort() + return session_labels + + def get_subject(self, subject_label: str) -> 'BIDSSubject | None': + """ + Get the subject directory corresponding to a subject label in this BIDS dataset or `None` + if it does not exist. + """ + + return find(lambda subject: subject.label == subject_label, self.subjects) + + @cached_property + def layout(self) -> BIDSLayout: + """ + Get the PyBIDS BIDSLayout for the BIDS dataset. + """ + + return BIDSLayout( + root = self.path, + ignore = PYBIDS_IGNORE, + force_index = PYBIDS_FORCE, + derivatives = True, + validate = self.validate + ) + + +class BIDSSubject: + root_dataset: BIDSDataset + path: Path + label: str + + def __init__(self, root_dataset: BIDSDataset, label: str): + self.root_dataset = root_dataset + self.label = label + self.path = self.root_dataset.path / f'sub-{self.label}' + + @property + def data_types(self) -> Iterator['BIDSDataType']: + for session in self.sessions: + yield from session.data_types + + @property + def acquisitions(self) -> Iterator['BIDSAcquisition[BIDSDataType]']: + for data_type in self.data_types: + yield from data_type.acquisitions + + @cached_property + def sessions(self) -> list['BIDSSession']: + """ + The session directories found in this subject directory. + """ + + sessions: list[BIDSSession] = [] + + for file in self.path.iterdir(): + if not file.is_dir(): + continue + + session_match = re.match(r'ses-([a-zA-Z0-9]+)', file.name) + if session_match is None: + continue + + session_label = session_match.group(1) + sessions.append(BIDSSession(self, session_label)) + + if sessions == []: + sessions.append(BIDSSession(self, None)) + + return sessions + + def get_session(self, session_label: str) -> 'BIDSSession | None': + """ + Get a session directory of this subject directory or `None` if it does not exist. + """ + + return find(lambda session: session.label == session_label, self.sessions) + + +class BIDSSession: + subject: BIDSSubject + path: Path + label: str | None + + def __init__(self, subject: BIDSSubject, label: str | None): + self.subject = subject + self.label = label + if label is not None: + self.path = subject.path / f'ses-{self.label}' + else: + self.path = subject.path + + @property + def root_dataset(self) -> BIDSDataset: + return self.subject.root_dataset + + @property + def acquisitions(self) -> Iterator['BIDSAcquisition[BIDSDataType]']: + for data_type in self.mri_data_types: + yield from data_type.acquisitions + + @cached_property + def mri_data_types(self) -> list['BIDSMRIDataType']: + """ + The MRI data type directories found in this session directory. + """ + + from loris_bids_reader.mri.data_type import BIDSMRIDataType + + data_types: list[BIDSMRIDataType] = [] + + for data_type_name in ['anat', 'dwi', 'fmap', 'func']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSMRIDataType(self, data_type_name)) + + return data_types + + @cached_property + def eeg_data_types(self) -> list['BIDSEEGDataType']: + """ + The MRI data type directories found in this session directory. + """ + + from loris_bids_reader.eeg.data_type import BIDSEEGDataType + + data_types: list[BIDSEEGDataType] = [] + + for data_type_name in ['eeg', 'ieeg']: + data_type_path = self.path / data_type_name + if data_type_path.is_dir(): + data_types.append(BIDSEEGDataType(self, data_type_name)) + + return data_types + + @property + def data_types(self) -> Iterator['BIDSDataType']: + """ + The data type directories found in this session directory. + """ + + yield from self.mri_data_types + yield from self.eeg_data_types + if self.meg is not None: + yield self.meg + + @cached_property + def meg(self) -> 'BIDSMEGDataType | None': + """ + The MEG data type directory found in this session directory, if there is one. + """ + + from loris_bids_reader.meg.data_type import BIDSMEGDataType + + meg_data_type_path = self.path / 'meg' + if not meg_data_type_path.exists(): + return None + + return BIDSMEGDataType(self, 'meg') + + @cached_property + def tsv_scans(self) -> BIDSScansFile | None: + tsv_scans_path = search_dir_file_with_regex(self.path, r'scans.tsv$') + if tsv_scans_path is None: + return None + + return BIDSScansFile(tsv_scans_path) + + +class BIDSDataType(ABC): + session: BIDSSession + path: Path + + def __init__(self, session: BIDSSession, name: str): + self.session = session + self.path = session.path / name + + @cached_property + @abstractmethod + def acquisitions(self) -> Sequence['BIDSAcquisition[Self]']: + ... + + @property + def name(self) -> str: + return self.path.name + + @property + def root_dataset(self) -> BIDSDataset: + return self.session.root_dataset + + @property + def subject(self) -> BIDSSubject: + return self.session.subject + + +T = TypeVar('T', bound=BIDSDataType, covariant=True) + + +class BIDSAcquisition(ABC, Generic[T]): + data_type: T + path: Path + + def __init__(self, data_type: T, name: str): + self.data_type = data_type + self.path = data_type.path / name + + @property + def name(self) -> str: + return self.path.name + + @property + def root_dataset(self) -> BIDSDataset: + return self.data_type.root_dataset + + @property + def subject(self) -> BIDSSubject: + return self.data_type.subject + + @property + def session(self) -> BIDSSession: + return self.data_type.session diff --git a/python/loris_bids_reader/dataset_description.py b/python/loris_bids_reader/dataset_description.py new file mode 100644 index 000000000..f0ea19035 --- /dev/null +++ b/python/loris_bids_reader/dataset_description.py @@ -0,0 +1,63 @@ +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field + +from loris_bids_reader.tsv_file import BIDSJSONFile + + +class BIDSContainer(BaseModel): + type: str | None = None + tag: str | None = None + uri: str | None = None + + +class BIDSGeneratedByItem(BaseModel): + name: str = Field(alias='Name') + version: str | None = Field(None, alias='Version') + description: str | None = Field(None, alias='Description') + code_url: str | None = Field(None, alias='CodeURL') + container: BIDSContainer | None = Field(None, alias='Container') + + +class BIDSSourceDataset(BaseModel): + url: str | None = Field(None, alias='URL') + doi: str | None = Field(None, alias='DOI') + version: str | None = Field(None, alias='Version') + + +class BIDSDatasetDescription(BaseModel): + """ + Model for a BIDS `dataset_description.json` file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/dataset-description.html#dataset_descriptionjson + """ + + model_config = ConfigDict(extra='allow', populate_by_name=True) + + name: str = Field(alias='Name') + bids_version: str = Field(alias='BIDSVersion') + hed_version: str | list[str] | None = Field(None, alias='HEDVersion') + dataset_links: dict[str, str] | None = Field(None, alias='DatasetLinks') + dataset_type: str | None = Field(None, alias='DatasetType') + license: str | None = Field(None, alias='License') + authors: list[str] | None = Field(None, alias='Authors') + keywords: list[str] | None = Field(None, alias='Keywords') + acknowledgements: str | None = Field(None, alias='Acknowledgements') + how_to_acknowledge: str | None = Field(None, alias='HowToAcknowledge') + funding: list[str] | None = Field(None, alias='Funding') + ethics_approvals: list[str] | None = Field(None, alias='EthicsApprovals') + references_and_links: list[str] | None = Field(None, alias='ReferencesAndLinks') + dataset_doi: str | None = Field(None, alias='DatasetDOI') + generated_by: list[BIDSGeneratedByItem] | None = Field(None, alias='GeneratedBy') + source_datasets: list[BIDSSourceDataset] | None = Field(None, alias='SourceDatasets') + + +class BIDSDatasetDescriptionFile(BIDSJSONFile[BIDSDatasetDescription]): + """ + Wrapper for a BIDS `dataset_description.json` file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/dataset-description.html#dataset_descriptionjson + """ + + def __init__(self, path: Path): + super().__init__(BIDSDatasetDescription, path) diff --git a/python/loris_bids_reader/eeg/data_type.py b/python/loris_bids_reader/eeg/data_type.py new file mode 100644 index 000000000..8a87639d9 --- /dev/null +++ b/python/loris_bids_reader/eeg/data_type.py @@ -0,0 +1,11 @@ +from collections.abc import Sequence +from functools import cached_property +from typing import Self + +from loris_bids_reader.dataset import BIDSAcquisition, BIDSDataType + + +class BIDSEEGDataType(BIDSDataType): + @cached_property + def acquisitions(self) -> Sequence[BIDSAcquisition[Self]]: + return [] diff --git a/python/lib/bids.py b/python/loris_bids_reader/json.py similarity index 63% rename from python/lib/bids.py rename to python/loris_bids_reader/json.py index fe616d42d..b04e57f4d 100644 --- a/python/lib/bids.py +++ b/python/loris_bids_reader/json.py @@ -1,9 +1,13 @@ +import json +from pathlib import Path from typing import Any from lib.config import get_patient_id_dicom_header_config from lib.env import Env from lib.get_session_info import SessionInfo, get_session_info from lib.imaging_lib.mri_scanner import MriScannerInfo +from lib.import_bids_dataset.imaging import map_bids_param_to_loris_param +from lib.util.crypto import compute_file_blake2b_hash def get_bids_json_scanner_info(bids_json: dict[str, Any]) -> MriScannerInfo: @@ -36,3 +40,18 @@ def get_bids_json_session_info(env: Env, bids_json: dict[str, Any]) -> SessionIn scanner_info = get_bids_json_scanner_info(bids_json) return get_session_info(env, patient_id, scanner_info) + + +def add_bids_json_file_parameters(env: Env, bids_json_path: Path, rel_json_path: Path, file_parameters: dict[str, Any]): + """ + Read a BIDS JSON sidecar file and add its parameters to a LORIS file parameters dictionary. + """ + + with open(bids_json_path) as data_file: + file_parameters.update(json.load(data_file)) + map_bids_param_to_loris_param(env, file_parameters) + + json_blake2 = compute_file_blake2b_hash(bids_json_path) + + file_parameters['bids_json_file'] = str(rel_json_path) + file_parameters['bids_json_file_blake2b_hash'] = json_blake2 diff --git a/python/loris_bids_reader/meg/channels.py b/python/loris_bids_reader/meg/channels.py new file mode 100644 index 000000000..b81edd8b6 --- /dev/null +++ b/python/loris_bids_reader/meg/channels.py @@ -0,0 +1,55 @@ +from pathlib import Path +from typing import Literal + +from pydantic import BaseModel, ConfigDict + +from loris_bids_reader.models import WithNA +from loris_bids_reader.tsv_file import BIDSTSVFile + +BIDSMEGChannelStatus = Literal['good', 'bad'] + +BIDSMEGChannelType = Literal[ + 'MEGMAG', 'MEGGRADAXIAL', 'MEGGRADPLANAR', 'MEGREFMAG', + 'MEGREFGRADAXIAL', 'MEGREFGRADPLANAR', 'MEGOTHER', 'EEG', + 'ECOG', 'SEEG', 'DBS', 'VEOG', 'HEOG', 'EOG', 'ECG', 'EMG', + 'TRIG', 'AUDIO', 'PD', 'EYEGAZE', 'PUPIL', 'MISC', 'SYSCLOCK', + 'ADC', 'DAC', 'HLU', 'FITERR', 'OTHER' +] + + +# TODO: Can the annotations of this be factorized using a type alias? + +class BIDSMEGChannelRow(BaseModel): + """ + Model for a BIDS MEG channels TSV file row. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetoencephalography.html#channels-description-_channelstsv + """ + + model_config = ConfigDict(extra='allow', validate_assignment=True) + + # Required fields (must appear in specific order) + name: str + type: BIDSMEGChannelType + units: WithNA[str] + + # Optional fields (can appear anywhere) + description: str | None = None + sampling_frequency: WithNA[float] = None + low_cutoff: WithNA[float] = None + high_cutoff: WithNA[float] = None + notch: WithNA[float | list[float]] = None + software_filters: WithNA[str] = None + status: WithNA[BIDSMEGChannelStatus] = None + status_description: WithNA[str] = None + + +class BIDSMEGChannelsFile(BIDSTSVFile[BIDSMEGChannelRow]): + """ + Wrapper for a BIDS channels TSV file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetoencephalography.html#channels-description-_channelstsv + """ + + def __init__(self, path: Path): + super().__init__(BIDSMEGChannelRow, path) diff --git a/python/loris_bids_reader/meg/data_type.py b/python/loris_bids_reader/meg/data_type.py new file mode 100644 index 000000000..2f0e66081 --- /dev/null +++ b/python/loris_bids_reader/meg/data_type.py @@ -0,0 +1,58 @@ +import re +from collections.abc import Iterator, Sequence +from functools import cached_property +from pathlib import Path + +from loris_bids_reader.agnostic.events import BIDSEventsFile +from loris_bids_reader.dataset import BIDSAcquisition, BIDSDataType +from loris_bids_reader.meg.channels import BIDSMEGChannelsFile +from loris_bids_reader.meg.sidecar import BIDSMEGSidecarFile + + +class BIDSMEGDataType(BIDSDataType): + @cached_property + def acquisitions(self) -> Sequence['BIDSMEGAcquisition']: + """ + The MEG acquisitions found in the MEG data type. + """ + + acquisitions: list[BIDSMEGAcquisition] = [] + for acquisition_name in find_dir_meg_acquisition_names(self.path): + acquisitions.append(BIDSMEGAcquisition(self, acquisition_name)) + + return acquisitions + + +class BIDSMEGAcquisition(BIDSAcquisition[BIDSMEGDataType]): + ctf_path: Path + sidecar: BIDSMEGSidecarFile + channels: BIDSMEGChannelsFile | None + events: BIDSEventsFile | None + + def __init__(self, data_type: BIDSMEGDataType, name: str): + super().__init__(data_type, name) + + self.ctf_path = self.path.with_name(f'{name}.ds') + + sidecar_path = self.path.with_suffix('.json') + if not sidecar_path.exists(): + raise Exception("No MEG JSON sidecar file.") + + self.sidecar = BIDSMEGSidecarFile(sidecar_path) + + channels_path = self.path.parent / re.sub(r'_meg$', '_channels.tsv', self.path.name) + self.channels = BIDSMEGChannelsFile(channels_path) if channels_path.exists() else None + + events_path = self.path.parent / re.sub(r'_meg$', '_events.tsv', self.path.name) + self.events = BIDSEventsFile(events_path) if events_path.exists() else None + + +def find_dir_meg_acquisition_names(dir_path: Path) -> Iterator[str]: + """ + Iterate over the Path objects of the NIfTI files found in a directory. + """ + + for item_path in dir_path.iterdir(): + name_match = re.search(r'(.+_meg)\.ds$', item_path.name) + if name_match is not None: + yield name_match.group(1) diff --git a/python/loris_bids_reader/meg/sidecar.py b/python/loris_bids_reader/meg/sidecar.py new file mode 100644 index 000000000..68681ee69 --- /dev/null +++ b/python/loris_bids_reader/meg/sidecar.py @@ -0,0 +1,95 @@ +from pathlib import Path +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + +from loris_bids_reader.tsv_file import BIDSJSONFile + +NA = Literal['n/a'] +RecordingType = Literal['continuous', 'epoched', 'discontinuous'] +Manufacturer = Literal['CTF', 'Neuromag/Elekta/MEGIN', 'BTi/4D', 'KIT/Yokogawa', 'ITAB', 'KRISS', 'Other'] + + +class BIDSMEGSidecar(BaseModel): + """ + Model for a BIDS MEG sidecar JSON data. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetoencephalography.html#sidecar-json-_megjson + """ + + model_config = ConfigDict( + str_strip_whitespace=True, + extra='forbid', + validate_assignment=True, + populate_by_name=True, + ) + + # REQUIRED fields + sampling_frequency : float = Field(..., gt=0, alias='SamplingFrequency') + power_line_frequency : float | NA = Field(..., alias='PowerLineFrequency') + dewar_position : str = Field(..., alias='DewarPosition') + software_filters : dict[str, dict[str, Any]] | NA = Field(..., alias='SoftwareFilters') + digitized_landmarks : bool = Field(..., alias='DigitizedLandmarks') + digitized_head_points : bool = Field(..., alias='DigitizedHeadPoints') + + # RECOMMENDED fields + meg_channel_count : int | None = Field(None, ge=0, alias='MEGChannelCount') + meg_ref_channel_count : int | None = Field(None, ge=0, alias='MEGREFChannelCount') + eeg_channel_count : int | None = Field(None, ge=0, alias='EEGChannelCount') + ecog_channel_count : int | None = Field(None, ge=0, alias='ECOGChannelCount') + seeg_channel_count : int | None = Field(None, ge=0, alias='SEEGChannelCount') + eog_channel_count : int | None = Field(None, ge=0, alias='EOGChannelCount') + ecg_channel_count : int | None = Field(None, ge=0, alias='ECGChannelCount') + emg_channel_count : int | None = Field(None, ge=0, alias='EMGChannelCount') + misc_channel_count : int | None = Field(None, ge=0, alias='MiscChannelCount') + trigger_channel_count : int | None = Field(None, ge=0, alias='TriggerChannelCount') + + # RECOMMENDED recording fields + recording_duration : float | None = Field(None, ge=0, alias='RecordingDuration') + recording_type : RecordingType | None = Field(None, alias='RecordingType') + epoch_length : float | None = Field(None, ge=0, alias='EpochLength') + continuous_head_localization : bool | None = Field(None, alias='ContinuousHeadLocalization') + head_coil_frequency : list[float] | float | None = Field(None, alias='HeadCoilFrequency') + max_movement : float | None = Field(None, ge=0, alias='MaxMovement') + subject_artefact_description : str | NA | None = Field(None, alias='SubjectArtefactDescription') + associated_empty_room : list[str] | str | None = Field(None, alias='AssociatedEmptyRoom') + hardware_filters : dict[str, dict[str, Any]] | NA | None = Field(None, alias='HardwareFilters') + + # OPTIONAL electrical stimulation fields + electrical_stimulation : bool | None = Field(None, alias='ElectricalStimulation') + electrical_stimulation_parameters : str | None = Field(None, alias='ElectricalStimulationParameters') + + # RECOMMENDED hardware information fields + manufacturer : Manufacturer | None = Field(None, alias='Manufacturer') + manufacturers_model_name : str | None = Field(None, alias='ManufacturersModelName') + software_versions : str | None = Field(None, alias='SoftwareVersions') + device_serial_number : str | None = Field(None, alias='DeviceSerialNumber') + + # REQUIRED and RECOMMENDED task information fields + task_name : str = Field(..., alias='TaskName') + task_description : str | None = Field(None, alias='TaskDescription') + instructions : str | None = Field(None, alias='Instructions') + cog_atlas_id : str | None = Field(None, alias='CogAtlasID') + cog_po_id : str | None = Field(None, alias='CogPOID') + + # RECOMMENDED institution information fields + institution_name : str | None = Field(None, alias='InstitutionName') + institution_address : str | None = Field(None, alias='InstitutionAddress') + institutional_department_name : str | None = Field(None, alias='InstitutionalDepartmentName') + + # OPTIONAL EEG-specific fields (if recorded with MEG) + eeg_placement_scheme : str | None = Field(None, alias='EEGPlacementScheme') + cap_manufacturer : str | None = Field(None, alias='CapManufacturer') + cap_manufacturers_model_name : str | None = Field(None, alias='CapManufacturersModelName') + eeg_reference : str | None = Field(None, alias='EEGReference') + + +class BIDSMEGSidecarFile(BIDSJSONFile[BIDSMEGSidecar]): + """ + Model for a BIDS MEG sidecar JSON file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetoencephalography.html#sidecar-json-_megjson + """ + + def __init__(self, path: Path): + super().__init__(BIDSMEGSidecar, path) diff --git a/python/loris_bids_reader/models.py b/python/loris_bids_reader/models.py new file mode 100644 index 000000000..37d745c50 --- /dev/null +++ b/python/loris_bids_reader/models.py @@ -0,0 +1,19 @@ +from typing import Annotated, TypeVar + +from pydantic import BeforeValidator + +T = TypeVar('T') + + +def validate_na(value: T) -> T | None: + """ + Validate that a value is not N/A. + """ + + if value == 'n/a': + return None + + return value + + +WithNA = Annotated[T | None, BeforeValidator(validate_na)] diff --git a/python/loris_bids_reader/mri/data_type.py b/python/loris_bids_reader/mri/data_type.py new file mode 100644 index 000000000..63d59b6fc --- /dev/null +++ b/python/loris_bids_reader/mri/data_type.py @@ -0,0 +1,43 @@ +import re +from collections.abc import Sequence +from functools import cached_property +from pathlib import Path + +from lib.util.fs import remove_path_extension, replace_path_extension +from loris_bids_reader.dataset import BIDSAcquisition, BIDSDataType + + +class BIDSMRIDataType(BIDSDataType): + @cached_property + def acquisitions(self) -> Sequence['BIDSMRIAcquisition']: + acquisitions: list[BIDSMRIAcquisition] = [] + + for file_path in self.path.iterdir(): + if file_path.name.endswith(('.nii', '.nii.gz')): + acquisitions.append(BIDSMRIAcquisition(self, file_path)) + + return acquisitions + + +class BIDSMRIAcquisition(BIDSAcquisition[BIDSMRIDataType]): + nifti_path: Path + sidecar_path: Path | None + bval_path: Path | None + bvec_path: Path | None + suffix: str | None + + def __init__(self, data_type: BIDSMRIDataType, nifti_path: Path): + super().__init__(data_type, remove_path_extension(nifti_path).name) + self.nifti_path = data_type.path / nifti_path + + sidecar_path = replace_path_extension(self.path, 'json') + self.sidecar_path = sidecar_path if sidecar_path.exists() else None + + bval_path = replace_path_extension(self.path, 'bval') + self.bval_path = bval_path if bval_path.exists() else None + + bvec_path = replace_path_extension(self.path, 'bvec') + self.bvec_path = bvec_path if bvec_path.exists() else None + + suffix_match = re.search(r'_([a-zA-Z0-9]+)$', self.name) + self.suffix = suffix_match.group(1) if suffix_match is not None else None diff --git a/python/loris_bids_reader/participants.py b/python/loris_bids_reader/participants.py new file mode 100644 index 000000000..7e747417a --- /dev/null +++ b/python/loris_bids_reader/participants.py @@ -0,0 +1,88 @@ +from pathlib import Path +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field, field_serializer, field_validator, model_validator + +from lib.util.iter import find, replace_or_append +from loris_bids_reader.tsv_file import BIDSTSVFile + +Sex = Literal['male', 'female', 'other'] +Handedness = Literal['left', 'right', 'ambidextrous'] + + +class BIDSParticipantRow(BaseModel): + """ + Model for a BIDS `participants.tsv` file row. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/data-summary-files.html#participants-file + """ + + model_config = ConfigDict(extra='allow', populate_by_name=True) + + # REQUIRED field + participant_id: str = Field(...) + + # RECOMMENDED fields + species: str | None = None + age: int | float | None = None + sex: Sex | None = None + handedness: Handedness | None = None + strain: str | int | None = None + strain_rrid: str | None = None + + # OPTIONAL fields + hed: str | None = Field(None, alias='HED') + + # LORIS fields + birth_date: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + + @field_validator('participant_id', mode='before') + @classmethod + def parse_participant_id(cls, value: Any) -> Any: + if isinstance(value, str): + return value.removeprefix('sub-') + + return value + + @field_serializer('participant_id') + def serialize_participant_id(self, v: str) -> str: + return f'sub-{v}' + + @model_validator(mode='before') + @classmethod + def parse_project(cls, data: Any) -> Any: + if isinstance(data, dict): + # Use the deprecated field `subproject` as `cohort` if the latter is not present. + if 'subproject' in data and 'cohort' not in data: + data['cohort'] = data['subproject'] + + return data # type: ignore + + +class BIDSParticipantsFile(BIDSTSVFile[BIDSParticipantRow]): + """ + Wrapper for a BIDS `participants.tsv` file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/data-summary-files.html#participants-file + """ + + def __init__(self, path: Path): + super().__init__(BIDSParticipantRow, path) + + def get(self, participant_id: str) -> BIDSParticipantRow | None: + return find(lambda row: row.participant_id == participant_id, self.rows) + + def set(self, participant: BIDSParticipantRow): + replace_or_append(self.rows, lambda row: row.participant_id == participant.participant_id, participant) + + def merge(self, other: 'BIDSParticipantsFile'): + """ + Copy another `participants.tsv` file into this file. The rows of this file are replaced by + those of the other file if there are duplicates. + """ + + for other_row in other.rows: + self.set(other_row) diff --git a/python/loris_bids_reader/scans.py b/python/loris_bids_reader/scans.py new file mode 100644 index 000000000..c6d6849f7 --- /dev/null +++ b/python/loris_bids_reader/scans.py @@ -0,0 +1,87 @@ +from datetime import datetime +from pathlib import Path +from typing import Any, Literal + +import dateutil.parser +from dateutil.parser import ParserError +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from lib.util.iter import find, replace_or_append +from loris_bids_reader.tsv_file import BIDSTSVFile + +Sex = Literal['male', 'female', 'other'] +Handedness = Literal['left', 'right', 'ambidextrous'] + + +class BIDSScanRow(BaseModel): + """ + Model for a BIDS `scans.tsv` file row. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/data-summary-files.html#scans-file + """ + + model_config = ConfigDict(extra='allow', populate_by_name=True) + + # REQUIRED field + filename: str = Field(...) + + # OPTIONAL fields + acq_time: str | None = None + hed: str | None = Field(None, alias='HED') + + # LORIS-specific fields + age_at_scan: str | None = None + mri_acq_time: str | None = None + eeg_acq_time: str | None = None + + age_at_scan: str | None + + def get_acquisition_time(self) -> datetime | None: + """ + Read the acquisition time field of a scan from this `scans.tsv` row. + """ + + for acq_time in [self.acq_time, self.mri_acq_time, self.eeg_acq_time]: + if acq_time is not None: + try: + return dateutil.parser.parse(acq_time) + except ParserError: + pass + + return None + + @model_validator(mode='before') + @classmethod + def parse_age_at_scan(cls, data: Any) -> Any: + for key in ['age', 'age_acq_time']: + # Use the fields `age` and `age_acq_time` as `age_at_scan` if the latter is not present. + if key in data and 'age_at_scan' not in data: + data['age_at_scan'] = data[key] + + return data + + +class BIDSScansFile(BIDSTSVFile[BIDSScanRow]): + """ + Wrapper for a BIDS `scans.tsv` file. + + Documentation: https://bids-specification.readthedocs.io/en/stable/modality-agnostic-files/data-summary-files.html#scans-file + """ + + def __init__(self, path: Path): + super().__init__(BIDSScanRow, path) + + def get(self, filename: str) -> BIDSScanRow | None: + return find(lambda row: row.filename == filename, self.rows) + + def set(self, scan: BIDSScanRow): + replace_or_append(self.rows, lambda row: row.filename == scan.filename, scan) + + def merge(self, other: 'BIDSScansFile'): + """ + Copy another `scans.tsv` file into this file. The rows of this file are replaced by + those of the other file if there are duplicates. + """ + + for other_row in other.rows: + self.set(other_row) diff --git a/python/loris_bids_reader/tsv_file.py b/python/loris_bids_reader/tsv_file.py new file mode 100644 index 000000000..3d793cebb --- /dev/null +++ b/python/loris_bids_reader/tsv_file.py @@ -0,0 +1,86 @@ +import csv +import json +from pathlib import Path +from typing import Generic, TypeVar + +from pydantic import BaseModel + +T = TypeVar('T', bound=BaseModel) + + +class BIDSJSONFile(Generic[T]): + path: Path + data: T + + def __init__(self, model_class: type[T], path: Path): + self.path = path + with open(self.path) as file: + sidecar_data = json.load(file) + self.data = model_class(**sidecar_data) + + +class BIDSTSVFile(Generic[T]): + """ + Class for a BIDS TSV file. + """ + + path: Path + model_class: type[T] + rows: list[T] + + def __init__(self, model_class: type[T], path: Path): + self.path = path + self.model_class = model_class + self.rows = [] + with open(self.path) as file: + reader = csv.DictReader(file, delimiter='\t') + for row in reader: + self.rows.append(model_class(**row)) + + def get_field_names(self) -> list[str]: + """ + Get the names of the fields of this file. + """ + + return list(self.model_class.model_fields.keys()) + + def get_populated_field_names(self) -> list[str]: + """ + Get the names of the fields that have at least one value in one row in this file. + """ + + fields = self.get_field_names() + unpopulated_fields = self.get_unpopulated_field_names() + return [field for field in fields if field not in unpopulated_fields] + + def get_unpopulated_field_names(self) -> set[str]: + """ + Get the names of the fields that do not have any value in any row in this file. + """ + + fields = set(self.get_field_names()) + for row in self.rows: + row_dict = row.model_dump() + for field in list(fields): + if row_dict.get(field) is not None: + fields.remove(field) + + return fields + + def write(self, path: Path, fields: list[str] | None = None): + """ + Write the TSV file to a path, writing either given fields, or the populated fields by + default. + """ + + if fields is None: + fields = self.get_populated_field_names() + + with open(path, 'w', newline='') as file: + writer = csv.DictWriter(file, fieldnames=fields, delimiter='\t') + writer.writeheader() + + for row in self.rows: + row_dict = row.model_dump() + filtered_row = {key: value for key, value in row_dict.items() if key in fields} + writer.writerow(filtered_row) diff --git a/python/loris_bids_reader/tsv_participants.py b/python/loris_bids_reader/tsv_participants.py new file mode 100644 index 000000000..7a0671999 --- /dev/null +++ b/python/loris_bids_reader/tsv_participants.py @@ -0,0 +1,113 @@ +import csv +import re +from dataclasses import dataclass +from pathlib import Path + +from dateutil.parser import ParserError, parse + +from loris_bids_reader.participants import BIDSParticipantsFile + + +@dataclass +class BidsTsvParticipant: + """ + Information about a participant found in a row of the `participants.tsv` file of a BIDS + dataset. + """ + + id: str + birth_date: str | None = None + sex: str | None = None + age: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + + +def read_bids_participants_tsv_file(participants_tsv_path: Path) -> dict[str, BidsTsvParticipant]: + """ + Read the `participants.tsv` file of a BIDS dataset and get the participant rows indexed by + participant ID. Raise an exception if the `participants.tsv` file is incorrect. + """ + + tsv_participants: dict[str, BidsTsvParticipant] = {} + with open(participants_tsv_path) as participants_tsv_file: + reader = csv.DictReader(participants_tsv_file.readlines(), delimiter='\t') + if reader.fieldnames is None or 'participant_id' not in reader.fieldnames: + raise Exception(f"Missing 'participant_id' field in participants.tsv file '{participants_tsv_path}'.") + + for tsv_participant_row in reader: + tsv_participant = read_bids_participants_tsv_row(tsv_participant_row, participants_tsv_path) + tsv_participants[tsv_participant.id] = tsv_participant + + return tsv_participants + + +def read_bids_participants_tsv_row( + tsv_participant_row: dict[str, str], + participants_tsv_path: Path, +) -> BidsTsvParticipant: + """ + Read a `participants.tsv` row, or raise an exception if that row is incorrect. + """ + + # Get the participant ID and removing the `sub-` prefix if it is present. + full_participant_id = tsv_participant_row.get('participant_id') + if full_participant_id is None: + raise Exception(f"Missing 'participant_id' value in participants.tsv file '{participants_tsv_path}'.") + + participant_id = re.sub(r'^sub-', '', full_participant_id) + + birth_date = _read_birth_date(tsv_participant_row) + cohort = _read_cohort(tsv_participant_row) + + # Create the BIDS participant object. + return BidsTsvParticipant( + id = participant_id, + birth_date = birth_date, + sex = tsv_participant_row.get('sex'), + age = tsv_participant_row.get('age'), + site = tsv_participant_row.get('site'), + project = tsv_participant_row.get('project'), + cohort = cohort, + ) + + +def write_bids_participants_tsv_file(tsv_participants: BIDSParticipantsFile, participants_file_path: Path): + """ + Write the `participants.tsv` file based from a set of participant rows. + """ + + with open(participants_file_path, 'w') as participants_file: + writer = csv.writer(participants_file, delimiter='\t') + writer.writerow(['participant_id']) + + for tsv_participant in sorted(tsv_participants.rows, key=lambda row: row.participant_id): + writer.writerow([tsv_participant.participant_id]) + + +def _read_birth_date(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the date of birth field of a participant from a `participants.tsv` row. + """ + + for birth_date_field_ame in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_field_ame in tsv_participant_row: + try: + return parse(tsv_participant_row[birth_date_field_ame]).strftime('%Y-%m-%d') + except ParserError: + pass + + return None + + +def _read_cohort(tsv_participant_row: dict[str, str]) -> str | None: + """ + Read the cohort field of a participant from a `participants.tsv` row. + """ + + for cohort_field_name in ['cohort', 'subproject']: + if cohort_field_name in tsv_participant_row: + return tsv_participant_row[cohort_field_name] + + return None diff --git a/python/loris_bids_reader/tsv_scans.py b/python/loris_bids_reader/tsv_scans.py new file mode 100644 index 000000000..bdfe4c282 --- /dev/null +++ b/python/loris_bids_reader/tsv_scans.py @@ -0,0 +1,16 @@ +from pathlib import Path +from typing import Any + +from lib.util.crypto import compute_file_blake2b_hash +from loris_bids_reader.scans import BIDSScanRow + + +def add_scan_tsv_file_parameters(scan_tsv: BIDSScanRow, scans_tsv_path: Path, file_parameters: dict[str, Any]): + """ + Add a scans.tsv file and row parameters to a LORIS file parameters dictionary. + """ + + file_parameters['scan_acquisition_time'] = scan_tsv.acq_time + file_parameters['age_at_scan'] = scan_tsv.age_at_scan + file_parameters['scans_tsv_file'] = scans_tsv_path + file_parameters['scans_tsv_file_bake2hash'] = compute_file_blake2b_hash(scans_tsv_path) diff --git a/python/loris_eeg_chunker/pyproject.toml b/python/loris_eeg_chunker/pyproject.toml index e27726c05..7ce83ca9f 100644 --- a/python/loris_eeg_chunker/pyproject.toml +++ b/python/loris_eeg_chunker/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ ] [project.scripts] +ctf-to-chunks = "loris_eeg_chunker.scripts.ctf_to_chunks:main" edf-to-chunks = "loris_eeg_chunker.scripts.edf_to_chunks:main" eeglab-to-chunks = "loris_eeg_chunker.scripts.eeglab_to_chunks:main" diff --git a/python/loris_eeg_chunker/src/loris_eeg_chunker/chunking.py b/python/loris_eeg_chunker/src/loris_eeg_chunker/chunking.py index a87042491..983f29ddc 100644 --- a/python/loris_eeg_chunker/src/loris_eeg_chunker/chunking.py +++ b/python/loris_eeg_chunker/src/loris_eeg_chunker/chunking.py @@ -181,8 +181,9 @@ def mne_file_to_chunks(path, chunk_size, loader, from_channel_name, channel_coun else: selected_channels = channel_names[from_channel_index:] + channels_count = len(selected_channels) for i, channel_name in enumerate(selected_channels): - print("Processing channel " + channel_name) + print(f"Processing channel {channel_name} ({i} / {channels_count})") channel = parsed.get_data(channel_name) channel_min = np.amin(channel) channel_max = np.amax(channel) diff --git a/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/ctf_to_chunks.py b/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/ctf_to_chunks.py new file mode 100755 index 000000000..1d1932431 --- /dev/null +++ b/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/ctf_to_chunks.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python + +import argparse +import sys +from pathlib import Path +from typing import cast + +from mne.io import read_raw_ctf # type: ignore +from mne.io.ctf import RawCTF + +from loris_eeg_chunker.chunking import write_chunk_directory # type: ignore + + +def load_channels(path: Path) -> RawCTF: + """Load CTF MEG data using MNE.""" + # Load raw CTF data + raw_ctf = read_raw_ctf(path, preload=False, verbose=False) + + # CTF data typically has triggers/stim channels that we might want to exclude + # You can modify this based on your needs + return raw_ctf + + +def main(): + parser = argparse.ArgumentParser( + description="Convert CTF MEG files (.ds directories) to chunks for browser based visualisation.") + parser.add_argument('files', metavar='FILE', type=Path, nargs='+', + help="one or more CTF .ds directories to convert to a directory of chunks") + parser.add_argument('--channel-index', '-i', type=int, default=0, + help="Starting index of the channels to process") + parser.add_argument('--channel-count', '-c', type=int, + help="Number of channels to process") + parser.add_argument('--chunk-size', '-s', type=int, default=5000, + help="1 dimensional chunk size") + parser.add_argument('--downsamplings', '-r', type=int, + help="How many downsampling levels to write to disk starting from the coarsest level.") + parser.add_argument('--destination', '-d', type=Path, + help="optional destination for all the chunk directories") + parser.add_argument('--prefix', '-p', type=str, + help="optional prefixing parent folder name each directory of chunks gets placed under") + + args = parser.parse_args() + + for path in args.files: + # Check if it's a CTF .ds directory + if not path.is_dir() or path.suffix != '.ds': + print(f"Warning: {path} doesn't appear to be a CTF .ds directory. Skipping.") + continue + + # Load the raw_ctf data to get channel information + try: + raw_ctf = read_raw_ctf(path, preload=False, verbose=False) + except Exception as e: + print(f"Error loading {path}: {e}") + continue + + channel_names = cast(list[str], raw_ctf.ch_names) # type: ignore + + if args.channel_index < 0: + print("Channel index must be a positive integer", file=sys.stderr) + sys.exit(-1) + + if args.channel_index >= len(channel_names): + print("Channel index exceeds the number of channels", file=sys.stderr) + sys.exit(-1) + + if args.channel_count and args.channel_count < 0: + print("Channel count must be a positive integer", file=sys.stderr) + sys.exit(-1) + + print(f'Creating chunks for {path}') + write_chunk_directory( + path=path, + from_channel_index=args.channel_index, + from_channel_name=channel_names[args.channel_index], # type: ignore + channel_count=args.channel_count, + loader=load_channels, + chunk_size=args.chunk_size, + destination=args.destination, + prefix=args.prefix + ) + + +if __name__ == '__main__': + main() + +# Channel filtering code, keep here for now decided if needed or not later: +# +# parser.add_argument('--exclude-ref', action='store_true', +# help="exclude reference channels") +# parser.add_argument('--meg-only', action='store_true', +# help="only process MEG channels (exclude EEG, EOG, etc.)") +# parser.add_argument('--grad-only', action='store_true', +# help="only process gradiometer channels") +# parser.add_argument('--mag-only', action='store_true', +# help="only process magnetometer channels") +# +# ... +# +# # Get all channel names +# all_channel_names = cast(list[str], raw_ctf.ch_names) # type: ignore +# +# # Filter channels based on arguments +# filtered_channel_names: list[str] = [] +# +# for idx, ch_name in enumerate(all_channel_names): +# ch_type = raw_ctf.get_channel_types()[idx] # type: ignore +# +# # Apply filters +# if args.meg_only and ch_type not in ['mag', 'grad']: +# continue +# if args.grad_only and ch_type != 'grad': +# continue +# if args.mag_only and ch_type != 'mag': +# continue +# if args.exclude_ref and 'REF' in ch_name.upper(): +# continue +# +# filtered_channel_names.append(ch_name) +# +# # If we filtered channels, we need to update the indices +# if filtered_channel_names != all_channel_names: +# # Create a mapping from filtered to original indices +# original_to_filtered = {} +# filtered_idx = 0 +# for orig_idx, ch_name in enumerate(all_channel_names): +# if ch_name in filtered_channel_names: +# original_to_filtered[orig_idx] = filtered_idx +# filtered_idx += 1 +# +# # Adjust channel_index if needed +# if args.channel_index >= len(all_channel_names): +# print("Channel index exceeds the number of channels") +# sys.exit(-1) +# +# # If the requested channel was filtered out, find the next available +# start_channel_name = all_channel_names[args.channel_index] +# if start_channel_name not in filtered_channel_names: +# # Find the first channel after the requested index that's in filtered list +# for i in range(args.channel_index + 1, len(all_channel_names)): +# if all_channel_names[i] in filtered_channel_names: +# args.channel_index = i +# start_channel_name = all_channel_names[i] +# print(f"Note: Requested channel was filtered out. Using {start_channel_name} instead.") +# break +# else: +# print("No channels available after applying filters", file=sys.stderr) +# sys.exit(-1) +# +# channel_names = filtered_channel_names if filtered_channel_names else all_channel_names diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py deleted file mode 100755 index 2902580b1..000000000 --- a/python/scripts/bids_import.py +++ /dev/null @@ -1,605 +0,0 @@ -#!/usr/bin/env python - -"""Script to import BIDS structure into LORIS.""" - -import getopt -import json -import os -import re -import sys - -import lib.exitcode -import lib.physiological -import lib.utilities -from lib.bidsreader import BidsReader -from lib.candidate import Candidate -from lib.config_file import load_config -from lib.database import Database -from lib.database_lib.config import Config -from lib.eeg import Eeg -from lib.mri import Mri -from lib.session import Session -from lib.util.crypto import compute_file_blake2b_hash - - -def main(): - bids_dir = '' - verbose = False - createcand = False - createvisit = False - idsvalidation = False - nobidsvalidation = False - type = None - profile = None - nocopy = False - - long_options = [ - "help", "profile=", "directory=", - "createcandidate", "createsession", "idsvalidation", - "nobidsvalidation", "nocopy", "type=", - "verbose" - ] - usage = ( - '\n' - 'usage : bids_import -d -p \n\n' - 'options: \n' - '\t-p, --profile : name of the python database config file in the config directory\n' - '\t-d, --directory : BIDS directory to parse & insert into LORIS\n' - 'If directory is within $data_dir/assembly_bids, no copy will be performed' - '\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n' - '\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n' - '\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n' - '\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n' - '\t-a, --nocopy : to disable dataset copy in data assembly_bids\n' - '\t-t, --type : raw | derivative. Specify the dataset type.' - 'If not set, the pipeline will look for both raw and derivative files.\n' - 'Required if no dataset_description.json is found.\n' - '\t-v, --verbose : be verbose\n' - ) - - try: - opts, _ = getopt.getopt(sys.argv[1:], 'hp:d:csinat:v', long_options) - except getopt.GetoptError: - print(usage) - sys.exit(lib.exitcode.GETOPT_FAILURE) - - for opt, arg in opts: - if opt in ('-h', '--help'): - print(usage) - sys.exit() - elif opt in ('-p', '--profile'): - profile = arg - elif opt in ('-d', '--directory'): - bids_dir = arg - elif opt in ('-v', '--verbose'): - verbose = True - elif opt in ('-c', '--createcandidate'): - createcand = True - elif opt in ('-s', '--createsession'): - createvisit = True - elif opt in ('-i', '--idsvalidation'): - idsvalidation = True - elif opt in ('-n', '--nobidsvalidation'): - nobidsvalidation = True - elif opt in ('-a', '--nocopy'): - nocopy = True - elif opt in ('-t', '--type'): - type = arg - - # input error checking and load config_file file - config_file = load_config(profile) - input_error_checking(bids_dir, usage) - - dataset_json = bids_dir + "/dataset_description.json" - if not os.path.isfile(dataset_json) and not type: - print('No dataset_description.json found. Please run with the --type option.') - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if type and type not in ('raw', 'derivative'): - print("--type must be one of 'raw', 'derivative'") - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - config_obj = Config(db, verbose) - data_dir = config_obj.get_config('dataDirBasepath') - # making sure that there is a final / in data_dir - data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" - - # read and insert BIDS data - read_and_insert_bids( - bids_dir, - data_dir, - verbose, - createcand, - createvisit, - idsvalidation, - nobidsvalidation, - type, - nocopy, - db - ) - - -def input_error_checking(bids_dir, usage): - """ - Checks whether the required inputs are set and that paths are valid. - - :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str - :param usage : script usage to be displayed when encountering an error - :type usage : st - """ - - if not bids_dir: - message = '\n\tERROR: you must specify a BIDS directory using -d or ' \ - '--directory option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if not os.path.isdir(bids_dir): - message = '\n\tERROR: you must specify a valid BIDS directory.\n' + \ - bids_dir + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - -def read_and_insert_bids( - bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db -): - """ - Read the provided BIDS structure and import it into the database. - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param data_dir : data_dir config value - :type data_dir : string - :param verbose : flag for more printing if set - :type verbose : bool - :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool - :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool - :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool - :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string - :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool - :param db : db object - :type db : object - - """ - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - - # Validate that pscid and candid matches - if idsvalidation: - validateids(bids_dir, db, verbose) - - # load the BIDS directory - if nobidsvalidation: - bids_reader = BidsReader(bids_dir, verbose, False) - else: - bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.participants_info \ - or not bids_reader.cand_sessions_list \ - or not bids_reader.cand_session_modalities_list: - message = '\n\tERROR: could not properly parse the following' \ - 'BIDS directory:' + bids_dir + '\n' - print(message) - sys.exit(lib.exitcode.UNREADABLE_FILE) - - loris_bids_root_dir = None - if not nocopy: - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - - # Assumption all same project (for project-wide tags) - single_project_id = None - - # loop through subjects - for bids_subject_info in bids_reader.participants_info: - - # greps BIDS information for the candidate - bids_id = bids_subject_info['participant_id'] - bids_sessions = bids_reader.cand_sessions_list[bids_id] - - # greps BIDS candidate's info from LORIS (creates the candidate if it - # does not exist yet in LORIS and the createcand flag is set to true) - loris_cand_info = grep_or_create_candidate_db_info( - bids_reader, bids_id, db, createcand, verbose - ) - - if not nocopy: - # create the candidate's directory in the LORIS BIDS import directory - lib.utilities.create_dir(loris_bids_root_dir + "sub-" + bids_id, verbose) - - cand_id = loris_cand_info['CandID'] - center_id = loris_cand_info['RegistrationCenterID'] - project_id = loris_cand_info['RegistrationProjectID'] - single_project_id = project_id - - cohort_id = None - # TODO: change subproject -> cohort in participants.tsv? - if 'subproject' in bids_subject_info: - # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_subject_info['subproject'] - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [cohort, ] - ) - if len(cohort_info) > 0: - cohort_id = cohort_info[0]['CohortID'] - - # greps BIDS session's info for the candidate from LORIS (creates the - # session if it does not exist yet in LORIS and the createvisit is set - # to true. If no visit in BIDS structure, then use default visit_label - # stored in the Config module) - grep_candidate_sessions_info( - bids_sessions, bids_id, cand_id, loris_bids_root_dir, - createvisit, verbose, db, default_bids_vl, - center_id, project_id, cohort_id, nocopy - ) - - # Import root-level (dataset-wide) events.json - # Assumption: Single project for project-wide tags - bids_layout = bids_reader.bids_layout - root_event_metadata_file = bids_layout.get_nearest( - bids_dir, - return_type='tuple', - strict=False, - extension='json', - suffix='events', - all_=False, - subject=None, - session=None - ) - - dataset_tag_dict = {} - if not root_event_metadata_file: - message = '\nWARNING: no events metadata files (events.json) in ' \ - 'root directory' - print(message) - else: - # copy the event file to the LORIS BIDS import directory - copy_file = str.replace( - root_event_metadata_file.path, - bids_layout.root, - "" - ).lstrip('/') - - if not nocopy: - event_metadata_path = loris_bids_root_dir + copy_file - lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, verbose) - - # TODO: Move - hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' - hed_union = db.pselect(query=hed_query, args=()) - - # load json data - with open(root_event_metadata_file.path) as metadata_file: - event_metadata = json.load(metadata_file) - blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) - physio = lib.physiological.Physiological(db, verbose) - _, dataset_tag_dict = physio.insert_event_metadata( - event_metadata=event_metadata, - event_metadata_file=event_metadata_path, - physiological_file_id=None, - project_id=single_project_id, - blake2=blake2, - project_wide=True, - hed_union=hed_union - ) - - # read list of modalities per session / candidate and register data - for row in bids_reader.cand_session_modalities_list: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_visit_rel_dir = 'sub-' + row['bids_sub_id'] + '/' + 'ses-' + visit_label - - for modality in row['modalities']: - loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - - if modality == 'eeg' or modality == 'ieeg': - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - - elif modality in ['anat', 'dwi', 'fmap', 'func']: - Mri( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - - -def validateids(bids_dir, db, verbose): - """ - Validate that pscid and candid matches - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool - """ - - bids_folder = bids_dir.rstrip('/').split('/')[-1] - bids_folder_parts = bids_folder.split('_') - psc_id = bids_folder_parts[0] - cand_id = bids_folder_parts[1] - - candidate = Candidate(verbose, cand_id=cand_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - print("ERROR: could not find a candidate with cand_id " + cand_id + ".") - sys.exit(lib.exitcode.CANDID_NOT_FOUND) - if loris_cand_info['PSCID'] != psc_id: - print("ERROR: cand_id " + cand_id + " and psc_id " + psc_id + " do not match.") - sys.exit(lib.exitcode.CANDIDATE_MISMATCH) - - -def create_loris_bids_directory(bids_reader, data_dir, verbose): - """ - Creates the LORIS BIDS import root directory (with name and BIDS version) - and copy over the dataset_description.json, README and participants.tsv - files. - - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: path to the LORIS BIDS import root directory - :rtype: str - """ - - # making sure that there is a final / in bids_dir - bids_dir = bids_reader.bids_dir - bids_dir = bids_dir if bids_dir.endswith('/') else bids_dir + "/" - - # determine the root directory of the LORIS BIDS and create it if does not exist - name = re.sub(r"[^0-9a-zA-Z]+", "_", bids_reader.dataset_name) # get name of the dataset - version = re.sub(r"[^0-9a-zA-Z\.]+", "_", bids_reader.bids_version) # get BIDSVersion of the dataset - - # the LORIS BIDS directory will be in data_dir/BIDS/ and named with the - # concatenation of the dataset name and the BIDS version - loris_bids_dirname = lib.utilities.create_dir( - data_dir + "bids_imports/" + name + "_BIDSVersion_" + version + "/", - verbose - ) - - # copy the dataset JSON file to the new directory - lib.utilities.copy_file( - bids_dir + "dataset_description.json", - loris_bids_dirname + "dataset_description.json", - verbose - ) - - # copy the README file to the new directory - if os.path.isfile(bids_dir + "README"): - lib.utilities.copy_file( - bids_dir + "README", - loris_bids_dirname + "README", - verbose - ) - - # copy the participant.tsv file to the new directory - if os.path.exists(loris_bids_dirname + "participants.tsv"): - lib.utilities.append_to_tsv_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - "participant_id", - verbose - ) - else: - lib.utilities.copy_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - verbose - ) - - return loris_bids_dirname - - -def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbose): - """ - Greps (or creates if candidate does not exist and createcand is true) the - BIDS candidate in the LORIS candidate's table and return a list of - candidates with their related fields from the database. - - :param bids_reader : BIDS information handler object - :type bids_reader : object - :param bids_id : bids_id to be used (CandID or PSCID) - :type bids_id : str - :param db : database handler object - :type db : object - :param createcand : if true, creates the candidate in LORIS - :type createcand : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: list of candidate's dictionaries. One entry in the list holds - a dictionary with field's values from the candidate table - :rtype: list - """ - - candidate = Candidate(verbose=verbose, cand_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - candidate = Candidate(verbose, psc_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info and createcand: - loris_cand_info = candidate.create_candidate( - db, bids_reader.participants_info - ) - if not loris_cand_info: - print("Creating candidate failed. Cannot importing the files.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not loris_cand_info: - print("Candidate " + bids_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - -def grep_or_create_session_db_info( - bids_id, cand_id, visit_label, - db, createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy): - """ - Greps (or creates if session does not exist and createvisit is true) the - BIDS session in the LORIS session's table and return a list of - sessions with their related fields from the database. - - :parma bids_id : BIDS ID of the session - :type bids_id : str - :param cand_id : CandID to use to create the session - :type cand_id : int - :param visit_label : Visit label to use to create the session - :type visit_label : str - :param db : database handler object - :type db : object - :param createvisit : if true, creates the session in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param center_id : CenterID to use to create the session - :type center_id : int - :param project_id : ProjectID to use to create the session - :type project_id : int - :param cohort_id : CohortID to use to create the session - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - :return: session information grepped from LORIS for cand_id and visit_label - :rtype: dict - """ - - session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info and createvisit: - loris_vl_info = session.create_session() - - if not nocopy: - # create the visit directory for in the candidate folder of the LORIS - # BIDS import directory - lib.utilities.create_dir( - loris_bids_dir + "sub-" + bids_id + "/ses-" + visit_label, - verbose - ) - - return loris_vl_info - - -def grep_candidate_sessions_info(bids_ses, bids_id, cand_id, loris_bids_dir, - createvisit, verbose, db, default_vl, - center_id, project_id, cohort_id, nocopy): - """ - Greps all session info dictionaries for a given candidate and aggregates - them into a list, with one entry per session. If the session does not - exist in LORIS and that createvisit is true, it will create the session - first. - - :param bids_ses : list of BIDS sessions to grep info or insert - :type bids_ses : list - :param bids_id : BIDS ID of the candidate - :type bids_id : str - :param cand_id : candidate's CandID - :type cand_id : int - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param createvisit : if true, creates the visits in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param db : database handler object - :type db : object - :param default_vl : default visit label from the Config module - :type default_vl : str - :param center_id : center ID associated to the candidate and visit - :type center_id : int - :param project_id : project ID associated to the candidate and visit - :type project_id : int - :param cohort_id : cohort ID associated to the candidate and visit - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - - - :return: list of all session's dictionaries for a given candidate - :rtype: list - """ - - loris_sessions_info = [] - - if not bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, default_vl, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - else: - for visit_label in bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, visit_label, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - - return loris_sessions_info - - -if __name__ == "__main__": - main() diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py new file mode 100755 index 000000000..97917ff33 --- /dev/null +++ b/python/scripts/import_bids_dataset.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +"""Script to import BIDS structure into LORIS.""" + +from pathlib import Path +from typing import Any + +import lib.exitcode +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.main import import_bids_dataset +from lib.logging import log_error_exit +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env + + +def pack_args(options_dict: dict[str, Any]) -> Args: + return Args( + source_bids_path = Path(options_dict['directory']['value']), + type = options_dict['type']['value'], + bids_validation = not options_dict['nobidsvalidation']['value'], + create_candidate = options_dict['createcandidate']['value'], + create_session = options_dict['createsession']['value'], + copy = not options_dict['nocopy']['value'], + verbose = options_dict['verbose']['value'], + ) + + +# to limit the traceback when raising exceptions. +# sys.tracebacklimit = 0 + +def main(): + usage = ( + "\n" + "usage : bids_import -d -p \n" + "\n" + "options: \n" + "\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n" + "\t-d, --directory : BIDS directory to parse & insert into LORIS\n" + "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" + "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" + "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" + "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" + "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" + "\t-t, --type : raw | derivative. Specify the dataset type.\n" + "\t If not set, the pipeline will look for both raw and derivative files.\n" + "\t Required if no dataset_description.json is found.\n" + "\t-v, --verbose : be verbose\n" + ) + + options_dict = { + "profile": { + "value": None, "required": False, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "directory": { + "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True + }, + "createcandidate": { + "value": False, "required": False, "expect_arg": False, "short_opt": "c", "is_path": False + }, + "createsession": { + "value": False, "required": False, "expect_arg": False, "short_opt": "s", "is_path": False + }, + "nobidsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "b", "is_path": False + }, + "nocopy": { + "value": False, "required": False, "expect_arg": False, "short_opt": "a", "is_path": False + }, + "type": { + "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and initiate the environment. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, 'import_bids_dataset') + + env = make_env(loris_getopt_obj) + + # Check the CLI arguments. + + type = loris_getopt_obj.options_dict['type']['value'] + if type not in (None, 'raw', 'derivative'): + log_error_exit( + env, + f"--type must be one of 'raw', 'derivative'\n{usage}", + lib.exitcode.MISSING_ARG, + ) + + args = pack_args(loris_getopt_obj.options_dict) + + # read and insert BIDS data + import_bids_dataset( + env, + args, + loris_getopt_obj.db, + ) + + print("Success !") + + +if __name__ == '__main__': + main() diff --git a/python/tests/integration/scripts/test_import_bids_dataset.py b/python/tests/integration/scripts/test_import_bids_dataset.py index 0bb12b96b..babf220b5 100644 --- a/python/tests/integration/scripts/test_import_bids_dataset.py +++ b/python/tests/integration/scripts/test_import_bids_dataset.py @@ -18,7 +18,7 @@ def test_import_eeg_bids_dataset(): db.commit() process = run_integration_script([ - 'bids_import.py', + 'import_bids_dataset.py', '--createcandidate', '--createsession', '--directory', '/data/loris/incoming/Face13', ])