diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..3618cab Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index f85f30e..47969bd 100644 --- a/.gitignore +++ b/.gitignore @@ -215,3 +215,5 @@ __marimo__/ # Streamlit .streamlit/secrets.toml + +mimic-iv/ diff --git a/group_code/__init__.py b/group_code/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/group_code/helper.py b/group_code/helper.py new file mode 100644 index 0000000..f2e5854 --- /dev/null +++ b/group_code/helper.py @@ -0,0 +1,21 @@ +# gpt generated code +def combine_dataframes(dfs, tags, on="subject_id"): + """ + Combine multiple dataframes on a common column, prefixing each dataframe's + columns (except the merge column) with a given tag. + Keeps only matched IDs across all dataframes (inner join). + """ + assert len(dfs) == len(tags), "Number of dataframes must match number of tags" + + # Rename columns in each dataframe (except merge key) + renamed_dfs = [] + for df, tag in zip(dfs, tags): + df_renamed = df.rename(columns={col: f"{tag}_{col}" for col in df.columns if col != on}) + renamed_dfs.append(df_renamed) + + # Iteratively merge with inner join + combined = renamed_dfs[0] + for df in renamed_dfs[1:]: + combined = combined.merge(df, on=on, how="inner") + + return combined diff --git a/group_code/mm_dataset.py b/group_code/mm_dataset.py new file mode 100644 index 0000000..5bf72e3 --- /dev/null +++ b/group_code/mm_dataset.py @@ -0,0 +1,46 @@ +from group_code.helper import combine_dataframes +from group_code.uni_model.ecg import ECG_uni +from group_code.uni_model.echo import ECHO_uni +from group_code.uni_model.ehr import EHR_uni +from mmai25_hackathon.dataset import BaseDataset + + +class EEE_dataset(BaseDataset): + def __init__(self, data_roots, data_mods): + self.uni_dict = {} + for mod, r_path in zip(data_mods, data_roots): + if mod == "ecg": + self.uni_dict[mod] = ECG_uni(r_path) + elif mod == "ehr": + self.uni_dict[mod] = EHR_uni(r_path) + elif mod == "echo": + self.uni_dict[mod] = ECHO_uni(r_path) + else: + print("Modality not supported.") + + self.combined_records = combine_dataframes( + [val.records for key, val in self.uni_dict.items() if key != "ehr"], + [key for key, val in self.uni_dict.items() if key != "ehr"], + ) + + def __len__(self) -> int: + return len(self.combined_records) + + def __getitem__(self, idx: int): + subject_id = self.combined_records.iloc[idx]["subject_id"] + return_dict = {} + for key, val in self.uni_dict.items(): + return_dict[key] = val.fetch(subject_id) + return return_dict + + def __add__(self, data_roots, data_mods): + for mod, r_path in zip(data_mods, data_roots): + if mod == "ecg": + self.uni_dict[mod] = ECG_uni(r_path) + self.combined_records = combine_dataframes( + [val.records for key, val in self.uni_dict.items() if key != "ehr"], + [key for key, val in self.uni_dict.items() if key != "ehr"], + ) + + def get_idx_from_sub_id(self, subject_id): + return self.combined_records.index[self.combined_records["subject_id"] == subject_id][0] diff --git a/group_code/uni_model/ecg.py b/group_code/uni_model/ecg.py new file mode 100644 index 0000000..5e3411b --- /dev/null +++ b/group_code/uni_model/ecg.py @@ -0,0 +1,24 @@ +from torch.utils.data import Dataset + +from mmai25_hackathon.load_data.ecg import load_ecg_record, load_mimic_iv_ecg_record_list + + +class ECG_uni(Dataset): + def __init__(self, mod_root): + self.records = load_mimic_iv_ecg_record_list(mod_root) + + def __len__(self) -> int: + return len(self.records) + + def __getitem__(self, idx: int): + subject_id = self.records.iloc[idx]["subject_id"] + sig, fields = load_ecg_record(self.records.iloc[idx]["hea_path"]) + + return {subject_id: [sig, fields]} + + def get_idx_by_subject(self, subject_id): + return self.records.index[self.records["subject_id"] == subject_id][0] + + def fetch(self, subject_id): + idx = self.get_idx_by_subject(subject_id) + return self.__getitem__(idx) diff --git a/group_code/uni_model/echo.py b/group_code/uni_model/echo.py new file mode 100644 index 0000000..bc58e6b --- /dev/null +++ b/group_code/uni_model/echo.py @@ -0,0 +1,27 @@ +# noqa: F403 +from torch.utils.data import Dataset + +from mmai25_hackathon.load_data.echo import load_echo_dicom, load_mimic_iv_echo_record_list + + +class ECHO_uni(Dataset): + def __init__(self, mod_root): + self.records = load_mimic_iv_echo_record_list(mod_root) + + def __len__(self) -> int: + return len(self.records) + + def __getitem__(self, idx: int): + subject_id = self.records.iloc[idx]["subject_id"] + frames, meta = load_echo_dicom(self.records.iloc[idx]["echo_path"]) + meta_filtered = { + k: meta[k] for k in ("NumberOfFrames", "Rows", "Columns", "FrameTime", "CineRate") if k in meta + } + return {subject_id: meta_filtered} + + def get_idx_by_subject(self, subject_id): + return self.records.index[self.records["subject_id"] == subject_id][0] + + def fetch(self, subject_id): + idx = self.get_idx_by_subject(subject_id) + return self.__getitem__(idx) diff --git a/group_code/uni_model/ehr.py b/group_code/uni_model/ehr.py new file mode 100644 index 0000000..69c3256 --- /dev/null +++ b/group_code/uni_model/ehr.py @@ -0,0 +1,30 @@ +from torch.utils.data import Dataset + +from mmai25_hackathon.load_data.ehr import load_mimic_iv_ehr + + +class EHR_uni(Dataset): + def __init__(self, mod_root): + self.root = mod_root + + def __len__(self) -> int: + return len(self.records) + + def __getitem__(self, subject_id): + return self.fetch(subject_id) + + def fetch(self, subject_id): + dfs_new = load_mimic_iv_ehr( + ehr_path=self.root, + module="both", + tables=["icustays", "admissions"], + index_cols=["subject_id", "hadm_id"], + subset_cols={ + "icustays": ["first_careunit"], + "admissions": ["admittime"], + }, + filter_rows={"subject_id": [int(subject_id)]}, + merge=True, + join="inner", + ) + return {subject_id: dfs_new} diff --git a/group_test/test_102.py b/group_test/test_102.py new file mode 100644 index 0000000..5af8cfb --- /dev/null +++ b/group_test/test_102.py @@ -0,0 +1,32 @@ +from group_code.mm_dataset import EEE_dataset +from group_code.uni_model.ecg import ECG_uni +from group_code.uni_model.echo import ECHO_uni +from group_code.uni_model.ehr import EHR_uni + +ecg_root = "mimic-iv/mimic-iv-ecg-diagnostic-electrocardiogram-matched-subset-1.0/" +echo_root = "mimic-iv/mimic-iv-echo-0.1.physionet.org/" +ehr_root = "mimic-iv/mimic-iv-3.1/" + + +def test_by_id(id): + ds = EEE_dataset([ecg_root, ehr_root, echo_root], ["ecg", "ehr", "echo"]) + + ds_idx = ds.get_idx_from_sub_id(id) + full_results = ds[ds_idx] + + ecg = ECG_uni(ecg_root) + ehr = EHR_uni(ehr_root) + echo = ECHO_uni(echo_root) + + ecg_res = ecg.fetch(id) + ehr_res = ehr.fetch(id) + echo_res = echo.fetch(id) + + assert full_results["ecg"][id][1]["comments"] == ecg_res[id][1]["comments"] + assert full_results["echo"][id]["Rows"] == echo_res[id]["Rows"] + assert len(full_results["ehr"][id]) == len(ehr_res[id]) + + +if __name__ == "__main__": + test_by_id(102) + test_by_id(101) diff --git a/try.ipynb b/try.ipynb new file mode 100644 index 0000000..6684705 --- /dev/null +++ b/try.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f6d69c1a", + "metadata": {}, + "outputs": [], + "source": [ + "ecg_root = 'mimic-iv/mimic-iv-ecg-diagnostic-electrocardiogram-matched-subset-1.0/'\n", + "echo_root = 'mimic-iv/mimic-iv-echo-0.1.physionet.org/'\n", + "ehr_root = 'mimic-iv/mimic-iv-3.1/'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c391dde8", + "metadata": {}, + "outputs": [], + "source": [ + "from mmai25_hackathon.load_data.ecg import * \n", + "from mmai25_hackathon.load_data.echo import * \n", + "from mmai25_hackathon.load_data.ehr import * " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e6cf19b4", + "metadata": {}, + "outputs": [], + "source": [ + "record = load_mimic_iv_ecg_record_list(ecg_root)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2649c7ad", + "metadata": {}, + "outputs": [], + "source": [ + "from group_code.mm_dataset import *" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "221279ec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ecg': {np.int64(101): [array([[ 0.02 , 0.06 , 0.04 , ..., 0.035, 0.42 , -0.28 ],\n", + " [ 0. , 0.075, 0.075, ..., 0.045, 0.41 , -0.295],\n", + " [-0.015, 0.075, 0.09 , ..., 0.055, 0.4 , -0.3 ],\n", + " ...,\n", + " [-0.015, -0.015, 0. , ..., -0.235, 0.34 , -0.065],\n", + " [ 0. , -0.015, -0.015, ..., -0.235, 0.34 , -0.065],\n", + " [ 0. , -0.02 , -0.02 , ..., -0.225, 0.34 , -0.065]],\n", + " shape=(5000, 12)),\n", + " {'fs': 500,\n", + " 'sig_len': 5000,\n", + " 'n_sig': 12,\n", + " 'base_date': datetime.date(2180, 8, 6),\n", + " 'base_time': datetime.time(9, 7),\n", + " 'units': ['mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV'],\n", + " 'sig_name': ['I',\n", + " 'II',\n", + " 'III',\n", + " 'aVR',\n", + " 'aVF',\n", + " 'aVL',\n", + " 'V1',\n", + " 'V2',\n", + " 'V3',\n", + " 'V4',\n", + " 'V5',\n", + " 'V6'],\n", + " 'comments': [': 101']}]},\n", + " 'ehr': {np.int64(101): subject_id hadm_id admittime \\\n", + " 0 101 1 24/02/2196 14:38 \n", + " 1 101 2 17/09/2153 17:08 \n", + " 2 101 3 18/08/2134 02:02 \n", + " \n", + " first_careunit \n", + " 0 Neuro Stepdown \n", + " 1 Neuro Surgical Intensive Care Unit (Neuro SICU) \n", + " 2 Neuro Intermediate },\n", + " 'echo': {np.int64(101): {'NumberOfFrames': '58',\n", + " 'Rows': 708,\n", + " 'Columns': 1016,\n", + " 'FrameTime': '33.6842',\n", + " 'CineRate': '30'}}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = EEE_dataset(\n", + " [ecg_root, ehr_root, echo_root],\n", + " ['ecg', 'ehr', 'echo']\n", + ")\n", + "ds[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7b9466a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ecg': {np.int64(101): [array([[ 0.02 , 0.06 , 0.04 , ..., 0.035, 0.42 , -0.28 ],\n", + " [ 0. , 0.075, 0.075, ..., 0.045, 0.41 , -0.295],\n", + " [-0.015, 0.075, 0.09 , ..., 0.055, 0.4 , -0.3 ],\n", + " ...,\n", + " [-0.015, -0.015, 0. , ..., -0.235, 0.34 , -0.065],\n", + " [ 0. , -0.015, -0.015, ..., -0.235, 0.34 , -0.065],\n", + " [ 0. , -0.02 , -0.02 , ..., -0.225, 0.34 , -0.065]],\n", + " shape=(5000, 12)),\n", + " {'fs': 500,\n", + " 'sig_len': 5000,\n", + " 'n_sig': 12,\n", + " 'base_date': datetime.date(2180, 8, 6),\n", + " 'base_time': datetime.time(9, 7),\n", + " 'units': ['mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV',\n", + " 'mV'],\n", + " 'sig_name': ['I',\n", + " 'II',\n", + " 'III',\n", + " 'aVR',\n", + " 'aVF',\n", + " 'aVL',\n", + " 'V1',\n", + " 'V2',\n", + " 'V3',\n", + " 'V4',\n", + " 'V5',\n", + " 'V6'],\n", + " 'comments': [': 101']}]},\n", + " 'ehr': {np.int64(101): subject_id hadm_id admittime \\\n", + " 0 101 1 24/02/2196 14:38 \n", + " 1 101 2 17/09/2153 17:08 \n", + " 2 101 3 18/08/2134 02:02 \n", + " \n", + " first_careunit \n", + " 0 Neuro Stepdown \n", + " 1 Neuro Surgical Intensive Care Unit (Neuro SICU) \n", + " 2 Neuro Intermediate },\n", + " 'echo': {np.int64(101): {'NumberOfFrames': '58',\n", + " 'Rows': 708,\n", + " 'Columns': 1016,\n", + " 'FrameTime': '33.6842',\n", + " 'CineRate': '30'}}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c0defa9b", + "metadata": {}, + "outputs": [], + "source": [ + "from group_test.test_102 import *" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "04f7829a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " subject_id hadm_id admittime first_careunit\n", + "0 102 4 13/11/2111 23:39 Trauma SICU (TSICU)\n", + "1 102 5 04/08/2113 18:46 Trauma SICU (TSICU)\n", + "2\n" + ] + } + ], + "source": [ + "test_by_id(102)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mmai-hackathon", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}