-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from nriss/nr/add_em_dicom_extract
add em dicom extract
- Loading branch information
Showing
4 changed files
with
294 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b9f92452-b2df-4fa2-b014-bd95e51ae571", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"source": [ | ||
"# Extracting data from DICOM files\n", | ||
"### This notebook extract data from Dicom files using the dicomTags." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2c857a38-eabe-497a-904e-39ad862dad9c", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Installing required packages:\n", | ||
"\n", | ||
"1. Create a new environment (https://docs.python.org/3/library/venv.html)\n", | ||
"\n", | ||
"```python\n", | ||
"python3 -m venv /path/to/new/virtual/environment\n", | ||
"```\n", | ||
"2. Activate the new environment\n", | ||
"\n", | ||
"```python\n", | ||
"source env/bin/activate\n", | ||
"```\n", | ||
"\n", | ||
"3. Install required packages\n", | ||
"\n", | ||
"```python\n", | ||
"pip install -r requirements.txt\n", | ||
"```\n", | ||
"\n", | ||
"4. Run the notebook. :)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "09335bfd-7719-4c49-ac02-ad28ad21f374", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"from pydicom import dcmread\n", | ||
"import re\n", | ||
"import glob" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2409bef7-55d0-452b-a562-8903ec428e9b", | ||
"metadata": {}, | ||
"source": [ | ||
"DICOM_file: The path containing the Dicom File.\n", | ||
"tags_file_osiris: the file containing the dicomTags to be extracted.\n", | ||
"For this example the original excel file of Christophe is used." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "085dd51a-7840-4329-a47a-2e5bdc7220f3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"DICOM_file = dcmread(\"dicom_file.dcm\")\n", | ||
"DICOM_file = dcmread(\"files/1-01.dcm\")\n", | ||
"tags_file_osiris = pd.read_excel(\n", | ||
" \"modele_osiris_radiomics.xlsx\", sheet_name=\"Feuil1\", header=0\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "3b8b6f69-5c99-46a8-b933-44474b65427a", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Display tags_file_osiris and Dicom File" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "74f1435d-e7e2-4a4c-965c-1028c77f1c26", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"tags_file_osiris" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8ae59cfa-f60b-43e0-a62d-d8d1c0dab5a9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"print(DICOM_file)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "926bd5b8-176f-49d4-9d1e-b26e48af42f3", | ||
"metadata": {}, | ||
"source": [ | ||
"### Extracting the data and building a data frame" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "395c5919-c8c8-430c-9bf0-b2128437e607", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"list_tags = []\n", | ||
"dicom_df = pd.DataFrame(columns=[\"tag\", \"name\", \"value\"])\n", | ||
"\n", | ||
"# extraction of dicomTags from the excel file\n", | ||
"for j in range(len(tags_file_osiris)):\n", | ||
" try:\n", | ||
" tag_ext = re.search(r\"\\((.*?)\\)\", tags_file_osiris[\"Source\"][j]).group(1)\n", | ||
" tag_conv = \"0x\" + tag_ext[0:4] + tag_ext[5:]\n", | ||
" list_tags.append(tag_conv)\n", | ||
" except:\n", | ||
" pass\n", | ||
"\n", | ||
"# searching for the tags in the dicom file and building the dataframe\n", | ||
"for i in list_tags:\n", | ||
" try:\n", | ||
" df_tag = str(DICOM_file[i].tag)\n", | ||
" df_name = DICOM_file[i].name\n", | ||
" df_value = DICOM_file[i].value\n", | ||
"\n", | ||
" dicom_df = dicom_df.append(\n", | ||
" {\"tag\": df_tag, \"name\": df_name, \"value\": df_value}, ignore_index=True\n", | ||
" )\n", | ||
"\n", | ||
" except:\n", | ||
" pass\n", | ||
"\n", | ||
"\n", | ||
"dicom_df" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b3a305ee-0a14-424e-8683-92c1ebc1c1ff", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Exporting to CSV" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "180c8dff-aae7-490a-83fb-209e670f5958", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"dicom_df.to_csv('dicom_data-osiris.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f5d14377-3613-4e57-b5c8-8627421bc5b8", | ||
"metadata": {}, | ||
"source": [ | ||
"### Working with file lists and building a transposed dataframe" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "89e65145-147b-40cc-a26d-6ed6d5d3bf40", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"dcm_files = (glob.glob(\"files/*.dcm\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d8752cb4-9ace-4798-ade6-a23034b2fa09", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"list_tags = []\n", | ||
"\n", | ||
"\n", | ||
"# extraction of dicomTags from the excel file\n", | ||
"for j in range(len(tags_file_osiris)):\n", | ||
" try:\n", | ||
" tag_ext = re.search(r\"\\((.*?)\\)\", tags_file_osiris[\"Source\"][j]).group(1)\n", | ||
" tag_conv = \"0x\" + tag_ext[0:4] + tag_ext[5:]\n", | ||
" list_tags.append(tag_conv)\n", | ||
" except:\n", | ||
" pass\n", | ||
"\n", | ||
"dicom_df = pd.DataFrame(columns=[\"file\",\"tag\", \"value\"])\n", | ||
"dicom_dict = {}\n", | ||
"\n", | ||
"# searching for the tags in the dicom file and building the dataframe\n", | ||
"for i in dcm_files:\n", | ||
" DICOM_file = dcmread(i)\n", | ||
" dicom_dict[i] = {}\n", | ||
" for j in list_tags:\n", | ||
" try:\n", | ||
" tag = str(DICOM_file[j].tag)\n", | ||
" value = DICOM_file[j].value\n", | ||
" dicom_dict[i][tag] = value\n", | ||
" except:\n", | ||
" pass" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "71b5225c-dee9-46f5-920c-2d36ef0828d9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_dicom = pd.DataFrame.from_dict(dicom_dict, orient='index')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "03d3864d-8f63-49d1-96e0-b4b741111a25", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_dicom.head(10)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "307f6f7f-1b2e-4bd4-a7c4-b30ff5da7a7c", | ||
"metadata": {}, | ||
"source": [ | ||
"#### Just for fun!" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "225ea97e-e261-46f2-ae57-67018c62c828", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import matplotlib.pyplot as plt\n", | ||
"\n", | ||
"plt.imshow(DICOM_file.pixel_array, cmap=plt.cm.gray)\n", | ||
"plt.show()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.12" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
pandas==1.2.3 | ||
numpy==1.20.2 | ||
matplotlib==3.4.1 | ||
pydicom==2.3.0.dev0 |