diff --git a/notebooks/Dicom_API.ipynb b/notebooks/Dicom_API.ipynb new file mode 100644 index 0000000..5885b72 --- /dev/null +++ b/notebooks/Dicom_API.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9f92452-b2df-4fa2-b014-bd95e51ae571", + "metadata": { + "tags": [] + }, + "source": [ + "# Extracting data from DICOM files\n", + "### This notebook extract data from Dicom files using the dicomTags." + ] + }, + { + "cell_type": "markdown", + "id": "2c857a38-eabe-497a-904e-39ad862dad9c", + "metadata": {}, + "source": [ + "#### Installing required packages:\n", + "\n", + "1. Create a new environment (https://docs.python.org/3/library/venv.html)\n", + "\n", + "```python\n", + "python3 -m venv /path/to/new/virtual/environment\n", + "```\n", + "2. Activate the new environment\n", + "\n", + "```python\n", + "source env/bin/activate\n", + "```\n", + "\n", + "3. Install required packages\n", + "\n", + "```python\n", + "pip install -r requirements.txt\n", + "```\n", + "\n", + "4. Run the notebook. :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09335bfd-7719-4c49-ac02-ad28ad21f374", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pydicom import dcmread\n", + "import re\n", + "import glob" + ] + }, + { + "cell_type": "markdown", + "id": "2409bef7-55d0-452b-a562-8903ec428e9b", + "metadata": {}, + "source": [ + "DICOM_file: The path containing the Dicom File.\n", + "tags_file_osiris: the file containing the dicomTags to be extracted.\n", + "For this example the original excel file of Christophe is used." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "085dd51a-7840-4329-a47a-2e5bdc7220f3", + "metadata": {}, + "outputs": [], + "source": [ + "DICOM_file = dcmread(\"dicom_file.dcm\")\n", + "DICOM_file = dcmread(\"files/1-01.dcm\")\n", + "tags_file_osiris = pd.read_excel(\n", + " \"modele_osiris_radiomics.xlsx\", sheet_name=\"Feuil1\", header=0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3b8b6f69-5c99-46a8-b933-44474b65427a", + "metadata": {}, + "source": [ + "#### Display tags_file_osiris and Dicom File" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74f1435d-e7e2-4a4c-965c-1028c77f1c26", + "metadata": {}, + "outputs": [], + "source": [ + "tags_file_osiris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ae59cfa-f60b-43e0-a62d-d8d1c0dab5a9", + "metadata": {}, + "outputs": [], + "source": [ + "print(DICOM_file)" + ] + }, + { + "cell_type": "markdown", + "id": "926bd5b8-176f-49d4-9d1e-b26e48af42f3", + "metadata": {}, + "source": [ + "### Extracting the data and building a data frame" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "395c5919-c8c8-430c-9bf0-b2128437e607", + "metadata": {}, + "outputs": [], + "source": [ + "list_tags = []\n", + "dicom_df = pd.DataFrame(columns=[\"tag\", \"name\", \"value\"])\n", + "\n", + "# extraction of dicomTags from the excel file\n", + "for j in range(len(tags_file_osiris)):\n", + " try:\n", + " tag_ext = re.search(r\"\\((.*?)\\)\", tags_file_osiris[\"Source\"][j]).group(1)\n", + " tag_conv = \"0x\" + tag_ext[0:4] + tag_ext[5:]\n", + " list_tags.append(tag_conv)\n", + " except:\n", + " pass\n", + "\n", + "# searching for the tags in the dicom file and building the dataframe\n", + "for i in list_tags:\n", + " try:\n", + " df_tag = str(DICOM_file[i].tag)\n", + " df_name = DICOM_file[i].name\n", + " df_value = DICOM_file[i].value\n", + "\n", + " dicom_df = dicom_df.append(\n", + " {\"tag\": df_tag, \"name\": df_name, \"value\": df_value}, ignore_index=True\n", + " )\n", + "\n", + " except:\n", + " pass\n", + "\n", + "\n", + "dicom_df" + ] + }, + { + "cell_type": "markdown", + "id": "b3a305ee-0a14-424e-8683-92c1ebc1c1ff", + "metadata": {}, + "source": [ + "#### Exporting to CSV" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "180c8dff-aae7-490a-83fb-209e670f5958", + "metadata": {}, + "outputs": [], + "source": [ + "dicom_df.to_csv('dicom_data-osiris.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "f5d14377-3613-4e57-b5c8-8627421bc5b8", + "metadata": {}, + "source": [ + "### Working with file lists and building a transposed dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89e65145-147b-40cc-a26d-6ed6d5d3bf40", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dcm_files = (glob.glob(\"files/*.dcm\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8752cb4-9ace-4798-ade6-a23034b2fa09", + "metadata": {}, + "outputs": [], + "source": [ + "list_tags = []\n", + "\n", + "\n", + "# extraction of dicomTags from the excel file\n", + "for j in range(len(tags_file_osiris)):\n", + " try:\n", + " tag_ext = re.search(r\"\\((.*?)\\)\", tags_file_osiris[\"Source\"][j]).group(1)\n", + " tag_conv = \"0x\" + tag_ext[0:4] + tag_ext[5:]\n", + " list_tags.append(tag_conv)\n", + " except:\n", + " pass\n", + "\n", + "dicom_df = pd.DataFrame(columns=[\"file\",\"tag\", \"value\"])\n", + "dicom_dict = {}\n", + "\n", + "# searching for the tags in the dicom file and building the dataframe\n", + "for i in dcm_files:\n", + " DICOM_file = dcmread(i)\n", + " dicom_dict[i] = {}\n", + " for j in list_tags:\n", + " try:\n", + " tag = str(DICOM_file[j].tag)\n", + " value = DICOM_file[j].value\n", + " dicom_dict[i][tag] = value\n", + " except:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71b5225c-dee9-46f5-920c-2d36ef0828d9", + "metadata": {}, + "outputs": [], + "source": [ + "df_dicom = pd.DataFrame.from_dict(dicom_dict, orient='index')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03d3864d-8f63-49d1-96e0-b4b741111a25", + "metadata": {}, + "outputs": [], + "source": [ + "df_dicom.head(10)" + ] + }, + { + "cell_type": "markdown", + "id": "307f6f7f-1b2e-4bd4-a7c4-b30ff5da7a7c", + "metadata": {}, + "source": [ + "#### Just for fun!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "225ea97e-e261-46f2-ae57-67018c62c828", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.imshow(DICOM_file.pixel_array, cmap=plt.cm.gray)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/dicom_file.dcm b/notebooks/dicom_file.dcm new file mode 100644 index 0000000..7ddbffd Binary files /dev/null and b/notebooks/dicom_file.dcm differ diff --git a/notebooks/modele_osiris_radiomics.xlsx b/notebooks/modele_osiris_radiomics.xlsx new file mode 100644 index 0000000..a35825c Binary files /dev/null and b/notebooks/modele_osiris_radiomics.xlsx differ diff --git a/notebooks/requirements.txt b/notebooks/requirements.txt new file mode 100644 index 0000000..18b2492 --- /dev/null +++ b/notebooks/requirements.txt @@ -0,0 +1,4 @@ +pandas==1.2.3 +numpy==1.20.2 +matplotlib==3.4.1 +pydicom==2.3.0.dev0