Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 35 additions & 63 deletions Compound.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Ice Python binding \n",
"%pip install https://github.com/glencoesoftware/zeroc-ice-py-linux-x86_64/releases/download/20240202/zeroc_ice-3.6.5-cp310-cp310-manylinux_2_28_x86_64.whl\n",
"\n",
"%pip install idr-py"
]
},
Expand Down Expand Up @@ -103,20 +106,16 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set up base URLS so can use shorter variable names later on"
"### Set up base URL so can use shorter variable names later on"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
"SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&id={compound_id}\"\n",
"PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}&case_sensitive=false\"\n",
"IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}&case_sensitive=false\"\n",
"ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
"SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
]
},
{
Expand All @@ -138,16 +137,7 @@
},
"outputs": [],
"source": [
"TYPE = \"compound\"\n",
"KEYS = {TYPE:\n",
" (\"InChIKey\",\n",
" \"PubChem InChIKey\",\n",
" \"Compound Concentration (microMolar)\",\n",
" \"Concentration (microMolar)\",\n",
" \"Dose\",\n",
" \"Compound MoA\",\n",
" \"Compound Action\")\n",
"}"
"COMPOUND_NAME = \"Compound Name\""
]
},
{
Expand All @@ -168,43 +158,34 @@
},
"outputs": [],
"source": [
"def parse_annotation(writer, json_data, name, data_type):\n",
" plate_name = \"-\"\n",
" screen_name = name\n",
" for p in json_data[data_type]:\n",
" parent_id = p['id']\n",
" plate_name = p['name']\n",
" qs3 = {'key': TYPE, 'value': compound,\n",
" 'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
" url3 = IMAGES_URL.format(**qs3)\n",
"def parse_annotation(writer, json_data, compound):\n",
" for p in json_data:\n",
" plate_name = p['plate_name']\n",
" screen_name = p['screen_name']\n",
" c = compound.lower()\n",
" if c.startswith(\"ml\"):\n",
" c = 'ml9'\n",
" for i in session.get(url3).json()['images']:\n",
" image_id = i['id']\n",
" url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
" row = {}\n",
" inchikey = \"unknown\"\n",
" concentration = \"unknown\"\n",
" moa = \"unknown\"\n",
" for a in session.get(url4).json()['annotations']:\n",
" for v in a['values']:\n",
" key = str(v[0])\n",
" if key in KEYS[TYPE]:\n",
" if key in ['InChIKey', 'PubChem InChIKey']:\n",
" inchikey = v[1]\n",
" elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
" concentration = float(v[1].replace(' micromolar', ''))\n",
" elif key in ['Compound MoA', 'Compound Action']:\n",
" moa = v[1]\n",
" row.update({'Compound': c,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'InChIKey': inchikey,\n",
" 'Concentration (microMolar)': concentration,\n",
" 'MoA': moa})\n",
" writer.writerow(row)"
" image_id = p['id']\n",
" inchikey = \"unknown\"\n",
" concentration = \"unknown\"\n",
" moa = \"unknown\"\n",
" for v in p['key_values']:\n",
" key = v[\"name\"]\n",
" value = v[\"value\"]\n",
" if key in ['InChIKey', 'PubChem InChIKey']:\n",
" inchikey = value\n",
" elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n",
" concentration = float(value.replace(' micromolar', ''))\n",
" elif key in ['Compound MoA', 'Compound Action']:\n",
" moa = value\n",
" row = {'Compound': c,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'InChIKey': inchikey,\n",
" 'Concentration (microMolar)': concentration,\n",
" 'MoA': moa}\n",
" writer.writerow(row)"
]
},
{
Expand Down Expand Up @@ -237,18 +218,9 @@
" writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
" writer.writeheader()\n",
" for compound in compounds:\n",
" qs1 = {'key': TYPE, 'value': compound}\n",
" url1 = URL.format(**qs1)\n",
" json_data = session.get(url1).json()\n",
" for m in json_data['maps']:\n",
" qs2 = {'key': TYPE, 'value': compound, 'compound_id': m['id']}\n",
" url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
" json_data = session.get(url2).json()\n",
" for s in json_data['screens']:\n",
" compound = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': compound, 'screen_id': s['id']}\n",
" url3 = PLATES_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
" url = SEARCH_URL.format(**{'key': COMPOUND_NAME, 'value': compound})\n",
" json_data = session.get(url).json()\n",
" parse_annotation(writer, json_data['results']['results'], compound)\n",
"finally:\n",
" csvfile.close()"
]
Expand Down
162 changes: 58 additions & 104 deletions GenesToPhenotypes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,11 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n",
"SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n",
"PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n",
"DATASETS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}\"\n",
"IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n",
"ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\""
"SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\""
]
},
{
Expand All @@ -141,17 +136,11 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"TYPE = \"gene\"\n",
"KEYS = {\"phenotype\":\n",
" (\"Phenotype\",\n",
" \"Phenotype Term Name\",\n",
" \"Phenotype Term Accession\",\n",
" \"Phenotype Term Accession URL\")\n",
"}"
"GENE_SYMBOL = \"Gene Symbol\""
]
},
{
Expand All @@ -164,75 +153,60 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def parse_annotation(writer, json_data, name, data_type):\n",
" screen_name = \"-\"\n",
" plate_name = \"-\"\n",
" project_name = \"-\"\n",
" dataset_name = \"-\"\n",
" if data_type == 'datasets':\n",
" project_name = name\n",
" else:\n",
" screen_name = name\n",
" \n",
" for p in json_data[data_type]:\n",
" parent_id = p['id']\n",
" if data_type == 'datasets':\n",
" dataset_name = p['name']\n",
" else:\n",
" plate_name = p['name']\n",
" qs3 = {'key': TYPE, 'value': gene,\n",
" 'parent_type': data_type[:-1], 'parent_id': parent_id}\n",
" url3 = IMAGES_URL.format(**qs3)\n",
" for i in session.get(url3).json()['images']:\n",
"\n",
" image_id = i['id']\n",
" url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n",
" for a in session.get(url4).json()['annotations']:\n",
" ontologies = [] # for ontology terms for a phenotype\n",
" row = {}\n",
" for v in a['values']:\n",
" if str(v[0]) in KEYS['phenotype']:\n",
" if str(v[0]) in ['Phenotype']: # has phenotype\n",
" row[str(v[0])] = v[1] # so create row\n",
"def parse_annotation(writer, json_data, gene):\n",
"\n",
" # if there are ontology mappings for the\n",
" # phenotype, add them to the ontologies list\n",
" ontList = ['Phenotype Term Name',\n",
" 'Phenotype Term Accession',\n",
" 'Phenotype Term Accession URL']\n",
" for p in json_data:\n",
" screen_name = p[\"screen_name\"] if p[\"screen_name\"] else \"-\"\n",
" plate_name = p[\"plate_name\"] if p[\"plate_name\"] else \"-\"\n",
" project_name = p[\"project_name\"] if p[\"project_name\"] else \"-\"\n",
" dataset_name = p[\"dataset_name\"] if p[\"dataset_name\"] else \"-\"\n",
" image_id = p['id']\n",
" ontologies = [] # for ontology terms for a phenotype\n",
" row = {}\n",
" for v in p['key_values']:\n",
" key = v['name']\n",
" value = v['value']\n",
" # if there are ontology mappings for the\n",
" # phenotype, add them to the ontologies list\n",
" ontList = ['Phenotype Term Name',\n",
" 'Phenotype Term Accession',\n",
" 'Phenotype Term Accession URL']\n",
" \n",
" if key == 'Phenotype': # has phenotype\n",
" row[key] = value # so create row\n",
"\n",
" if str(v[0]) in ontList:\n",
" ontologies.extend([str(v[0]), str(v[1])])\n",
" if row:\n",
" if (len(ontologies) > 0): # 1+ ontology mapping\n",
" row.update({'Gene': gene,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'Project' : project_name,\n",
" 'Dataset': dataset_name})\n",
" # we have the start of a row now\n",
" # but we want to print out as many rows\n",
" # as there are ontology mappings\n",
" # so if there is mapping to 1 ontology term\n",
" # print 1 row, if there are 2 ontology terms\n",
" # print 2 rows etc\n",
" numberOfRows = len(ontologies)/6\n",
" # this is 3 pairs of ontology values per\n",
" # mapping, add the ontology mappings and print\n",
" n = 1\n",
" while (n <= numberOfRows):\n",
" row.update({ontologies[0]: ontologies[1],\n",
" ontologies[2]: ontologies[3],\n",
" ontologies[4]: ontologies[5]})\n",
" # remove that set of ontology mappings\n",
" ontologies = ontologies[6:]\n",
" writer.writerow(row)\n",
" n = n + 1"
" elif key in ontList:\n",
" ontologies.extend([key, value])\n",
" if row:\n",
" if (len(ontologies) > 0): # 1+ ontology mapping\n",
" row.update({'Gene': gene,\n",
" 'Screen': screen_name,\n",
" 'Plate': plate_name,\n",
" 'Image': image_id,\n",
" 'Project' : project_name,\n",
" 'Dataset': dataset_name})\n",
" # we have the start of a row now\n",
" # but we want to print out as many rows\n",
" # as there are ontology mappings\n",
" # so if there is mapping to 1 ontology term\n",
" # print 1 row, if there are 2 ontology terms\n",
" # print 2 rows etc\n",
" numberOfRows = len(ontologies)/6\n",
" # this is 3 pairs of ontology values per\n",
" # mapping, add the ontology mappings and print\n",
" n = 1\n",
" while (n <= numberOfRows):\n",
" row.update({ontologies[0]: ontologies[1],\n",
" ontologies[2]: ontologies[3],\n",
" ontologies[4]: ontologies[5]})\n",
" # remove that set of ontology mappings\n",
" ontologies = ontologies[6:]\n",
" writer.writerow(row)\n",
" n = n + 1"
]
},
{
Expand All @@ -258,32 +232,12 @@
" 'Phenotype Term Accession URL']\n",
" writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
" writer.writeheader()\n",
" new_genes = []\n",
" for g in genes:\n",
" qs1 = {'key': TYPE, 'value': g}\n",
" url1 = URL.format(**qs1)\n",
" json = session.get(url1).json()\n",
" for m in json['maps']: \n",
" new_genes.append(m['id'])\n",
" \n",
" for gene in new_genes:\n",
" qs1 = {'key': TYPE, 'value': gene}\n",
" url1 = URL.format(**qs1)\n",
" json = session.get(url1).json()\n",
" for m in json['maps']:\n",
" qs2 = {'key': TYPE, 'value': gene, 'compound_id': m['id']}\n",
" url2 = SCREENS_PROJECTS_URL.format(**qs2)\n",
" json = session.get(url2).json()\n",
" for s in json['screens']:\n",
" gene = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': gene, 'screen_id': s['id']}\n",
" url3 = PLATES_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n",
" for p in json['projects']:\n",
" gene = s['extra']['value']\n",
" qs3 = {'key': TYPE, 'value': gene, 'project_id': p['id']}\n",
" url3 = DATASETS_URL.format(**qs3)\n",
" parse_annotation(writer, session.get(url3).json(), p['name'], 'datasets') \n",
" for gene in genes:\n",
" url = SEARCH_URL.format(**{'key': GENE_SYMBOL, 'value': gene})\n",
" json_data = session.get(url).json()\n",
" parse_annotation(writer, json_data['results']['results'], gene)\n",
" \n",
"finally:\n",
" csvfile.close()"
]
Expand Down
4 changes: 2 additions & 2 deletions binder/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ channels:
dependencies:
- pip
- idr-py
- scikit-image=0.17.*
- scikit-learn=0.23.*
- scikit-image
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should pin to a newer version otherwise this could lead to some unexpected failure

- scikit-learn
- bokeh=2.2.*
- ipywidgets
- matplotlib
Expand Down