diff --git a/Compound.ipynb b/Compound.ipynb index 582717b9..d5be25b4 100644 --- a/Compound.ipynb +++ b/Compound.ipynb @@ -24,6 +24,9 @@ "metadata": {}, "outputs": [], "source": [ + "# Ice Python binding \n", + "%pip install https://github.com/glencoesoftware/zeroc-ice-py-linux-x86_64/releases/download/20240202/zeroc_ice-3.6.5-cp310-cp310-manylinux_2_28_x86_64.whl\n", + "\n", "%pip install idr-py" ] }, @@ -103,20 +106,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Set up base URLS so can use shorter variable names later on" + "### Set up base URL so can use shorter variable names later on" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n", - "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&id={compound_id}\"\n", - "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}&case_sensitive=false\"\n", - "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}&case_sensitive=false\"\n", - "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\"" + "SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\"" ] }, { @@ -138,16 +137,7 @@ }, "outputs": [], "source": [ - "TYPE = \"compound\"\n", - "KEYS = {TYPE:\n", - " (\"InChIKey\",\n", - " \"PubChem InChIKey\",\n", - " \"Compound Concentration (microMolar)\",\n", - " \"Concentration (microMolar)\",\n", - " \"Dose\",\n", - " \"Compound MoA\",\n", - " \"Compound Action\")\n", - "}" + "COMPOUND_NAME = \"Compound Name\"" ] }, { @@ -168,43 +158,34 @@ }, "outputs": [], "source": [ - "def parse_annotation(writer, json_data, name, data_type):\n", - " plate_name = \"-\"\n", - " screen_name = name\n", - " for p in json_data[data_type]:\n", - " parent_id = p['id']\n", - " plate_name = p['name']\n", - " qs3 = {'key': TYPE, 'value': compound,\n", - " 'parent_type': data_type[:-1], 'parent_id': parent_id}\n", - " url3 = IMAGES_URL.format(**qs3)\n", + "def parse_annotation(writer, json_data, compound):\n", + " for p in json_data:\n", + " plate_name = p['plate_name']\n", + " screen_name = p['screen_name']\n", " c = compound.lower()\n", " if c.startswith(\"ml\"):\n", " c = 'ml9'\n", - " for i in session.get(url3).json()['images']:\n", - " image_id = i['id']\n", - " url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n", - " row = {}\n", - " inchikey = \"unknown\"\n", - " concentration = \"unknown\"\n", - " moa = \"unknown\"\n", - " for a in session.get(url4).json()['annotations']:\n", - " for v in a['values']:\n", - " key = str(v[0])\n", - " if key in KEYS[TYPE]:\n", - " if key in ['InChIKey', 'PubChem InChIKey']:\n", - " inchikey = v[1]\n", - " elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n", - " concentration = float(v[1].replace(' micromolar', ''))\n", - " elif key in ['Compound MoA', 'Compound Action']:\n", - " moa = v[1]\n", - " row.update({'Compound': c,\n", - " 'Screen': screen_name,\n", - " 'Plate': plate_name,\n", - " 'Image': image_id,\n", - " 'InChIKey': inchikey,\n", - " 'Concentration (microMolar)': concentration,\n", - " 'MoA': moa})\n", - " writer.writerow(row)" + " image_id = p['id']\n", + " inchikey = \"unknown\"\n", + " concentration = \"unknown\"\n", + " moa = \"unknown\"\n", + " for v in p['key_values']:\n", + " key = v[\"name\"]\n", + " value = v[\"value\"]\n", + " if key in ['InChIKey', 'PubChem InChIKey']:\n", + " inchikey = value\n", + " elif key in ['Dose', 'Compound Concentration (microMolar)', 'Concentration (microMolar)']:\n", + " concentration = float(value.replace(' micromolar', ''))\n", + " elif key in ['Compound MoA', 'Compound Action']:\n", + " moa = value\n", + " row = {'Compound': c,\n", + " 'Screen': screen_name,\n", + " 'Plate': plate_name,\n", + " 'Image': image_id,\n", + " 'InChIKey': inchikey,\n", + " 'Concentration (microMolar)': concentration,\n", + " 'MoA': moa}\n", + " writer.writerow(row)" ] }, { @@ -237,18 +218,9 @@ " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", " for compound in compounds:\n", - " qs1 = {'key': TYPE, 'value': compound}\n", - " url1 = URL.format(**qs1)\n", - " json_data = session.get(url1).json()\n", - " for m in json_data['maps']:\n", - " qs2 = {'key': TYPE, 'value': compound, 'compound_id': m['id']}\n", - " url2 = SCREENS_PROJECTS_URL.format(**qs2)\n", - " json_data = session.get(url2).json()\n", - " for s in json_data['screens']:\n", - " compound = s['extra']['value']\n", - " qs3 = {'key': TYPE, 'value': compound, 'screen_id': s['id']}\n", - " url3 = PLATES_URL.format(**qs3)\n", - " parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n", + " url = SEARCH_URL.format(**{'key': COMPOUND_NAME, 'value': compound})\n", + " json_data = session.get(url).json()\n", + " parse_annotation(writer, json_data['results']['results'], compound)\n", "finally:\n", " csvfile.close()" ] diff --git a/GenesToPhenotypes.ipynb b/GenesToPhenotypes.ipynb index ebd928ed..e0a1794f 100644 --- a/GenesToPhenotypes.ipynb +++ b/GenesToPhenotypes.ipynb @@ -118,16 +118,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}&case_sensitive=false&orphaned=true\"\n", - "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n", - "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n", - "DATASETS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/datasets/?value={value}&id={project_id}\"\n", - "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n", - "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\"" + "SEARCH_URL = \"https://idr.openmicroscopy.org/searchengine/api/v1/resources/image/search/?key={key}&value={value}\"" ] }, { @@ -141,17 +136,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "TYPE = \"gene\"\n", - "KEYS = {\"phenotype\":\n", - " (\"Phenotype\",\n", - " \"Phenotype Term Name\",\n", - " \"Phenotype Term Accession\",\n", - " \"Phenotype Term Accession URL\")\n", - "}" + "GENE_SYMBOL = \"Gene Symbol\"" ] }, { @@ -164,75 +153,60 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "def parse_annotation(writer, json_data, name, data_type):\n", - " screen_name = \"-\"\n", - " plate_name = \"-\"\n", - " project_name = \"-\"\n", - " dataset_name = \"-\"\n", - " if data_type == 'datasets':\n", - " project_name = name\n", - " else:\n", - " screen_name = name\n", - " \n", - " for p in json_data[data_type]:\n", - " parent_id = p['id']\n", - " if data_type == 'datasets':\n", - " dataset_name = p['name']\n", - " else:\n", - " plate_name = p['name']\n", - " qs3 = {'key': TYPE, 'value': gene,\n", - " 'parent_type': data_type[:-1], 'parent_id': parent_id}\n", - " url3 = IMAGES_URL.format(**qs3)\n", - " for i in session.get(url3).json()['images']:\n", - "\n", - " image_id = i['id']\n", - " url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n", - " for a in session.get(url4).json()['annotations']:\n", - " ontologies = [] # for ontology terms for a phenotype\n", - " row = {}\n", - " for v in a['values']:\n", - " if str(v[0]) in KEYS['phenotype']:\n", - " if str(v[0]) in ['Phenotype']: # has phenotype\n", - " row[str(v[0])] = v[1] # so create row\n", + "def parse_annotation(writer, json_data, gene):\n", "\n", - " # if there are ontology mappings for the\n", - " # phenotype, add them to the ontologies list\n", - " ontList = ['Phenotype Term Name',\n", - " 'Phenotype Term Accession',\n", - " 'Phenotype Term Accession URL']\n", + " for p in json_data:\n", + " screen_name = p[\"screen_name\"] if p[\"screen_name\"] else \"-\"\n", + " plate_name = p[\"plate_name\"] if p[\"plate_name\"] else \"-\"\n", + " project_name = p[\"project_name\"] if p[\"project_name\"] else \"-\"\n", + " dataset_name = p[\"dataset_name\"] if p[\"dataset_name\"] else \"-\"\n", + " image_id = p['id']\n", + " ontologies = [] # for ontology terms for a phenotype\n", + " row = {}\n", + " for v in p['key_values']:\n", + " key = v['name']\n", + " value = v['value']\n", + " # if there are ontology mappings for the\n", + " # phenotype, add them to the ontologies list\n", + " ontList = ['Phenotype Term Name',\n", + " 'Phenotype Term Accession',\n", + " 'Phenotype Term Accession URL']\n", + " \n", + " if key == 'Phenotype': # has phenotype\n", + " row[key] = value # so create row\n", "\n", - " if str(v[0]) in ontList:\n", - " ontologies.extend([str(v[0]), str(v[1])])\n", - " if row:\n", - " if (len(ontologies) > 0): # 1+ ontology mapping\n", - " row.update({'Gene': gene,\n", - " 'Screen': screen_name,\n", - " 'Plate': plate_name,\n", - " 'Image': image_id,\n", - " 'Project' : project_name,\n", - " 'Dataset': dataset_name})\n", - " # we have the start of a row now\n", - " # but we want to print out as many rows\n", - " # as there are ontology mappings\n", - " # so if there is mapping to 1 ontology term\n", - " # print 1 row, if there are 2 ontology terms\n", - " # print 2 rows etc\n", - " numberOfRows = len(ontologies)/6\n", - " # this is 3 pairs of ontology values per\n", - " # mapping, add the ontology mappings and print\n", - " n = 1\n", - " while (n <= numberOfRows):\n", - " row.update({ontologies[0]: ontologies[1],\n", - " ontologies[2]: ontologies[3],\n", - " ontologies[4]: ontologies[5]})\n", - " # remove that set of ontology mappings\n", - " ontologies = ontologies[6:]\n", - " writer.writerow(row)\n", - " n = n + 1" + " elif key in ontList:\n", + " ontologies.extend([key, value])\n", + " if row:\n", + " if (len(ontologies) > 0): # 1+ ontology mapping\n", + " row.update({'Gene': gene,\n", + " 'Screen': screen_name,\n", + " 'Plate': plate_name,\n", + " 'Image': image_id,\n", + " 'Project' : project_name,\n", + " 'Dataset': dataset_name})\n", + " # we have the start of a row now\n", + " # but we want to print out as many rows\n", + " # as there are ontology mappings\n", + " # so if there is mapping to 1 ontology term\n", + " # print 1 row, if there are 2 ontology terms\n", + " # print 2 rows etc\n", + " numberOfRows = len(ontologies)/6\n", + " # this is 3 pairs of ontology values per\n", + " # mapping, add the ontology mappings and print\n", + " n = 1\n", + " while (n <= numberOfRows):\n", + " row.update({ontologies[0]: ontologies[1],\n", + " ontologies[2]: ontologies[3],\n", + " ontologies[4]: ontologies[5]})\n", + " # remove that set of ontology mappings\n", + " ontologies = ontologies[6:]\n", + " writer.writerow(row)\n", + " n = n + 1" ] }, { @@ -258,32 +232,12 @@ " 'Phenotype Term Accession URL']\n", " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", - " new_genes = []\n", - " for g in genes:\n", - " qs1 = {'key': TYPE, 'value': g}\n", - " url1 = URL.format(**qs1)\n", - " json = session.get(url1).json()\n", - " for m in json['maps']: \n", - " new_genes.append(m['id'])\n", " \n", - " for gene in new_genes:\n", - " qs1 = {'key': TYPE, 'value': gene}\n", - " url1 = URL.format(**qs1)\n", - " json = session.get(url1).json()\n", - " for m in json['maps']:\n", - " qs2 = {'key': TYPE, 'value': gene, 'compound_id': m['id']}\n", - " url2 = SCREENS_PROJECTS_URL.format(**qs2)\n", - " json = session.get(url2).json()\n", - " for s in json['screens']:\n", - " gene = s['extra']['value']\n", - " qs3 = {'key': TYPE, 'value': gene, 'screen_id': s['id']}\n", - " url3 = PLATES_URL.format(**qs3)\n", - " parse_annotation(writer, session.get(url3).json(), s['name'], 'plates')\n", - " for p in json['projects']:\n", - " gene = s['extra']['value']\n", - " qs3 = {'key': TYPE, 'value': gene, 'project_id': p['id']}\n", - " url3 = DATASETS_URL.format(**qs3)\n", - " parse_annotation(writer, session.get(url3).json(), p['name'], 'datasets') \n", + " for gene in genes:\n", + " url = SEARCH_URL.format(**{'key': GENE_SYMBOL, 'value': gene})\n", + " json_data = session.get(url).json()\n", + " parse_annotation(writer, json_data['results']['results'], gene)\n", + " \n", "finally:\n", " csvfile.close()" ] diff --git a/binder/environment.yml b/binder/environment.yml index f18ddeff..9fc7a827 100644 --- a/binder/environment.yml +++ b/binder/environment.yml @@ -6,8 +6,8 @@ channels: dependencies: - pip - idr-py -- scikit-image=0.17.* -- scikit-learn=0.23.* +- scikit-image +- scikit-learn - bokeh=2.2.* - ipywidgets - matplotlib