Skip to content

Commit

Permalink
Updated uniprot query to allow default empty responses.
Browse files Browse the repository at this point in the history
  • Loading branch information
pgrosjean committed Jun 15, 2024
1 parent ed97806 commit 72e0174
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 115 deletions.
11 changes: 9 additions & 2 deletions src/uniprot/functions/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,21 @@ pub async fn async_query_uniprot(
query
);
let value = Client::new()
.get(url)
.get(&url) // Updated to pass the URL by reference
.header("content-type", "application/json")
.send()
.await?
.json::<Value>()
.await?;

// Updated to check if the struct is non-empty
let info = UniprotInfo::from_value(&value, gene)?;
Ok(info)
if let Some(uniprot_info) = info {
if uniprot_info.is_non_empty() {
return Ok(Some(uniprot_info));
}
}
Ok(None) // Return None if the struct is empty
}

/// An asynchronous function which joins all the handles from `async_query_uniprot`
Expand Down
219 changes: 106 additions & 113 deletions src/uniprot/types/uniprotinfo.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::utils::{FastaRecord, FastaRecords};
use anyhow::{bail, Result};
use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::{collections::HashMap, fmt};

// A container for UniprotInfo
Expand Down Expand Up @@ -31,7 +30,7 @@ impl UniprotInfoContainer {
}

/// A structure to handle the relevant results of a `Uniprot` query.
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct UniprotInfo {
pub uniprot_id: String,
pub uniprot_entry_name: String,
Expand All @@ -48,6 +47,7 @@ pub struct UniprotInfo {
pub protein_existence: String,
pub query: String,
}

impl fmt::Display for UniprotInfo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
Expand All @@ -57,26 +57,28 @@ impl fmt::Display for UniprotInfo {
)
}
}

impl UniprotInfo {
#[must_use]
pub fn from_value(value: &Value, query: &str) -> Result<Option<Self>> {
pub fn from_value(value: &serde_json::Value, query: &str) -> Result<Option<Self>> {
if !Self::is_valid(value) {
return Ok(None);
return Ok(Some(Self::default()));
}
let uniprot_id = Self::get_uniprot_id(value)?;
let uniprot_entry_name = Self::get_uniprot_entry_name(value)?;
let primary_gene_name = Self::get_primary_gene_name(value)?;

let uniprot_id = Self::get_uniprot_id(value).unwrap_or_default();
let uniprot_entry_name = Self::get_uniprot_entry_name(value).unwrap_or_default();
let primary_gene_name = Self::get_primary_gene_name(value).unwrap_or_default();
let uniprot_synonyms = Self::get_uniprot_synonyms(value);
let protein_name = Self::get_protein_names(value)?;
let uniprot_description = Self::get_uniprot_description(value)?;
let protein_name = Self::get_protein_names(value).unwrap_or_default();
let uniprot_description = Self::get_uniprot_description(value).unwrap_or_default();
let ncbi_id = Self::get_ncbi_id(value);
let pdb_id = Self::get_pdb_id(value);
let taxon_id = Self::get_taxon_id(value)?;
let organism_name = Self::get_organism_name(value)?;
let sequence = Self::get_protein_sequence(value)?;
let sequence_version = Self::get_sequence_version(value)?;
let protein_existence = Self::get_protein_existence(value)?;
let taxon_id = Self::get_taxon_id(value).unwrap_or_default();
let organism_name = Self::get_organism_name(value).unwrap_or_default();
let sequence = Self::get_protein_sequence(value).unwrap_or_default();
let sequence_version = Self::get_sequence_version(value).unwrap_or_default();
let protein_existence = Self::get_protein_existence(value).unwrap_or_default();
let query = query.to_string();

Ok(Some(Self {
uniprot_id,
uniprot_entry_name,
Expand All @@ -95,130 +97,121 @@ impl UniprotInfo {
}))
}

fn is_valid(value: &Value) -> bool {
fn is_valid(value: &serde_json::Value) -> bool {
!value["results"][0].is_null()
}

fn get_uniprot_id(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["primaryAccession"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not parse Uniprot ID")
}
fn get_uniprot_id(value: &serde_json::Value) -> Result<String> {
value["results"][0]["primaryAccession"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse Uniprot ID"))
}

fn get_uniprot_entry_name(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["uniProtkbId"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not parse Uniprot Entry Name")
}
fn get_uniprot_entry_name(value: &serde_json::Value) -> Result<String> {
value["results"][0]["uniProtkbId"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse Uniprot Entry Name"))
}

fn get_primary_gene_name(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["genes"][0]["geneName"]["value"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not parse primary gene name")
}
fn get_primary_gene_name(value: &serde_json::Value) -> Result<String> {
value["results"][0]["genes"][0]["geneName"]["value"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse primary gene name"))
}

fn get_protein_sequence(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["sequence"]["value"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not protein sequence")
}
fn get_protein_sequence(value: &serde_json::Value) -> Result<String> {
value["results"][0]["sequence"]["value"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse protein sequence"))
}

fn get_uniprot_synonyms(value: &Value) -> Vec<String> {
match value["results"][0]["genes"][0]["synonyms"].as_array() {
Some(values) => values
.iter()
.map(|x| x["value"].as_str().unwrap().to_string())
.collect(),
None => Vec::new(),
}
fn get_uniprot_synonyms(value: &serde_json::Value) -> Vec<String> {
value["results"][0]["genes"][0]["synonyms"]
.as_array()
.map_or(Vec::new(), |values| {
values.iter().map(|x| x["value"].as_str().unwrap_or_default().to_string()).collect()
})
}

fn get_protein_names(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["proteinDescription"]["recommendedName"]["fullName"]
["value"]
fn get_protein_names(value: &serde_json::Value) -> Result<String> {
value["results"][0]["proteinDescription"]["recommendedName"]["fullName"]["value"]
.as_str()
{
Ok(s.to_string())
} else {
bail!("Could not parse protein names")
}
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse protein names"))
}

fn get_uniprot_description(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["comments"][0]["texts"][0]["value"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not parse uniprot description")
}
fn get_uniprot_description(value: &serde_json::Value) -> Result<String> {
value["results"][0]["comments"][0]["texts"][0]["value"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse uniprot description"))
}

fn get_ncbi_id(value: &Value) -> Option<String> {
match value["results"][0]["uniProtKBCrossReferences"].as_array() {
Some(values) => {
let reference = values
.iter()
.filter(|x| x["database"] == "GeneID")
.take(1)
.next();
reference.map(|v| v["id"].as_str().unwrap().to_string())
}
None => None,
}
fn get_ncbi_id(value: &serde_json::Value) -> Option<String> {
value["results"][0]["uniProtKBCrossReferences"]
.as_array()
.and_then(|values| {
values.iter().find(|x| x["database"] == "GeneID").and_then(|v| v["id"].as_str().map(|s| s.to_string()))
})
}
fn get_pdb_id(value: &Value) -> Option<String> {
match value["results"][0]["uniProtKBCrossReferences"].as_array() {
Some(values) => {
let reference = values
.iter()
.filter(|x| x["database"] == "PDB")
.take(1)
.next();
reference.map(|v| v["id"].as_str().unwrap().to_string())
}
None => None,
}

fn get_pdb_id(value: &serde_json::Value) -> Option<String> {
value["results"][0]["uniProtKBCrossReferences"]
.as_array()
.and_then(|values| {
values.iter().find(|x| x["database"] == "PDB").and_then(|v| v["id"].as_str().map(|s| s.to_string()))
})
}

fn get_taxon_id(value: &Value) -> Result<usize> {
if let Some(s) = value["results"][0]["organism"]["taxonId"].as_u64() {
Ok(s as usize)
} else {
bail!("Could not parse taxon id")
}
fn get_taxon_id(value: &serde_json::Value) -> Result<usize> {
value["results"][0]["organism"]["taxonId"]
.as_u64()
.map(|s| s as usize)
.ok_or_else(|| anyhow!("Could not parse taxon id"))
}

fn get_organism_name(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["organism"]["commonName"].as_str() {
Ok(s.to_string())
} else if let Some(s) = value["results"][0]["organism"]["scientificName"].as_str() {
Ok(s.to_string())
} else {
bail!("Could not parse organism name")
}
fn get_organism_name(value: &serde_json::Value) -> Result<String> {
value["results"][0]["organism"]["commonName"]
.as_str()
.or_else(|| value["results"][0]["organism"]["scientificName"].as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Could not parse organism name"))
}

fn get_sequence_version(value: &Value) -> Result<usize> {
if let Some(s) = value["results"][0]["entryAudit"]["sequenceVersion"].as_u64() {
Ok(s as usize)
} else {
bail!("Could not parse sequence version")
}
fn get_sequence_version(value: &serde_json::Value) -> Result<usize> {
value["results"][0]["entryAudit"]["sequenceVersion"]
.as_u64()
.map(|s| s as usize)
.ok_or_else(|| anyhow!("Could not parse sequence version"))
}

fn get_protein_existence(value: &Value) -> Result<String> {
if let Some(s) = value["results"][0]["proteinExistence"].as_str() {
Ok(s.to_string().chars().nth(0).unwrap().to_string())
} else {
bail!("Could not parse protein existence")
}
fn get_protein_existence(value: &serde_json::Value) -> Result<String> {
value["results"][0]["proteinExistence"]
.as_str()
.map(|s| s.chars().nth(0).unwrap().to_string())
.ok_or_else(|| anyhow!("Could not parse protein existence"))
}

// Method to check if the struct is non-empty
pub fn is_non_empty(&self) -> bool {
!self.uniprot_id.is_empty() ||
!self.uniprot_entry_name.is_empty() ||
!self.primary_gene_name.is_empty() ||
!self.uniprot_synonyms.is_empty() ||
!self.protein_name.is_empty() ||
!self.uniprot_description.is_empty() ||
self.ncbi_id.is_some() ||
self.pdb_id.is_some() ||
self.taxon_id != 0 ||
!self.organism_name.is_empty() ||
!self.sequence.is_empty() ||
self.sequence_version != 0 ||
!self.protein_existence.is_empty() ||
!self.query.is_empty()
}

pub fn fasta_header(&self) -> String {
Expand Down

0 comments on commit 72e0174

Please sign in to comment.