Skip to content

Commit

Permalink
Merge pull request #350 from Aariq/cir_query-#289
Browse files Browse the repository at this point in the history
Closes #289.
  • Loading branch information
stitam authored Feb 19, 2022
2 parents f2fe526 + eefb613 commit 67d30b8
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 26 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Description: Chemical information from around the web. This package interacts
Flavornet, NIST Chemistry WebBook, OPSIN, PAN Pesticide Database, PubChem,
SRS, Wikidata.
Type: Package
Version: 1.1.2
Version: 1.1.2.9002
Date: 2021-12-06
License: MIT + file LICENSE
URL: https://docs.ropensci.org/webchem/, https://github.com/ropensci/webchem
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# webchem 1.1.2.9001
# webchem (development version)

## NEW FEATURES

* cir_query() now returns a tibble instead of a list to be consistent with other translator functions. This is a potentially *breaking change* for users.

## BUG FIXES

Expand Down
38 changes: 25 additions & 13 deletions R/cir.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#'
#' @import xml2
#' @importFrom utils URLencode
#' @importFrom rlang :=
#'
#' @param identifier character; chemical identifier.
#' @param representation character; what representation of the identifier should
Expand All @@ -19,7 +20,7 @@
#' @param choices deprecated. Use the \code{match} argument instead.
#' @param verbose logical; should a verbose output be printed on the console?
#' @param ... currently not used.
#' @return A list of character vectors.
#' @return A tibble with a `query` column and a column for the requested representation.
#' @details
#' CIR can resolve can be of the following \code{identifier}: Chemical Names,
#' IUPAC names,
Expand Down Expand Up @@ -112,7 +113,8 @@
#'
#'}
#' @export
cir_query <- function(identifier, representation = "smiles",
cir_query <- function(identifier,
representation = "smiles",
resolver = NULL,
match = c("all", "first", "ask", "na"),
verbose = getOption("verbose"),
Expand All @@ -124,16 +126,23 @@ cir_query <- function(identifier, representation = "smiles",
if (!missing("choices")) {
stop("`choices` is deprecated. Use `match` instead.")
}

if (length(representation) > 1 | !is.character(representation)) {
stop("`representation` must be a string. See ?cir_query for options.")
}

match <- match.arg(match)
foo <- function(identifier, representation, resolver, first, verbose) {

foo <- function(identifier, representation, resolver, match, verbose) {
na_tbl <- tibble(query = identifier, !!representation := NA)
if (is.na(identifier)) {
if (verbose) webchem_message("na")
return(NA)
return(na_tbl)
}
if (verbose) webchem_message("query", identifier, appendLF = FALSE)
identifier <- URLencode(identifier, reserved = TRUE)
id <- URLencode(identifier, reserved = TRUE)
baseurl <- "https://cactus.nci.nih.gov/chemical/structure"
qurl <- paste(baseurl, identifier, representation, 'xml', sep = '/')
qurl <- paste(baseurl, id, representation, 'xml', sep = '/')
if (!is.null(resolver)) {
qurl <- paste0(qurl, '?resolver=', resolver)
}
Expand All @@ -145,15 +154,15 @@ cir_query <- function(identifier, representation = "smiles",
quiet = TRUE), silent = TRUE)
if (inherits(h, "try-error")) {
if (verbose) webchem_message("service_down")
return(NA)
return(na_tbl)
}
if (verbose) message(httr::message_for_status(h))
if (h$status_code == 200){
tt <- read_xml(content(h, as = 'raw'))
out <- xml_text(xml_find_all(tt, '//item'))
if (length(out) == 0) {
if (verbose) webchem_message("not_found")
return(NA)
return(na_tbl)
}
out <- matcher(out, query = identifier, match = match, verbose = verbose)
if (representation %in% c('mw', 'monoisotopic_mass', 'h_bond_donor_count',
Expand All @@ -164,16 +173,19 @@ cir_query <- function(identifier, representation = "smiles",
'heavy_atom_count', 'deprotonable_group_count',
'protonable_group_count') )
out <- as.numeric(out)
return(out)

out_tbl <- tibble(query = identifier, !!representation := out)
return(out_tbl)
}
else {
return(NA)
return(na_tbl)
}
}


out <- lapply(identifier, foo, representation = representation,
resolver = resolver, first = first, verbose = verbose)
names(out) <- identifier
return(out)
resolver = resolver, match = match, verbose = verbose)
bind_rows(out)
}

#' Query Chemical Identifier Resolver Images
Expand Down
2 changes: 1 addition & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ matcher <-
} else {
choices <- x
}
pick <- menu(choices, graphics = FALSE, "Select one:")
pick <- menu(choices, graphics = FALSE, paste0("Select result for '", query, "':"))
return(x[pick])

} else if (match == "na") {
Expand Down
2 changes: 1 addition & 1 deletion man/cir_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 20 additions & 9 deletions tests/testthat/test-cir.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,45 @@ test_that("cir_query()", {
skip_on_cran()
skip_if_not(up, "CIR server is down")

expect_equal(cir_query('Triclosan', 'mw')[[1]], 289.5451)
expect_equal(cir_query('xxxxxxx', 'mw')[[1]], NA)
expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number')[[1]],
expect_equal(cir_query('Triclosan', 'mw')$mw[1], 289.5451)
expect_equal(cir_query('xxxxxxx', 'mw')$mw[1], NA)
expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number')$stdinchikey[1],
"InChIKey=XEFQLINVKFYRCS-UHFFFAOYSA-N")
expect_true(length(cir_query('Triclosan', 'cas')[[1]]) > 1)
expect_length(cir_query('Triclosan', 'cas', match = "first")[[1]], 1)
expect_length(cir_query(c('Triclosan', 'Aspirin'), 'cas'), 2)
expect_true(nrow(cir_query('Triclosan', 'cas')) > 1)
expect_true(nrow(cir_query('Triclosan', 'cas', match = "first")) == 1)
expect_true(nrow(cir_query(c('Triclosan', 'Aspirin'), 'cas', match = "first")) == 2)

expect_equal(cir_query('acetic acid', 'mw', match = "first"),
list(`acetic acid` = 60.0524))
tibble(query = "acetic acid", mw = 60.0524))

})

test_that("cir_query() doesn't mistake NA for sodium", {
skip_on_cran()
skip_if_not(up, "CIR server is down")

expect_true(is.na(cir_query(as.character(NA), 'cas')))
expect_true(is.na(cir_query(as.character(NA), 'cas')$cas))
})

test_that("cir_query() handles special characters in SMILES", {
skip_on_cran()
skip_if_not(up, "CIR server is down")

expect_equal(cir_query("C#C", representation = "inchikey")[[1]],
expect_equal(cir_query("C#C", representation = "inchikey")$inchikey,
"InChIKey=HSFWRNGVRCDJHI-UHFFFAOYNA-N")
})

test_that("cir_query() handles NA queries and queries that return NA", {
skip_on_cran()
skip_if_not(up, "CIR server is down")

expect_identical(
cir_query(c("Triclosan", "pumpkin", NA), representation = "cas",match = "first"),
tibble(query = c("Triclosan", "pumpkin", NA_character_),
cas = c("3380-34-5", NA_character_, NA_character_))
)
})

test_that("cir_img()", {
skip_on_cran()
skip_if_not(up, "CIR server is down")
Expand Down

0 comments on commit 67d30b8

Please sign in to comment.