diff --git a/DESCRIPTION b/DESCRIPTION index e5f2576b..45e9817b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Description: Chemical information from around the web. This package interacts Flavornet, NIST Chemistry WebBook, OPSIN, PAN Pesticide Database, PubChem, SRS, Wikidata. Type: Package -Version: 1.1.2 +Version: 1.1.2.9002 Date: 2021-12-06 License: MIT + file LICENSE URL: https://docs.ropensci.org/webchem/, https://github.com/ropensci/webchem diff --git a/NEWS.md b/NEWS.md index e1193baf..7c1d39d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -# webchem 1.1.2.9001 +# webchem (development version) + +## NEW FEATURES + +* cir_query() now returns a tibble instead of a list to be consistent with other translator functions. This is a potentially *breaking change* for users. ## BUG FIXES diff --git a/R/cir.R b/R/cir.R index 76909aad..674bd9ba 100644 --- a/R/cir.R +++ b/R/cir.R @@ -5,6 +5,7 @@ #' #' @import xml2 #' @importFrom utils URLencode +#' @importFrom rlang := #' #' @param identifier character; chemical identifier. #' @param representation character; what representation of the identifier should @@ -19,7 +20,7 @@ #' @param choices deprecated. Use the \code{match} argument instead. #' @param verbose logical; should a verbose output be printed on the console? #' @param ... currently not used. -#' @return A list of character vectors. +#' @return A tibble with a `query` column and a column for the requested representation. #' @details #' CIR can resolve can be of the following \code{identifier}: Chemical Names, #' IUPAC names, @@ -112,7 +113,8 @@ #' #'} #' @export -cir_query <- function(identifier, representation = "smiles", +cir_query <- function(identifier, + representation = "smiles", resolver = NULL, match = c("all", "first", "ask", "na"), verbose = getOption("verbose"), @@ -124,16 +126,23 @@ cir_query <- function(identifier, representation = "smiles", if (!missing("choices")) { stop("`choices` is deprecated. Use `match` instead.") } + + if (length(representation) > 1 | !is.character(representation)) { + stop("`representation` must be a string. See ?cir_query for options.") + } + match <- match.arg(match) - foo <- function(identifier, representation, resolver, first, verbose) { + + foo <- function(identifier, representation, resolver, match, verbose) { + na_tbl <- tibble(query = identifier, !!representation := NA) if (is.na(identifier)) { if (verbose) webchem_message("na") - return(NA) + return(na_tbl) } if (verbose) webchem_message("query", identifier, appendLF = FALSE) - identifier <- URLencode(identifier, reserved = TRUE) + id <- URLencode(identifier, reserved = TRUE) baseurl <- "https://cactus.nci.nih.gov/chemical/structure" - qurl <- paste(baseurl, identifier, representation, 'xml', sep = '/') + qurl <- paste(baseurl, id, representation, 'xml', sep = '/') if (!is.null(resolver)) { qurl <- paste0(qurl, '?resolver=', resolver) } @@ -145,7 +154,7 @@ cir_query <- function(identifier, representation = "smiles", quiet = TRUE), silent = TRUE) if (inherits(h, "try-error")) { if (verbose) webchem_message("service_down") - return(NA) + return(na_tbl) } if (verbose) message(httr::message_for_status(h)) if (h$status_code == 200){ @@ -153,7 +162,7 @@ cir_query <- function(identifier, representation = "smiles", out <- xml_text(xml_find_all(tt, '//item')) if (length(out) == 0) { if (verbose) webchem_message("not_found") - return(NA) + return(na_tbl) } out <- matcher(out, query = identifier, match = match, verbose = verbose) if (representation %in% c('mw', 'monoisotopic_mass', 'h_bond_donor_count', @@ -164,16 +173,19 @@ cir_query <- function(identifier, representation = "smiles", 'heavy_atom_count', 'deprotonable_group_count', 'protonable_group_count') ) out <- as.numeric(out) - return(out) + + out_tbl <- tibble(query = identifier, !!representation := out) + return(out_tbl) } else { - return(NA) + return(na_tbl) } } + + out <- lapply(identifier, foo, representation = representation, - resolver = resolver, first = first, verbose = verbose) - names(out) <- identifier - return(out) + resolver = resolver, match = match, verbose = verbose) + bind_rows(out) } #' Query Chemical Identifier Resolver Images diff --git a/R/utils.R b/R/utils.R index f9a402fb..e810fa8d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -502,7 +502,7 @@ matcher <- } else { choices <- x } - pick <- menu(choices, graphics = FALSE, "Select one:") + pick <- menu(choices, graphics = FALSE, paste0("Select result for '", query, "':")) return(x[pick]) } else if (match == "na") { diff --git a/man/cir_query.Rd b/man/cir_query.Rd index 723b09ad..b84c7b57 100644 --- a/man/cir_query.Rd +++ b/man/cir_query.Rd @@ -36,7 +36,7 @@ returns all matches, \code{"first"} returns only the first result, \item{...}{currently not used.} } \value{ -A list of character vectors. +A tibble with a `query` column an a column for the requested representation. } \description{ A interface to the Chemical Identifier Resolver (CIR). diff --git a/tests/testthat/test-cir.R b/tests/testthat/test-cir.R index 470e64ee..60239856 100644 --- a/tests/testthat/test-cir.R +++ b/tests/testthat/test-cir.R @@ -3,16 +3,16 @@ test_that("cir_query()", { skip_on_cran() skip_if_not(up, "CIR server is down") - expect_equal(cir_query('Triclosan', 'mw')[[1]], 289.5451) - expect_equal(cir_query('xxxxxxx', 'mw')[[1]], NA) - expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number')[[1]], + expect_equal(cir_query('Triclosan', 'mw')$mw[1], 289.5451) + expect_equal(cir_query('xxxxxxx', 'mw')$mw[1], NA) + expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number')$stdinchikey[1], "InChIKey=XEFQLINVKFYRCS-UHFFFAOYSA-N") - expect_true(length(cir_query('Triclosan', 'cas')[[1]]) > 1) - expect_length(cir_query('Triclosan', 'cas', match = "first")[[1]], 1) - expect_length(cir_query(c('Triclosan', 'Aspirin'), 'cas'), 2) + expect_true(nrow(cir_query('Triclosan', 'cas')) > 1) + expect_true(nrow(cir_query('Triclosan', 'cas', match = "first")) == 1) + expect_true(nrow(cir_query(c('Triclosan', 'Aspirin'), 'cas', match = "first")) == 2) expect_equal(cir_query('acetic acid', 'mw', match = "first"), - list(`acetic acid` = 60.0524)) + tibble(query = "acetic acid", mw = 60.0524)) }) @@ -20,17 +20,28 @@ test_that("cir_query() doesn't mistake NA for sodium", { skip_on_cran() skip_if_not(up, "CIR server is down") - expect_true(is.na(cir_query(as.character(NA), 'cas'))) + expect_true(is.na(cir_query(as.character(NA), 'cas')$cas)) }) test_that("cir_query() handles special characters in SMILES", { skip_on_cran() skip_if_not(up, "CIR server is down") - expect_equal(cir_query("C#C", representation = "inchikey")[[1]], + expect_equal(cir_query("C#C", representation = "inchikey")$inchikey, "InChIKey=HSFWRNGVRCDJHI-UHFFFAOYNA-N") }) +test_that("cir_query() handles NA queries and queries that return NA", { + skip_on_cran() + skip_if_not(up, "CIR server is down") + + expect_identical( + cir_query(c("Triclosan", "pumpkin", NA), representation = "cas",match = "first"), + tibble(query = c("Triclosan", "pumpkin", NA_character_), + cas = c("3380-34-5", NA_character_, NA_character_)) + ) +}) + test_that("cir_img()", { skip_on_cran() skip_if_not(up, "CIR server is down")