diff --git a/DESCRIPTION b/DESCRIPTION index 741c329..bd6d6ba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Description: Tools for working with 'taxonomic' databases, including utilities for downloading databases, loading them into various 'SQL' databases, cleaning up files, and providing a 'SQL' connection that can be used to do 'SQL' queries directly or used in 'dplyr'. -Version: 0.1.7.9600 +Version: 0.1.7.9610 Authors@R: c( person("Scott", "Chamberlain", email = "myrmecocystus+r@gmail.com", role = c("aut", "cre")), person("Zebulun", "Arendsee", role = "aut") @@ -31,6 +31,9 @@ Imports: hoardr (>= 0.1.0) Suggests: roxygen2 (>= 6.0.1), + taxize, testthat, - taxize + webmockr (>= 0.2.1.9222), + vcr (>= 0.0.7.9316) +Remotes: ropensci/vcr, ropensci/webmockr RoxygenNote: 6.0.1 diff --git a/tests/fixtures/vcr_cassettes/children_unambiguous.yml b/tests/fixtures/vcr_cassettes/children_unambiguous.yml new file mode 100644 index 0000000..2d01c98 --- /dev/null +++ b/tests/fixtures/vcr_cassettes/children_unambiguous.yml @@ -0,0 +1,204 @@ +http_interactions: +- request: + method: get + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1837063%2B1746102%2B1547873%2B1547872%2B1328956%2B1240361%2B869751%2B869750%2B412662%2B378006%2B347883%2B302551%2B97980%2B97979%2B81970%2B59690%2B59689%2B45251%2B45249%2B38785%2B29726%2B3702&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108 + body: + encoding: '' + string: '' + headers: + User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2 + Accept-Encoding: gzip, deflate + Accept: application/json, text/xml, application/xml, */* + response: + status: + status_code: '200' + message: OK + explanation: Request fulfilled, document follows + headers: + status: HTTP/1.1 200 OK + date: Thu, 05 Apr 2018 16:21:14 GMT + server: Finatra + strict-transport-security: max-age=31536000; includeSubDomains; preload + content-security-policy: upgrade-insecure-requests + access-control-allow-origin: '*' + cache-control: private + ncbi-phid: 6177DD555E4FF90DE0056DC9A6EF4843.1.m_1 + ncbi-sid: 9818C387B02F828E_27CBSID + content-type: text/xml; charset=UTF-8 + x-ratelimit-limit: '150' + x-ratelimit-remaining: '147' + content-encoding: gzip + set-cookie: ncbi_sid=9818C387B02F828E_27CBSID; domain=.nih.gov; path=/; expires=Fri, + 05 Apr 2019 16:21:14 GMT + x-ua-compatible: IE=Edge + x-xss-protection: 1; mode=block + transfer-encoding: chunked + body: + encoding: UTF-8 + string: "\n\n\n\n\t1837063\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis thaliana x Arabidopsis halleri\n\t\n\t1837063\n\t0\n\t\n\t\n\t\n\t2016/04/22 00:00\n\n\n\n\t1746102\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis sp. hda9-2\n\t\n\t1746102\n\t0\n\tArabidopsis\n\tsp. hda9-2\n\t\n\t2015/11/01 + 00:00\n\n\n\n\t1547873\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis sp. NH-2014a\n\t\n\t1547873\n\t0\n\tArabidopsis\n\tsp. NH-2014a\n\t\n\t2014/11/09 + 00:00\n\n\n\n\t1547872\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis umezawana\n\t\n\t1547872\n\t0\n\tArabidopsis\n\tumezawana\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t1328956\n\tactive\n\tspecies\n\teudicots\n\t(Arabidopsis thaliana x Arabidopsis arenosa) x Arabidopsis + suecica\n\t\n\t1328956\n\t0\n\t\n\t\n\t\n\t2014/01/09 00:00\n\n\n\n\t1240361\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis thaliana x Arabidopsis arenosa\n\t\n\t1240361\n\t0\n\t\n\t\n\t\n\t2012/10/20 00:00\n\n\n\n\t869751\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis thaliana x Arabidopsis halleri subsp. gemmifera\n\t\n\t869751\n\t0\n\t\n\t\n\t\n\t2010/07/29 00:00\n\n\n\n\t869750\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis thaliana x Arabidopsis lyrata\n\t\n\t869750\n\t0\n\t\n\t\n\t\n\t2010/07/29 00:00\n\n\n\n\t412662\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis pedemontana\n\t\n\t412662\n\t0\n\tArabidopsis\n\tpedemontana\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t378006\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis arenosa x Arabidopsis thaliana\n\t\n\t378006\n\t0\n\t\n\t\n\t\n\t2006/04/19 00:00\n\n\n\n\t347883\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis arenicola\n\t\n\t347883\n\t0\n\tArabidopsis\n\tarenicola\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t302551\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis petrogena\n\t\n\t302551\n\t0\n\tArabidopsis\n\tpetrogena\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t97980\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis croatica\n\t\n\t97980\n\t0\n\tArabidopsis\n\tcroatica\n\t\n\t2015/09/16 00:00\n\n\n\n\t97979\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis cebennensis\n\t\n\t97979\n\t0\n\tArabidopsis\n\tcebennensis\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t81970\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis halleri\n\t\n\t81970\n\t0\n\tArabidopsis\n\thalleri\n\t\n\t2015/09/16 00:00\n\n\n\n\t59690\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis kamchatica\n\t\n\t59690\n\t0\n\tArabidopsis\n\tkamchatica\n\t\n\t2015/09/16 + 00:00\n\n\n\n\t59689\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis lyrata\n\tlyrate + rockcress\n\t59689\n\t0\n\tArabidopsis\n\tlyrata\n\t\n\t2015/10/28 00:00\n\n\n\n\t45251\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis neglecta\n\t\n\t45251\n\t0\n\tArabidopsis\n\tneglecta\n\t\n\t2015/09/16 00:00\n\n\n\n\t45249\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis suecica\n\t\n\t45249\n\t0\n\tArabidopsis\n\tsuecica\n\t\n\t2015/09/16 00:00\n\n\n\n\t38785\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis arenosa\n\t\n\t38785\n\t0\n\tArabidopsis\n\tarenosa\n\t\n\t2015/09/16 00:00\n\n\n\n\t29726\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis sp.\n\t\n\t29726\n\t0\n\tArabidopsis\n\tsp.\n\t\n\t2005/01/19 00:00\n\n\n\n\t3702\n\tactive\n\tspecies\n\teudicots\n\tArabidopsis thaliana\n\tthale + cress\n\t3702\n\t0\n\tArabidopsis\n\tthaliana\n\t\n\t2017/06/29 00:00\n\n\n\n" + recorded_at: 2018-04-05 16:21:14 GMT + recorded_with: vcr/0.0.7.9325 diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml new file mode 100644 index 0000000..e8b8a3b --- /dev/null +++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml @@ -0,0 +1,44 @@ +http_interactions: +- request: + method: get + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=9999999999&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108 + body: + encoding: '' + string: '' + headers: + User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2 + Accept-Encoding: gzip, deflate + Accept: application/json, text/xml, application/xml, */* + response: + status: + status_code: '200' + message: OK + explanation: Request fulfilled, document follows + headers: + status: HTTP/1.1 200 OK + date: Thu, 05 Apr 2018 16:21:14 GMT + server: Finatra + strict-transport-security: max-age=31536000; includeSubDomains; preload + content-security-policy: upgrade-insecure-requests + access-control-allow-origin: '*' + cache-control: private + ncbi-phid: 644081FF892592C429149881FC4396BF.1.m_3 + ncbi-sid: 6A8B33BC89AA0BA0_61BASID + content-type: text/xml; charset=UTF-8 + x-ratelimit-limit: '150' + x-ratelimit-remaining: '146' + content-encoding: gzip + set-cookie: ncbi_sid=6A8B33BC89AA0BA0_61BASID; domain=.nih.gov; path=/; expires=Fri, + 05 Apr 2019 16:21:14 GMT + x-ua-compatible: IE=Edge + x-xss-protection: 1; mode=block + transfer-encoding: chunked + body: + encoding: UTF-8 + string: |- + + + + + recorded_at: 2018-04-05 16:21:14 GMT + recorded_with: vcr/0.0.7.9325 diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml new file mode 100644 index 0000000..c650295 --- /dev/null +++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml @@ -0,0 +1,44 @@ +http_interactions: +- request: + method: get + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=8888888888&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108 + body: + encoding: '' + string: '' + headers: + User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2 + Accept-Encoding: gzip, deflate + Accept: application/json, text/xml, application/xml, */* + response: + status: + status_code: '200' + message: OK + explanation: Request fulfilled, document follows + headers: + status: HTTP/1.1 200 OK + date: Thu, 05 Apr 2018 16:21:14 GMT + server: Finatra + strict-transport-security: max-age=31536000; includeSubDomains; preload + content-security-policy: upgrade-insecure-requests + access-control-allow-origin: '*' + cache-control: private + ncbi-phid: 0EE57C5FD6AF758EECFA3C98D3690CF8.1.m_3 + ncbi-sid: D32F37DD656D596E_C797SID + content-type: text/xml; charset=UTF-8 + x-ratelimit-limit: '150' + x-ratelimit-remaining: '145' + content-encoding: gzip + set-cookie: ncbi_sid=D32F37DD656D596E_C797SID; domain=.nih.gov; path=/; expires=Fri, + 05 Apr 2019 16:21:15 GMT + x-ua-compatible: IE=Edge + x-xss-protection: 1; mode=block + transfer-encoding: chunked + body: + encoding: UTF-8 + string: |- + + + + + recorded_at: 2018-04-05 16:21:15 GMT + recorded_with: vcr/0.0.7.9325 diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml new file mode 100644 index 0000000..c72a972 --- /dev/null +++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml @@ -0,0 +1,196 @@ +http_interactions: +- request: + method: get + uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=3702&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108 + body: + encoding: '' + string: '' + headers: + User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2 + Accept-Encoding: gzip, deflate + Accept: application/json, text/xml, application/xml, */* + response: + status: + status_code: '200' + message: OK + explanation: Request fulfilled, document follows + headers: + status: HTTP/1.1 200 OK + date: Thu, 05 Apr 2018 16:21:15 GMT + server: Finatra + strict-transport-security: max-age=31536000; includeSubDomains; preload + content-security-policy: upgrade-insecure-requests + access-control-allow-origin: '*' + cache-control: private + ncbi-phid: 6BA2F0C898D72FC7C63842EA143832F9.1.m_3 + ncbi-sid: 93374FDDBAB1BFB6_DCDCSID + content-type: text/xml; charset=UTF-8 + x-ratelimit-limit: '150' + x-ratelimit-remaining: '145' + content-encoding: gzip + set-cookie: ncbi_sid=93374FDDBAB1BFB6_DCDCSID; domain=.nih.gov; path=/; expires=Fri, + 05 Apr 2019 16:21:15 GMT + x-ua-compatible: IE=Edge + x-xss-protection: 1; mode=block + transfer-encoding: chunked + body: + encoding: UTF-8 + string: |- + + + + 3702 + Arabidopsis thaliana + + thale cress + mouse-ear cress + thale-cress + + authority + Arabidopsis thaliana (L.) Heynh. + + + misspelling + Arabidopsis thaliana (thale cress) + + + misspelling + Arabidopsis_thaliana + + + misspelling + Arbisopsis thaliana + + + misspelling + thale kress + + + 3701 + species + Plants and Fungi + + 1 + Standard + + + 1 + Standard + + cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis + + + 131567 + cellular organisms + no rank + + + 2759 + Eukaryota + superkingdom + + + 33090 + Viridiplantae + kingdom + + + 35493 + Streptophyta + phylum + + + 131221 + Streptophytina + subphylum + + + 3193 + Embryophyta + no rank + + + 58023 + Tracheophyta + no rank + + + 78536 + Euphyllophyta + no rank + + + 58024 + Spermatophyta + no rank + + + 3398 + Magnoliophyta + no rank + + + 1437183 + Mesangiospermae + no rank + + + 71240 + eudicotyledons + no rank + + + 91827 + Gunneridae + no rank + + + 1437201 + Pentapetalae + no rank + + + 71275 + rosids + subclass + + + 91836 + malvids + no rank + + + 3699 + Brassicales + order + + + 3700 + Brassicaceae + family + + + 980083 + Camelineae + tribe + + + 3701 + Arabidopsis + genus + + + + + pgcode + 11 + + + 1995/02/27 09:24:00 + 2017/07/06 17:40:43 + 1992/11/10 00:00:00 + + + + recorded_at: 2018-04-05 16:21:15 GMT + recorded_with: vcr/0.0.7.9325 diff --git a/tests/testthat/helper-taxizedb.R b/tests/testthat/helper-taxizedb.R new file mode 100644 index 0000000..0ff8305 --- /dev/null +++ b/tests/testthat/helper-taxizedb.R @@ -0,0 +1,3 @@ +# set up vcr +library("vcr") +invisible(vcr::vcr_configure(dir = "../fixtures/vcr_cassettes")) diff --git a/tests/testthat/test-children.R b/tests/testthat/test-children.R index ead2bd2..1c1f659 100644 --- a/tests/testthat/test-children.R +++ b/tests/testthat/test-children.R @@ -7,10 +7,12 @@ test_that("unambiguous children", { # taxizedb::children(3702, db='ncbi'), # taxize::children(3702, db='ncbi') # ) - expect_equal( - taxizedb::children(3701, db='ncbi'), - taxize::children(3701, db='ncbi') - ) + vcr::use_cassette("children_unambiguous", { + expect_equal( + taxizedb::children(3701, db='ncbi'), + taxize::children(3701, db='ncbi') + ) + }, preserve_exact_body_bytes = FALSE) ## TODO: these are not currently equal ## once changes in taxize are sorted out, we can restore this test @@ -23,7 +25,7 @@ test_that("unambiguous children", { # ) }) -test_that("ambiguous NCBI children", { +# test_that("ambiguous NCBI children", { ## TODO: these are not currently equal ## once changes in taxize are sorted out, we can restore this test # expect_equal( @@ -33,7 +35,7 @@ test_that("ambiguous NCBI children", { # subset(childtaxa_rank != 'species') %>% # magrittr::set_rownames(NULL) # ) -}) +# }) test_that("missing values are consistent with taxize", { empty_df <- data.frame( diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R index 87ad4da..f1f6f9e 100644 --- a/tests/testthat/test-classification.R +++ b/tests/testthat/test-classification.R @@ -2,54 +2,61 @@ context("classification") library(taxize, quietly = TRUE, warn.conflicts = FALSE) -test_that("taxizedb::classification == taxize::classification", { - taxa_ids <- c(9606, 3702) - taxa_names <- c("Homo sapiens", "Arabidopsis thaliana") - taxa_names2 <- c("thale cress", "Homo_sapiens") +# test_that("taxizedb::classification == taxize::classification", { +# taxa_ids <- c(9606, 3702) +# taxa_names <- c("Homo sapiens", "Arabidopsis thaliana") +# taxa_names2 <- c("thale cress", "Homo_sapiens") - ## TODO: none of those are equivalent - ## slight differences in each - # expect_equal( - # taxize::classification(taxa_ids, db='ncbi'), - # taxizedb::classification(taxa_ids, db='ncbi') - # ) - # expect_equal( - # taxize::classification(taxa_names, db='ncbi'), - # taxizedb::classification(taxa_names, db='ncbi') - # ) - # # input names are preserved (even if incorrect) - # expect_equal( - # taxize::classification(taxa_names2, db='ncbi'), - # taxizedb::classification(taxa_names2, db='ncbi') - # ) -}) +# ## TODO: none of those are equivalent +# ## slight differences in each +# expect_equal( +# taxize::classification(taxa_ids, db='ncbi'), +# taxizedb::classification(taxa_ids, db='ncbi') +# ) +# expect_equal( +# taxize::classification(taxa_names, db='ncbi'), +# taxizedb::classification(taxa_names, db='ncbi') +# ) +# # input names are preserved (even if incorrect) +# expect_equal( +# taxize::classification(taxa_names2, db='ncbi'), +# taxizedb::classification(taxa_names2, db='ncbi') +# ) +# }) -test_that("classification is case insensitive", { - taxa_names <- c('homo sapiens', 'PIG', 'zea_mays') - ## TODO: none of those are equivalent - ## slight differences between them - # expect_equal( - # taxize::classification(taxa_names, db='ncbi'), - # taxizedb::classification(taxa_names, db='ncbi') - # ) -}) +# test_that("classification is case insensitive", { +# taxa_names <- c('homo sapiens', 'PIG', 'zea_mays') +# ## TODO: none of those are equivalent +# ## slight differences between them +# # expect_equal( +# # taxize::classification(taxa_names, db='ncbi'), +# # taxizedb::classification(taxa_names, db='ncbi') +# # ) +# }) test_that('classification handles invalid ids', { taxa_ids1 <- 9999999999 taxa_ids2 <- c(9999999999, 8888888888) taxa_ids3 <- c(8888888888, 3702) - expect_equal( - taxize::classification(taxa_ids1, db='ncbi'), - taxizedb::classification(taxa_ids1, db='ncbi') - ) - expect_equal( - taxize::classification(taxa_ids2, db='ncbi'), - taxizedb::classification(taxa_ids2, db='ncbi') - ) - expect_equal( - taxize::classification(taxa_ids3, db='ncbi'), - taxizedb::classification(taxa_ids3, db='ncbi') - ) + + vcr::use_cassette("classification_invalid_ids1", { + expect_equal( + taxize::classification(taxa_ids1, db='ncbi'), + taxizedb::classification(taxa_ids1, db='ncbi') + ) + }, preserve_exact_body_bytes = FALSE) + vcr::use_cassette("classification_invalid_ids2", { + expect_equal( + taxize::classification(taxa_ids2, db='ncbi'), + taxizedb::classification(taxa_ids2, db='ncbi') + ) + }, preserve_exact_body_bytes = FALSE) + vcr::use_cassette("classification_invalid_ids3", { + expect_equal( + taxize::classification(taxa_ids3, db='ncbi'), + taxizedb::classification(taxa_ids3, db='ncbi') + ) + }, preserve_exact_body_bytes = FALSE) }) test_that('classification handles invalid names', {