diff --git a/DESCRIPTION b/DESCRIPTION
index 741c329..bd6d6ba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,7 +5,7 @@ Description: Tools for working with 'taxonomic' databases, including
utilities for downloading databases, loading them into various
'SQL' databases, cleaning up files, and providing a 'SQL' connection
that can be used to do 'SQL' queries directly or used in 'dplyr'.
-Version: 0.1.7.9600
+Version: 0.1.7.9610
Authors@R: c(
person("Scott", "Chamberlain", email = "myrmecocystus+r@gmail.com", role = c("aut", "cre")),
person("Zebulun", "Arendsee", role = "aut")
@@ -31,6 +31,9 @@ Imports:
hoardr (>= 0.1.0)
Suggests:
roxygen2 (>= 6.0.1),
+ taxize,
testthat,
- taxize
+ webmockr (>= 0.2.1.9222),
+ vcr (>= 0.0.7.9316)
+Remotes: ropensci/vcr, ropensci/webmockr
RoxygenNote: 6.0.1
diff --git a/tests/fixtures/vcr_cassettes/children_unambiguous.yml b/tests/fixtures/vcr_cassettes/children_unambiguous.yml
new file mode 100644
index 0000000..2d01c98
--- /dev/null
+++ b/tests/fixtures/vcr_cassettes/children_unambiguous.yml
@@ -0,0 +1,204 @@
+http_interactions:
+- request:
+ method: get
+ uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id=1837063%2B1746102%2B1547873%2B1547872%2B1328956%2B1240361%2B869751%2B869750%2B412662%2B378006%2B347883%2B302551%2B97980%2B97979%2B81970%2B59690%2B59689%2B45251%2B45249%2B38785%2B29726%2B3702&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108
+ body:
+ encoding: ''
+ string: ''
+ headers:
+ User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2
+ Accept-Encoding: gzip, deflate
+ Accept: application/json, text/xml, application/xml, */*
+ response:
+ status:
+ status_code: '200'
+ message: OK
+ explanation: Request fulfilled, document follows
+ headers:
+ status: HTTP/1.1 200 OK
+ date: Thu, 05 Apr 2018 16:21:14 GMT
+ server: Finatra
+ strict-transport-security: max-age=31536000; includeSubDomains; preload
+ content-security-policy: upgrade-insecure-requests
+ access-control-allow-origin: '*'
+ cache-control: private
+ ncbi-phid: 6177DD555E4FF90DE0056DC9A6EF4843.1.m_1
+ ncbi-sid: 9818C387B02F828E_27CBSID
+ content-type: text/xml; charset=UTF-8
+ x-ratelimit-limit: '150'
+ x-ratelimit-remaining: '147'
+ content-encoding: gzip
+ set-cookie: ncbi_sid=9818C387B02F828E_27CBSID; domain=.nih.gov; path=/; expires=Fri,
+ 05 Apr 2019 16:21:14 GMT
+ x-ua-compatible: IE=Edge
+ x-xss-protection: 1; mode=block
+ transfer-encoding: chunked
+ body:
+ encoding: UTF-8
+ string: "\n\n\n\n\t1837063\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis thaliana x Arabidopsis halleri
\n\t \n\t- 1837063
\n\t- 0
\n\t \n\t \n\t \n\t- 2016/04/22 00:00
\n\n\n\n\t1746102\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis sp. hda9-2
\n\t \n\t- 1746102
\n\t- 0
\n\t- Arabidopsis
\n\t- sp. hda9-2
\n\t \n\t- 2015/11/01
+ 00:00
\n\n\n\n\t1547873\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis sp. NH-2014a
\n\t \n\t- 1547873
\n\t- 0
\n\t- Arabidopsis
\n\t- sp. NH-2014a
\n\t \n\t- 2014/11/09
+ 00:00
\n\n\n\n\t1547872\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis umezawana
\n\t \n\t- 1547872
\n\t- 0
\n\t- Arabidopsis
\n\t- umezawana
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t1328956\n\t- active
\n\t- species
\n\t- eudicots
\n\t- (Arabidopsis thaliana x Arabidopsis arenosa) x Arabidopsis
+ suecica
\n\t \n\t- 1328956
\n\t- 0
\n\t \n\t \n\t \n\t- 2014/01/09 00:00
\n\n\n\n\t1240361\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis thaliana x Arabidopsis arenosa
\n\t \n\t- 1240361
\n\t- 0
\n\t \n\t \n\t \n\t- 2012/10/20 00:00
\n\n\n\n\t869751\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis thaliana x Arabidopsis halleri subsp. gemmifera
\n\t \n\t- 869751
\n\t- 0
\n\t \n\t \n\t \n\t- 2010/07/29 00:00
\n\n\n\n\t869750\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis thaliana x Arabidopsis lyrata
\n\t \n\t- 869750
\n\t- 0
\n\t \n\t \n\t \n\t- 2010/07/29 00:00
\n\n\n\n\t412662\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis pedemontana
\n\t \n\t- 412662
\n\t- 0
\n\t- Arabidopsis
\n\t- pedemontana
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t378006\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis arenosa x Arabidopsis thaliana
\n\t \n\t- 378006
\n\t- 0
\n\t \n\t \n\t \n\t- 2006/04/19 00:00
\n\n\n\n\t347883\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis arenicola
\n\t \n\t- 347883
\n\t- 0
\n\t- Arabidopsis
\n\t- arenicola
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t302551\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis petrogena
\n\t \n\t- 302551
\n\t- 0
\n\t- Arabidopsis
\n\t- petrogena
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t97980\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis croatica
\n\t \n\t- 97980
\n\t- 0
\n\t- Arabidopsis
\n\t- croatica
\n\t \n\t- 2015/09/16 00:00
\n\n\n\n\t97979\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis cebennensis
\n\t \n\t- 97979
\n\t- 0
\n\t- Arabidopsis
\n\t- cebennensis
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t81970\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis halleri
\n\t \n\t- 81970
\n\t- 0
\n\t- Arabidopsis
\n\t- halleri
\n\t \n\t- 2015/09/16 00:00
\n\n\n\n\t59690\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis kamchatica
\n\t \n\t- 59690
\n\t- 0
\n\t- Arabidopsis
\n\t- kamchatica
\n\t \n\t- 2015/09/16
+ 00:00
\n\n\n\n\t59689\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis lyrata
\n\t- lyrate
+ rockcress
\n\t- 59689
\n\t- 0
\n\t- Arabidopsis
\n\t- lyrata
\n\t \n\t- 2015/10/28 00:00
\n\n\n\n\t45251\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis neglecta
\n\t \n\t- 45251
\n\t- 0
\n\t- Arabidopsis
\n\t- neglecta
\n\t \n\t- 2015/09/16 00:00
\n\n\n\n\t45249\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis suecica
\n\t \n\t- 45249
\n\t- 0
\n\t- Arabidopsis
\n\t- suecica
\n\t \n\t- 2015/09/16 00:00
\n\n\n\n\t38785\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis arenosa
\n\t \n\t- 38785
\n\t- 0
\n\t- Arabidopsis
\n\t- arenosa
\n\t \n\t- 2015/09/16 00:00
\n\n\n\n\t29726\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis sp.
\n\t \n\t- 29726
\n\t- 0
\n\t- Arabidopsis
\n\t- sp.
\n\t \n\t- 2005/01/19 00:00
\n\n\n\n\t3702\n\t- active
\n\t- species
\n\t- eudicots
\n\t- Arabidopsis thaliana
\n\t- thale
+ cress
\n\t- 3702
\n\t- 0
\n\t- Arabidopsis
\n\t- thaliana
\n\t \n\t- 2017/06/29 00:00
\n\n\n\n"
+ recorded_at: 2018-04-05 16:21:14 GMT
+ recorded_with: vcr/0.0.7.9325
diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml
new file mode 100644
index 0000000..e8b8a3b
--- /dev/null
+++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids1.yml
@@ -0,0 +1,44 @@
+http_interactions:
+- request:
+ method: get
+ uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=9999999999&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108
+ body:
+ encoding: ''
+ string: ''
+ headers:
+ User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2
+ Accept-Encoding: gzip, deflate
+ Accept: application/json, text/xml, application/xml, */*
+ response:
+ status:
+ status_code: '200'
+ message: OK
+ explanation: Request fulfilled, document follows
+ headers:
+ status: HTTP/1.1 200 OK
+ date: Thu, 05 Apr 2018 16:21:14 GMT
+ server: Finatra
+ strict-transport-security: max-age=31536000; includeSubDomains; preload
+ content-security-policy: upgrade-insecure-requests
+ access-control-allow-origin: '*'
+ cache-control: private
+ ncbi-phid: 644081FF892592C429149881FC4396BF.1.m_3
+ ncbi-sid: 6A8B33BC89AA0BA0_61BASID
+ content-type: text/xml; charset=UTF-8
+ x-ratelimit-limit: '150'
+ x-ratelimit-remaining: '146'
+ content-encoding: gzip
+ set-cookie: ncbi_sid=6A8B33BC89AA0BA0_61BASID; domain=.nih.gov; path=/; expires=Fri,
+ 05 Apr 2019 16:21:14 GMT
+ x-ua-compatible: IE=Edge
+ x-xss-protection: 1; mode=block
+ transfer-encoding: chunked
+ body:
+ encoding: UTF-8
+ string: |-
+
+
+
+
+ recorded_at: 2018-04-05 16:21:14 GMT
+ recorded_with: vcr/0.0.7.9325
diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml
new file mode 100644
index 0000000..c650295
--- /dev/null
+++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids2.yml
@@ -0,0 +1,44 @@
+http_interactions:
+- request:
+ method: get
+ uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=8888888888&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108
+ body:
+ encoding: ''
+ string: ''
+ headers:
+ User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2
+ Accept-Encoding: gzip, deflate
+ Accept: application/json, text/xml, application/xml, */*
+ response:
+ status:
+ status_code: '200'
+ message: OK
+ explanation: Request fulfilled, document follows
+ headers:
+ status: HTTP/1.1 200 OK
+ date: Thu, 05 Apr 2018 16:21:14 GMT
+ server: Finatra
+ strict-transport-security: max-age=31536000; includeSubDomains; preload
+ content-security-policy: upgrade-insecure-requests
+ access-control-allow-origin: '*'
+ cache-control: private
+ ncbi-phid: 0EE57C5FD6AF758EECFA3C98D3690CF8.1.m_3
+ ncbi-sid: D32F37DD656D596E_C797SID
+ content-type: text/xml; charset=UTF-8
+ x-ratelimit-limit: '150'
+ x-ratelimit-remaining: '145'
+ content-encoding: gzip
+ set-cookie: ncbi_sid=D32F37DD656D596E_C797SID; domain=.nih.gov; path=/; expires=Fri,
+ 05 Apr 2019 16:21:15 GMT
+ x-ua-compatible: IE=Edge
+ x-xss-protection: 1; mode=block
+ transfer-encoding: chunked
+ body:
+ encoding: UTF-8
+ string: |-
+
+
+
+
+ recorded_at: 2018-04-05 16:21:15 GMT
+ recorded_with: vcr/0.0.7.9325
diff --git a/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml b/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml
new file mode 100644
index 0000000..c72a972
--- /dev/null
+++ b/tests/fixtures/vcr_cassettes/classification_invalid_ids3.yml
@@ -0,0 +1,196 @@
+http_interactions:
+- request:
+ method: get
+ uri: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&ID=3702&api_key=eea6131e94f20ca3f84bb345e8a4f9f4c108
+ body:
+ encoding: ''
+ string: ''
+ headers:
+ User-Agent: libcurl/7.54.0 r-curl/3.2 crul/0.5.2
+ Accept-Encoding: gzip, deflate
+ Accept: application/json, text/xml, application/xml, */*
+ response:
+ status:
+ status_code: '200'
+ message: OK
+ explanation: Request fulfilled, document follows
+ headers:
+ status: HTTP/1.1 200 OK
+ date: Thu, 05 Apr 2018 16:21:15 GMT
+ server: Finatra
+ strict-transport-security: max-age=31536000; includeSubDomains; preload
+ content-security-policy: upgrade-insecure-requests
+ access-control-allow-origin: '*'
+ cache-control: private
+ ncbi-phid: 6BA2F0C898D72FC7C63842EA143832F9.1.m_3
+ ncbi-sid: 93374FDDBAB1BFB6_DCDCSID
+ content-type: text/xml; charset=UTF-8
+ x-ratelimit-limit: '150'
+ x-ratelimit-remaining: '145'
+ content-encoding: gzip
+ set-cookie: ncbi_sid=93374FDDBAB1BFB6_DCDCSID; domain=.nih.gov; path=/; expires=Fri,
+ 05 Apr 2019 16:21:15 GMT
+ x-ua-compatible: IE=Edge
+ x-xss-protection: 1; mode=block
+ transfer-encoding: chunked
+ body:
+ encoding: UTF-8
+ string: |-
+
+
+
+ 3702
+ Arabidopsis thaliana
+
+ thale cress
+ mouse-ear cress
+ thale-cress
+
+ authority
+ Arabidopsis thaliana (L.) Heynh.
+
+
+ misspelling
+ Arabidopsis thaliana (thale cress)
+
+
+ misspelling
+ Arabidopsis_thaliana
+
+
+ misspelling
+ Arbisopsis thaliana
+
+
+ misspelling
+ thale kress
+
+
+ 3701
+ species
+ Plants and Fungi
+
+ 1
+ Standard
+
+
+ 1
+ Standard
+
+ cellular organisms; Eukaryota; Viridiplantae; Streptophyta; Streptophytina; Embryophyta; Tracheophyta; Euphyllophyta; Spermatophyta; Magnoliophyta; Mesangiospermae; eudicotyledons; Gunneridae; Pentapetalae; rosids; malvids; Brassicales; Brassicaceae; Camelineae; Arabidopsis
+
+
+ 131567
+ cellular organisms
+ no rank
+
+
+ 2759
+ Eukaryota
+ superkingdom
+
+
+ 33090
+ Viridiplantae
+ kingdom
+
+
+ 35493
+ Streptophyta
+ phylum
+
+
+ 131221
+ Streptophytina
+ subphylum
+
+
+ 3193
+ Embryophyta
+ no rank
+
+
+ 58023
+ Tracheophyta
+ no rank
+
+
+ 78536
+ Euphyllophyta
+ no rank
+
+
+ 58024
+ Spermatophyta
+ no rank
+
+
+ 3398
+ Magnoliophyta
+ no rank
+
+
+ 1437183
+ Mesangiospermae
+ no rank
+
+
+ 71240
+ eudicotyledons
+ no rank
+
+
+ 91827
+ Gunneridae
+ no rank
+
+
+ 1437201
+ Pentapetalae
+ no rank
+
+
+ 71275
+ rosids
+ subclass
+
+
+ 91836
+ malvids
+ no rank
+
+
+ 3699
+ Brassicales
+ order
+
+
+ 3700
+ Brassicaceae
+ family
+
+
+ 980083
+ Camelineae
+ tribe
+
+
+ 3701
+ Arabidopsis
+ genus
+
+
+
+
+ pgcode
+ 11
+
+
+ 1995/02/27 09:24:00
+ 2017/07/06 17:40:43
+ 1992/11/10 00:00:00
+
+
+
+ recorded_at: 2018-04-05 16:21:15 GMT
+ recorded_with: vcr/0.0.7.9325
diff --git a/tests/testthat/helper-taxizedb.R b/tests/testthat/helper-taxizedb.R
new file mode 100644
index 0000000..0ff8305
--- /dev/null
+++ b/tests/testthat/helper-taxizedb.R
@@ -0,0 +1,3 @@
+# set up vcr
+library("vcr")
+invisible(vcr::vcr_configure(dir = "../fixtures/vcr_cassettes"))
diff --git a/tests/testthat/test-children.R b/tests/testthat/test-children.R
index ead2bd2..1c1f659 100644
--- a/tests/testthat/test-children.R
+++ b/tests/testthat/test-children.R
@@ -7,10 +7,12 @@ test_that("unambiguous children", {
# taxizedb::children(3702, db='ncbi'),
# taxize::children(3702, db='ncbi')
# )
- expect_equal(
- taxizedb::children(3701, db='ncbi'),
- taxize::children(3701, db='ncbi')
- )
+ vcr::use_cassette("children_unambiguous", {
+ expect_equal(
+ taxizedb::children(3701, db='ncbi'),
+ taxize::children(3701, db='ncbi')
+ )
+ }, preserve_exact_body_bytes = FALSE)
## TODO: these are not currently equal
## once changes in taxize are sorted out, we can restore this test
@@ -23,7 +25,7 @@ test_that("unambiguous children", {
# )
})
-test_that("ambiguous NCBI children", {
+# test_that("ambiguous NCBI children", {
## TODO: these are not currently equal
## once changes in taxize are sorted out, we can restore this test
# expect_equal(
@@ -33,7 +35,7 @@ test_that("ambiguous NCBI children", {
# subset(childtaxa_rank != 'species') %>%
# magrittr::set_rownames(NULL)
# )
-})
+# })
test_that("missing values are consistent with taxize", {
empty_df <- data.frame(
diff --git a/tests/testthat/test-classification.R b/tests/testthat/test-classification.R
index 87ad4da..f1f6f9e 100644
--- a/tests/testthat/test-classification.R
+++ b/tests/testthat/test-classification.R
@@ -2,54 +2,61 @@ context("classification")
library(taxize, quietly = TRUE, warn.conflicts = FALSE)
-test_that("taxizedb::classification == taxize::classification", {
- taxa_ids <- c(9606, 3702)
- taxa_names <- c("Homo sapiens", "Arabidopsis thaliana")
- taxa_names2 <- c("thale cress", "Homo_sapiens")
+# test_that("taxizedb::classification == taxize::classification", {
+# taxa_ids <- c(9606, 3702)
+# taxa_names <- c("Homo sapiens", "Arabidopsis thaliana")
+# taxa_names2 <- c("thale cress", "Homo_sapiens")
- ## TODO: none of those are equivalent
- ## slight differences in each
- # expect_equal(
- # taxize::classification(taxa_ids, db='ncbi'),
- # taxizedb::classification(taxa_ids, db='ncbi')
- # )
- # expect_equal(
- # taxize::classification(taxa_names, db='ncbi'),
- # taxizedb::classification(taxa_names, db='ncbi')
- # )
- # # input names are preserved (even if incorrect)
- # expect_equal(
- # taxize::classification(taxa_names2, db='ncbi'),
- # taxizedb::classification(taxa_names2, db='ncbi')
- # )
-})
+# ## TODO: none of those are equivalent
+# ## slight differences in each
+# expect_equal(
+# taxize::classification(taxa_ids, db='ncbi'),
+# taxizedb::classification(taxa_ids, db='ncbi')
+# )
+# expect_equal(
+# taxize::classification(taxa_names, db='ncbi'),
+# taxizedb::classification(taxa_names, db='ncbi')
+# )
+# # input names are preserved (even if incorrect)
+# expect_equal(
+# taxize::classification(taxa_names2, db='ncbi'),
+# taxizedb::classification(taxa_names2, db='ncbi')
+# )
+# })
-test_that("classification is case insensitive", {
- taxa_names <- c('homo sapiens', 'PIG', 'zea_mays')
- ## TODO: none of those are equivalent
- ## slight differences between them
- # expect_equal(
- # taxize::classification(taxa_names, db='ncbi'),
- # taxizedb::classification(taxa_names, db='ncbi')
- # )
-})
+# test_that("classification is case insensitive", {
+# taxa_names <- c('homo sapiens', 'PIG', 'zea_mays')
+# ## TODO: none of those are equivalent
+# ## slight differences between them
+# # expect_equal(
+# # taxize::classification(taxa_names, db='ncbi'),
+# # taxizedb::classification(taxa_names, db='ncbi')
+# # )
+# })
test_that('classification handles invalid ids', {
taxa_ids1 <- 9999999999
taxa_ids2 <- c(9999999999, 8888888888)
taxa_ids3 <- c(8888888888, 3702)
- expect_equal(
- taxize::classification(taxa_ids1, db='ncbi'),
- taxizedb::classification(taxa_ids1, db='ncbi')
- )
- expect_equal(
- taxize::classification(taxa_ids2, db='ncbi'),
- taxizedb::classification(taxa_ids2, db='ncbi')
- )
- expect_equal(
- taxize::classification(taxa_ids3, db='ncbi'),
- taxizedb::classification(taxa_ids3, db='ncbi')
- )
+
+ vcr::use_cassette("classification_invalid_ids1", {
+ expect_equal(
+ taxize::classification(taxa_ids1, db='ncbi'),
+ taxizedb::classification(taxa_ids1, db='ncbi')
+ )
+ }, preserve_exact_body_bytes = FALSE)
+ vcr::use_cassette("classification_invalid_ids2", {
+ expect_equal(
+ taxize::classification(taxa_ids2, db='ncbi'),
+ taxizedb::classification(taxa_ids2, db='ncbi')
+ )
+ }, preserve_exact_body_bytes = FALSE)
+ vcr::use_cassette("classification_invalid_ids3", {
+ expect_equal(
+ taxize::classification(taxa_ids3, db='ncbi'),
+ taxizedb::classification(taxa_ids3, db='ncbi')
+ )
+ }, preserve_exact_body_bytes = FALSE)
})
test_that('classification handles invalid names', {