Skip to content

Commit 79397ec

Browse files
authored
Merge pull request #47 from ncss-tech/WRB2022
World Reference Base for Soil Resources (4th Edition, 2022)
2 parents 6f81a38 + d381897 commit 79397ec

11 files changed

+2052
-18
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Suggests:
2222
soilDB,
2323
ape,
2424
data.tree
25-
RoxygenNote: 7.2.3
25+
RoxygenNote: 7.3.2
2626
Roxygen: list(markdown = TRUE)
2727
VignetteBuilder: knitr
2828
LazyData: false

R/data-documentation.R

+34-12
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#'
2-
#' @title Soil Taxonomy Hierarchy
2+
#' Soil Taxonomy Hierarchy
33
#'
4-
#' @description The first 4 levels of the US Soil Taxonomy hierarchy (soil order, suborder, greatgroup, subgroup), presented as a \code{data.frame} (denormalized) and a \code{list} of unique taxa.
4+
#' The first 4 levels of the US Soil Taxonomy hierarchy (soil order, suborder, greatgroup, subgroup), presented as a \code{data.frame} (denormalized) and a \code{list} of unique taxa.
55
#' @details Ordered based on the unique letter codes denoting taxa from the 13th edition of the Keys to Soil Taxonomy.
66
#' @usage data(ST)
77
#'
@@ -20,9 +20,9 @@
2020
#'
2121
"ST"
2222

23-
#' @title Family-level Classes for Soil Taxonomy
23+
#' Family-level Classes for Soil Taxonomy
2424
#'
25-
#' @description A database of family-level class names for Soil Taxonomy.
25+
#' A database of family-level class names for Soil Taxonomy.
2626
#'
2727
#' @references
2828
#' Soil Survey Staff. 2014. Keys to Soil Taxonomy, 12th ed. USDA-Natural Resources Conservation Service, Washington, DC.
@@ -34,9 +34,9 @@
3434
#'
3535
"ST_family_classes"
3636

37-
#' @title Epipedons, Diagnostic Horizons, Characteristics and Features in Soil Taxonomy
37+
#' Epipedons, Diagnostic Horizons, Characteristics and Features in Soil Taxonomy
3838
#'
39-
#' @description A `data.frame` with columns "group", "name", "chapter", "page", "description", "criteria". Currently page numbers and contents are referenced to 12th Edition Keys to Soil Taxonomy and derived from products in the ncss-tech SoilKnowledgeBase repository (https://github.com/ncss-tech/SoilKnowledgeBase).
39+
#' A `data.frame` with columns "group", "name", "chapter", "page", "description", "criteria". Currently page numbers and contents are referenced to 12th Edition Keys to Soil Taxonomy and derived from products in the ncss-tech SoilKnowledgeBase repository (https://github.com/ncss-tech/SoilKnowledgeBase).
4040
#'
4141
#' @references
4242
#' Soil Survey Staff. 2014. Keys to Soil Taxonomy, 12th ed. USDA-Natural Resources Conservation Service, Washington, DC.
@@ -48,9 +48,9 @@
4848
#'
4949
"ST_features"
5050

51-
#' @title Formative Elements used by Soil Taxonomy
51+
#' Formative Elements used by Soil Taxonomy
5252
#'
53-
#' @description A database of formative elements used by the first 4 levels of US Soil Taxonomy hierarchy (soil order, suborder, greatgroup, subgroup).
53+
#' A database of formative elements used by the first 4 levels of US Soil Taxonomy hierarchy (soil order, suborder, greatgroup, subgroup).
5454
#'
5555
#' @references
5656
#' S. W. Buol and R. C. Graham and P. A. McDaniel and R. J. Southard. Soil Genesis and Classification, 5th edition. Iowa State Press, 2003.
@@ -61,9 +61,9 @@
6161
#'
6262
"ST_formative_elements"
6363

64-
#' @title Letter Code Lookup Table for Position of Taxa within the Keys to Soil Taxonomy (12th Edition)
64+
#' Letter Code Lookup Table for Position of Taxa within the Keys to Soil Taxonomy (12th Edition)
6565
#'
66-
#' @description A lookup table mapping unique taxonomic Order, Suborder, Great Group and Subgroups to letter codes that denote their logical position within the Keys.
66+
#' A lookup table mapping unique taxonomic Order, Suborder, Great Group and Subgroups to letter codes that denote their logical position within the Keys.
6767
#'
6868
#' @details The lookup table has been corrected to reflect errata that were posted after the print publication of the 12th Edition Keys, as well as typos in the Spanish language edition.
6969
#'
@@ -81,9 +81,9 @@
8181
#'
8282
"ST_higher_taxa_codes_12th"
8383

84-
#' @title Letter Code Lookup Table for Position of Taxa within the Keys to Soil Taxonomy (13th Edition)
84+
#' Letter Code Lookup Table for Position of Taxa within the Keys to Soil Taxonomy (13th Edition)
8585
#'
86-
#' @description A lookup table mapping unique taxonomic Order, Suborder, Great Group and Subgroups to letter codes that denote their logical position within the Keys.
86+
#' A lookup table mapping unique taxonomic Order, Suborder, Great Group and Subgroups to letter codes that denote their logical position within the Keys.
8787
#'
8888
#' @references
8989
#'
@@ -95,3 +95,25 @@
9595
#' @keywords datasets
9696
#'
9797
"ST_higher_taxa_codes_13th"
98+
99+
#' World Reference Base for Soil Resources (4th Edition, 2022)
100+
#'
101+
#' A _list_ containing three _data.frame_ elements `"rsg"`, `"pq"`, and `"sq"` providing information on the 'Representative Soil Groups', 'Principal Qualifiers,' and 'Supplementary Qualifiers,' respectively.
102+
#'
103+
#' @details
104+
#'
105+
#' Each element has the column `"code"` which is a number (1-32) referring to the position in the Reference Soil Groups, and the column `"reference_soil_group"` which is the corresponding group name. The `"pq"` and `"sq"` qualifier name columns (`primary_qualifier` and `supplementary_qualifier`) contain individual qualifier terms. Related qualifiers are identified using `qualifier_group` column derived from qualifier names separated with a forward slash `" / "`
106+
#'
107+
#' - The _data.frame_ `"rsg"` has column `"criteria"`, describing the logical criteria for each Reference Soil Group.
108+
#' - The _data.frame_ `"pq"` has qualifier names in column `"principal_qualifier"`
109+
#' - The _data.frame_ `"sq"` has column `"supplementary_qualifier"`.
110+
#'
111+
#' @references
112+
#'
113+
#' IUSS Working Group WRB. 2022. World Reference Base for Soil Resources. International soil classification system for naming soils and creating legends for soil maps. 4th edition. International Union of Soil Sciences (IUSS), Vienna, Austria.
114+
#'
115+
#' @usage data(WRB_4th_2022)
116+
#'
117+
#' @keywords datasets
118+
#'
119+
"WRB_4th_2022"

R/higherTaxaCodes.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#' Decompose taxon letter codes
22
#'
3-
#' @description Find all codes that logically comprise the specified codes. For instance, code "ABC" ("Anhyturbels") returns "A" ("Gelisols"), "AB" ("Turbels"), "ABC" ("Anhyturbels"). Use in conjunction with a lookup table that maps Order, Suborder, Great Group and Subgroup taxa to their codes (see \code{\link{taxon_code_to_taxon}} and \code{\link{taxon_to_taxon_code}}).
3+
#' Find all codes that logically comprise the specified codes. For instance, code "ABC" ("Anhyturbels") returns "A" ("Gelisols"), "AB" ("Turbels"), "ABC" ("Anhyturbels"). Use in conjunction with a lookup table that maps Order, Suborder, Great Group and Subgroup taxa to their codes (see \code{\link{taxon_code_to_taxon}} and \code{\link{taxon_to_taxon_code}}).
44
#'
55
#' @details Accounts for Keys that run out of capital letters (more than 26 subgroups) and use lowercase letters for a unique subdivision within the "fourth character position."
66
#'
@@ -49,7 +49,7 @@ decompose_taxon_code <- function(codes) {
4949

5050
#' Get taxon codes of preceding taxa
5151
#'
52-
#' @description Find all codes that logically precede the specified codes. For instance, code "ABC" ("Anhyturbels") returns "AA" ("Histels") "ABA" ("Histoturbels") and "ABB" ("Aquiturbels"). Use in conjunction with a lookup table that maps Order, Suborder, Great Group and Subgroup taxa to their codes (see \code{\link{taxon_code_to_taxon}} and \code{\link{taxon_to_taxon_code}}).
52+
#' Find all codes that logically precede the specified codes. For instance, code "ABC" ("Anhyturbels") returns "AA" ("Histels") "ABA" ("Histoturbels") and "ABB" ("Aquiturbels"). Use in conjunction with a lookup table that maps Order, Suborder, Great Group and Subgroup taxa to their codes (see \code{\link{taxon_code_to_taxon}} and \code{\link{taxon_to_taxon_code}}).
5353
#'
5454
#' @details Accounts for Keys that run out of capital letters (more than 26 subgroups) and use lowercase letters for a unique subdivision within the "fourth character position."
5555
#'
@@ -187,7 +187,7 @@ taxon_to_taxon_code <- function(taxon) {
187187

188188
#' Determine relative position of taxon within Keys to Soil Taxonomy (Order to Subgroup)
189189
#'
190-
#' @description The relative position of a taxon is `[number of preceding Key steps] + 1`, or `NA` if it does not exist in the lookup table.
190+
#' The relative position of a taxon is `[number of preceding Key steps] + 1`, or `NA` if it does not exist in the lookup table.
191191
#'
192192
#' @param code A character vector of taxon codes to determine the relative position of.
193193
#'

data-raw/wrb_4th_2022.R

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
## code to prepare `WRBs_4th_2022` dataset goes here
2+
library(pdftools)
3+
4+
## SETUP
5+
##
6+
# dir.create("misc/WRB2022")
7+
# download.file("https://wrb.isric.org/files/WRB_fourth_edition_2022-12-18.pdf",
8+
# destfile = "misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
9+
10+
## does not work for RSG/qualifiers; tables used in formatting
11+
## can be used for definitions of diagnostics and qualifiers
12+
# x <- pdf_text("misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
13+
# x <- unlist(strsplit(x, "\n"))
14+
# ldx <- cumsum(grepl("Key to the Reference Soil Groups", x))
15+
# y <- split(x, ldx)
16+
# data.frame(y[[11]]) |> View()
17+
18+
## nope
19+
# x <- pdf_data("misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
20+
# y <- do.call('rbind', x)
21+
#
22+
23+
x <- readLines("misc/WRB2022/WRB_RSG.txt")
24+
x <- gsub("\u003c", "<", gsub("\u003E", ">", gsub("\u2264", "<=", gsub("\u2265", ">=", x))))
25+
n <- grep("^[A-Z]+$", x)
26+
z.names <- x[n]
27+
x <- x[-n]
28+
idx <- grep("^(Soils having|Other soils)", x)
29+
ldx <- rep(FALSE, length(x))
30+
ldx[idx] <- TRUE
31+
xx <- split(x, cumsum(ldx))
32+
z <- lapply(xx, function(y) {
33+
i <- grep("(; (and|or)|\\.|:)$", y) + 1
34+
i <- i[i < length(y)]
35+
l <- rep(FALSE, length(y))
36+
l[i] <- TRUE
37+
sapply(split(y, cumsum(l)), paste0, collapse = " ")
38+
})
39+
names(z) <- z.names
40+
41+
wrb_rsg <- do.call('rbind', lapply(seq(z), function(i) {
42+
data.frame(code = i, reference_soil_group = z.names[i], criteria = z[[z.names[i]]])
43+
}))
44+
rownames(wrb_rsg) <- NULL
45+
# View(wrb_rsg)
46+
47+
x <- readLines("misc/WRB2022/WRB_PQ.txt")
48+
n <- grep("^[A-Z]+$", x)
49+
z.names <- x[n]
50+
x <- x[-n]
51+
idx <- grep("Principal qualifiers", x)
52+
ldx <- rep(FALSE, length(x))
53+
ldx[idx] <- TRUE
54+
xx <- split(x, cumsum(ldx))
55+
z <- lapply(xx, function(y) {
56+
y <- trimws(gsub("([^ ])/ ", "\\1 / ", y))
57+
y[y != "Principal qualifiers"]
58+
})
59+
names(z) <- z.names
60+
61+
wrb_pq <- do.call('rbind', lapply(seq(z), function(i) {
62+
pq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
63+
pg <- lapply(seq(pq), function(j) rep(z[[z.names[i]]][j], length(pq[[j]])))
64+
data.frame(code = i,
65+
reference_soil_group = z.names[i],
66+
qualifier_group = unlist(pg),
67+
principal_qualifiers = unlist(pq))
68+
}))
69+
rownames(wrb_pq) <- NULL
70+
# View(wrb_pq)
71+
72+
x <- readLines("misc/WRB2022/WRB_SQ.txt")
73+
n <- grep("^[A-Z]+$", x)
74+
z.names <- x[n]
75+
x <- x[-n]
76+
idx <- grep("Supplementary qualifiers", x)
77+
ldx <- rep(FALSE, length(x))
78+
ldx[idx] <- TRUE
79+
xx <- split(x, cumsum(ldx))
80+
z <- lapply(xx, function(y) {
81+
y <- trimws(gsub("([^ ])/ ", "\\1 / ", y))
82+
y[y != "Supplementary qualifiers"]
83+
})
84+
names(z) <- z.names
85+
86+
wrb_sq <- do.call('rbind', lapply(seq(z), function(i) {
87+
sq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
88+
sg <- lapply(seq(sq), function(j) rep(z[[z.names[i]]][j], length(sq[[j]])))
89+
data.frame(code = i,
90+
reference_soil_group = z.names[i],
91+
qualifier_group = unlist(sg),
92+
supplementary_qualifiers = unlist(sq))
93+
}))
94+
rownames(wrb_sq) <- NULL
95+
# View(wrb_sq)
96+
97+
WRB_4th_2022 <- list(
98+
rsg = wrb_rsg,
99+
pq = wrb_pq,
100+
sq = wrb_sq
101+
)
102+
103+
stopifnot(all(sapply(WRB_4th_2022, function(x) max(x$code)) == 32))
104+
105+
usethis::use_data(WRB_4th_2022, overwrite = TRUE)

data/WRB_4th_2022.rda

7.77 KB
Binary file not shown.

man/SoilTaxonomy-package.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/WRB_4th_2022.Rd

+27
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

misc/.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
*.json
2-
subgroups.tgz
32
.Rproj.user
43
.Rhistory
54
*.Rproj
5+
WRB2022/WRB_fourth_edition_2022-12-18.pdf

0 commit comments

Comments
 (0)