Skip to content

Commit 64116fd

Browse files
committed
add qualifier_group column for both primary and supplementary qualifiers
- used to identify related terms
1 parent f03da16 commit 64116fd

File tree

4 files changed

+21
-11
lines changed

4 files changed

+21
-11
lines changed

R/data-documentation.R

+6-6
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@
9999
#' World Reference Base for Soil Resources (4th Edition, 2022)
100100
#'
101101
#' A _list_ containing three _data.frame_ elements `"rsg"`, `"pq"`, and `"sq"` providing information on the 'Representative Soil Groups', 'Principal Qualifiers,' and 'Supplementary Qualifiers,' respectively.
102-
#'
102+
#'
103103
#' @details
104-
#'
105-
#' Each element has the column `"code"` which is a number (1-32) referring to the position in the Reference Soil Groups, and the column `"reference_soil_group"` which is the corresponding group name.
106-
#' - The _data.frame_ `"rsg"` has column `"criteria"`, describing the logical criteria for each Reference Soil Group.
107-
#' - The _data.frame_ `"pq"` has qualifier names in column `"principal_qualifier"` and `"sq"` has column `"supplementary_qualifier"`. The `"pq"` and `"sq"` qualifier names may be single qualifiers, or several related qualifiers separated with a forward slash `" / "`
104+
#'
105+
#' Each element has the column `"code"` which is a number (1-32) referring to the position in the Reference Soil Groups, and the column `"reference_soil_group"` which is the corresponding group name.
106+
#' - The _data.frame_ `"rsg"` has column `"criteria"`, describing the logical criteria for each Reference Soil Group.
107+
#' - The _data.frame_ `"pq"` has qualifier names in column `"principal_qualifier"` and `"sq"` has column `"supplementary_qualifier"`. The `"pq"` and `"sq"` qualifier name columns (`primary_qualifier` and `supplementary_qualifier`) contain individual qualifier terms. Related qualifiers can be identified using `qualifier_group` column which is derived from qualifier names separated with a forward slash `" / "`
108108
#'
109109
#' @references
110110
#'
@@ -114,4 +114,4 @@
114114
#'
115115
#' @keywords datasets
116116
#'
117-
"wrb_4th_2022"
117+
"wrb_4th_2022"

data-raw/wrb_4th_2022.R

+14-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
library(pdftools)
33

44
## SETUP
5-
##
5+
##
66
# dir.create("misc/WRB2022")
77
# download.file("https://wrb.isric.org/files/WRB_fourth_edition_2022-12-18.pdf",
88
# destfile = "misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
@@ -18,7 +18,7 @@ library(pdftools)
1818
## nope
1919
# x <- pdf_data("misc/WRB2022/WRB_fourth_edition_2022-12-18.pdf")
2020
# y <- do.call('rbind', x)
21-
#
21+
#
2222

2323
x <- readLines("misc/WRB2022/WRB_RSG.txt")
2424
x <- gsub("\u003c", "<", gsub("\u003E", ">", gsub("\u2264", "<=", gsub("\u2265", ">=", x))))
@@ -59,7 +59,12 @@ z <- lapply(xx, function(y) {
5959
names(z) <- z.names
6060

6161
wrb_pq <- do.call('rbind', lapply(seq(z), function(i) {
62-
data.frame(code = i, reference_soil_group = z.names[i], principal_qualifiers = z[[z.names[i]]])
62+
pq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
63+
pg <- lapply(seq(pq), function(j) rep(z[[z.names[i]]][j], length(pq[[j]])))
64+
data.frame(code = i,
65+
reference_soil_group = z.names[i],
66+
qualifier_group = unlist(pg),
67+
principal_qualifiers = unlist(pq))
6368
}))
6469
rownames(wrb_pq) <- NULL
6570
# View(wrb_pq)
@@ -79,7 +84,12 @@ z <- lapply(xx, function(y) {
7984
names(z) <- z.names
8085

8186
wrb_sq <- do.call('rbind', lapply(seq(z), function(i) {
82-
data.frame(code = i, reference_soil_group = z.names[i], supplementary_qualifiers = z[[z.names[i]]])
87+
sq <- lapply(strsplit(z[[z.names[i]]], "/"), trimws)
88+
sg <- lapply(seq(sq), function(j) rep(z[[z.names[i]]][j], length(sq[[j]])))
89+
data.frame(code = i,
90+
reference_soil_group = z.names[i],
91+
qualifier_group = unlist(pg),
92+
supplementary_qualifiers = unlist(pq))
8393
}))
8494
rownames(wrb_sq) <- NULL
8595
# View(wrb_sq)

data/wrb_4th_2022.rda

-494 Bytes
Binary file not shown.

man/wrb_4th_2022.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)