Skip to content

Commit 2a070f9

Browse files
Merge pull request #380 from ldecicco-USGS/main
New closest code
2 parents 9cc2d17 + 70fe123 commit 2a070f9

File tree

7 files changed

+591
-259
lines changed

7 files changed

+591
-259
lines changed

.Rbuildignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ vignettes/figure
1111
^\.Rproj\.user$
1212
README.Rmd
1313
README_files/
14+
^Temp$
1415
CONDUCT.md
1516
DISCLAIMER.md
1617
LICENSE.md
@@ -40,7 +41,9 @@ vignettes/pairResults2.rds
4041
vignettes/WRTDSK.Rmd
4142
vignettes/ChainBridge.TP.RData
4243
vignettes/dataPreperation.Rmd
43-
vignettes/Join_clpsest.Rmd
44+
vignettes/Join_closest.Rmd
45+
vignettes/Compare_QW_and_UV.Rmd
46+
vignettes/helper_functions.R
4447
vignettes/Method.bib
4548
vignettes/Extend_method.bib
4649
vignettes/Regional_studies.bib

.github/workflows/pkgdown.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444

4545
- name: Build site
4646
run: |
47-
install.packages('zoo') |
47+
install.packages(c('zoo', 'data.table')) |
4848
pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, dest_dir = "public") |
4949
file.copy(from = "./public/articles/logo.png",to = "./public/reference/logo.png")
5050
shell: Rscript {0}

.gitlab-ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ getready:
3939
- mkdir -p $R_LIBS_USER
4040
- mkdir -p $APT_CACHE
4141
- echo "options(Ncpus=$(nproc --all), repos=c(CRAN='$CRAN'))" >> $R_PROFILE
42-
- Rscript -e "install.packages(c('devtools', 'pkgdown', 'covr', 'connectapi', 'zoo', 'rsconnect'))"
42+
- Rscript -e "install.packages(c('devtools', 'pkgdown', 'covr', 'connectapi', 'zoo', 'rsconnect', 'data.table'))"
4343
- Rscript -e 'remotes::install_deps(dependencies=TRUE)'
4444
cache:
4545
paths:

_pkgdown.yml

+4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ navbar:
4545
href: articles/AlternativeQMethod.html
4646
- text: Annual_Hydrograph_Timing
4747
href: articles/Annual_Hydrograph_Timing.html
48+
- text: Join discrete and sensor data
49+
href: articles/Join_closest.html
50+
- text: Compare QW and UV
51+
href: articles/Compare_QW_and_UV.html
4852
- text: Custom Units
4953
href: articles/units.html
5054
- text: Bibliograpy

vignettes/Compare_QW_and_UV.Rmd

+381
Large diffs are not rendered by default.

vignettes/Join_closest.Rmd

+111-256
Large diffs are not rendered by default.

vignettes/helper_functions.R

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
join_qw_uv <- function(qw_data, # data from readWQP
3+
uv_flow_qw, # data from readNWISuv
4+
hour_threshold = 24, # hours threshold for joining
5+
join_by_qw = "ActivityStartDateTime",
6+
join_by_uv = "dateTime",
7+
qw_val = "ResultMeasureValue",
8+
qw_rmk = "ResultDetectionConditionText",
9+
qw_det_val = "DetectionQuantitationLimitMeasure.MeasureValue",
10+
qw_val_uv, # water quality value column in uv data
11+
qw_rmk_uv, # water quality remark column in uv data
12+
flow_val = "X_00060_00000", # uv flow parameter
13+
flow_rmk = "X_00060_00000_cd"){ # uv flow parameter cd
14+
15+
library(data.table)
16+
req_cols <- c(join_by_qw, qw_val, qw_rmk, qw_det_val)
17+
if(!all(req_cols %in% names(qw_data))){
18+
stop(paste('qw_data missing columns:', req_cols[!req_cols %in% names(qw_data)]))
19+
}
20+
21+
req_cols_uv <- c(join_by_uv)
22+
if(!all(req_cols_uv %in% names(uv_flow_qw))){
23+
stop(paste('uv_data missing columns:', req_cols_uv[!req_cols_uv %in% names(uv_flow_qw)]))
24+
}
25+
26+
data.table::setDT(qw_data)[, eval(parse(text = paste("join_date :=", join_by_qw)))]
27+
28+
data.table::setDT(uv_flow_qw)[, eval(parse(text = paste("join_date :=", join_by_uv)))]
29+
30+
# rolling join
31+
x <- uv_flow_qw[qw_data, on = .(join_date), roll = "nearest"]
32+
33+
setnames(x, c(qw_val, join_by_uv, join_by_qw, qw_rmk, qw_det_val),
34+
c("val_qw","uv_date", "qw_date", "qw_rmk", "qw_det_val"))
35+
36+
x <- x[order(qw_date)]
37+
38+
x_tib <- as_tibble(x)
39+
40+
if(!is.na(flow_val) | flow_val != ""){
41+
x_tib$flow_uv <- x_tib[[flow_val]]
42+
}
43+
if(!is.na(flow_rmk) | flow_rmk != ""){
44+
x_tib$flow_rmk_uv <- x_tib[[flow_rmk]]
45+
}
46+
47+
if(!is.na(qw_val_uv) | qw_val_uv != ""){
48+
x_tib$qw_val_uv <- x_tib[[qw_val_uv]]
49+
}
50+
if(!is.na(qw_rmk_uv) | qw_rmk_uv != ""){
51+
x_tib$qw_rmk_uv <- x_tib[[qw_rmk_uv]]
52+
}
53+
54+
toMatch <- c("NON-DETECT", "NON DETECT", "NOT DETECTED",
55+
"DETECTED NOT QUANTIFIED", "BELOW QUANTIFICATION LIMIT")
56+
57+
x_tib <- x_tib |>
58+
mutate(delta_time = difftime(qw_date, uv_date, units = "hours"),
59+
qw_val_uv = if_else(abs(as.numeric(delta_time)) >= hour_threshold,
60+
NA, qw_val_uv),
61+
qualifier = if_else(grepl(paste(toMatch,collapse="|"),
62+
toupper(qw_rmk)),
63+
"<", ""),
64+
value = if_else(qualifier == "<", qw_det_val, val_qw),
65+
date = as.Date(qw_date)) |>
66+
select(any_of(c("uv_date", "qw_date", "delta_time", "date",
67+
"qw_val_uv", "qw_rmk_uv",
68+
"value", "qualifier",
69+
"flow_uv", "flow_rmk_uv"))) |>
70+
rename(dateTime = qw_date)
71+
72+
73+
compressedData <- EGRET::compressData(x_tib[, c("date",
74+
"qualifier",
75+
"value")],
76+
verbose = FALSE)
77+
Sample <- EGRET::populateSampleColumns(compressedData)
78+
Sample <- Sample |>
79+
left_join(x_tib |>
80+
select(-qualifier) |>
81+
rename(qw_dateTime = dateTime,
82+
uv_dateTime = uv_date,
83+
Date = date,
84+
ConcHigh = value),
85+
by = c("Date", "ConcHigh"))
86+
87+
return(Sample)
88+
89+
}

0 commit comments

Comments
 (0)