Skip to content

Commit

Permalink
Merge pull request #109 from PIP-Technical-Team/tefera_update_branch
Browse files Browse the repository at this point in the history
Tefera update branch
  • Loading branch information
randrescastaneda authored Jan 14, 2025
2 parents cbae251 + ef16941 commit 1bb8bf2
Show file tree
Hide file tree
Showing 155 changed files with 4,852 additions and 150 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ tmp/
.vscode/
^codecov\.yml$
^data-raw$
^vignettes/articles$
12 changes: 8 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ Suggests:
testthat (>= 3.0.0),
knitr,
rmarkdown,
covr
covr,
dm,
config
Imports:
haven,
digest,
Expand All @@ -50,10 +52,12 @@ Imports:
httr,
pipfun (>= 0.0.2),
lifecycle,
collapse,
joyn,
dm,
config,
collapse
data.validator,
assertr,
blastula,
rlang
VignetteBuilder: knitr
Remotes:
github::PIP-Technical-Team/pipload@DEV_v2,
Expand Down
41 changes: 41 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,28 @@

export("%>%")
export(auto_aux_update)
export(cl_validate_raw)
export(clean_validation_report)
export(convert_df_to_base64)
export(countries_validate_output)
export(cpi_validate_output)
export(cpi_validate_raw)
export(draw_model)
export(gdm_validate_output)
export(gdm_validate_raw)
export(gdp_validate_output)
export(get_error_validation)
export(incgroup_validate_output)
export(load_aux)
export(merger_aux)
export(metadata_validate_output)
export(metadata_validate_raw)
export(mpd_validate_raw)
export(npl_validate_output)
export(npl_validate_raw)
export(pce_validate_output)
export(pfw_validate_output)
export(pfw_validate_raw)
export(pip_censoring)
export(pip_countries)
export(pip_country_list)
Expand All @@ -13,6 +32,7 @@ export(pip_cpi)
export(pip_dictionary)
export(pip_gdm)
export(pip_gdp)
export(pip_gdp_weo)
export(pip_income_groups)
export(pip_indicators)
export(pip_maddison)
Expand All @@ -24,21 +44,42 @@ export(pip_nan)
export(pip_npl)
export(pip_pce)
export(pip_pfw)
export(pip_pfw_key)
export(pip_pl)
export(pip_pl_clean)
export(pip_pop)
export(pip_ppp)
export(pip_prices)
export(pip_regions)
export(pip_sna)
export(pip_update_all_aux)
export(pip_wdi)
export(pip_wdi_update)
export(pip_weo)
export(pip_weo_clean)
export(pl_validate_output)
export(pop_validate_output)
export(pop_validate_raw)
export(popmain_validate_raw)
export(ppp_validate_output)
export(ppp_validate_raw)
export(save_aux_to_gh)
export(send_report)
export(sna_fy_validate_raw)
export(sna_validate_raw)
export(spop_validate_raw)
export(update_aux)
export(wdi_validate_raw)
export(weo_validate_output)
export(weo_validate_raw)
import(blastula)
import(collapse, except = fdroplevels)
import(data.table)
import(data.table, except = fdroplevels)
import(data.validator)
importFrom(assertr,in_set)
importFrom(assertr,is_uniq)
importFrom(assertr,not_na)
importFrom(glue,glue)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%<>%")
Expand Down
1 change: 1 addition & 0 deletions R/aaa.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.pipaux <- new.env(parent = emptyenv())
2 changes: 2 additions & 0 deletions R/auto_aux_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ auto_aux_update <- function(measure = NULL,
)
}

# if there is validation report in the environment - remove it
clean_validation_report()

creds <- pipfun::get_github_creds()
gh_user <- "https://raw.githubusercontent.com"
Expand Down
73 changes: 73 additions & 0 deletions R/cl_validate_raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#' Validate raw country list data
#'
#' @param cl raw country list data, as loaded via `pipfun::load_from_gh`
#' @param detail has an option TRUE/FALSE, default value is FALSE
#' @import data.validator
#' @importFrom assertr in_set not_na is_uniq
#' @keywords internal
#'
#' @export
cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){

stopifnot("Country list raw data is not loaded" = !is.null(cl))

report <- data_validation_report()

# country_list <- pipload::pip_load_aux("pfw")
country_list <- pipfun::load_from_gh(measure = "pfw",
owner = getOption("pipfun.ghowner"),
branch = "DEV",
ext = "dta")

country_list <- unique(country_list[, code])

validate(cl, name = "CL raw data validation") |>
validate_if(is.character(country_code),
description = "`country_code` should be character") |>
# validate_cols(in_set(country_list),
# country_code, description = "`country_code` values within range") |>
validate_if(is.character(country_name),
description = "`country_name` should be character") |>
validate_if(is.character(africa_split),
description = "`africa_split` should be character") |>
validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
africa_split, description = "`africa_split` values within range") |>
validate_if(is.character(africa_split_code),
description = "`africa_split_code` should be character") |>
validate_cols(in_set(c("AFE", "AFW", NA)),
africa_split_code, description = "`africa_split_code` values within range") |>
validate_if(is.character(pcn_region),
description = "`pcn_region` should be character") |>
validate_if(is.character(pcn_region_code),
description = "`pcn_region_code` should be character") |>
validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
pcn_region_code, description = "`pcn_region_code` values within range") |>
validate_if(is.character(region),
description = "`region` should be character") |>
validate_if(is.character(region_code),
description = "`region_code` should be character") |>
validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
region_code, description = "`region_code` values within range") |>
validate_if(is.character(world),
description = "`world` should be character") |>
validate_cols(in_set(c("World")),
world, description = "`world` values within range") |>
validate_if(is.character(world_code),
description = "`world_code` should be character") |>
validate_cols(in_set(c("WLD")),
world_code, description = "`world_code` values within range") |>
validate_cols(not_na, country_code,
description = "no missing values in key variables") |>
validate_if(is_uniq(country_code),
description = "no duplicate records in key variables") |>
add_results(report)

validation_record <- get_results(report, unnest = FALSE) |>
setDT()

if (any(validation_record[["type"]] == "error")){
get_error_validation(validation_record, detail)
}

}

12 changes: 12 additions & 0 deletions R/clean_validation_report.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#' Remove data validation report from .pipaux environment variable
#'
#' @export
clean_validation_report <- function(){

if (rlang::env_has(.pipaux, "validation_report")){

# rlang::env_bind(.pipaux, validation_report = rlang::zap())
rlang::env_unbind(.pipaux, "validation_report")

}
}
57 changes: 57 additions & 0 deletions R/countries_validate_output.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' Validate output countries data
#'
#' @param countries output countries data
#' @param detail has an option TRUE/FALSE, default value is FALSE
#' @import data.validator
#' @importFrom assertr in_set not_na is_uniq
#' @keywords internal
#'
#' @export
countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){

stopifnot("Countries output data is not loaded" = !is.null(countries))

report <- data_validation_report()

validate(countries, name = "countries output data validation") |>
validate_if(is.character(country_code),
description = "`country_code` should be character") |>
validate_if(is.character(country_name),
description = "`country_name` should be character") |>
validate_if(is.character(africa_split),
description = "`africa_split` should be character") |>
validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
africa_split, description = "`africa_split` values within range") |>
validate_if(is.character(africa_split_code),
description = "`africa_split_code` should be character") |>
validate_cols(in_set(c("AFE", "AFW", NA)),
africa_split_code, description = "`africa_split_code` values within range") |>
validate_if(is.character(region),
description = "`region` should be character") |>
validate_if(is.character(region_code),
description = "`region_code` should be character") |>
validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
region_code, description = "`region_code` values within range") |>
validate_if(is.character(world),
description = "`world` should be character") |>
validate_cols(in_set(c("World")),
world, description = "`world` values within range") |>
validate_if(is.character(world_code),
description = "`world_code` should be character") |>
validate_cols(in_set(c("WLD")),
world_code, description = "`world_code` values within range") |>
validate_cols(not_na, country_code,
description = "no missing values in key variables") |>
validate_if(is_uniq(country_code),
description = "no duplicate records in key variables") |>
add_results(report)

validation_record <- get_results(report, unnest = FALSE) |>
setDT()

if (any(validation_record[["type"]] == "error")){
get_error_validation(validation_record, detail)
}

}

92 changes: 92 additions & 0 deletions R/cpi_validate_output.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#' Validate clean cpi data
#'
#' @param cpi clean cpi data, output via `pip_cpi_clean`
#' @param detail has an option TRUE/FALSE, default value is FALSE
#' @import data.validator
#' @importFrom assertr in_set not_na is_uniq
#' @keywords internal
#'
#' @export
cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){

stopifnot("CPI clean data is not loaded" = !is.null(cpi))

report <- data_validation_report()

validate(cpi, name = "CPI output data validation") |>
validate_if(is.character(country_code),
description = "`country_code` should be character") |>
validate_if(is.integer(year),
description = "`year` should be integer") |>
validate_if(is.numeric(survey_year),
description = "`survey_year` should be numeric") |>
validate_if(is.numeric(cpi),
description = "`cpi` should be numeric") |>
validate_if(is.numeric(ccf),
description = "`ccf` should be numeric") |>
validate_if(is.character(survey_acronym),
description = "`survey_acronym` should be character") |>
validate_if(is.numeric(change_cpi2011),
description = "`change_cpi2011` should be numeric") |>
validate_cols(in_set(c(0, 1)), change_cpi2011,
description = "`change_cpi2011` values within range") |>
# validate_if(is.character(cpi_domain),
# description = "`cpi_domain` should be character") |>
# validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain,
# description = "`cpi_domian` values within range") |>
validate_if(is.numeric(cpi_domain_value),
description = "`cpi_domain_value` should be numeric") |>
validate_cols(in_set(c(0, 1)), cpi_domain_value,
description = "`cpi_domain_value` values within range") |>
validate_if(is.numeric(cpi2017_unadj),
description = "`cpi2017_unadj` should be numeric") |>
validate_if(is.numeric(cpi2011_unadj),
description = "`cpi2011_unadj` should be numeric") |>
validate_if(is.numeric(cpi2011),
description = "`cpi2011` should be numeric") |>
validate_if(is.numeric(cpi2017),
description = "`cpi2017` should be numeric") |>
# validate_if(is.numeric(cpi2011_SM22),
# description = "`cpi2011_SM22` should be numeric") |>
# validate_if(is.numeric(cpi2017_SM22),
# description = "`cpi2017_SM22` should be numeric") |>
validate_cols(is.logical, cpi2005,
description = "`cpi2005` should be logical") |>
validate_if(is.character(reporting_level),
description = "`reporting_level` should be character") |>
validate_cols(in_set(c("national", "rural", "urban")), reporting_level,
description = "`reporting_level` values within range") |>
# validate_if(is.numeric(cpi2011_AM23),
# description = "`cpi2011_AM23` should be numeric") |>
# validate_if(is.numeric(cpi2017_AM23),
# description = "`cpi2017_AM23` should be numeric") |>
validate_if(is.character(cpi_id),
description = "`cpi_id` should be character") |>
validate_cols(not_na, country_code, year, survey_acronym, reporting_level,
description = "no missing values in key variables") |>
validate_if(is_uniq(country_code, year, survey_acronym,
reporting_level),
description = "no duplicate records in key variables") |>
validate_if(is_uniq(country_code, year, survey_acronym,
reporting_level),
description = "no duplicate cpi values") |>
add_results(report)

num_var_list1 <- grep("cpi2011_", colnames(cpi))
num_var_list2 <- grep("cpi2017_", colnames(cpi))
num_var_list <- c(num_var_list1, num_var_list2)

for (i in 1:length(num_var_list)) {
validate(cpi, name = "CPI validation") |>
validate_cols(is.numeric, num_var_list[i],
description = "variables (with numeric var name) should be numeric") |>
add_results(report)
}

validation_record <- get_results(report, unnest = FALSE) |>
setDT()

if (any(validation_record[["type"]] == "error")){
get_error_validation(validation_record, detail)
}
}
Loading

0 comments on commit 1bb8bf2

Please sign in to comment.