Merge pull request #109 from PIP-Technical-Team/tefera_update_branch

Tefera update branch
PIP-Technical-Team · Jan 14, 2025 · 1bb8bf2 · 1bb8bf2
2 parents cbae251 + ef16941
commit 1bb8bf2
Show file tree

Hide file tree

Showing 155 changed files with 4,852 additions and 150 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@ tmp/
 .vscode/
 ^codecov\.yml$
 ^data-raw$
+^vignettes/articles$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,7 +28,9 @@ Suggests:
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,
-    covr
+    covr,
+    dm,
+    config
 Imports: 
     haven,
     digest,
@@ -50,10 +52,12 @@ Imports:
     httr,
     pipfun (>=  0.0.2),
     lifecycle,
+    collapse,
     joyn,
-    dm,
-    config,
-    collapse
+    data.validator,
+    assertr,
+    blastula,
+    rlang
 VignetteBuilder: knitr
 Remotes: 
     github::PIP-Technical-Team/pipload@DEV_v2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,9 +2,28 @@
 
 export("%>%")
 export(auto_aux_update)
+export(cl_validate_raw)
+export(clean_validation_report)
 export(convert_df_to_base64)
+export(countries_validate_output)
+export(cpi_validate_output)
+export(cpi_validate_raw)
 export(draw_model)
+export(gdm_validate_output)
+export(gdm_validate_raw)
+export(gdp_validate_output)
+export(get_error_validation)
+export(incgroup_validate_output)
 export(load_aux)
+export(merger_aux)
+export(metadata_validate_output)
+export(metadata_validate_raw)
+export(mpd_validate_raw)
+export(npl_validate_output)
+export(npl_validate_raw)
+export(pce_validate_output)
+export(pfw_validate_output)
+export(pfw_validate_raw)
 export(pip_censoring)
 export(pip_countries)
 export(pip_country_list)
@@ -13,6 +32,7 @@ export(pip_cpi)
 export(pip_dictionary)
 export(pip_gdm)
 export(pip_gdp)
+export(pip_gdp_weo)
 export(pip_income_groups)
 export(pip_indicators)
 export(pip_maddison)
@@ -24,21 +44,42 @@ export(pip_nan)
 export(pip_npl)
 export(pip_pce)
 export(pip_pfw)
+export(pip_pfw_key)
 export(pip_pl)
 export(pip_pl_clean)
 export(pip_pop)
 export(pip_ppp)
+export(pip_prices)
 export(pip_regions)
 export(pip_sna)
 export(pip_update_all_aux)
 export(pip_wdi)
 export(pip_wdi_update)
 export(pip_weo)
 export(pip_weo_clean)
+export(pl_validate_output)
+export(pop_validate_output)
+export(pop_validate_raw)
+export(popmain_validate_raw)
+export(ppp_validate_output)
+export(ppp_validate_raw)
+export(save_aux_to_gh)
+export(send_report)
+export(sna_fy_validate_raw)
+export(sna_validate_raw)
+export(spop_validate_raw)
 export(update_aux)
+export(wdi_validate_raw)
+export(weo_validate_output)
+export(weo_validate_raw)
+import(blastula)
 import(collapse, except = fdroplevels)
 import(data.table)
 import(data.table, except = fdroplevels)
+import(data.validator)
+importFrom(assertr,in_set)
+importFrom(assertr,is_uniq)
+importFrom(assertr,not_na)
 importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%<>%")

diff --git a/R/aaa.R b/R/aaa.R
@@ -0,0 +1 @@
+.pipaux <-  new.env(parent = emptyenv())
diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R
@@ -29,6 +29,8 @@ auto_aux_update <- function(measure = NULL,
     )
   }
 
+  # if there is validation report in the environment - remove it
+  clean_validation_report()
 
   creds <- pipfun::get_github_creds()
   gh_user   <- "https://raw.githubusercontent.com"

diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R
@@ -0,0 +1,73 @@
+#' Validate raw country list data
+#'
+#' @param cl raw country list data, as loaded via `pipfun::load_from_gh`
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#' @keywords internal
+#'
+#' @export
+cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){
+
+  stopifnot("Country list raw data is not loaded" = !is.null(cl))
+
+  report <- data_validation_report()
+
+  # country_list <- pipload::pip_load_aux("pfw")
+  country_list <- pipfun::load_from_gh(measure = "pfw",
+                                       owner   = getOption("pipfun.ghowner"),
+                                       branch  = "DEV",
+                                       ext = "dta")
+
+  country_list <- unique(country_list[, code])
+
+  validate(cl, name = "CL raw data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    # validate_cols(in_set(country_list),
+    #               country_code, description = "`country_code` values within range") |>
+    validate_if(is.character(country_name),
+                description = "`country_name` should be character") |>
+    validate_if(is.character(africa_split),
+                description = "`africa_split` should be character") |>
+    validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
+                  africa_split, description = "`africa_split` values within range") |>
+    validate_if(is.character(africa_split_code),
+                description = "`africa_split_code` should be character") |>
+    validate_cols(in_set(c("AFE", "AFW", NA)),
+                  africa_split_code, description = "`africa_split_code` values within range") |>
+    validate_if(is.character(pcn_region),
+                description = "`pcn_region` should be character") |>
+    validate_if(is.character(pcn_region_code),
+                description = "`pcn_region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  pcn_region_code, description = "`pcn_region_code` values within range") |>
+    validate_if(is.character(region),
+                description = "`region` should be character") |>
+    validate_if(is.character(region_code),
+                description = "`region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  region_code, description = "`region_code` values within range") |>
+    validate_if(is.character(world),
+                description = "`world` should be character") |>
+    validate_cols(in_set(c("World")),
+                  world, description = "`world` values within range") |>
+    validate_if(is.character(world_code),
+                description = "`world_code` should be character") |>
+    validate_cols(in_set(c("WLD")),
+                  world_code, description = "`world_code` values within range") |>
+    validate_cols(not_na, country_code,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+
+}
+
diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R
@@ -0,0 +1,12 @@
+#' Remove data validation report from .pipaux environment variable
+#'
+#' @export
+clean_validation_report <- function(){
+
+  if (rlang::env_has(.pipaux, "validation_report")){
+
+    # rlang::env_bind(.pipaux, validation_report = rlang::zap())
+    rlang::env_unbind(.pipaux, "validation_report")
+
+  }
+}
diff --git a/R/countries_validate_output.R b/R/countries_validate_output.R
@@ -0,0 +1,57 @@
+#' Validate output countries data
+#'
+#' @param countries output countries data
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#' @keywords internal
+#'
+#' @export
+countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){
+
+  stopifnot("Countries output data is not loaded" = !is.null(countries))
+
+  report <- data_validation_report()
+
+  validate(countries, name = "countries output data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    validate_if(is.character(country_name),
+                description = "`country_name` should be character") |>
+    validate_if(is.character(africa_split),
+                description = "`africa_split` should be character") |>
+    validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
+                  africa_split, description = "`africa_split` values within range") |>
+    validate_if(is.character(africa_split_code),
+                description = "`africa_split_code` should be character") |>
+    validate_cols(in_set(c("AFE", "AFW", NA)),
+                  africa_split_code, description = "`africa_split_code` values within range") |>
+    validate_if(is.character(region),
+                description = "`region` should be character") |>
+    validate_if(is.character(region_code),
+                description = "`region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  region_code, description = "`region_code` values within range") |>
+    validate_if(is.character(world),
+                description = "`world` should be character") |>
+    validate_cols(in_set(c("World")),
+                  world, description = "`world` values within range") |>
+    validate_if(is.character(world_code),
+                description = "`world_code` should be character") |>
+    validate_cols(in_set(c("WLD")),
+                  world_code, description = "`world_code` values within range") |>
+    validate_cols(not_na, country_code,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+
+}
+
diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R
@@ -0,0 +1,92 @@
+#' Validate clean cpi data
+#'
+#' @param cpi clean cpi data, output via `pip_cpi_clean`
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#' @keywords internal
+#'
+#' @export
+cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){
+
+  stopifnot("CPI clean data is not loaded" = !is.null(cpi))
+
+  report <- data_validation_report()
+
+  validate(cpi, name = "CPI output data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    validate_if(is.integer(year),
+                description = "`year` should be integer") |>
+    validate_if(is.numeric(survey_year),
+                description = "`survey_year` should be numeric") |>
+    validate_if(is.numeric(cpi),
+                description = "`cpi` should be numeric") |>
+    validate_if(is.numeric(ccf),
+                description = "`ccf` should be numeric") |>
+    validate_if(is.character(survey_acronym),
+                description = "`survey_acronym` should be character") |>
+    validate_if(is.numeric(change_cpi2011),
+                description = "`change_cpi2011` should be numeric") |>
+    validate_cols(in_set(c(0, 1)), change_cpi2011,
+                  description = "`change_cpi2011` values within range") |>
+    # validate_if(is.character(cpi_domain),
+    #             description = "`cpi_domain` should be character") |>
+    # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain,
+    #               description = "`cpi_domian` values within range") |>
+    validate_if(is.numeric(cpi_domain_value),
+                description = "`cpi_domain_value` should be numeric") |>
+    validate_cols(in_set(c(0, 1)), cpi_domain_value,
+                  description = "`cpi_domain_value` values within range") |>
+    validate_if(is.numeric(cpi2017_unadj),
+                description = "`cpi2017_unadj` should be numeric") |>
+    validate_if(is.numeric(cpi2011_unadj),
+                description = "`cpi2011_unadj` should be numeric") |>
+    validate_if(is.numeric(cpi2011),
+                description = "`cpi2011` should be numeric") |>
+    validate_if(is.numeric(cpi2017),
+                description = "`cpi2017` should be numeric") |>
+    # validate_if(is.numeric(cpi2011_SM22),
+    #             description = "`cpi2011_SM22` should be numeric") |>
+    # validate_if(is.numeric(cpi2017_SM22),
+    #             description = "`cpi2017_SM22` should be numeric") |>
+    validate_cols(is.logical, cpi2005,
+                  description = "`cpi2005` should be logical") |>
+    validate_if(is.character(reporting_level),
+                description = "`reporting_level` should be character") |>
+    validate_cols(in_set(c("national", "rural", "urban")), reporting_level,
+                  description = "`reporting_level` values within range") |>
+    # validate_if(is.numeric(cpi2011_AM23),
+    #             description = "`cpi2011_AM23` should be numeric") |>
+    # validate_if(is.numeric(cpi2017_AM23),
+    #             description = "`cpi2017_AM23` should be numeric") |>
+    validate_if(is.character(cpi_id),
+                description = "`cpi_id` should be character") |>
+    validate_cols(not_na, country_code, year, survey_acronym, reporting_level,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code, year, survey_acronym,
+                        reporting_level),
+                description = "no duplicate records in key variables") |>
+    validate_if(is_uniq(country_code, year, survey_acronym,
+                        reporting_level),
+                description = "no duplicate cpi values") |>
+    add_results(report)
+
+  num_var_list1 <- grep("cpi2011_", colnames(cpi))
+  num_var_list2 <- grep("cpi2017_", colnames(cpi))
+  num_var_list <- c(num_var_list1, num_var_list2)
+
+  for (i in 1:length(num_var_list)) {
+    validate(cpi, name = "CPI validation") |>
+      validate_cols(is.numeric, num_var_list[i],
+                    description = "variables (with numeric var name) should be numeric") |>
+      add_results(report)
+  }
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+}