PIP-Technical-Team · Tefera19 · Jan 29, 2024 · Jan 30, 2024 · Feb 6, 2024 · Mar 4, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -53,7 +53,12 @@ Imports:
     joyn,
     dm,
     config,
-    collapse
+    collapse,
+    covr,
+    data.validator,
+    assertr,
+    blastula,
+    rlang
 VignetteBuilder: knitr
 Remotes: 
     github::PIP-Technical-Team/pipload@ongoing,

diff --git a/Data/git_metadata.csv b/Data/git_metadata.csv
@@ -0,0 +1,25 @@
+Repo,hash,branch
+PIP-Technical-Team/aux_censoring,2d531fdd08a80ad69212c8a313f4c76ec33a8b85,DEV
+PIP-Technical-Team/aux_country_list,6d14c6c7b019dfa217c968bf8b4f5effa1548827,DEV
+PIP-Technical-Team/aux_cp,63b2cb63a461fe68f8cbafa6e3eb4640f01e3d28,DEV
+PIP-Technical-Team/aux_cpi,535de6c954ac56a24e50222564f603993ff0832a,DEV
+PIP-Technical-Team/aux_dictionary,3c597365da8a9e6f25a5f571d427b52c27ac379d,DEV
+PIP-Technical-Team/aux_dlw,c51a7a271b3b8566e3bd2a0c749179a1a2fc9e6a,DEV
+PIP-Technical-Team/aux_gdm,f43c984a0fa1cede4dc4ee847201e8f9ba1f2ce1,DEV
+PIP-Technical-Team/aux_gdp,07973496ab9fb30f80cf34d9969a13064695e676,DEV
+PIP-Technical-Team/aux_income_groups,3dbf8467aa3fb09ef8053d43f039b7f9df79156b,DEV
+PIP-Technical-Team/aux_indicators,47cc9fa6fc68c6ccc958029674c68c69e0f8daef,DEV
+PIP-Technical-Team/aux_maddison,6051166dd0e0ca9c5f4f9438d7f756359f493014,DEV
+PIP-Technical-Team/aux_metadata,328ed879227c0c7fef3f3f8a151fa0defe7c1a2a,DEV
+PIP-Technical-Team/aux_npl,ae9abc913a987d8107f7b2d10f570cd24a2bd0d0,DEV
+PIP-Technical-Team/aux_pce,6e5fa243d225112c8ff00d63b55be68ad9700886,DEV
+PIP-Technical-Team/aux_pfw,6299794192c61f4ef85af847d03833ee7d2a2b8a,DEV
+PIP-Technical-Team/aux_pl,e81910e2848e88d3b0319beec15b0b0b1a4f86d8,DEV
+PIP-Technical-Team/aux_pop,218e0e08460f52485ed9ca8a245b759cad224cb0,DEV
+PIP-Technical-Team/aux_ppp,916e11b8a30cbebbf0262a624ae92d8910343972,DEV
+PIP-Technical-Team/aux_regions,8747ef071d672ea6e7ef5bc40702e99310f04d01,DEV
+PIP-Technical-Team/aux_shp,373bd77aa1cb5f6c0d043356e9aa75f2e108ff61,DEV
+PIP-Technical-Team/aux_sna,99aa56fb8cb03428301cff1464065d660e628987,DEV
+PIP-Technical-Team/aux_sub,937dc962f7a3d39c2ae47e5b7e4d5b62c219e11f,DEV
+PIP-Technical-Team/aux_wdi,5bf6cf3c903f6223b4bedb6bb82fe5a78befec85,DEV
+PIP-Technical-Team/aux_weo,8beb3cb62860412cc3d68b69657b8c520d46c936,DEV
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,8 +2,26 @@
 
 export("%>%")
 export(auto_aux_update)
+export(cl_validate_raw)
+export(clean_validation_report)
+export(countries_validate_output)
+export(cpi_validate_output)
+export(cpi_validate_raw)
 export(draw_model)
+export(gdm_validate_output)
+export(gdm_validate_raw)
+export(gdp_validate_output)
+export(get_error_validation)
+export(incgroup_validate_output)
 export(load_aux)
+export(metadata_validate_output)
+export(metadata_validate_raw)
+export(mpd_validate_raw)
+export(npl_validate_output)
+export(npl_validate_raw)
+export(pce_validate_output)
+export(pfw_validate_output)
+export(pfw_validate_raw)
 export(pip_censoring)
 export(pip_countries)
 export(pip_country_list)
@@ -12,6 +30,7 @@ export(pip_cpi)
 export(pip_dictionary)
 export(pip_gdm)
 export(pip_gdp)
+export(pip_gdp_weo)
 export(pip_income_groups)
 export(pip_indicators)
 export(pip_maddison)
@@ -25,17 +44,36 @@ export(pip_pl)
 export(pip_pl_clean)
 export(pip_pop)
 export(pip_ppp)
+export(pip_prices)
 export(pip_regions)
 export(pip_sna)
 export(pip_update_all_aux)
 export(pip_wdi)
 export(pip_wdi_update)
 export(pip_weo)
 export(pip_weo_clean)
+export(pl_validate_output)
+export(pop_validate_output)
+export(pop_validate_raw)
+export(popmain_validate_raw)
+export(ppp_validate_output)
+export(ppp_validate_raw)
+export(send_report)
+export(sna_fy_validate_raw)
+export(sna_validate_raw)
+export(spop_validate_raw)
 export(update_aux)
+export(wdi_validate_raw)
+export(weo_validate_output)
+export(weo_validate_raw)
+import(blastula)
 import(collapse, except = fdroplevels)
 import(data.table)
 import(data.table, except = fdroplevels)
+import(data.validator)
+importFrom(assertr,in_set)
+importFrom(assertr,is_uniq)
+importFrom(assertr,not_na)
 importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%<>%")

diff --git a/R/aaa.R b/R/aaa.R
@@ -0,0 +1 @@
+.pipaux <-  new.env(parent = emptyenv())
diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R
@@ -29,6 +29,8 @@ auto_aux_update <- function(measure = NULL,
     )
   }
 
+  # if there is validation report in the environment - remove it
+  clean_validation_report()
 
   assertthat::assert_that(Sys.getenv("GITHUB_PAT") != "",
                           msg = "Enviroment variable `GITHUB_PAT` is empty.

diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R
@@ -0,0 +1,80 @@
+#' Validate raw country list data
+#'
+#' @param cl raw country list data, as loaded via `pipfun::load_from_gh`
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#'
+#' @export
+cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){
+
+  stopifnot("Country list raw data is not loaded" = !is.null(cl))
+
+  report <- data_validation_report()
+
+  validate(cl, name = "CL raw data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    validate_cols(in_set(c("ABW", "AFG", "AGO", "ALB", "AND", "ARE", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE",
+                           "BDI", "BEL", "BEN", "BFA", "BGD", "BGR", "BHR", "BHS", "BIH", "BLR", "BLZ", "BMU", "BOL",
+                           "BRA", "BRB", "BRN", "BTN", "BWA", "CAF", "CAN", "CHE", "CHI", "CHL", "CHN", "CIV", "CMR",
+                           "COD", "COG", "COL", "COM", "CPV", "CRI", "CUB", "CUW", "CYM", "CYP", "CZE", "DEU", "DJI",
+                           "DMA", "DNK", "DOM", "DZA", "ECU", "EGY", "ERI", "ESP", "EST", "ETH", "FIN", "FJI", "FRA",
+                           "FRO", "FSM", "GAB", "GBR", "GEO", "GHA", "GIB", "GIN", "GMB", "GNB", "GNQ", "GRC", "GRD",
+                           "GRL", "GTM", "GUM", "GUY", "HKG", "HND", "HRV", "HTI", "HUN", "IDN", "IMN", "IND", "IRL",
+                           "IRN", "IRQ", "ISL", "ISR", "ITA", "JAM", "JOR", "JPN", "KAZ", "KEN", "KGZ", "KHM", "KIR",
+                           "KNA", "KOR", "KWT", "LAO", "LBN", "LBR", "LBY", "LCA", "LIE", "LKA", "LSO", "LTU", "LUX",
+                           "LVA", "MAC", "MAF", "MAR", "MCO", "MDA", "MDG", "MDV", "MEX", "MHL", "MKD", "MLI", "MLT",
+                           "MMR", "MNE", "MNG", "MNP", "MOZ", "MRT", "MUS", "MWI", "MYS", "NAM", "NCL", "NER", "NGA",
+                           "NIC", "NLD", "NOR", "NPL", "NRU", "NZL", "OMN", "PAK", "PAN", "PER", "PHL", "PLW", "PNG",
+                           "POL", "PRI", "PRK", "PRT", "PRY", "PSE", "PYF", "QAT", "ROU", "RUS", "RWA", "SAU", "SDN",
+                           "SEN", "SGP", "SLB", "SLE", "SLV", "SMR", "SOM", "SRB", "SSD", "STP", "SUR", "SVK", "SVN",
+                           "SWE", "SWZ", "SXM", "SYC", "SYR", "TCA", "TCD", "TGO", "THA", "TJK", "TKM", "TLS", "TON",
+                           "TTO", "TUN", "TUR", "TUV", "TWN", "TZA", "UGA", "UKR", "URY", "USA", "UZB", "VCT", "VEN",
+                           "VGB", "VIR", "VNM", "VUT", "WSM", "XKX", "YEM", "ZAF", "ZMB", "ZWE")),
+                  country_code, description = "`country_code` values within range") |>
+    validate_if(is.character(country_name),
+                description = "`country_name` should be character") |>
+    validate_if(is.character(africa_split),
+                description = "`africa_split` should be character") |>
+    validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
+                  africa_split, description = "`africa_split` values within range") |>
+    validate_if(is.character(africa_split_code),
+                description = "`africa_split_code` should be character") |>
+    validate_cols(in_set(c("AFE", "AFW", NA)),
+                  africa_split_code, description = "`africa_split_code` values within range") |>
+    validate_if(is.character(pcn_region),
+                description = "`pcn_region` should be character") |>
+    validate_if(is.character(pcn_region_code),
+                description = "`pcn_region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  pcn_region_code, description = "`pcn_region_code` values within range") |>
+    validate_if(is.character(region),
+                description = "`region` should be character") |>
+    validate_if(is.character(region_code),
+                description = "`region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  region_code, description = "`region_code` values within range") |>
+    validate_if(is.character(world),
+                description = "`world` should be character") |>
+    validate_cols(in_set(c("World")),
+                  world, description = "`world` values within range") |>
+    validate_if(is.character(world_code),
+                description = "`world_code` should be character") |>
+    validate_cols(in_set(c("WLD")),
+                  world_code, description = "`world_code` values within range") |>
+    validate_cols(not_na, country_code,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+
+}
+
diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R
@@ -0,0 +1,12 @@
+#' Remove data validation report from .pipaux environment variable
+#'
+#' @export
+clean_validation_report <- function(){
+
+  if (rlang::env_has(.pipaux, "validation_report")){
+
+    # rlang::env_bind(.pipaux, validation_report = rlang::zap())
+    rlang::env_unbind(.pipaux, "validation_report")
+
+  }
+}
diff --git a/R/countries_validate_output.R b/R/countries_validate_output.R
@@ -0,0 +1,56 @@
+#' Validate output countries data
+#'
+#' @param countries output countries data
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#'
+#' @export
+countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){
+
+  stopifnot("Countries output data is not loaded" = !is.null(countries))
+
+  report <- data_validation_report()
+
+  validate(countries, name = "countries output data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    validate_if(is.character(country_name),
+                description = "`country_name` should be character") |>
+    validate_if(is.character(africa_split),
+                description = "`africa_split` should be character") |>
+    validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
+                  africa_split, description = "`africa_split` values within range") |>
+    validate_if(is.character(africa_split_code),
+                description = "`africa_split_code` should be character") |>
+    validate_cols(in_set(c("AFE", "AFW", NA)),
+                  africa_split_code, description = "`africa_split_code` values within range") |>
+    validate_if(is.character(region),
+                description = "`region` should be character") |>
+    validate_if(is.character(region_code),
+                description = "`region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  region_code, description = "`region_code` values within range") |>
+    validate_if(is.character(world),
+                description = "`world` should be character") |>
+    validate_cols(in_set(c("World")),
+                  world, description = "`world` values within range") |>
+    validate_if(is.character(world_code),
+                description = "`world_code` should be character") |>
+    validate_cols(in_set(c("WLD")),
+                  world_code, description = "`world_code` values within range") |>
+    validate_cols(not_na, country_code,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+
+}
+
diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R
@@ -0,0 +1,77 @@
+#' Validate clean cpi data
+#'
+#' @param cpi clean cpi data, output via `pip_cpi_clean`
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#'
+#' @export
+cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){
+
+  stopifnot("CPI clean data is not loaded" = !is.null(cpi))
+
+  report <- data_validation_report()
+
+  validate(cpi, name = "CPI output data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    validate_if(is.integer(cpi_year),
+                description = "`cpi_year` should be integer") |>
+    validate_if(is.numeric(survey_year),
+                description = "`survey_year` should be numeric") |>
+    validate_if(is.numeric(cpi),
+                description = "`cpi` should be numeric") |>
+    validate_if(is.numeric(ccf),
+                description = "`ccf` should be numeric") |>
+    validate_if(is.character(survey_acronym),
+                description = "`survey_acronym` should be character") |>
+    validate_if(is.numeric(change_cpi2011),
+                description = "`change_cpi2011` should be numeric") |>
+    validate_cols(in_set(c(0, 1)), change_cpi2011,
+                  description = "`change_cpi2011` values within range") |>
+    validate_if(is.character(cpi_domain),
+                description = "`cpi_domain` should be character") |>
+    validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain,
+                  description = "`cpi_domian` values within range") |>
+    validate_if(is.numeric(cpi_domain_value),
+                description = "`cpi_domain_value` should be numeric") |>
+    validate_cols(in_set(c(0, 1)), cpi_domain_value,
+                  description = "`cpi_domain_value` values within range") |>
+    validate_if(is.numeric(cpi2017_unadj),
+                description = "`cpi2017_unadj` should be numeric") |>
+    validate_if(is.numeric(cpi2011_unadj),
+                description = "`cpi2011_unadj` should be numeric") |>
+    validate_if(is.numeric(cpi2011),
+                description = "`cpi2011` should be numeric") |>
+    validate_if(is.numeric(cpi2017),
+                description = "`cpi2017` should be numeric") |>
+    validate_if(is.numeric(cpi2011_SM22),
+                description = "`cpi2011_SM22` should be numeric") |>
+    validate_if(is.numeric(cpi2017_SM22),
+                description = "`cpi2017_SM22` should be numeric") |>
+    validate_cols(is.logical, cpi2005,
+                  description = "`cpi2005` should be logical") |>
+    validate_if(is.character(cpi_data_level),
+                description = "`cpi_data_level` should be character") |>
+    validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level,
+                  description = "`cpi_data_level` values within range") |>
+    validate_if(is.numeric(cpi2011_AM23),
+                description = "`cpi2011_AM23` should be numeric") |>
+    validate_if(is.numeric(cpi2017_AM23),
+                description = "`cpi2017_AM23` should be numeric") |>
+    validate_if(is.character(cpi_id),
+                description = "`cpi_id` should be character") |>
+    validate_cols(not_na, country_code, cpi_year, survey_acronym, cpi_data_level,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code, cpi_year, survey_acronym,
+                        cpi_data_level),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+}