PIP-Technical-Team · Tefera19 · Jan 29, 2024 · Jan 30, 2024 · Feb 6, 2024 · Mar 4, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@ tmp/
 .vscode/
 ^codecov\.yml$
 ^data-raw$
+^vignettes/articles$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,7 +28,9 @@ Suggests:
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,
-    covr
+    covr,
+    dm,
+    config
 Imports: 
     haven,
     digest,
@@ -50,10 +52,12 @@ Imports:
     httr,
     pipfun (>= 0.0.0.9007),
     lifecycle,
+    collapse,
     joyn,
-    dm,
-    config,
-    collapse
+    data.validator,
+    assertr,
+    blastula,
+    rlang
 VignetteBuilder: knitr
 Remotes: 
     github::PIP-Technical-Team/pipload@ongoing,

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,8 +2,28 @@
 
 export("%>%")
 export(auto_aux_update)
+export(aux_data)
+export(cl_validate_raw)
+export(clean_validation_report)
+export(countries_validate_output)
+export(cpi_validate_output)
+export(cpi_validate_raw)
 export(draw_model)
+export(gdm_validate_output)
+export(gdm_validate_raw)
+export(gdp_validate_output)
+export(get_error_validation)
+export(incgroup_validate_output)
 export(load_aux)
+export(merger_aux)
+export(metadata_validate_output)
+export(metadata_validate_raw)
+export(mpd_validate_raw)
+export(npl_validate_output)
+export(npl_validate_raw)
+export(pce_validate_output)
+export(pfw_validate_output)
+export(pfw_validate_raw)
 export(pip_censoring)
 export(pip_countries)
 export(pip_country_list)
@@ -12,6 +32,7 @@ export(pip_cpi)
 export(pip_dictionary)
 export(pip_gdm)
 export(pip_gdp)
+export(pip_gdp_weo)
 export(pip_income_groups)
 export(pip_indicators)
 export(pip_maddison)
@@ -21,21 +42,41 @@ export(pip_missing_data)
 export(pip_npl)
 export(pip_pce)
 export(pip_pfw)
+export(pip_pfw_key)
 export(pip_pl)
 export(pip_pl_clean)
 export(pip_pop)
 export(pip_ppp)
+export(pip_prices)
 export(pip_regions)
 export(pip_sna)
 export(pip_update_all_aux)
 export(pip_wdi)
 export(pip_wdi_update)
 export(pip_weo)
 export(pip_weo_clean)
+export(pl_validate_output)
+export(pop_validate_output)
+export(pop_validate_raw)
+export(popmain_validate_raw)
+export(ppp_validate_output)
+export(ppp_validate_raw)
+export(send_report)
+export(sna_fy_validate_raw)
+export(sna_validate_raw)
+export(spop_validate_raw)
 export(update_aux)
+export(wdi_validate_raw)
+export(weo_validate_output)
+export(weo_validate_raw)
+import(blastula)
 import(collapse, except = fdroplevels)
 import(data.table)
 import(data.table, except = fdroplevels)
+import(data.validator)
+importFrom(assertr,in_set)
+importFrom(assertr,is_uniq)
+importFrom(assertr,not_na)
 importFrom(glue,glue)
 importFrom(lifecycle,deprecated)
 importFrom(magrittr,"%<>%")

diff --git a/R/aaa.R b/R/aaa.R
@@ -0,0 +1 @@
+.pipaux <-  new.env(parent = emptyenv())
diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R
@@ -29,6 +29,8 @@ auto_aux_update <- function(measure = NULL,
     )
   }
 
+  # if there is validation report in the environment - remove it
+  clean_validation_report()
 
   assertthat::assert_that(Sys.getenv("GITHUB_PAT") != "",
                           msg = "Enviroment variable `GITHUB_PAT` is empty.

diff --git a/R/aux_data_files.R b/R/aux_data_files.R
@@ -0,0 +1,136 @@
+#' Attache key values into auxiliary file
+#'
+#' @param aux_file auxiliary file
+#'
+#' @return data.table with key values
+#' @export
+#'
+#' @examples
+aux_data <- function(aux_file){
+
+  # list of possible auxiliary keys --------------------------------------------
+  keycolsg0 <- c("country_code") # countries, country_list
+  keycolsg1 <- c("country_code", "surveyid_year") # maddison, weo, npl, income_group
+  keycolsg2 <- c("country_code", "surveyid_year", "reporting_level")  # gdp, pce, pop
+  keycolsg3 <- c("country_code", "surveyid_year", "reporting_level") # gdm
+  keycolsg4 <- c("country_code", "surveyid_year", "survey_acronym", "reporting_level") # cpi "survey_acronym"
+  keypfw <- c("country_code", "surveyid_year", "year", "survey_acronym",
+              "reporting_level")
+
+  # list of all the auxiliary files
+  aux_file_names <- c("pfw", "cpi", "gdp", "gdm", "pce", "pop", "ppp", "maddison",
+                      "weo", "npl", "countries", "country_list", "regions",
+                      "income_groups", "metadata")
+
+  if (deparse(substitute(aux_file)) %chin% aux_file_names) {
+
+    # pfw  ---------------------------------------------------------------------
+    if (deparse(substitute(aux_file)) == "pfw"){
+
+      setkeyv(pfw,
+              c("country_code", "survey_year", "survey_acronym", "cpi_domain"))
+
+      # generate a dataset that can be used to add reporting_level variable to pfw data
+      pfw_key <- pip_pfw_key()
+
+      aux_file <-  pfw_key[aux_file] |>
+        setkeyv(keypfw)
+    } else if (deparse(substitute(aux_file)) == "ppp"){
+
+      # ppp --------------------------------------------------------------------
+      # filter ppp based on defualt ppp value
+      aux_file <- ppp[ppp_default == TRUE,
+                      .(country_code, ppp_year, ppp, ppp_data_level)]
+
+      setnames(aux_file, "ppp_data_level", "reporting_level",
+               skip_absent=TRUE)
+
+      setkeyv(aux_file, c("country_code", "reporting_level"))
+
+    } else if (deparse(substitute(aux_file)) == "cpi"){
+
+      # cpi --------------------------------------------------------------------
+      # rename two variables cpi_year to surveyid_year and cpi_data_level to reporting_level
+      aux_file <- aux_file |>
+        setnames(c("cpi_year", "cpi_data_level"),
+                 c("surveyid_year", "reporting_level"),
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, c("country_code", "surveyid_year", "survey_acronym", "reporting_level")) #keycolsg4)
+
+    } else if (deparse(substitute(aux_file)) == "gdm"){
+
+      # gdm --------------------------------------------------------------------
+      aux_file <- aux_file |>
+        setnames("pop_daaux_file ta_level", "reporting_level",
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, keycolsg3)
+
+    } else if (deparse(substitute(aux_file)) == "npl"){
+
+      # npl --------------------------------------------------------------------
+      aux_file <- aux_file |>
+        setnames("reporting_year", "surveyid_year",
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, keycolsg1)
+
+    } else if (deparse(substitute(aux_file)) == "income_groups"){
+
+      # income_groups ------------------------------------------------------------
+      # rename year_data into surveyid_year
+      aux_file <- aux_file |>
+        setnames("year_data", "surveyid_year",
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, keycolsg1)
+
+    } else if (deparse(substitute(aux_file)) == "countries"){
+
+      # countries ----------------------------------------------------------------
+      setkeyv(aux_file, keycolsg0)
+
+    } else if (deparse(substitute(aux_file)) == "country_list"){
+
+      # country_list--------------------------------------------------------------
+      setkeyv(aux_file, keycolsg0)
+
+    } else if (deparse(substitute(aux_file)) == "metadata"){
+
+      # metadata -----------------------------------------------------------------
+      setkeyv(aux_file, keycolsg5)
+
+    } else if (deparse(substitute(aux_file)) %chin% c("maddison", "weo")){
+
+      # auxiliary datasets - group 1 (maddison and weo) --------------------------
+      aux_file |>
+        setnames("year", "surveyid_year",
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, keycolsg1)
+
+    } else if (deparse(substitute(aux_file)) %chin% c("gdp", "pop", "pce")){
+
+      # auxiliary datasets - group 2 (gdp, pop, pce) ---------------------------
+      aux_data_level <- paste0(deparse(substitute(aux_file)), "_data_level")
+
+      aux_file |>
+        setnames(c(aux_data_level, "year"),
+                 c("reporting_level", "surveyid_year"),
+                 skip_absent=TRUE)
+
+      setkeyv(aux_file, keycolsg2)
+
+    }
+
+    return(aux_file)
+
+  } else {
+
+    return(aux_file)
+
+  }
+
+}
+
diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R
@@ -0,0 +1,73 @@
+#' Validate raw country list data
+#'
+#' @param cl raw country list data, as loaded via `pipfun::load_from_gh`
+#' @param detail has an option TRUE/FALSE, default value is FALSE
+#' @import data.validator
+#' @importFrom assertr in_set not_na is_uniq
+#' @keywords internal
+#'
+#' @export
+cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){
+
+  stopifnot("Country list raw data is not loaded" = !is.null(cl))
+
+  report <- data_validation_report()
+
+  # country_list <- pipload::pip_load_aux("pfw")
+  country_list <- pipfun::load_from_gh(measure = "pfw",
+                                       owner   = getOption("pipfun.ghowner"),
+                                       branch  = "DEV",
+                                       ext = "dta")
+
+  country_list <- unique(country_list[, code])
+
+  validate(cl, name = "CL raw data validation") |>
+    validate_if(is.character(country_code),
+                description = "`country_code` should be character") |>
+    # validate_cols(in_set(country_list),
+    #               country_code, description = "`country_code` values within range") |>
+    validate_if(is.character(country_name),
+                description = "`country_name` should be character") |>
+    validate_if(is.character(africa_split),
+                description = "`africa_split` should be character") |>
+    validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)),
+                  africa_split, description = "`africa_split` values within range") |>
+    validate_if(is.character(africa_split_code),
+                description = "`africa_split_code` should be character") |>
+    validate_cols(in_set(c("AFE", "AFW", NA)),
+                  africa_split_code, description = "`africa_split_code` values within range") |>
+    validate_if(is.character(pcn_region),
+                description = "`pcn_region` should be character") |>
+    validate_if(is.character(pcn_region_code),
+                description = "`pcn_region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  pcn_region_code, description = "`pcn_region_code` values within range") |>
+    validate_if(is.character(region),
+                description = "`region` should be character") |>
+    validate_if(is.character(region_code),
+                description = "`region_code` should be character") |>
+    validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")),
+                  region_code, description = "`region_code` values within range") |>
+    validate_if(is.character(world),
+                description = "`world` should be character") |>
+    validate_cols(in_set(c("World")),
+                  world, description = "`world` values within range") |>
+    validate_if(is.character(world_code),
+                description = "`world_code` should be character") |>
+    validate_cols(in_set(c("WLD")),
+                  world_code, description = "`world_code` values within range") |>
+    validate_cols(not_na, country_code,
+                  description = "no missing values in key variables") |>
+    validate_if(is_uniq(country_code),
+                description = "no duplicate records in key variables") |>
+    add_results(report)
+
+  validation_record <- get_results(report, unnest = FALSE) |>
+    setDT()
+
+  if (any(validation_record[["type"]] == "error")){
+    get_error_validation(validation_record, detail)
+  }
+
+}
+
diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R
@@ -0,0 +1,12 @@
+#' Remove data validation report from .pipaux environment variable
+#'
+#' @export
+clean_validation_report <- function(){
+
+  if (rlang::env_has(.pipaux, "validation_report")){
+
+    # rlang::env_bind(.pipaux, validation_report = rlang::zap())
+    rlang::env_unbind(.pipaux, "validation_report")
+
+  }
+}