From 2a33ead6f0982a3c98f7fe2b281f9dfae83f005e Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 21 Nov 2024 17:00:32 -0500 Subject: [PATCH 01/20] changing prefixes part 1 --- NAMESPACE | 24 +++++----- R/{pip_censoring.R => aux_censoring.R} | 4 +- ...ist_update.R => aux_country_list_update.R} | 2 +- R/{pip_cp.R => aux_cp.R} | 6 +-- R/{pip_cp_clean.R => aux_cp_clean.R} | 2 +- R/{pip_cp_update.R => aux_cp_update.R} | 6 +-- R/{pip_cpi_clean.R => aux_cpi_clean.R} | 4 +- R/{pip_cpi_vintage.R => aux_cpi_vintage.R} | 4 +- R/{pip_dictionary.R => aux_dictionary.R} | 4 +- R/{pip_metaregion.R => aux_metaregion.R} | 4 +- R/{pip_missing_data.R => aux_missing_data.R} | 4 +- R/{pip_nan.R => aux_nan.R} | 4 +- R/{pip_pfw_clean.R => aux_pfw_clean.R} | 4 +- R/{pip_pl_clean.R => aux_pl_clean.R} | 2 +- R/{pip_ppp_clean.R => aux_ppp_clean.R} | 4 +- R/{pip_sign_save.R => aux_sign_save.R} | 4 +- R/{sna.R => aux_sna.R} | 4 +- R/aux_update_all.R | 46 +++++++++++++++++++ R/{pip_sna.R => fake_aux_sna.R} | 8 ++-- R/pip_pfw.R | 2 +- R/pip_update_all_aux.R | 46 ------------------- man/{pip_censoring.Rd => aux_censoring.Rd} | 8 ++-- ...t_update.Rd => aux_country_list_update.Rd} | 8 ++-- man/aux_cp.Rd | 27 +++++++++++ man/{pip_cp_clean.Rd => aux_cp_clean.Rd} | 8 ++-- man/{pip_cp_update.Rd => aux_cp_update.Rd} | 12 ++--- man/{pip_cpi_clean.Rd => aux_cpi_clean.Rd} | 10 ++-- ...{pip_cpi_vintage.Rd => aux_cpi_vintage.Rd} | 10 ++-- man/{pip_dictionary.Rd => aux_dictionary.Rd} | 8 ++-- man/{pip_metaregion.Rd => aux_metaregion.Rd} | 16 ++----- ...ip_missing_data.Rd => aux_missing_data.Rd} | 16 ++----- man/{pip_nan.Rd => aux_nan.Rd} | 8 ++-- man/{pip_pfw.Rd => aux_pfw.Rd} | 6 +-- man/{pip_pfw_clean.Rd => aux_pfw_clean.Rd} | 12 ++--- man/{pip_pl_clean.Rd => aux_pl_clean.Rd} | 8 ++-- man/{pip_ppp_clean.Rd => aux_ppp_clean.Rd} | 10 ++-- man/{pip_sign_save.Rd => aux_sign_save.Rd} | 10 ++-- man/{pip_sna.Rd => aux_sna.Rd} | 19 ++------ ...ip_update_all_aux.Rd => aux_update_all.Rd} | 12 ++--- man/clean_cp_names.Rd | 2 +- man/{pip_cp.Rd => fake_aux_sna.Rd} | 18 ++++---- man/load_aux.Rd | 2 - man/load_cpi.Rd | 2 +- man/merger_aux.Rd | 7 +-- man/pip_country_list.Rd | 7 --- man/pip_gdp.Rd | 7 --- man/pip_gdp_update.Rd | 4 -- man/pip_indicators.Rd | 7 --- man/pip_maddison.Rd | 7 --- man/pip_metadata.Rd | 15 ------ man/pip_metadata_update.Rd | 4 -- man/pip_pce.Rd | 7 --- man/pip_pce_update.Rd | 4 -- man/pip_pfw_update.Rd | 6 --- man/pip_pl.Rd | 7 --- man/pip_ppp.Rd | 7 --- man/pip_wdi.Rd | 7 --- man/pip_wdi_update.Rd | 4 -- man/pip_weo.Rd | 7 --- man/save_aux_to_gh.Rd | 41 +++++++++++++++++ 60 files changed, 250 insertions(+), 318 deletions(-) rename R/{pip_censoring.R => aux_censoring.R} (95%) rename R/{pip_country_list_update.R => aux_country_list_update.R} (99%) rename R/{pip_cp.R => aux_cp.R} (86%) rename R/{pip_cp_clean.R => aux_cp_clean.R} (99%) rename R/{pip_cp_update.R => aux_cp_update.R} (95%) rename R/{pip_cpi_clean.R => aux_cpi_clean.R} (95%) rename R/{pip_cpi_vintage.R => aux_cpi_vintage.R} (94%) rename R/{pip_dictionary.R => aux_dictionary.R} (92%) rename R/{pip_metaregion.R => aux_metaregion.R} (93%) rename R/{pip_missing_data.R => aux_missing_data.R} (98%) rename R/{pip_nan.R => aux_nan.R} (93%) rename R/{pip_pfw_clean.R => aux_pfw_clean.R} (94%) rename R/{pip_pl_clean.R => aux_pl_clean.R} (97%) rename R/{pip_ppp_clean.R => aux_ppp_clean.R} (94%) rename R/{pip_sign_save.R => aux_sign_save.R} (98%) rename R/{sna.R => aux_sna.R} (93%) create mode 100644 R/aux_update_all.R rename R/{pip_sna.R => fake_aux_sna.R} (79%) delete mode 100644 R/pip_update_all_aux.R rename man/{pip_censoring.Rd => aux_censoring.Rd} (89%) rename man/{pip_country_list_update.Rd => aux_country_list_update.Rd} (56%) create mode 100644 man/aux_cp.Rd rename man/{pip_cp_clean.Rd => aux_cp_clean.Rd} (69%) rename man/{pip_cp_update.Rd => aux_cp_update.Rd} (74%) rename man/{pip_cpi_clean.Rd => aux_cpi_clean.Rd} (77%) rename man/{pip_cpi_vintage.Rd => aux_cpi_vintage.Rd} (66%) rename man/{pip_dictionary.Rd => aux_dictionary.Rd} (89%) rename man/{pip_metaregion.Rd => aux_metaregion.Rd} (62%) rename man/{pip_missing_data.Rd => aux_missing_data.Rd} (65%) rename man/{pip_nan.Rd => aux_nan.Rd} (92%) rename man/{pip_pfw.Rd => aux_pfw.Rd} (96%) rename man/{pip_pfw_clean.Rd => aux_pfw_clean.Rd} (55%) rename man/{pip_pl_clean.Rd => aux_pl_clean.Rd} (73%) rename man/{pip_ppp_clean.Rd => aux_ppp_clean.Rd} (56%) rename man/{pip_sign_save.Rd => aux_sign_save.Rd} (75%) rename man/{pip_sna.Rd => aux_sna.Rd} (75%) rename man/{pip_update_all_aux.Rd => aux_update_all.Rd} (59%) rename man/{pip_cp.Rd => fake_aux_sna.Rd} (77%) create mode 100644 man/save_aux_to_gh.Rd diff --git a/NAMESPACE b/NAMESPACE index 3db08a9..5fd9f41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,14 +2,25 @@ export("%>%") export(auto_aux_update) +export(aux_censoring) +export(aux_cp) export(aux_data) +export(aux_dictionary) +export(aux_metaregion) +export(aux_missing_data) +export(aux_nan) +export(aux_pfw) +export(aux_pl_clean) +export(aux_sna) +export(aux_update_all) export(cl_validate_raw) export(clean_validation_report) +export(convert_df_to_base64) export(countries_validate_output) export(cpi_validate_output) export(cpi_validate_raw) -export(convert_df_to_base64) export(draw_model) +export(fake_aux_sna) export(gdm_validate_output) export(gdm_validate_raw) export(gdp_validate_output) @@ -25,12 +36,9 @@ export(npl_validate_raw) export(pce_validate_output) export(pfw_validate_output) export(pfw_validate_raw) -export(pip_censoring) export(pip_countries) export(pip_country_list) -export(pip_cp) export(pip_cpi) -export(pip_dictionary) export(pip_gdm) export(pip_gdp) export(pip_gdp_weo) @@ -39,21 +47,14 @@ export(pip_indicators) export(pip_maddison) export(pip_metadata) export(pip_metadata_update) -export(pip_metaregion) -export(pip_missing_data) -export(pip_nan) export(pip_npl) export(pip_pce) -export(pip_pfw) export(pip_pfw_key) export(pip_pl) -export(pip_pl_clean) export(pip_pop) export(pip_ppp) export(pip_prices) export(pip_regions) -export(pip_sna) -export(pip_update_all_aux) export(pip_wdi) export(pip_wdi_update) export(pip_weo) @@ -64,6 +65,7 @@ export(pop_validate_raw) export(popmain_validate_raw) export(ppp_validate_output) export(ppp_validate_raw) +export(save_aux_to_gh) export(send_report) export(sna_fy_validate_raw) export(sna_validate_raw) diff --git a/R/pip_censoring.R b/R/aux_censoring.R similarity index 95% rename from R/pip_censoring.R rename to R/aux_censoring.R index d5cc657..f239e03 100644 --- a/R/pip_censoring.R +++ b/R/aux_censoring.R @@ -3,10 +3,10 @@ #' Load or update censoring data #' #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_censoring <- function(action = c("update", "load"), +aux_censoring <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_country_list_update.R b/R/aux_country_list_update.R similarity index 99% rename from R/pip_country_list_update.R rename to R/aux_country_list_update.R index b696bc2..ff5b861 100644 --- a/R/pip_country_list_update.R +++ b/R/aux_country_list_update.R @@ -2,7 +2,7 @@ #' #' @param class_branch character: names of branch of GPID-WB/class repo. Default #' if master -pip_country_list_update <- +aux_country_list_update <- function(class_branch = "master") { # Check arguments diff --git a/R/pip_cp.R b/R/aux_cp.R similarity index 86% rename from R/pip_cp.R rename to R/aux_cp.R index 2a65be8..cedcc6f 100644 --- a/R/pip_cp.R +++ b/R/aux_cp.R @@ -2,10 +2,10 @@ #' #' Update a list with country profiles data #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_cp <- function(action = c("update", "load"), +aux_cp <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -16,7 +16,7 @@ pip_cp <- function(action = c("update", "load"), action <- match.arg(action) if (action == "update") { - pip_cp_update(maindir = maindir, + aux_cp_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_cp_clean.R b/R/aux_cp_clean.R similarity index 99% rename from R/pip_cp_clean.R rename to R/aux_cp_clean.R index 350ebf6..d746022 100644 --- a/R/pip_cp_clean.R +++ b/R/aux_cp_clean.R @@ -4,7 +4,7 @@ #' @param file_names character: vector with names of files #' #' @return data.table -pip_cp_clean <- function(x, +aux_cp_clean <- function(x, file_names) { # ____________________________________________________________________ diff --git a/R/pip_cp_update.R b/R/aux_cp_update.R similarity index 95% rename from R/pip_cp_update.R rename to R/aux_cp_update.R index 13d62c3..62ac0e7 100644 --- a/R/pip_cp_update.R +++ b/R/aux_cp_update.R @@ -2,9 +2,9 @@ #' #' Update a list with country profiles data #' -#' @inheritParams pip_cp +#' @inheritParams aux_cp #' @keywords internal -pip_cp_update <- function(maindir = gls$PIP_DATA_DIR, +aux_cp_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -38,7 +38,7 @@ pip_cp_update <- function(maindir = gls$PIP_DATA_DIR, }) - dl <- pip_cp_clean(raw_files, + dl <- aux_cp_clean(raw_files, file_names) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## download files -------- diff --git a/R/pip_cpi_clean.R b/R/aux_cpi_clean.R similarity index 95% rename from R/pip_cpi_clean.R rename to R/aux_cpi_clean.R index 17d6524..cdf4b09 100644 --- a/R/pip_cpi_clean.R +++ b/R/aux_cpi_clean.R @@ -2,13 +2,13 @@ #' #' Clean CPI data from Datalibweb to meet PIP protocols. #' -#' @param y dataset with CPI data from `pip_cpi_update()`. +#' @param y dataset with CPI data from `aux_cpi_update()`. #' @param cpivar character: CPI variable to be used as default. Currently it is #' "cpi2011". #' @inheritParams pip_cpi_update #' #' @keywords internal -pip_cpi_clean <- function(y, +aux_cpi_clean <- function(y, cpivar = getOption("pipaux.cpivar"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) { diff --git a/R/pip_cpi_vintage.R b/R/aux_cpi_vintage.R similarity index 94% rename from R/pip_cpi_vintage.R rename to R/aux_cpi_vintage.R index d761193..76d65d0 100644 --- a/R/pip_cpi_vintage.R +++ b/R/aux_cpi_vintage.R @@ -2,10 +2,10 @@ #' #' @param msrdir character: measure directory. #' @param dlwdir character: Datalibweb directory -#' @param force logical: If TRUE force update of veintage level 1. +#' @param force logical: If TRUE force update of vintage level 1. #' #' @keywords internal -pip_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), +aux_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), force = FALSE) { time <- format(Sys.time(), "%Y%m%d%H%M%S") # find a way to account for time zones diff --git a/R/pip_dictionary.R b/R/aux_dictionary.R similarity index 92% rename from R/pip_dictionary.R rename to R/aux_dictionary.R index 513ca0f..c5c129b 100644 --- a/R/pip_dictionary.R +++ b/R/aux_dictionary.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with the indicators master sheet. #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_dictionary <- function(action = c("update", "load"), +aux_dictionary <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_metaregion.R b/R/aux_metaregion.R similarity index 93% rename from R/pip_metaregion.R rename to R/aux_metaregion.R index 57a6d89..e8a44b6 100644 --- a/R/pip_metaregion.R +++ b/R/aux_metaregion.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with regions. #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_metaregion <- function(action = c("update", "load"), +aux_metaregion <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/pip_missing_data.R b/R/aux_missing_data.R similarity index 98% rename from R/pip_missing_data.R rename to R/aux_missing_data.R index ce514eb..e8607da 100644 --- a/R/pip_missing_data.R +++ b/R/aux_missing_data.R @@ -1,12 +1,12 @@ #' Create table with missing countries #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' #' @return if `action = "update"` returns logical. If `action = "load"` returns #' a data.table #' @export -pip_missing_data <- function(action = c("update", "load"), +aux_missing_data <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_nan.R b/R/aux_nan.R similarity index 93% rename from R/pip_nan.R rename to R/aux_nan.R index 9b51351..7f9afa6 100644 --- a/R/pip_nan.R +++ b/R/aux_nan.R @@ -2,12 +2,12 @@ #' #' Update nowcast data #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" #' and "gh" are synonymous #' @export -pip_nan <- function(action = c("update", "load"), +aux_nan <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/pip_pfw_clean.R b/R/aux_pfw_clean.R similarity index 94% rename from R/pip_pfw_clean.R rename to R/aux_pfw_clean.R index 25019f8..12590b6 100644 --- a/R/pip_pfw_clean.R +++ b/R/aux_pfw_clean.R @@ -2,11 +2,11 @@ #' #' Clean PFW data from Datalibweb to meet PIP protocols. #' -#' @param y dataset with PPP data from `pip_pfw_update()`. +#' @param y dataset with PPP data from `aux_pfw_update()`. #' @inheritParams load_aux #' #' @keywords internal -pip_pfw_clean <- function(y, +aux_pfw_clean <- function(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) { diff --git a/R/pip_pl_clean.R b/R/aux_pl_clean.R similarity index 97% rename from R/pip_pl_clean.R rename to R/aux_pl_clean.R index ba4cc8e..8236df7 100644 --- a/R/pip_pl_clean.R +++ b/R/aux_pl_clean.R @@ -4,7 +4,7 @@ #' #' @return data.table #' @export -pip_pl_clean <- function(l) { +aux_pl_clean <- function(l) { # ____________________________________________________________________________ diff --git a/R/pip_ppp_clean.R b/R/aux_ppp_clean.R similarity index 94% rename from R/pip_ppp_clean.R rename to R/aux_ppp_clean.R index b9efa69..6c93c70 100644 --- a/R/pip_ppp_clean.R +++ b/R/aux_ppp_clean.R @@ -1,10 +1,10 @@ #' Clean PPP data from datalibweb to meet PIP protocols #' -#' @param y dataset with PPP data from `pip_ppp_update()`. +#' @param y dataset with PPP data from `aux_ppp_update()`. #' @param default_year numeric: ICP round year. Default is 2011 #' #' @keywords internal -pip_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { +aux_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { x <- data.table::as.data.table(y) y <- melt(x, diff --git a/R/pip_sign_save.R b/R/aux_sign_save.R similarity index 98% rename from R/pip_sign_save.R rename to R/aux_sign_save.R index f8d97ac..a967e44 100644 --- a/R/pip_sign_save.R +++ b/R/aux_sign_save.R @@ -5,14 +5,14 @@ #' This function is deprecated because of the new, more flexible and general #' function `pipfun::pip_sign_save()` #' @param x data.frame Data frame to be signed and saved. -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @param msrdir character: Directory where the data and data signature will be #' saved. #' @param save_dta logical: If TRUE a Stata (.dta) version of the dataset is #' also saved. #' @keywords internal #' @return logical -pip_sign_save <- function(x, +aux_sign_save <- function(x, measure, msrdir, force = FALSE, diff --git a/R/sna.R b/R/aux_sna.R similarity index 93% rename from R/sna.R rename to R/aux_sna.R index 263de85..e7b2745 100644 --- a/R/sna.R +++ b/R/aux_sna.R @@ -2,12 +2,12 @@ #' #' Update special national accounts data #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" #' and "gh" are synonymous #' @export -pip_sna <- function(action = c("update", "load"), +aux_sna <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/aux_update_all.R b/R/aux_update_all.R new file mode 100644 index 0000000..daa3b38 --- /dev/null +++ b/R/aux_update_all.R @@ -0,0 +1,46 @@ +#' Update all auxiliary data at once +#' +#' @inheritParams aux_cpi +#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. +#' @export +aux_update_all <- function(force = FALSE, + popsrc = getOption("pipaux.popsrc"), + maindir = gls$PIP_DATA_DIR) { + + # List of countries in WDI + aux_country_list(force = force, maindir = maindir) + + # PIP countries and regions + aux_countries(force = force, maindir = maindir) + aux_regions(force = force, maindir = maindir) + + # PIP Indicators + aux_indicators(force = force, maindir = maindir) + + # Poverty lines + aux_pl(force = force, maindir = maindir) + + # PFW, CPI and PPP from DLW + aux_pfw(force = force, maindir = maindir) + aux_cpi(force = force, maindir = maindir) + aux_ppp(force = force, maindir = maindir) + + # POP from Emi or WDI + aux_pop(force = force, maindir = maindir, src = popsrc) + + # GDP from WEO, Maddison and WDI (+ a few special cases) + aux_weo(force = force, maindir = maindir) + aux_maddison(force = force, maindir = maindir) + aux_gdp(force = force, maindir = maindir) + + # PCE from WDI (+ a few special cases) + aux_pce(force = force, maindir = maindir) + + # Country profiles (from Poverty GP) + aux_cp(force = force, maindir = maindir) + + # Survey metadata (from Poverty GP) + aux_metadata(force = force, maindir = maindir) + + return(invisible()) +} diff --git a/R/pip_sna.R b/R/fake_aux_sna.R similarity index 79% rename from R/pip_sna.R rename to R/fake_aux_sna.R index 37ee45a..f0da8c8 100644 --- a/R/pip_sna.R +++ b/R/fake_aux_sna.R @@ -1,16 +1,16 @@ #' Fake PIP SNA function #' -#' @inheritParams pip_gdp -#' @inheritParams pip_pfw +#' @inheritParams aux_gdp +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_sna <- function(action = c("update", "load"), +fake_aux_sna <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), tag = match.arg(branch), from = c("gh", "file", "api")) { - + return(invisible(TRUE)) } diff --git a/R/pip_pfw.R b/R/pip_pfw.R index 3c20bff..7c76a9a 100644 --- a/R/pip_pfw.R +++ b/R/pip_pfw.R @@ -10,7 +10,7 @@ #' @inheritParams pipfun::load_from_gh #' @export #' @import data.table -pip_pfw <- function(action = c("update", "load"), +aux_pfw <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_update_all_aux.R b/R/pip_update_all_aux.R deleted file mode 100644 index 4f42e85..0000000 --- a/R/pip_update_all_aux.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Update all auxiliary data at once -#' -#' @inheritParams pip_cpi -#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. -#' @export -pip_update_all_aux <- function(force = FALSE, - popsrc = getOption("pipaux.popsrc"), - maindir = gls$PIP_DATA_DIR) { - - # List of countries in WDI - pip_country_list(force = force, maindir = maindir) - - # PIP countries and regions - pip_countries(force = force, maindir = maindir) - pip_regions(force = force, maindir = maindir) - - # PIP Indicators - pip_indicators(force = force, maindir = maindir) - - # Poverty lines - pip_pl(force = force, maindir = maindir) - - # PFW, CPI and PPP from DLW - pip_pfw(force = force, maindir = maindir) - pip_cpi(force = force, maindir = maindir) - pip_ppp(force = force, maindir = maindir) - - # POP from Emi or WDI - pip_pop(force = force, maindir = maindir, src = popsrc) - - # GDP from WEO, Maddison and WDI (+ a few special cases) - pip_weo(force = force, maindir = maindir) - pip_maddison(force = force, maindir = maindir) - pip_gdp(force = force, maindir = maindir) - - # PCE from WDI (+ a few special cases) - pip_pce(force = force, maindir = maindir) - - # Country profiles (from Poverty GP) - pip_cp(force = force, maindir = maindir) - - # Survey metadata (from Poverty GP) - pip_metadata(force = force, maindir = maindir) - - return(invisible()) -} diff --git a/man/pip_censoring.Rd b/man/aux_censoring.Rd similarity index 89% rename from man/pip_censoring.Rd rename to man/aux_censoring.Rd index 02e7923..260c872 100644 --- a/man/pip_censoring.Rd +++ b/man/aux_censoring.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_censoring.R -\name{pip_censoring} -\alias{pip_censoring} +% Please edit documentation in R/aux_censoring.R +\name{aux_censoring} +\alias{aux_censoring} \title{Censoring data} \usage{ -pip_censoring( +aux_censoring( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_country_list_update.Rd b/man/aux_country_list_update.Rd similarity index 56% rename from man/pip_country_list_update.Rd rename to man/aux_country_list_update.Rd index 8a1c56d..e5f4a01 100644 --- a/man/pip_country_list_update.Rd +++ b/man/aux_country_list_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_country_list_update.R -\name{pip_country_list_update} -\alias{pip_country_list_update} +% Please edit documentation in R/aux_country_list_update.R +\name{aux_country_list_update} +\alias{aux_country_list_update} \title{Update Country LIst} \usage{ -pip_country_list_update(class_branch = "master") +aux_country_list_update(class_branch = "master") } \arguments{ \item{class_branch}{character: names of branch of GPID-WB/class repo. Default diff --git a/man/aux_cp.Rd b/man/aux_cp.Rd new file mode 100644 index 0000000..27264e8 --- /dev/null +++ b/man/aux_cp.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_cp.R +\name{aux_cp} +\alias{aux_cp} +\title{Country Profiles} +\usage{ +aux_cp( + action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch) +) +} +\arguments{ +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} +} +\description{ +Update a list with country profiles data +} diff --git a/man/pip_cp_clean.Rd b/man/aux_cp_clean.Rd similarity index 69% rename from man/pip_cp_clean.Rd rename to man/aux_cp_clean.Rd index 80b0fce..ab496a3 100644 --- a/man/pip_cp_clean.Rd +++ b/man/aux_cp_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_clean.R -\name{pip_cp_clean} -\alias{pip_cp_clean} +% Please edit documentation in R/aux_cp_clean.R +\name{aux_cp_clean} +\alias{aux_cp_clean} \title{Clean country profile data} \usage{ -pip_cp_clean(x, file_names) +aux_cp_clean(x, file_names) } \arguments{ \item{x}{database from pip_cp_update} diff --git a/man/pip_cp_update.Rd b/man/aux_cp_update.Rd similarity index 74% rename from man/pip_cp_update.Rd rename to man/aux_cp_update.Rd index 6ea06c5..f91fccd 100644 --- a/man/pip_cp_update.Rd +++ b/man/aux_cp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_update.R -\name{pip_cp_update} -\alias{pip_cp_update} +% Please edit documentation in R/aux_cp_update.R +\name{aux_cp_update} +\alias{aux_cp_update} \title{Update Country Profiles} \usage{ -pip_cp_update( +aux_cp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), @@ -13,10 +13,6 @@ pip_cp_update( ) } \arguments{ -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_cpi_clean.Rd b/man/aux_cpi_clean.Rd similarity index 77% rename from man/pip_cpi_clean.Rd rename to man/aux_cpi_clean.Rd index 063dc31..8256dc1 100644 --- a/man/pip_cpi_clean.Rd +++ b/man/aux_cpi_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_clean.R -\name{pip_cpi_clean} -\alias{pip_cpi_clean} +% Please edit documentation in R/aux_cpi_clean.R +\name{aux_cpi_clean} +\alias{aux_cpi_clean} \title{Clean CPI data} \usage{ -pip_cpi_clean( +aux_cpi_clean( y, cpivar = getOption("pipaux.cpivar"), maindir = gls$PIP_DATA_DIR, @@ -12,7 +12,7 @@ pip_cpi_clean( ) } \arguments{ -\item{y}{dataset with CPI data from \code{pip_cpi_update()}.} +\item{y}{dataset with CPI data from \code{aux_cpi_update()}.} \item{cpivar}{character: CPI variable to be used as default. Currently it is "cpi2011".} diff --git a/man/pip_cpi_vintage.Rd b/man/aux_cpi_vintage.Rd similarity index 66% rename from man/pip_cpi_vintage.Rd rename to man/aux_cpi_vintage.Rd index f3bb49a..ffec5d5 100644 --- a/man/pip_cpi_vintage.Rd +++ b/man/aux_cpi_vintage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_vintage.R -\name{pip_cpi_vintage} -\alias{pip_cpi_vintage} +% Please edit documentation in R/aux_cpi_vintage.R +\name{aux_cpi_vintage} +\alias{aux_cpi_vintage} \title{Check CPI Vintage} \usage{ -pip_cpi_vintage( +aux_cpi_vintage( msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), force = FALSE @@ -15,7 +15,7 @@ pip_cpi_vintage( \item{dlwdir}{character: Datalibweb directory} -\item{force}{logical: If TRUE force update of veintage level 1.} +\item{force}{logical: If TRUE force update of vintage level 1.} } \description{ Check CPI Vintage diff --git a/man/pip_dictionary.Rd b/man/aux_dictionary.Rd similarity index 89% rename from man/pip_dictionary.Rd rename to man/aux_dictionary.Rd index 1998468..f42fe32 100644 --- a/man/pip_dictionary.Rd +++ b/man/aux_dictionary.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_dictionary.R -\name{pip_dictionary} -\alias{pip_dictionary} +% Please edit documentation in R/aux_dictionary.R +\name{aux_dictionary} +\alias{aux_dictionary} \title{PIP Dictionary} \usage{ -pip_dictionary( +aux_dictionary( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_metaregion.Rd b/man/aux_metaregion.Rd similarity index 62% rename from man/pip_metaregion.Rd rename to man/aux_metaregion.Rd index df279e8..1dd3587 100644 --- a/man/pip_metaregion.Rd +++ b/man/aux_metaregion.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metaregion.R -\name{pip_metaregion} -\alias{pip_metaregion} +% Please edit documentation in R/aux_metaregion.R +\name{aux_metaregion} +\alias{aux_metaregion} \title{Metadata for PIP regions} \usage{ -pip_metaregion( +aux_metaregion( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, @@ -14,14 +14,6 @@ pip_metaregion( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in -memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_missing_data.Rd b/man/aux_missing_data.Rd similarity index 65% rename from man/pip_missing_data.Rd rename to man/aux_missing_data.Rd index c8611b5..5bb03a7 100644 --- a/man/pip_missing_data.Rd +++ b/man/aux_missing_data.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_missing_data.R -\name{pip_missing_data} -\alias{pip_missing_data} +% Please edit documentation in R/aux_missing_data.R +\name{aux_missing_data} +\alias{aux_missing_data} \title{Create table with missing countries} \usage{ -pip_missing_data( +aux_missing_data( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), @@ -14,17 +14,9 @@ pip_missing_data( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in -memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_nan.Rd b/man/aux_nan.Rd similarity index 92% rename from man/pip_nan.Rd rename to man/aux_nan.Rd index b63772d..4c713ca 100644 --- a/man/pip_nan.Rd +++ b/man/aux_nan.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_nan.R -\name{pip_nan} -\alias{pip_nan} +% Please edit documentation in R/aux_nan.R +\name{aux_nan} +\alias{aux_nan} \title{PIP nowcast data} \usage{ -pip_nan( +aux_nan( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_pfw.Rd b/man/aux_pfw.Rd similarity index 96% rename from man/pip_pfw.Rd rename to man/aux_pfw.Rd index d2f8a6e..1c4bbc4 100644 --- a/man/pip_pfw.Rd +++ b/man/aux_pfw.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/pip_pfw.R -\name{pip_pfw} -\alias{pip_pfw} +\name{aux_pfw} +\alias{aux_pfw} \title{PIP PFW} \usage{ -pip_pfw( +aux_pfw( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pfw_clean.Rd b/man/aux_pfw_clean.Rd similarity index 55% rename from man/pip_pfw_clean.Rd rename to man/aux_pfw_clean.Rd index 05a0dee..fcb9034 100644 --- a/man/pip_pfw_clean.Rd +++ b/man/aux_pfw_clean.Rd @@ -1,15 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_clean.R -\name{pip_pfw_clean} -\alias{pip_pfw_clean} +% Please edit documentation in R/aux_pfw_clean.R +\name{aux_pfw_clean} +\alias{aux_pfw_clean} \title{Clean PFW} \usage{ -pip_pfw_clean(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) +aux_pfw_clean(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) } \arguments{ -\item{y}{dataset with PPP data from \code{pip_pfw_update()}.} - -\item{maindir}{character: Main directory of project.} +\item{y}{dataset with PPP data from \code{aux_pfw_update()}.} \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_pl_clean.Rd b/man/aux_pl_clean.Rd similarity index 73% rename from man/pip_pl_clean.Rd rename to man/aux_pl_clean.Rd index a210dae..beb1116 100644 --- a/man/pip_pl_clean.Rd +++ b/man/aux_pl_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pl_clean.R -\name{pip_pl_clean} -\alias{pip_pl_clean} +% Please edit documentation in R/aux_pl_clean.R +\name{aux_pl_clean} +\alias{aux_pl_clean} \title{Build a data table for each list from yaml file with poverty lines info} \usage{ -pip_pl_clean(l) +aux_pl_clean(l) } \arguments{ \item{l}{list from yaml file} diff --git a/man/pip_ppp_clean.Rd b/man/aux_ppp_clean.Rd similarity index 56% rename from man/pip_ppp_clean.Rd rename to man/aux_ppp_clean.Rd index 8418563..9768319 100644 --- a/man/pip_ppp_clean.Rd +++ b/man/aux_ppp_clean.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp_clean.R -\name{pip_ppp_clean} -\alias{pip_ppp_clean} +% Please edit documentation in R/aux_ppp_clean.R +\name{aux_ppp_clean} +\alias{aux_ppp_clean} \title{Clean PPP data from datalibweb to meet PIP protocols} \usage{ -pip_ppp_clean(y, default_year = getOption("pipaux.pppyear")) +aux_ppp_clean(y, default_year = getOption("pipaux.pppyear")) } \arguments{ -\item{y}{dataset with PPP data from \code{pip_ppp_update()}.} +\item{y}{dataset with PPP data from \code{aux_ppp_update()}.} \item{default_year}{numeric: ICP round year. Default is 2011} } diff --git a/man/pip_sign_save.Rd b/man/aux_sign_save.Rd similarity index 75% rename from man/pip_sign_save.Rd rename to man/aux_sign_save.Rd index 8124995..0b56f13 100644 --- a/man/pip_sign_save.Rd +++ b/man/aux_sign_save.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_sign_save.R -\name{pip_sign_save} -\alias{pip_sign_save} +% Please edit documentation in R/aux_sign_save.R +\name{aux_sign_save} +\alias{aux_sign_save} \title{Save PIP auxiliary data} \usage{ -pip_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) +aux_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) } \arguments{ \item{x}{data.frame Data frame to be signed and saved.} @@ -12,8 +12,6 @@ pip_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) \item{msrdir}{character: Directory where the data and data signature will be saved.} -\item{force}{logical: If TRUE data will be overwritten.} - \item{save_dta}{logical: If TRUE a Stata (.dta) version of the dataset is also saved.} } diff --git a/man/pip_sna.Rd b/man/aux_sna.Rd similarity index 75% rename from man/pip_sna.Rd rename to man/aux_sna.Rd index 479197b..c215f0a 100644 --- a/man/pip_sna.Rd +++ b/man/aux_sna.Rd @@ -1,19 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_sna.R, R/sna.R -\name{pip_sna} -\alias{pip_sna} -\title{Fake PIP SNA function} +% Please edit documentation in R/aux_sna.R +\name{aux_sna} +\alias{aux_sna} +\title{PIP Special National accounts} \usage{ -pip_sna( - action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) - -pip_sna( +aux_sna( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_update_all_aux.Rd b/man/aux_update_all.Rd similarity index 59% rename from man/pip_update_all_aux.Rd rename to man/aux_update_all.Rd index c0ccf15..48c357f 100644 --- a/man/pip_update_all_aux.Rd +++ b/man/aux_update_all.Rd @@ -1,21 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_update_all_aux.R -\name{pip_update_all_aux} -\alias{pip_update_all_aux} +% Please edit documentation in R/aux_update_all.R +\name{aux_update_all} +\alias{aux_update_all} \title{Update all auxiliary data at once} \usage{ -pip_update_all_aux( +aux_update_all( force = FALSE, popsrc = getOption("pipaux.popsrc"), maindir = gls$PIP_DATA_DIR ) } \arguments{ -\item{force}{logical: If TRUE data will be overwritten.} - \item{popsrc}{character: Source for population data. Defaults to \code{getOption("pipaux.popsrc")}.} - -\item{maindir}{character: Main directory of project.} } \description{ Update all auxiliary data at once diff --git a/man/clean_cp_names.Rd b/man/clean_cp_names.Rd index bd71560..7ef9628 100644 --- a/man/clean_cp_names.Rd +++ b/man/clean_cp_names.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_clean.R +% Please edit documentation in R/aux_cp_clean.R \name{clean_cp_names} \alias{clean_cp_names} \title{Clean names from original CP files} diff --git a/man/pip_cp.Rd b/man/fake_aux_sna.Rd similarity index 77% rename from man/pip_cp.Rd rename to man/fake_aux_sna.Rd index 7bde8e2..05f58a5 100644 --- a/man/pip_cp.Rd +++ b/man/fake_aux_sna.Rd @@ -1,22 +1,22 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp.R -\name{pip_cp} -\alias{pip_cp} -\title{Country Profiles} +% Please edit documentation in R/fake_aux_sna.R +\name{fake_aux_sna} +\alias{fake_aux_sna} +\title{Fake PIP SNA function} \usage{ -pip_cp( +fake_aux_sna( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + from = c("gh", "file", "api") ) } \arguments{ \item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in -memory.} +"update" data will be updated on the system. If "load" data is loaded in memory.} \item{force}{logical: If TRUE data will be overwritten.} @@ -31,5 +31,5 @@ will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} } \description{ -Update a list with country profiles data +Fake PIP SNA function } diff --git a/man/load_aux.Rd b/man/load_aux.Rd index 16180eb..c57f0ec 100644 --- a/man/load_aux.Rd +++ b/man/load_aux.Rd @@ -15,8 +15,6 @@ load_aux( \arguments{ \item{measure}{character: measure to be loaded} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/load_cpi.Rd b/man/load_cpi.Rd index fe4c0bc..cd3a807 100644 --- a/man/load_cpi.Rd +++ b/man/load_cpi.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_vintage.R +% Please edit documentation in R/aux_cpi_vintage.R \name{load_cpi} \alias{load_cpi} \title{Load cpi files and create CPI ID variable} diff --git a/man/merger_aux.Rd b/man/merger_aux.Rd index b465c44..fc0f269 100644 --- a/man/merger_aux.Rd +++ b/man/merger_aux.Rd @@ -23,12 +23,13 @@ data.table with key information Merge auxiliary datasets } \examples{ -pfw <- pip_pfw(action = "load") -ppp <- pip_ppp(action = "load") +pfw <- load_aux("pfw") +ppp <- load_aux("ppp") pfw_ppp <- merger_aux(pfw, ppp) -cpi <- pip_cpi(action = "cpi") +cpi <- load_aux("cpi") cpi <- cpi[, -c("cpi_domain")] pfw_cpi <- merger_aux(cpi, pfw, keep = "right") +cpi_pfw <- merger_aux(cpi, pfw) } diff --git a/man/pip_country_list.Rd b/man/pip_country_list.Rd index 70eb8d4..9d307a4 100644 --- a/man/pip_country_list.Rd +++ b/man/pip_country_list.Rd @@ -14,13 +14,6 @@ pip_country_list( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_gdp.Rd b/man/pip_gdp.Rd index 9c49cd1..ac51744 100644 --- a/man/pip_gdp.Rd +++ b/man/pip_gdp.Rd @@ -16,13 +16,6 @@ pip_gdp( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_gdp_update.Rd b/man/pip_gdp_update.Rd index 16eb090..be88c52 100644 --- a/man/pip_gdp_update.Rd +++ b/man/pip_gdp_update.Rd @@ -15,10 +15,6 @@ pip_gdp_update( ) } \arguments{ -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_indicators.Rd b/man/pip_indicators.Rd index 083f7fc..63c09fb 100644 --- a/man/pip_indicators.Rd +++ b/man/pip_indicators.Rd @@ -14,16 +14,9 @@ pip_indicators( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_maddison.Rd b/man/pip_maddison.Rd index 75b905a..98fbe6b 100644 --- a/man/pip_maddison.Rd +++ b/man/pip_maddison.Rd @@ -15,16 +15,9 @@ pip_maddison( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_metadata.Rd b/man/pip_metadata.Rd index 8d94cdb..bdef629 100644 --- a/man/pip_metadata.Rd +++ b/man/pip_metadata.Rd @@ -15,21 +15,6 @@ pip_metadata( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{maindir}{character: Main directory of project.} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - \item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ diff --git a/man/pip_metadata_update.Rd b/man/pip_metadata_update.Rd index 53b34bc..29d7fc0 100644 --- a/man/pip_metadata_update.Rd +++ b/man/pip_metadata_update.Rd @@ -14,10 +14,6 @@ pip_metadata_update( ) } \arguments{ -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_pce.Rd b/man/pip_pce.Rd index 92aff3a..91585d8 100644 --- a/man/pip_pce.Rd +++ b/man/pip_pce.Rd @@ -16,16 +16,9 @@ pip_pce( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_pce_update.Rd b/man/pip_pce_update.Rd index 660a846..58f027b 100644 --- a/man/pip_pce_update.Rd +++ b/man/pip_pce_update.Rd @@ -15,10 +15,6 @@ pip_pce_update( ) } \arguments{ -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_pfw_update.Rd b/man/pip_pfw_update.Rd index 966833a..7c98a32 100644 --- a/man/pip_pfw_update.Rd +++ b/man/pip_pfw_update.Rd @@ -14,10 +14,6 @@ pip_pfw_update( ) } \arguments{ -\item{maindir}{character: Main directory of project.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} @@ -25,8 +21,6 @@ pip_pfw_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update PFW diff --git a/man/pip_pl.Rd b/man/pip_pl.Rd index 7c556e5..80cb70c 100644 --- a/man/pip_pl.Rd +++ b/man/pip_pl.Rd @@ -15,16 +15,9 @@ pip_pl( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_ppp.Rd b/man/pip_ppp.Rd index 12840dc..a92033d 100644 --- a/man/pip_ppp.Rd +++ b/man/pip_ppp.Rd @@ -16,19 +16,12 @@ pip_ppp( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{maindir}{character: Main directory of project.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} -\item{force}{logical: If TRUE data will be overwritten.} - \item{tag}{character: specific release to be used in the update.} \item{detail}{has an option TRUE/FALSE, default value is FALSE} diff --git a/man/pip_wdi.Rd b/man/pip_wdi.Rd index ea9a06d..92ca1c5 100644 --- a/man/pip_wdi.Rd +++ b/man/pip_wdi.Rd @@ -16,13 +16,6 @@ pip_wdi( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_wdi_update.Rd b/man/pip_wdi_update.Rd index 9f0bba3..307b5ac 100644 --- a/man/pip_wdi_update.Rd +++ b/man/pip_wdi_update.Rd @@ -15,10 +15,6 @@ pip_wdi_update( ) } \arguments{ -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_weo.Rd b/man/pip_weo.Rd index 2d55d0b..d89d7d6 100644 --- a/man/pip_weo.Rd +++ b/man/pip_weo.Rd @@ -15,16 +15,9 @@ pip_weo( ) } \arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} -\item{maindir}{character: Main directory of project.} - \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/save_aux_to_gh.Rd b/man/save_aux_to_gh.Rd new file mode 100644 index 0000000..57edd8d --- /dev/null +++ b/man/save_aux_to_gh.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{save_aux_to_gh} +\alias{save_aux_to_gh} +\title{SAve auxiliary file to Github Repo} +\usage{ +save_aux_to_gh( + df, + measure, + owner = getOption("pipfun.ghowner"), + repo = paste0("aux_", measure), + branch = "DEV", + tag = branch, + filename = measure, + ext = "csv", + ... +) +} +\arguments{ +\item{df}{A dataframe object} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{repo}{character: name of the repo} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{filename}{character: Name of file name without the ".csv" extension. +Default is \code{measure}} + +\item{ext}{character: Extension of \code{filename}. Default "csv"} + +\item{...}{parameters to be passed to the loading functions depending of the +extension used} +} +\description{ +Sometimes we need to save auxiliary files to Github repo. +This function allows for this. +} From 230f6c24a5a922a83a241c27edea2ccd63de46c2 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 22 Nov 2024 11:08:49 -0500 Subject: [PATCH 02/20] change pip prefix to aux -part2 --- R/{pip_gdp.R => aux_gdp.R} | 6 +++--- R/{pip_gdp_update.R => aux_gdp_update.R} | 4 ++-- R/{pip_gdp_weo.R => aux_gdp_weo.R} | 10 +++++----- R/{pip_indicators.R => aux_indicators.R} | 4 ++-- R/{pip_maddison.R => aux_maddison.R} | 4 ++-- R/{pip_metadata.R => aux_metadata.R} | 6 +++--- R/{pip_metadata_update.R => aux_metadata_update.R} | 4 ++-- R/{pip_npl.R => aux_npl.R} | 4 ++-- R/{pip_pce.R => aux_pce.R} | 8 ++++---- R/{pip_pce_update.R => aux_pce_update.R} | 4 ++-- R/{pip_pfw.R => aux_pfw.R} | 2 +- R/{pip_pfw_key.R => aux_pfw_key.R} | 2 +- R/{pip_pfw_update.R => aux_pfw_update.R} | 6 +++--- R/{pip_pl.R => aux_pl.R} | 4 ++-- R/{pip_pop.R => aux_pop.R} | 6 +++--- R/{pip_pop_update.R => aux_pop_update.R} | 4 ++-- R/{pip_ppp.R => aux_ppp.R} | 6 +++--- R/{pip_ppp_update.R => aux_ppp_update.R} | 4 ++-- R/{pip_prices.R => aux_prices.R} | 8 ++++---- R/{pip_regions.R => aux_regions.R} | 4 ++-- R/{pip_wdi.R => aux_wdi.R} | 6 +++--- R/{pip_wdi_update.R => aux_wdi_update.R} | 6 +++--- R/{pip_weo.R => aux_weo.R} | 6 +++--- R/{pip_weo_clean.R => aux_weo_clean.R} | 2 +- 24 files changed, 60 insertions(+), 60 deletions(-) rename R/{pip_gdp.R => aux_gdp.R} (90%) rename R/{pip_gdp_update.R => aux_gdp_update.R} (99%) rename R/{pip_gdp_weo.R => aux_gdp_weo.R} (94%) rename R/{pip_indicators.R => aux_indicators.R} (94%) rename R/{pip_maddison.R => aux_maddison.R} (94%) rename R/{pip_metadata.R => aux_metadata.R} (89%) rename R/{pip_metadata_update.R => aux_metadata_update.R} (97%) rename R/{pip_npl.R => aux_npl.R} (96%) rename R/{pip_pce.R => aux_pce.R} (87%) rename R/{pip_pce_update.R => aux_pce_update.R} (98%) rename R/{pip_pfw.R => aux_pfw.R} (97%) rename R/{pip_pfw_key.R => aux_pfw_key.R} (97%) rename R/{pip_pfw_update.R => aux_pfw_update.R} (91%) rename R/{pip_pl.R => aux_pl.R} (94%) rename R/{pip_pop.R => aux_pop.R} (91%) rename R/{pip_pop_update.R => aux_pop_update.R} (99%) rename R/{pip_ppp.R => aux_ppp.R} (93%) rename R/{pip_ppp_update.R => aux_ppp_update.R} (97%) rename R/{pip_prices.R => aux_prices.R} (96%) rename R/{pip_regions.R => aux_regions.R} (97%) rename R/{pip_wdi.R => aux_wdi.R} (90%) rename R/{pip_wdi_update.R => aux_wdi_update.R} (96%) rename R/{pip_weo.R => aux_weo.R} (95%) rename R/{pip_weo_clean.R => aux_weo_clean.R} (99%) diff --git a/R/pip_gdp.R b/R/aux_gdp.R similarity index 90% rename from R/pip_gdp.R rename to R/aux_gdp.R index b012729..ccac898 100644 --- a/R/pip_gdp.R +++ b/R/aux_gdp.R @@ -3,12 +3,12 @@ #' Update or load GDP data. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" #' and "gh" are synonymous #' @export -pip_gdp <- function(action = c("update", "load"), +aux_gdp <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), @@ -23,7 +23,7 @@ pip_gdp <- function(action = c("update", "load"), if (action == "update") { - pip_gdp_update(maindir = maindir, + aux_gdp_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_gdp_update.R b/R/aux_gdp_update.R similarity index 99% rename from R/pip_gdp_update.R rename to R/aux_gdp_update.R index e87ac05..06e8553 100644 --- a/R/pip_gdp_update.R +++ b/R/aux_gdp_update.R @@ -2,10 +2,10 @@ #' #' Update GDP data using WDI, Maddison and Special cases. #' -#' @inheritParams pip_gdp +#' @inheritParams aux_gdp #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, +aux_gdp_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), diff --git a/R/pip_gdp_weo.R b/R/aux_gdp_weo.R similarity index 94% rename from R/pip_gdp_weo.R rename to R/aux_gdp_weo.R index 5b616ad..a2ff895 100644 --- a/R/pip_gdp_weo.R +++ b/R/aux_gdp_weo.R @@ -6,12 +6,12 @@ #' as an .xls file in `/_aux/weo/`. The filename should be in the #' following structure `WEO_.xls`. Due to potential file corruption #' the file must be opened and re-saved before it can be updated with -#' `pip_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file +#' `aux_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file #' that's not really xls. #' -#' @inheritParams pip_prices +#' @inheritParams aux_prices #' @export -pip_gdp_weo <- function(action = "update", +aux_gdp_weo <- function(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) { measure <- "weo" @@ -101,7 +101,7 @@ pip_gdp_weo <- function(action = "update", # ---- Merge with population ---- - pop <- pip_pop("load", maindir = maindir) + pop <- aux_pop("load", maindir = maindir) setDT(pop) pop <- pop[pop_data_level == "national", ] dt[pop, @@ -138,7 +138,7 @@ pip_gdp_weo <- function(action = "update", dt <- dt[, c("country_code", "year", "weo_gdp")] # Save dataset - pip_sign_save( + aux_sign_save( x = dt, measure = measure, msrdir = msrdir, diff --git a/R/pip_indicators.R b/R/aux_indicators.R similarity index 94% rename from R/pip_indicators.R rename to R/aux_indicators.R index a6d127a..5f2d3c1 100644 --- a/R/pip_indicators.R +++ b/R/aux_indicators.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with the indicators master sheet. #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_indicators <- function(action = c("update", "load"), +aux_indicators <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_maddison.R b/R/aux_maddison.R similarity index 94% rename from R/pip_maddison.R rename to R/aux_maddison.R index dc744c9..5bb356c 100644 --- a/R/pip_maddison.R +++ b/R/aux_maddison.R @@ -3,11 +3,11 @@ #' Load or update data from the Maddison project. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export #' @import data.table -pip_maddison <- function(action = c("update", "load"), +aux_maddison <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_metadata.R b/R/aux_metadata.R similarity index 89% rename from R/pip_metadata.R rename to R/aux_metadata.R index b82b52f..1844e66 100644 --- a/R/pip_metadata.R +++ b/R/aux_metadata.R @@ -3,10 +3,10 @@ #' Update or load a dataset with survey metadata. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams load_raw_indicators #' @export -pip_metadata <- function(action = c("update", "load"), +aux_metadata <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -19,7 +19,7 @@ pip_metadata <- function(action = c("update", "load"), if (action == "update") { - pip_metadata_update( + aux_metadata_update( maindir = maindir, force = force, owner = owner, diff --git a/R/pip_metadata_update.R b/R/aux_metadata_update.R similarity index 97% rename from R/pip_metadata_update.R rename to R/aux_metadata_update.R index ed634e2..8b84b05 100644 --- a/R/pip_metadata_update.R +++ b/R/aux_metadata_update.R @@ -2,10 +2,10 @@ #' #' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh -#' @inheritParams pip_metadata +#' @inheritParams aux_metadata #' @return logical. TRUE if saved correctly. FALSE if error happened #' @export -pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, +aux_metadata_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), diff --git a/R/pip_npl.R b/R/aux_npl.R similarity index 96% rename from R/pip_npl.R rename to R/aux_npl.R index 53fdf2f..4c9271a 100644 --- a/R/pip_npl.R +++ b/R/aux_npl.R @@ -3,10 +3,10 @@ #' Update series of national poverty lines #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_npl <- function(action = c("update", "load"), +aux_npl <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_pce.R b/R/aux_pce.R similarity index 87% rename from R/pip_pce.R rename to R/aux_pce.R index c19d26d..b0d4563 100644 --- a/R/pip_pce.R +++ b/R/aux_pce.R @@ -3,11 +3,11 @@ #' Load or update PCE data. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_gdp -#' @inheritParams pip_pfw +#' @inheritParams aux_gdp +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_pce <- function(action = c("update", "load"), +aux_pce <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -20,7 +20,7 @@ pip_pce <- function(action = c("update", "load"), action <- match.arg(action) if (action == "update") { - pip_pce_update(maindir = maindir, + aux_pce_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_pce_update.R b/R/aux_pce_update.R similarity index 98% rename from R/pip_pce_update.R rename to R/aux_pce_update.R index f9c5b14..b2d00fa 100644 --- a/R/pip_pce_update.R +++ b/R/aux_pce_update.R @@ -2,10 +2,10 @@ #' #' Update PCE data using WDI and Special cases. #' -#' @inheritParams pip_gdp +#' @inheritParams aux_gdp #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, +aux_pce_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), diff --git a/R/pip_pfw.R b/R/aux_pfw.R similarity index 97% rename from R/pip_pfw.R rename to R/aux_pfw.R index 7c76a9a..92da3b3 100644 --- a/R/pip_pfw.R +++ b/R/aux_pfw.R @@ -22,7 +22,7 @@ aux_pfw <- function(action = c("update", "load"), action <- match.arg(action) if (action == "update") { - pip_pfw_update(maindir = maindir, + aux_pfw_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_pfw_key.R b/R/aux_pfw_key.R similarity index 97% rename from R/pip_pfw_key.R rename to R/aux_pfw_key.R index 955b27a..14cf94f 100644 --- a/R/pip_pfw_key.R +++ b/R/aux_pfw_key.R @@ -3,7 +3,7 @@ #' @return data.table #' @export #' -pip_pfw_key <- function(){ +aux_pfw_key <- function(){ pfw_temp <- load_aux("pfw", maindir = temp_fld) diff --git a/R/pip_pfw_update.R b/R/aux_pfw_update.R similarity index 91% rename from R/pip_pfw_update.R rename to R/aux_pfw_update.R index f499269..54ab7e7 100644 --- a/R/pip_pfw_update.R +++ b/R/aux_pfw_update.R @@ -1,9 +1,9 @@ #' Update PFW #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, +aux_pfw_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -22,7 +22,7 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, pfw_validate_raw(pfw = pfw, detail = detail) # Clean data - pfw <- pip_pfw_clean(pfw, + pfw <- aux_pfw_clean(pfw, maindir = maindir, branch = branch) diff --git a/R/pip_pl.R b/R/aux_pl.R similarity index 94% rename from R/pip_pl.R rename to R/aux_pl.R index 1b467e5..8f82cd1 100644 --- a/R/pip_pl.R +++ b/R/aux_pl.R @@ -3,10 +3,10 @@ #' Update or load a dataset with poverty lines. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_pl <- function(action = c("update", "load"), +aux_pl <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_pop.R b/R/aux_pop.R similarity index 91% rename from R/pip_pop.R rename to R/aux_pop.R index 4b0ec7e..036fee9 100644 --- a/R/pip_pop.R +++ b/R/aux_pop.R @@ -3,11 +3,11 @@ #' Load or update population data. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @param from character: Source for population data. #' @export -pip_pop <- function(action = c("update", "load"), +aux_pop <- function(action = c("update", "load"), force = FALSE, from = c("gh", "file", "api"), maindir = gls$PIP_DATA_DIR, @@ -20,7 +20,7 @@ pip_pop <- function(action = c("update", "load"), action <- match.arg(action) if (action == "update") { - pip_pop_update( + aux_pop_update( force = force, from = from, maindir = maindir, diff --git a/R/pip_pop_update.R b/R/aux_pop_update.R similarity index 99% rename from R/pip_pop_update.R rename to R/aux_pop_update.R index 35a9c4b..4b79bab 100644 --- a/R/pip_pop_update.R +++ b/R/aux_pop_update.R @@ -3,8 +3,8 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @param from character: Source for population data. #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pop -pip_pop_update <- function(force = FALSE, +#' @inheritParams aux_pop +aux_pop_update <- function(force = FALSE, from = c("gh", "file", "api"), maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/pip_ppp.R b/R/aux_ppp.R similarity index 93% rename from R/pip_ppp.R rename to R/aux_ppp.R index e8fe3a7..6efdc95 100644 --- a/R/pip_ppp.R +++ b/R/aux_ppp.R @@ -3,11 +3,11 @@ #' Load or update PPP data. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export #' @import data.table -pip_ppp <- function(action = c("update", "load"), +aux_ppp <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -42,7 +42,7 @@ pip_ppp <- function(action = c("update", "load"), # ____________________________________________________________________________ # Computations #### if (action == "update") { - pip_ppp_update(maindir = maindir, + aux_ppp_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_ppp_update.R b/R/aux_ppp_update.R similarity index 97% rename from R/pip_ppp_update.R rename to R/aux_ppp_update.R index e400338..d5e23ee 100644 --- a/R/pip_ppp_update.R +++ b/R/aux_ppp_update.R @@ -2,7 +2,7 @@ #' #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, +aux_ppp_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -36,7 +36,7 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, # Clean data - ppp <- pip_ppp_clean(ppp) + ppp <- aux_ppp_clean(ppp) # Remove any non-WDI countries cl <- load_aux(maindir = maindir, diff --git a/R/pip_prices.R b/R/aux_prices.R similarity index 96% rename from R/pip_prices.R rename to R/aux_prices.R index 3754e27..33d8942 100644 --- a/R/pip_prices.R +++ b/R/aux_prices.R @@ -11,7 +11,7 @@ #' #' @export #' @import data.table -pip_prices <- function(measure = NULL, +aux_prices <- function(measure = NULL, action = "update", maindir = gls$PIP_DATA_DIR, dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), @@ -73,19 +73,19 @@ pip_prices <- function(measure = NULL, #--------- update --------- if (action == "update") { if (measure == "cpi") { - pip_cpi_update( + aux_cpi_update( maindir = maindir, dlwdir = dlwdir, force = force ) } else if (measure == "ppp") { - pip_ppp_update( + aux_ppp_update( maindir = maindir, dlwdir = dlwdir, force = force ) } else if (measure == "pfw") { - pip_pfw_update( + aux_pfw_update( maindir = maindir, dlwdir = dlwdir, force = force diff --git a/R/pip_regions.R b/R/aux_regions.R similarity index 97% rename from R/pip_regions.R rename to R/aux_regions.R index c8d5244..b50df1c 100644 --- a/R/pip_regions.R +++ b/R/aux_regions.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with regions. #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_regions <- function(action = c("update", "load"), +aux_regions <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/pip_wdi.R b/R/aux_wdi.R similarity index 90% rename from R/pip_wdi.R rename to R/aux_wdi.R index af5073f..e218bf5 100644 --- a/R/pip_wdi.R +++ b/R/aux_wdi.R @@ -3,12 +3,12 @@ #' Update or load wdi data. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" #' and "gh" are synonymous #' @export -pip_wdi <- function(action = c("update", "load"), +aux_wdi <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), @@ -23,7 +23,7 @@ pip_wdi <- function(action = c("update", "load"), if (action == "update") { - pip_wdi_update(maindir = maindir, + aux_wdi_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_wdi_update.R b/R/aux_wdi_update.R similarity index 96% rename from R/pip_wdi_update.R rename to R/aux_wdi_update.R index 325cdef..36894ac 100644 --- a/R/pip_wdi_update.R +++ b/R/aux_wdi_update.R @@ -3,13 +3,13 @@ #' GDP and HFCE data from WDI. It could be either from API or from file #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_gdp +#' @inheritParams aux_gdp #' @return data.table with gdp and pce variables #' @export #' #' @examples -#' pip_wdi_update() -pip_wdi_update <- function(force = FALSE, +#' aux_wdi_update() +aux_wdi_update <- function(force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), diff --git a/R/pip_weo.R b/R/aux_weo.R similarity index 95% rename from R/pip_weo.R rename to R/aux_weo.R index 060ccd3..5e000c6 100644 --- a/R/pip_weo.R +++ b/R/aux_weo.R @@ -10,10 +10,10 @@ #' that's not really xls. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_weo <- function(action = c("update", "load"), +aux_weo <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -40,7 +40,7 @@ pip_weo <- function(action = c("update", "load"), # validate weo raw data weo_validate_raw(weo = dt, detail = detail) - dt <- pip_weo_clean(dt, + dt <- aux_weo_clean(dt, maindir = maindir, branch = branch) diff --git a/R/pip_weo_clean.R b/R/aux_weo_clean.R similarity index 99% rename from R/pip_weo_clean.R rename to R/aux_weo_clean.R index 120fa27..220da23 100644 --- a/R/pip_weo_clean.R +++ b/R/aux_weo_clean.R @@ -6,7 +6,7 @@ #' #' @return data.table #' @export -pip_weo_clean <- function(dt, +aux_weo_clean <- function(dt, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) { From 84aeca3255b704c3f625044d57ef56fcafc04952 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 22 Nov 2024 11:34:59 -0500 Subject: [PATCH 03/20] change pip to aux -part3 --- R/auto_aux_update.R | 2 +- R/{pip_countries.R => aux_countries.R} | 4 ++-- R/{pip_country_list.R => aux_country_list.R} | 6 +++--- R/{pip_cpi.R => aux_cpi.R} | 4 ++-- R/{pip_cpi_update.R => aux_cpi_update.R} | 6 +++--- R/{pip_gdm.R => aux_gdm.R} | 6 +++--- R/{pip_gdm_update.R => aux_gdm_update.R} | 6 +++--- R/{pip_income_groups.R => aux_income_groups.R} | 4 ++-- R/{pip_aux_labels.R => aux_labels_pip.R} | 4 ++-- R/cpi_validate_output.R | 2 +- R/load_aux.R | 2 +- R/merger_aux.R | 2 +- R/pfw_validate_output.R | 2 +- R/update_aux.R | 10 +++++----- 14 files changed, 30 insertions(+), 30 deletions(-) rename R/{pip_countries.R => aux_countries.R} (95%) rename R/{pip_country_list.R => aux_country_list.R} (94%) rename R/{pip_cpi.R => aux_cpi.R} (95%) rename R/{pip_cpi_update.R => aux_cpi_update.R} (94%) rename R/{pip_gdm.R => aux_gdm.R} (91%) rename R/{pip_gdm_update.R => aux_gdm_update.R} (98%) rename R/{pip_income_groups.R => aux_income_groups.R} (96%) rename R/{pip_aux_labels.R => aux_labels_pip.R} (98%) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index b2bfcd1..2194e7c 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -2,7 +2,7 @@ #' #' @param measure character: measure to be updated, if NULL will update all of #' them -#' @inheritParams pip_pop_update +#' @inheritParams aux_pop_update #' @export auto_aux_update <- function(measure = NULL, force = FALSE, diff --git a/R/pip_countries.R b/R/aux_countries.R similarity index 95% rename from R/pip_countries.R rename to R/aux_countries.R index 009f7a9..02d1b59 100644 --- a/R/pip_countries.R +++ b/R/aux_countries.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with countries. #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_countries <- function(action = c("update", "load"), +aux_countries <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_country_list.R b/R/aux_country_list.R similarity index 94% rename from R/pip_country_list.R rename to R/aux_country_list.R index bd9266a..439bc98 100644 --- a/R/pip_country_list.R +++ b/R/aux_country_list.R @@ -9,11 +9,11 @@ #' The dependency on the PCN Masterfile should be changed in the future. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export #' @return logical if `action = "update"` or data.table if `action = "load"` -pip_country_list <- function(action = c("update", "load"), +aux_country_list <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, branch = c("DEV", "PROD", "main"), @@ -27,7 +27,7 @@ pip_country_list <- function(action = c("update", "load"), if (action == "update") { ## Special national accounts -------- - cl <- pip_country_list_update(class_branch = class_branch) + cl <- aux_country_list_update(class_branch = class_branch) # validate country list raw data cl_validate_raw(cl, detail = detail) diff --git a/R/pip_cpi.R b/R/aux_cpi.R similarity index 95% rename from R/pip_cpi.R rename to R/aux_cpi.R index 6303ed2..87c5a47 100644 --- a/R/pip_cpi.R +++ b/R/aux_cpi.R @@ -12,7 +12,7 @@ #' #' @export #' @import data.table -pip_cpi <- function(action = c("update", "load"), +aux_cpi <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), @@ -46,7 +46,7 @@ pip_cpi <- function(action = c("update", "load"), # ____________________________________________________________________________ # Computations #### if (action == "update") { - pip_cpi_update(maindir = maindir, + aux_cpi_update(maindir = maindir, force = force, owner = owner, branch = branch, diff --git a/R/pip_cpi_update.R b/R/aux_cpi_update.R similarity index 94% rename from R/pip_cpi_update.R rename to R/aux_cpi_update.R index ea7afb3..11d8db1 100644 --- a/R/pip_cpi_update.R +++ b/R/aux_cpi_update.R @@ -1,8 +1,8 @@ #' Update CPI #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @keywords internal -pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, +aux_cpi_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -34,7 +34,7 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, # Cleaning #### # Clean data - cpi <- pip_cpi_clean(cpi, + cpi <- aux_cpi_clean(cpi, maindir = maindir, branch = branch) diff --git a/R/pip_gdm.R b/R/aux_gdm.R similarity index 91% rename from R/pip_gdm.R rename to R/aux_gdm.R index 3fc7dfc..c49fb76 100644 --- a/R/pip_gdm.R +++ b/R/aux_gdm.R @@ -9,11 +9,11 @@ #' #' The dependency on the PCN Masterfile should be changed in the future. #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh #' @export -pip_gdm <- function(action = c("update", "load"), +aux_gdm <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -27,7 +27,7 @@ pip_gdm <- function(action = c("update", "load"), if (action == "update") { - pip_gdm_update(force = force, + aux_gdm_update(force = force, maindir = maindir, owner = owner, branch = branch, diff --git a/R/pip_gdm_update.R b/R/aux_gdm_update.R similarity index 98% rename from R/pip_gdm_update.R rename to R/aux_gdm_update.R index 72f1e49..4621d79 100644 --- a/R/pip_gdm_update.R +++ b/R/aux_gdm_update.R @@ -2,9 +2,9 @@ #' #' Update GDM data using the PovcalNet Masterfile. #' -#' @inheritParams pip_gdm +#' @inheritParams aux_gdm #' @keywords internal -pip_gdm_update <- function(force = FALSE, +aux_gdm_update <- function(force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), @@ -193,7 +193,7 @@ pip_gdm_update <- function(force = FALSE, ## ............................................................................ ## Remove any non-WDI countries #### - pip_country_list(maindir = maindir, + aux_country_list(maindir = maindir, force = force, branch = branch) diff --git a/R/pip_income_groups.R b/R/aux_income_groups.R similarity index 96% rename from R/pip_income_groups.R rename to R/aux_income_groups.R index 5b43c71..8e521ec 100644 --- a/R/pip_income_groups.R +++ b/R/aux_income_groups.R @@ -4,10 +4,10 @@ #' available in the PIP-Technical-Team group but in the Povcalnet-team group. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_income_groups <- function(action = c("update", "load"), +aux_income_groups <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_aux_labels.R b/R/aux_labels_pip.R similarity index 98% rename from R/pip_aux_labels.R rename to R/aux_labels_pip.R index c757424..0e43e86 100644 --- a/R/pip_aux_labels.R +++ b/R/aux_labels_pip.R @@ -1,10 +1,10 @@ -#' pip_aux_labels +#' PIP Auxiliary Labels #' #' @param x Data frame to be labeled. #' @param measure type of data frame, e.g., "cpi" or "PPP". #' #' @keywords internal -pip_aux_labels <- function(x, measure) { +aux_labels_pip <- function(x, measure) { if (measure == "cpi") { # Label variables diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 2c2102b..c9c7895 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -1,6 +1,6 @@ #' Validate clean cpi data #' -#' @param cpi clean cpi data, output via `pip_cpi_clean` +#' @param cpi clean cpi data, output via `aux_cpi_clean` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq diff --git a/R/load_aux.R b/R/load_aux.R index 8f858b9..9d8d712 100644 --- a/R/load_aux.R +++ b/R/load_aux.R @@ -1,6 +1,6 @@ #' Load any auxiliary data #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param apply_label logical: If TRUE, predefined labels will applied. #' @param ppp_defaults logical: If TRUE, wider format ppp data will be returned diff --git a/R/merger_aux.R b/R/merger_aux.R index 06ffe4f..1693e3f 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -41,7 +41,7 @@ merger_aux <- function(aux_data1, if (measure1 == "pfw" || measure2 == "pfw"){ # generate a dataset that can be used to add reporting_level variable to pfw data - pfw_key <- pip_pfw_key() + pfw_key <- aux_pfw_key() pfw <- pfw_key[pfw, on = .(country_code, survey_year, survey_acronym, cpi_domain_var)] diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R index b7af2f6..42721a5 100644 --- a/R/pfw_validate_output.R +++ b/R/pfw_validate_output.R @@ -1,6 +1,6 @@ #' Validate clean pfw data #' -#' @param pfw clean pfw data, output via `pip_pfw_clean` +#' @param pfw clean pfw data, output via `aux_pfw_clean` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq diff --git a/R/update_aux.R b/R/update_aux.R index c888d48..4a0f999 100644 --- a/R/update_aux.R +++ b/R/update_aux.R @@ -1,7 +1,7 @@ #' Update Auxiliary data. Wrapper of measure-specific functions. #' -#' @inheritParams pip_aux_labels -#' @inheritParams pip_cpi +#' @inheritParams aux_labels_pip +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @param verbose logical : Do you want verbose output? #' @export @@ -32,9 +32,9 @@ update_aux <- function(measure, if ("all" %in% tolower(measure)) { measure <- lsf.str("package:pipaux", - pattern = "^pip_[a-z]+$") |> + pattern = "^aux_[a-z]+$") |> as.character() |> - {\(.) gsub("^pip_", "", .)}() |> + {\(.) gsub("^aux_", "", .)}() |> sort() } @@ -43,7 +43,7 @@ update_aux <- function(measure, al$verbose <- NULL # build function name - fun_name <- glue("pip_{measure}") + fun_name <- glue("aux_{measure}") rs <- lapply(fun_name, \(.x) { From 8420b40d182251f88ce2e2890ec6fe4ff6fca71d Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 22 Nov 2024 16:20:10 -0500 Subject: [PATCH 04/20] documentation --- NAMESPACE | 46 ++++++------ R/aux_cpi_clean.R | 2 +- R/aux_data_files.R | 1 - man/{pip_countries.Rd => aux_countries.Rd} | 8 +- ...ip_country_list.Rd => aux_country_list.Rd} | 15 +++- man/aux_cp.Rd | 8 ++ man/aux_cp_update.Rd | 4 + man/{pip_cpi.Rd => aux_cpi.Rd} | 8 +- man/{pip_cpi_update.Rd => aux_cpi_update.Rd} | 8 +- man/{pip_gdm.Rd => aux_gdm.Rd} | 8 +- man/{pip_gdm_update.Rd => aux_gdm_update.Rd} | 8 +- man/{pip_gdp.Rd => aux_gdp.Rd} | 75 ++++++++++--------- man/{pip_gdp_update.Rd => aux_gdp_update.Rd} | 12 ++- man/{pip_gdp_weo.Rd => aux_gdp_weo.Rd} | 10 +-- ..._income_groups.Rd => aux_income_groups.Rd} | 8 +- man/aux_indicators.Rd | 34 +++++++++ man/{pip_aux_labels.Rd => aux_labels_pip.Rd} | 12 +-- man/aux_maddison.Rd | 37 +++++++++ man/aux_metadata.Rd | 37 +++++++++ ...adata_update.Rd => aux_metadata_update.Rd} | 12 ++- man/aux_metaregion.Rd | 8 ++ man/aux_missing_data.Rd | 8 ++ man/{pip_npl.Rd => aux_npl.Rd} | 8 +- man/{pip_pce.Rd => aux_pce.Rd} | 15 +++- man/{pip_pce_update.Rd => aux_pce_update.Rd} | 12 ++- man/aux_pfw.Rd | 2 +- man/aux_pfw_clean.Rd | 2 + man/{pip_pfw_key.Rd => aux_pfw_key.Rd} | 8 +- man/aux_pfw_update.Rd | 34 +++++++++ man/aux_pl.Rd | 37 +++++++++ man/{pip_pop.Rd => aux_pop.Rd} | 8 +- man/{pip_pop_update.Rd => aux_pop_update.Rd} | 8 +- man/aux_ppp.Rd | 38 ++++++++++ man/{pip_ppp_update.Rd => aux_ppp_update.Rd} | 8 +- man/{pip_prices.Rd => aux_prices.Rd} | 8 +- man/{pip_regions.Rd => aux_regions.Rd} | 8 +- man/aux_sign_save.Rd | 2 + man/aux_update_all.Rd | 4 + man/{pip_wdi.Rd => aux_wdi.Rd} | 15 +++- man/{pip_wdi_update.Rd => aux_wdi_update.Rd} | 14 ++-- man/{pip_weo.Rd => aux_weo.Rd} | 15 +++- man/{pip_weo_clean.Rd => aux_weo_clean.Rd} | 8 +- man/clean_from_wide.Rd | 2 +- man/clean_names_from_wide.Rd | 2 +- man/cpi_validate_output.Rd | 2 +- man/fake_aux_sna.Rd | 3 + man/load_aux.Rd | 2 + man/pfw_validate_output.Rd | 2 +- man/pip_indicators.Rd | 27 ------- man/pip_maddison.Rd | 30 -------- man/pip_metadata.Rd | 22 ------ man/pip_pfw_update.Rd | 28 ------- man/pip_pl.Rd | 30 -------- man/pip_ppp.Rd | 31 -------- 54 files changed, 472 insertions(+), 332 deletions(-) rename man/{pip_countries.Rd => aux_countries.Rd} (89%) rename man/{pip_country_list.Rd => aux_country_list.Rd} (71%) rename man/{pip_cpi.Rd => aux_cpi.Rd} (92%) rename man/{pip_cpi_update.Rd => aux_cpi_update.Rd} (87%) rename man/{pip_gdm.Rd => aux_gdm.Rd} (94%) rename man/{pip_gdm_update.Rd => aux_gdm_update.Rd} (88%) rename man/{pip_gdp.Rd => aux_gdp.Rd} (70%) rename man/{pip_gdp_update.Rd => aux_gdp_update.Rd} (79%) rename man/{pip_gdp_weo.Rd => aux_gdp_weo.Rd} (79%) rename man/{pip_income_groups.Rd => aux_income_groups.Rd} (89%) create mode 100644 man/aux_indicators.Rd rename man/{pip_aux_labels.Rd => aux_labels_pip.Rd} (53%) create mode 100644 man/aux_maddison.Rd create mode 100644 man/aux_metadata.Rd rename man/{pip_metadata_update.Rd => aux_metadata_update.Rd} (75%) rename man/{pip_npl.Rd => aux_npl.Rd} (92%) rename man/{pip_pce.Rd => aux_pce.Rd} (70%) rename man/{pip_pce_update.Rd => aux_pce_update.Rd} (79%) rename man/{pip_pfw_key.Rd => aux_pfw_key.Rd} (64%) create mode 100644 man/aux_pfw_update.Rd create mode 100644 man/aux_pl.Rd rename man/{pip_pop.Rd => aux_pop.Rd} (92%) rename man/{pip_pop_update.Rd => aux_pop_update.Rd} (88%) create mode 100644 man/aux_ppp.Rd rename man/{pip_ppp_update.Rd => aux_ppp_update.Rd} (85%) rename man/{pip_prices.Rd => aux_prices.Rd} (87%) rename man/{pip_regions.Rd => aux_regions.Rd} (90%) rename man/{pip_wdi.Rd => aux_wdi.Rd} (70%) rename man/{pip_wdi_update.Rd => aux_wdi_update.Rd} (79%) rename man/{pip_weo.Rd => aux_weo.Rd} (77%) rename man/{pip_weo_clean.Rd => aux_weo_clean.Rd} (77%) delete mode 100644 man/pip_indicators.Rd delete mode 100644 man/pip_maddison.Rd delete mode 100644 man/pip_metadata.Rd delete mode 100644 man/pip_pfw_update.Rd delete mode 100644 man/pip_pl.Rd delete mode 100644 man/pip_ppp.Rd diff --git a/NAMESPACE b/NAMESPACE index 5fd9f41..a6eca3b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,16 +3,39 @@ export("%>%") export(auto_aux_update) export(aux_censoring) +export(aux_countries) +export(aux_country_list) export(aux_cp) +export(aux_cpi) export(aux_data) export(aux_dictionary) +export(aux_gdm) +export(aux_gdp) +export(aux_gdp_weo) +export(aux_income_groups) +export(aux_indicators) +export(aux_maddison) +export(aux_metadata) +export(aux_metadata_update) export(aux_metaregion) export(aux_missing_data) export(aux_nan) +export(aux_npl) +export(aux_pce) export(aux_pfw) +export(aux_pfw_key) +export(aux_pl) export(aux_pl_clean) +export(aux_pop) +export(aux_ppp) +export(aux_prices) +export(aux_regions) export(aux_sna) export(aux_update_all) +export(aux_wdi) +export(aux_wdi_update) +export(aux_weo) +export(aux_weo_clean) export(cl_validate_raw) export(clean_validation_report) export(convert_df_to_base64) @@ -36,29 +59,6 @@ export(npl_validate_raw) export(pce_validate_output) export(pfw_validate_output) export(pfw_validate_raw) -export(pip_countries) -export(pip_country_list) -export(pip_cpi) -export(pip_gdm) -export(pip_gdp) -export(pip_gdp_weo) -export(pip_income_groups) -export(pip_indicators) -export(pip_maddison) -export(pip_metadata) -export(pip_metadata_update) -export(pip_npl) -export(pip_pce) -export(pip_pfw_key) -export(pip_pl) -export(pip_pop) -export(pip_ppp) -export(pip_prices) -export(pip_regions) -export(pip_wdi) -export(pip_wdi_update) -export(pip_weo) -export(pip_weo_clean) export(pl_validate_output) export(pop_validate_output) export(pop_validate_raw) diff --git a/R/aux_cpi_clean.R b/R/aux_cpi_clean.R index cdf4b09..3e40af2 100644 --- a/R/aux_cpi_clean.R +++ b/R/aux_cpi_clean.R @@ -5,7 +5,7 @@ #' @param y dataset with CPI data from `aux_cpi_update()`. #' @param cpivar character: CPI variable to be used as default. Currently it is #' "cpi2011". -#' @inheritParams pip_cpi_update +#' @inheritParams aux_cpi_update #' #' @keywords internal aux_cpi_clean <- function(y, diff --git a/R/aux_data_files.R b/R/aux_data_files.R index 415aa56..cdf08a7 100644 --- a/R/aux_data_files.R +++ b/R/aux_data_files.R @@ -5,7 +5,6 @@ #' @return data.table with key values #' @export #' -#' @examples aux_data <- function(aux_file){ # list of possible auxiliary keys -------------------------------------------- diff --git a/man/pip_countries.Rd b/man/aux_countries.Rd similarity index 89% rename from man/pip_countries.Rd rename to man/aux_countries.Rd index df445f8..101329b 100644 --- a/man/pip_countries.Rd +++ b/man/aux_countries.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_countries.R -\name{pip_countries} -\alias{pip_countries} +% Please edit documentation in R/aux_countries.R +\name{aux_countries} +\alias{aux_countries} \title{PIP Countries} \usage{ -pip_countries( +aux_countries( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_country_list.Rd b/man/aux_country_list.Rd similarity index 71% rename from man/pip_country_list.Rd rename to man/aux_country_list.Rd index 9d307a4..f5f4900 100644 --- a/man/pip_country_list.Rd +++ b/man/aux_country_list.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_country_list.R -\name{pip_country_list} -\alias{pip_country_list} +% Please edit documentation in R/aux_country_list.R +\name{aux_country_list} +\alias{aux_country_list} \title{List of countries} \usage{ -pip_country_list( +aux_country_list( action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, @@ -14,6 +14,13 @@ pip_country_list( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/aux_cp.Rd b/man/aux_cp.Rd index 27264e8..f78c6a6 100644 --- a/man/aux_cp.Rd +++ b/man/aux_cp.Rd @@ -14,9 +14,17 @@ aux_cp( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in +memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/aux_cp_update.Rd b/man/aux_cp_update.Rd index f91fccd..4d292a8 100644 --- a/man/aux_cp_update.Rd +++ b/man/aux_cp_update.Rd @@ -13,6 +13,10 @@ aux_cp_update( ) } \arguments{ +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_cpi.Rd b/man/aux_cpi.Rd similarity index 92% rename from man/pip_cpi.Rd rename to man/aux_cpi.Rd index 806c297..742e849 100644 --- a/man/pip_cpi.Rd +++ b/man/aux_cpi.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi.R -\name{pip_cpi} -\alias{pip_cpi} +% Please edit documentation in R/aux_cpi.R +\name{aux_cpi} +\alias{aux_cpi} \title{PIP CPI} \usage{ -pip_cpi( +aux_cpi( action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, diff --git a/man/pip_cpi_update.Rd b/man/aux_cpi_update.Rd similarity index 87% rename from man/pip_cpi_update.Rd rename to man/aux_cpi_update.Rd index 0e9254d..0660607 100644 --- a/man/pip_cpi_update.Rd +++ b/man/aux_cpi_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_update.R -\name{pip_cpi_update} -\alias{pip_cpi_update} +% Please edit documentation in R/aux_cpi_update.R +\name{aux_cpi_update} +\alias{aux_cpi_update} \title{Update CPI} \usage{ -pip_cpi_update( +aux_cpi_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_gdm.Rd b/man/aux_gdm.Rd similarity index 94% rename from man/pip_gdm.Rd rename to man/aux_gdm.Rd index 232b43b..fa8b072 100644 --- a/man/pip_gdm.Rd +++ b/man/aux_gdm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdm.R -\name{pip_gdm} -\alias{pip_gdm} +% Please edit documentation in R/aux_gdm.R +\name{aux_gdm} +\alias{aux_gdm} \title{PIP GDM} \usage{ -pip_gdm( +aux_gdm( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_gdm_update.Rd b/man/aux_gdm_update.Rd similarity index 88% rename from man/pip_gdm_update.Rd rename to man/aux_gdm_update.Rd index 7fda4b1..df9d148 100644 --- a/man/pip_gdm_update.Rd +++ b/man/aux_gdm_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdm_update.R -\name{pip_gdm_update} -\alias{pip_gdm_update} +% Please edit documentation in R/aux_gdm_update.R +\name{aux_gdm_update} +\alias{aux_gdm_update} \title{Update GDM} \usage{ -pip_gdm_update( +aux_gdm_update( force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_gdp.Rd b/man/aux_gdp.Rd similarity index 70% rename from man/pip_gdp.Rd rename to man/aux_gdp.Rd index ac51744..3e237ed 100644 --- a/man/pip_gdp.Rd +++ b/man/aux_gdp.Rd @@ -1,34 +1,41 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp.R -\name{pip_gdp} -\alias{pip_gdp} -\title{PIP GDP} -\usage{ -pip_gdp( - action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = "file", - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" -and "gh" are synonymous} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Update or load GDP data. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_gdp.R +\name{aux_gdp} +\alias{aux_gdp} +\title{PIP GDP} +\usage{ +aux_gdp( + action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = "file", + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" +and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Update or load GDP data. +} diff --git a/man/pip_gdp_update.Rd b/man/aux_gdp_update.Rd similarity index 79% rename from man/pip_gdp_update.Rd rename to man/aux_gdp_update.Rd index be88c52..04e8f7b 100644 --- a/man/pip_gdp_update.Rd +++ b/man/aux_gdp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp_update.R -\name{pip_gdp_update} -\alias{pip_gdp_update} +% Please edit documentation in R/aux_gdp_update.R +\name{aux_gdp_update} +\alias{aux_gdp_update} \title{Update GDP} \usage{ -pip_gdp_update( +aux_gdp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), @@ -15,6 +15,10 @@ pip_gdp_update( ) } \arguments{ +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_gdp_weo.Rd b/man/aux_gdp_weo.Rd similarity index 79% rename from man/pip_gdp_weo.Rd rename to man/aux_gdp_weo.Rd index 0f1bd98..1a573a2 100644 --- a/man/pip_gdp_weo.Rd +++ b/man/aux_gdp_weo.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp_weo.R -\name{pip_gdp_weo} -\alias{pip_gdp_weo} +% Please edit documentation in R/aux_gdp_weo.R +\name{aux_gdp_weo} +\alias{aux_gdp_weo} \title{Fetch GDP data from WEO} \usage{ -pip_gdp_weo(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) +aux_gdp_weo(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) } \arguments{ \item{action}{character: Either "load" or "update". Default is "update". If @@ -22,6 +22,6 @@ Note that the most recent version most be downloaded from imf.org and saved as an .xls file in \verb{/_aux/weo/}. The filename should be in the following structure \verb{WEO_.xls}. Due to potential file corruption the file must be opened and re-saved before it can be updated with -\code{pip_gdp_weo()}. Hopefully in the future IMF will stop using an \code{.xls} file +\code{aux_gdp_weo()}. Hopefully in the future IMF will stop using an \code{.xls} file that's not really xls. } diff --git a/man/pip_income_groups.Rd b/man/aux_income_groups.Rd similarity index 89% rename from man/pip_income_groups.Rd rename to man/aux_income_groups.Rd index 100ad5e..75829fd 100644 --- a/man/pip_income_groups.Rd +++ b/man/aux_income_groups.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_income_groups.R -\name{pip_income_groups} -\alias{pip_income_groups} +% Please edit documentation in R/aux_income_groups.R +\name{aux_income_groups} +\alias{aux_income_groups} \title{PIP series of income group} \usage{ -pip_income_groups( +aux_income_groups( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/aux_indicators.Rd b/man/aux_indicators.Rd new file mode 100644 index 0000000..fd6c9bc --- /dev/null +++ b/man/aux_indicators.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_indicators.R +\name{aux_indicators} +\alias{aux_indicators} +\title{PIP Indicators} +\usage{ +aux_indicators( + action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch) +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{maindir}{character: Main directory of project.} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} +} +\description{ +Update or load a dataset with the indicators master sheet. +} diff --git a/man/pip_aux_labels.Rd b/man/aux_labels_pip.Rd similarity index 53% rename from man/pip_aux_labels.Rd rename to man/aux_labels_pip.Rd index 4f7525e..1d0190b 100644 --- a/man/pip_aux_labels.Rd +++ b/man/aux_labels_pip.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_aux_labels.R -\name{pip_aux_labels} -\alias{pip_aux_labels} -\title{pip_aux_labels} +% Please edit documentation in R/aux_labels_pip.R +\name{aux_labels_pip} +\alias{aux_labels_pip} +\title{PIP Auxiliary Labels} \usage{ -pip_aux_labels(x, measure) +aux_labels_pip(x, measure) } \arguments{ \item{x}{Data frame to be labeled.} @@ -12,6 +12,6 @@ pip_aux_labels(x, measure) \item{measure}{type of data frame, e.g., "cpi" or "PPP".} } \description{ -pip_aux_labels +PIP Auxiliary Labels } \keyword{internal} diff --git a/man/aux_maddison.Rd b/man/aux_maddison.Rd new file mode 100644 index 0000000..e63e31f --- /dev/null +++ b/man/aux_maddison.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_maddison.R +\name{aux_maddison} +\alias{aux_maddison} +\title{Maddison data} +\usage{ +aux_maddison( + action = c("update", "load"), + owner = getOption("pipfun.ghowner"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Load or update data from the Maddison project. +} diff --git a/man/aux_metadata.Rd b/man/aux_metadata.Rd new file mode 100644 index 0000000..748a91e --- /dev/null +++ b/man/aux_metadata.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_metadata.R +\name{aux_metadata} +\alias{aux_metadata} +\title{PIP Survey Metadata} +\usage{ +aux_metadata( + action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{maindir}{character: Main directory of project.} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Update or load a dataset with survey metadata. +} diff --git a/man/pip_metadata_update.Rd b/man/aux_metadata_update.Rd similarity index 75% rename from man/pip_metadata_update.Rd rename to man/aux_metadata_update.Rd index 29d7fc0..8e255a2 100644 --- a/man/pip_metadata_update.Rd +++ b/man/aux_metadata_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metadata_update.R -\name{pip_metadata_update} -\alias{pip_metadata_update} +% Please edit documentation in R/aux_metadata_update.R +\name{aux_metadata_update} +\alias{aux_metadata_update} \title{Update metadata file} \usage{ -pip_metadata_update( +aux_metadata_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), @@ -14,6 +14,10 @@ pip_metadata_update( ) } \arguments{ +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/aux_metaregion.Rd b/man/aux_metaregion.Rd index 1dd3587..891e4a5 100644 --- a/man/aux_metaregion.Rd +++ b/man/aux_metaregion.Rd @@ -14,6 +14,14 @@ aux_metaregion( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in +memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/aux_missing_data.Rd b/man/aux_missing_data.Rd index 5bb03a7..06d054a 100644 --- a/man/aux_missing_data.Rd +++ b/man/aux_missing_data.Rd @@ -14,9 +14,17 @@ aux_missing_data( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in +memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_npl.Rd b/man/aux_npl.Rd similarity index 92% rename from man/pip_npl.Rd rename to man/aux_npl.Rd index d73dd03..d796196 100644 --- a/man/pip_npl.Rd +++ b/man/aux_npl.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_npl.R -\name{pip_npl} -\alias{pip_npl} +% Please edit documentation in R/aux_npl.R +\name{aux_npl} +\alias{aux_npl} \title{National Poverty headcount} \usage{ -pip_npl( +aux_npl( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pce.Rd b/man/aux_pce.Rd similarity index 70% rename from man/pip_pce.Rd rename to man/aux_pce.Rd index 91585d8..59d9bdd 100644 --- a/man/pip_pce.Rd +++ b/man/aux_pce.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pce.R -\name{pip_pce} -\alias{pip_pce} +% Please edit documentation in R/aux_pce.R +\name{aux_pce} +\alias{aux_pce} \title{PIP PCE} \usage{ -pip_pce( +aux_pce( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), @@ -16,9 +16,16 @@ pip_pce( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_pce_update.Rd b/man/aux_pce_update.Rd similarity index 79% rename from man/pip_pce_update.Rd rename to man/aux_pce_update.Rd index 58f027b..6190a2b 100644 --- a/man/pip_pce_update.Rd +++ b/man/aux_pce_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pce_update.R -\name{pip_pce_update} -\alias{pip_pce_update} +% Please edit documentation in R/aux_pce_update.R +\name{aux_pce_update} +\alias{aux_pce_update} \title{Update PCE} \usage{ -pip_pce_update( +aux_pce_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), @@ -15,6 +15,10 @@ pip_pce_update( ) } \arguments{ +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/aux_pfw.Rd b/man/aux_pfw.Rd index 1c4bbc4..5606c17 100644 --- a/man/aux_pfw.Rd +++ b/man/aux_pfw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw.R +% Please edit documentation in R/aux_pfw.R \name{aux_pfw} \alias{aux_pfw} \title{PIP PFW} diff --git a/man/aux_pfw_clean.Rd b/man/aux_pfw_clean.Rd index fcb9034..d0bd814 100644 --- a/man/aux_pfw_clean.Rd +++ b/man/aux_pfw_clean.Rd @@ -9,6 +9,8 @@ aux_pfw_clean(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) \arguments{ \item{y}{dataset with PPP data from \code{aux_pfw_update()}.} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} } diff --git a/man/pip_pfw_key.Rd b/man/aux_pfw_key.Rd similarity index 64% rename from man/pip_pfw_key.Rd rename to man/aux_pfw_key.Rd index 5afcf90..d5f9d5e 100644 --- a/man/pip_pfw_key.Rd +++ b/man/aux_pfw_key.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_key.R -\name{pip_pfw_key} -\alias{pip_pfw_key} +% Please edit documentation in R/aux_pfw_key.R +\name{aux_pfw_key} +\alias{aux_pfw_key} \title{Generate a dataset that contains pfw keys} \usage{ -pip_pfw_key() +aux_pfw_key() } \value{ data.table diff --git a/man/aux_pfw_update.Rd b/man/aux_pfw_update.Rd new file mode 100644 index 0000000..ef99acd --- /dev/null +++ b/man/aux_pfw_update.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_pfw_update.R +\name{aux_pfw_update} +\alias{aux_pfw_update} +\title{Update PFW} +\usage{ +aux_pfw_update( + maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{maindir}{character: Main directory of project.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Update PFW +} +\keyword{internal} diff --git a/man/aux_pl.Rd b/man/aux_pl.Rd new file mode 100644 index 0000000..d1c300e --- /dev/null +++ b/man/aux_pl.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_pl.R +\name{aux_pl} +\alias{aux_pl} +\title{Poverty lines} +\usage{ +aux_pl( + action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{maindir}{character: Main directory of project.} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Update or load a dataset with poverty lines. +} diff --git a/man/pip_pop.Rd b/man/aux_pop.Rd similarity index 92% rename from man/pip_pop.Rd rename to man/aux_pop.Rd index a20a57a..f541001 100644 --- a/man/pip_pop.Rd +++ b/man/aux_pop.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop.R -\name{pip_pop} -\alias{pip_pop} +% Please edit documentation in R/aux_pop.R +\name{aux_pop} +\alias{aux_pop} \title{PIP POP} \usage{ -pip_pop( +aux_pop( action = c("update", "load"), force = FALSE, from = c("gh", "file", "api"), diff --git a/man/pip_pop_update.Rd b/man/aux_pop_update.Rd similarity index 88% rename from man/pip_pop_update.Rd rename to man/aux_pop_update.Rd index 054bd38..c1fca11 100644 --- a/man/pip_pop_update.Rd +++ b/man/aux_pop_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R -\name{pip_pop_update} -\alias{pip_pop_update} +% Please edit documentation in R/aux_pop_update.R +\name{aux_pop_update} +\alias{aux_pop_update} \title{Update POP} \usage{ -pip_pop_update( +aux_pop_update( force = FALSE, from = c("gh", "file", "api"), maindir = gls$PIP_DATA_DIR, diff --git a/man/aux_ppp.Rd b/man/aux_ppp.Rd new file mode 100644 index 0000000..e585f4f --- /dev/null +++ b/man/aux_ppp.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_ppp.R +\name{aux_ppp} +\alias{aux_ppp} +\title{PIP PPP} +\usage{ +aux_ppp( + action = c("update", "load"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + force = FALSE, + tag = branch, + detail = getOption("pipaux.detail.raw"), + ppp_defaults = TRUE +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{maindir}{character: Main directory of project.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Load or update PPP data. +} diff --git a/man/pip_ppp_update.Rd b/man/aux_ppp_update.Rd similarity index 85% rename from man/pip_ppp_update.Rd rename to man/aux_ppp_update.Rd index 2faedb9..e5f21d9 100644 --- a/man/pip_ppp_update.Rd +++ b/man/aux_ppp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp_update.R -\name{pip_ppp_update} -\alias{pip_ppp_update} +% Please edit documentation in R/aux_ppp_update.R +\name{aux_ppp_update} +\alias{aux_ppp_update} \title{Update PPP} \usage{ -pip_ppp_update( +aux_ppp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_prices.Rd b/man/aux_prices.Rd similarity index 87% rename from man/pip_prices.Rd rename to man/aux_prices.Rd index b103845..fbea4cb 100644 --- a/man/pip_prices.Rd +++ b/man/aux_prices.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_prices.R -\name{pip_prices} -\alias{pip_prices} +% Please edit documentation in R/aux_prices.R +\name{aux_prices} +\alias{aux_prices} \title{PIP Prices} \usage{ -pip_prices( +aux_prices( measure = NULL, action = "update", maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_regions.Rd b/man/aux_regions.Rd similarity index 90% rename from man/pip_regions.Rd rename to man/aux_regions.Rd index 8b91baf..5c70ac7 100644 --- a/man/pip_regions.Rd +++ b/man/aux_regions.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_regions.R -\name{pip_regions} -\alias{pip_regions} +% Please edit documentation in R/aux_regions.R +\name{aux_regions} +\alias{aux_regions} \title{PIP Regions} \usage{ -pip_regions( +aux_regions( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/aux_sign_save.Rd b/man/aux_sign_save.Rd index 0b56f13..d5d3f50 100644 --- a/man/aux_sign_save.Rd +++ b/man/aux_sign_save.Rd @@ -12,6 +12,8 @@ aux_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) \item{msrdir}{character: Directory where the data and data signature will be saved.} +\item{force}{logical: If TRUE data will be overwritten.} + \item{save_dta}{logical: If TRUE a Stata (.dta) version of the dataset is also saved.} } diff --git a/man/aux_update_all.Rd b/man/aux_update_all.Rd index 48c357f..fff2d92 100644 --- a/man/aux_update_all.Rd +++ b/man/aux_update_all.Rd @@ -11,7 +11,11 @@ aux_update_all( ) } \arguments{ +\item{force}{logical: If TRUE data will be overwritten.} + \item{popsrc}{character: Source for population data. Defaults to \code{getOption("pipaux.popsrc")}.} + +\item{maindir}{character: Main directory of project.} } \description{ Update all auxiliary data at once diff --git a/man/pip_wdi.Rd b/man/aux_wdi.Rd similarity index 70% rename from man/pip_wdi.Rd rename to man/aux_wdi.Rd index 92ca1c5..530f088 100644 --- a/man/pip_wdi.Rd +++ b/man/aux_wdi.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_wdi.R -\name{pip_wdi} -\alias{pip_wdi} +% Please edit documentation in R/aux_wdi.R +\name{aux_wdi} +\alias{aux_wdi} \title{PIP wdi} \usage{ -pip_wdi( +aux_wdi( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, @@ -16,6 +16,13 @@ pip_wdi( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} diff --git a/man/pip_wdi_update.Rd b/man/aux_wdi_update.Rd similarity index 79% rename from man/pip_wdi_update.Rd rename to man/aux_wdi_update.Rd index 307b5ac..e24963a 100644 --- a/man/pip_wdi_update.Rd +++ b/man/aux_wdi_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_wdi_update.R -\name{pip_wdi_update} -\alias{pip_wdi_update} +% Please edit documentation in R/aux_wdi_update.R +\name{aux_wdi_update} +\alias{aux_wdi_update} \title{Update National accounts data from WDI} \usage{ -pip_wdi_update( +aux_wdi_update( force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), @@ -15,6 +15,10 @@ pip_wdi_update( ) } \arguments{ +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} @@ -35,5 +39,5 @@ data.table with gdp and pce variables GDP and HFCE data from WDI. It could be either from API or from file } \examples{ -pip_wdi_update() +aux_wdi_update() } diff --git a/man/pip_weo.Rd b/man/aux_weo.Rd similarity index 77% rename from man/pip_weo.Rd rename to man/aux_weo.Rd index d89d7d6..47f4e68 100644 --- a/man/pip_weo.Rd +++ b/man/aux_weo.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_weo.R -\name{pip_weo} -\alias{pip_weo} +% Please edit documentation in R/aux_weo.R +\name{aux_weo} +\alias{aux_weo} \title{Fetch GDP data from WEO} \usage{ -pip_weo( +aux_weo( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), @@ -15,9 +15,16 @@ pip_weo( ) } \arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + \item{owner}{character: Github repo owner. Default is \code{getOption("pipfun.ghowner")}} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pip_weo_clean.Rd b/man/aux_weo_clean.Rd similarity index 77% rename from man/pip_weo_clean.Rd rename to man/aux_weo_clean.Rd index 4fa165a..18c68fc 100644 --- a/man/pip_weo_clean.Rd +++ b/man/aux_weo_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_weo_clean.R -\name{pip_weo_clean} -\alias{pip_weo_clean} +% Please edit documentation in R/aux_weo_clean.R +\name{aux_weo_clean} +\alias{aux_weo_clean} \title{Clean WEO data} \usage{ -pip_weo_clean( +aux_weo_clean( dt, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main") diff --git a/man/clean_from_wide.Rd b/man/clean_from_wide.Rd index 22819be..8794185 100644 --- a/man/clean_from_wide.Rd +++ b/man/clean_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R +% Please edit documentation in R/aux_pop_update.R \name{clean_from_wide} \alias{clean_from_wide} \title{Clean from WDI format} diff --git a/man/clean_names_from_wide.Rd b/man/clean_names_from_wide.Rd index fb138f9..7a73217 100644 --- a/man/clean_names_from_wide.Rd +++ b/man/clean_names_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R +% Please edit documentation in R/aux_pop_update.R \name{clean_names_from_wide} \alias{clean_names_from_wide} \title{Clean names from wide WDI format} diff --git a/man/cpi_validate_output.Rd b/man/cpi_validate_output.Rd index ca705b9..09767fc 100644 --- a/man/cpi_validate_output.Rd +++ b/man/cpi_validate_output.Rd @@ -7,7 +7,7 @@ cpi_validate_output(cpi, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{cpi}{clean cpi data, output via \code{pip_cpi_clean}} +\item{cpi}{clean cpi data, output via \code{aux_cpi_clean}} \item{detail}{has an option TRUE/FALSE, default value is FALSE} } diff --git a/man/fake_aux_sna.Rd b/man/fake_aux_sna.Rd index 05f58a5..a0b593d 100644 --- a/man/fake_aux_sna.Rd +++ b/man/fake_aux_sna.Rd @@ -29,6 +29,9 @@ fake_aux_sna( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" +and "gh" are synonymous} } \description{ Fake PIP SNA function diff --git a/man/load_aux.Rd b/man/load_aux.Rd index c57f0ec..16180eb 100644 --- a/man/load_aux.Rd +++ b/man/load_aux.Rd @@ -15,6 +15,8 @@ load_aux( \arguments{ \item{measure}{character: measure to be loaded} +\item{maindir}{character: Main directory of project.} + \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} diff --git a/man/pfw_validate_output.Rd b/man/pfw_validate_output.Rd index c2c29b1..2d32ac2 100644 --- a/man/pfw_validate_output.Rd +++ b/man/pfw_validate_output.Rd @@ -7,7 +7,7 @@ pfw_validate_output(pfw, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{pfw}{clean pfw data, output via \code{pip_pfw_clean}} +\item{pfw}{clean pfw data, output via \code{aux_pfw_clean}} \item{detail}{has an option TRUE/FALSE, default value is FALSE} } diff --git a/man/pip_indicators.Rd b/man/pip_indicators.Rd deleted file mode 100644 index 63c09fb..0000000 --- a/man/pip_indicators.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_indicators.R -\name{pip_indicators} -\alias{pip_indicators} -\title{PIP Indicators} -\usage{ -pip_indicators( - action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} -} -\description{ -Update or load a dataset with the indicators master sheet. -} diff --git a/man/pip_maddison.Rd b/man/pip_maddison.Rd deleted file mode 100644 index 98fbe6b..0000000 --- a/man/pip_maddison.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_maddison.R -\name{pip_maddison} -\alias{pip_maddison} -\title{Maddison data} -\usage{ -pip_maddison( - action = c("update", "load"), - owner = getOption("pipfun.ghowner"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Load or update data from the Maddison project. -} diff --git a/man/pip_metadata.Rd b/man/pip_metadata.Rd deleted file mode 100644 index bdef629..0000000 --- a/man/pip_metadata.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metadata.R -\name{pip_metadata} -\alias{pip_metadata} -\title{PIP Survey Metadata} -\usage{ -pip_metadata( - action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Update or load a dataset with survey metadata. -} diff --git a/man/pip_pfw_update.Rd b/man/pip_pfw_update.Rd deleted file mode 100644 index 7c98a32..0000000 --- a/man/pip_pfw_update.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_update.R -\name{pip_pfw_update} -\alias{pip_pfw_update} -\title{Update PFW} -\usage{ -pip_pfw_update( - maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} -} -\description{ -Update PFW -} -\keyword{internal} diff --git a/man/pip_pl.Rd b/man/pip_pl.Rd deleted file mode 100644 index 80cb70c..0000000 --- a/man/pip_pl.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pl.R -\name{pip_pl} -\alias{pip_pl} -\title{Poverty lines} -\usage{ -pip_pl( - action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Update or load a dataset with poverty lines. -} diff --git a/man/pip_ppp.Rd b/man/pip_ppp.Rd deleted file mode 100644 index a92033d..0000000 --- a/man/pip_ppp.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp.R -\name{pip_ppp} -\alias{pip_ppp} -\title{PIP PPP} -\usage{ -pip_ppp( - action = c("update", "load"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - force = FALSE, - tag = branch, - detail = getOption("pipaux.detail.raw"), - ppp_defaults = TRUE -) -} -\arguments{ -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Load or update PPP data. -} From 3a34565402c8591526dbe4979ce1b6225526bb6b Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 22 Nov 2024 18:03:20 -0500 Subject: [PATCH 05/20] try gen function to call any aux fun --- R/utils.R | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/R/utils.R b/R/utils.R index 5375b09..ad37509 100644 --- a/R/utils.R +++ b/R/utils.R @@ -409,12 +409,12 @@ get_gh <- function(owner, } #' SAve auxiliary file to Github Repo -#' -#' Sometimes we need to save auxiliary files to Github repo. +#' +#' Sometimes we need to save auxiliary files to Github repo. #' This function allows for this. #' #' @inheritParams pipfun::save_to_gh -#' @export +#' @export #' @return NULL save_aux_to_gh <- function(df, measure, @@ -426,13 +426,36 @@ save_aux_to_gh <- function(df, ext = "csv", ... ) { - + pipfun::save_to_gh(df = df, - repo = repo, + repo = repo, owner = owner, branch = branch, tag = tag, filename = filename, ext = ext, ...) -} \ No newline at end of file +} + +#' Call auxiliary function for a specified measure +#' @param measure character: name of auxiliary data measure +#' @param ... additional arguments to pass +#' +call_aux_function <- function(measure, + package_name = "pipaux", + ...) { + + stopifnot(exprs = exists(function_name, envir = asNamespace(package_name))) + function_name <- paste0("aux_", measure) + + func <- get(function_name, envir = asNamespace(package_name)) + + return(func(...)) +} + + + + + + + From ff1e0c6202a169367f9a051a21c802c1de6fef2c Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 12 Dec 2024 11:13:10 -0500 Subject: [PATCH 06/20] fix to gen function --- R/utils.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index ad37509..3f4ac3f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -445,12 +445,14 @@ call_aux_function <- function(measure, package_name = "pipaux", ...) { - stopifnot(exprs = exists(function_name, envir = asNamespace(package_name))) function_name <- paste0("aux_", measure) + stopifnot(exprs = exists(function_name, + envir = asNamespace(package_name))) + func <- get(function_name, envir = asNamespace(package_name)) - return(func(...)) + return(func) } From 336fdf7698db22a65958bc528b66b438e561206d Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 12 Dec 2024 13:58:56 -0500 Subject: [PATCH 07/20] fix again --- R/utils.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 3f4ac3f..0adcf51 100644 --- a/R/utils.R +++ b/R/utils.R @@ -447,12 +447,17 @@ call_aux_function <- function(measure, function_name <- paste0("aux_", measure) - stopifnot(exprs = exists(function_name, - envir = asNamespace(package_name))) + if (!exists(function_name, + envir = asNamespace(package_name))) { + cli::cli_abort(paste0("Function '", function_name, "' does not exist in the '", package_name, "' package.")) + } - func <- get(function_name, envir = asNamespace(package_name)) + # Get the function + func <- get(function_name, + envir = asNamespace(package_name)) - return(func) + # Call the function with additional arguments + func(...) } From b2e1a920734f945f7c7a7d6611beebfc126803e0 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 12 Dec 2024 14:01:05 -0500 Subject: [PATCH 08/20] documentation --- R/utils.R | 9 +++++++-- man/call_aux_function.Rd | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 man/call_aux_function.Rd diff --git a/R/utils.R b/R/utils.R index 0adcf51..871850d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -438,9 +438,14 @@ save_aux_to_gh <- function(df, } #' Call auxiliary function for a specified measure -#' @param measure character: name of auxiliary data measure -#' @param ... additional arguments to pass #' +#' @param measure character: Name of the auxiliary data measure (e.g., "ppp"). +#' @param package_name character: Name of the package containing the auxiliary function (default is "pipaux"). +#' @param ... additional arguments to pass to the auxiliary function. +#' +#' @return The result of the auxiliary function call. +#' @examples +#' \donotrun{call_aux_function("ppp")} call_aux_function <- function(measure, package_name = "pipaux", ...) { diff --git a/man/call_aux_function.Rd b/man/call_aux_function.Rd new file mode 100644 index 0000000..c74dd4e --- /dev/null +++ b/man/call_aux_function.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{call_aux_function} +\alias{call_aux_function} +\title{Call auxiliary function for a specified measure} +\usage{ +call_aux_function(measure, package_name = "pipaux", ...) +} +\arguments{ +\item{measure}{character: Name of the auxiliary data measure (e.g., "ppp").} + +\item{package_name}{character: Name of the package containing the auxiliary function (default is "pipaux").} + +\item{...}{additional arguments to pass to the auxiliary function.} +} +\value{ +The result of the auxiliary function call. +} +\description{ +Call auxiliary function for a specified measure +} +\examples{ +\donotrun{call_aux_function("ppp")} +} From 4b4b15703d4bfe697832a9fdecde28c32a5b4a3d Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 13 Dec 2024 10:47:00 -0500 Subject: [PATCH 09/20] documentation --- R/utils.R | 8 +++----- man/call_aux_function.Rd | 9 +++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/R/utils.R b/R/utils.R index 871850d..0ba377b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -439,13 +439,11 @@ save_aux_to_gh <- function(df, #' Call auxiliary function for a specified measure #' -#' @param measure character: Name of the auxiliary data measure (e.g., "ppp"). -#' @param package_name character: Name of the package containing the auxiliary function (default is "pipaux"). -#' @param ... additional arguments to pass to the auxiliary function. +#' @param measure character: Name of the auxiliary data measure (e.g., "ppp") +#' @param package_name character: Name of the package containing the auxiliary function (default is "pipaux") +#' @param ... additional arguments to pass to the auxiliary function #' #' @return The result of the auxiliary function call. -#' @examples -#' \donotrun{call_aux_function("ppp")} call_aux_function <- function(measure, package_name = "pipaux", ...) { diff --git a/man/call_aux_function.Rd b/man/call_aux_function.Rd index c74dd4e..ae2c356 100644 --- a/man/call_aux_function.Rd +++ b/man/call_aux_function.Rd @@ -7,11 +7,11 @@ call_aux_function(measure, package_name = "pipaux", ...) } \arguments{ -\item{measure}{character: Name of the auxiliary data measure (e.g., "ppp").} +\item{measure}{character: Name of the auxiliary data measure (e.g., "ppp")} -\item{package_name}{character: Name of the package containing the auxiliary function (default is "pipaux").} +\item{package_name}{character: Name of the package containing the auxiliary function (default is "pipaux")} -\item{...}{additional arguments to pass to the auxiliary function.} +\item{...}{additional arguments to pass to the auxiliary function} } \value{ The result of the auxiliary function call. @@ -19,6 +19,3 @@ The result of the auxiliary function call. \description{ Call auxiliary function for a specified measure } -\examples{ -\donotrun{call_aux_function("ppp")} -} From 2712432888b0be8f09f319fc1be801ed898ae84a Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 13 Dec 2024 11:29:04 -0500 Subject: [PATCH 10/20] initialize new vignette --- vignettes/managing_release_branches.Rmd | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 vignettes/managing_release_branches.Rmd diff --git a/vignettes/managing_release_branches.Rmd b/vignettes/managing_release_branches.Rmd new file mode 100644 index 0000000..4525530 --- /dev/null +++ b/vignettes/managing_release_branches.Rmd @@ -0,0 +1,21 @@ +--- +title: "Managing Branches of Aux Repos" +subtitle: "test" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Managing Branches of Aux Repos} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +#library(pipaux) +devtools::load_all(".") +``` From 845b019a7cbfc28cc067d27e746eb405a2d5c9dc Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Fri, 13 Dec 2024 13:00:48 -0500 Subject: [PATCH 11/20] version one of vignette, to be checked --- vignettes/managing_release_branches.Rmd | 76 +++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/vignettes/managing_release_branches.Rmd b/vignettes/managing_release_branches.Rmd index 4525530..f83fd48 100644 --- a/vignettes/managing_release_branches.Rmd +++ b/vignettes/managing_release_branches.Rmd @@ -19,3 +19,79 @@ knitr::opts_chunk$set( #library(pipaux) devtools::load_all(".") ``` + +The {pipaux} package manages auxiliary data used across PIP. It includes functions like aux_ppp() for PPP data and aux_gdp() for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions both save the updated dataset locally and push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. + +To ensure that the version of auxiliary data used for each release is properly tracked, 2 steps are necessary: + +- Each auxiliary data repository should have a dedicated release branch. +- The release branch must be periodically updated with changes from the DEV branch. +To facilitate the management of release branches and synchronization with the DEV branch, the {pipfun} package provides functions designed for this purpose. + +Specifically, + +### 1. Check if release branch is there, and if not, create it + +```{r setup} +# install.packages("pak") +pak::pak("PIP-Technical-Team/pipfun@DEV_v2") +library("pipfun") + + +``` + + +```{r} + +# Get branches in repo + +repo_branches <- pipfun::get_repo_branches(repo = "aux_test", owner = getOption("pipfun.ghowner")) +repo_branches + +# Example with ppp repo + +# pipfun::get_repo_branches(repo = "aux_ppp", owner = getOption("pipfun.ghowner")) + + +# As no release branch is found, create it from DEV +create_new_branch(new_branch = "20241005", + ref_branch = "DEV", + repo = "aux_test") + +``` + + +### 2. Check that release branch is updated with DEV +```{r} + +pipfun::compare_branch_content(repo = "aux_test", + branch1 = "DEV", + branch2 = "20241005") + +``` + +### 3. Update release branch if needed + +In this case, given that the release branch has just been created, it is up to date with DEV. However, had `same_content' been FALSE, we need to update release branch accordingly as follows: + + +```{r} + +# Option 1: let release branch point to the latest commit as in DEV +pipfun::update_branches(repo = "aux_test", + branch1 = "DEV", + branch2 = "20241005") + +# Option 2: Merge DEV into release branch + +pipfun::merge_branch_into(repo = "aux_test", + source_branch = "DEV", + target_branch = "20241005") +``` + + + + + + + From ee87ec8a4f77892ce965def186f5721a1d4b9239 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Wed, 18 Dec 2024 10:53:41 -0500 Subject: [PATCH 12/20] finalize vignette --- R/utils.R | 3 +- vignettes/managing_release_branches.Rmd | 43 +++++++++++++------------ 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/R/utils.R b/R/utils.R index 0ba377b..c68e2e5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -448,7 +448,8 @@ call_aux_function <- function(measure, package_name = "pipaux", ...) { - function_name <- paste0("aux_", measure) + function_name <- paste0("aux_", + measure) if (!exists(function_name, envir = asNamespace(package_name))) { diff --git a/vignettes/managing_release_branches.Rmd b/vignettes/managing_release_branches.Rmd index f83fd48..0577c15 100644 --- a/vignettes/managing_release_branches.Rmd +++ b/vignettes/managing_release_branches.Rmd @@ -1,9 +1,9 @@ --- -title: "Managing Branches of Aux Repos" -subtitle: "test" +title: "Auxiliary Data - Version Control" +subtitle: "Release branch management" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Managing Branches of Aux Repos} + %\VignetteIndexEntry{Auxiliary Data - Version Control} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -20,23 +20,23 @@ knitr::opts_chunk$set( devtools::load_all(".") ``` -The {pipaux} package manages auxiliary data used across PIP. It includes functions like aux_ppp() for PPP data and aux_gdp() for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions both save the updated dataset locally and push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. +The {pipaux} package manages auxiliary data used across PIP. It includes functions like `aux_ppp()` for PPP data and `aux_gdp()` for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions both save the updated dataset locally and push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. To ensure that the version of auxiliary data used for each release is properly tracked, 2 steps are necessary: -- Each auxiliary data repository should have a dedicated release branch. -- The release branch must be periodically updated with changes from the DEV branch. +- Each auxiliary data repository should have a dedicated **release branch**. +- The release branch must be periodically **updated** with changes from the **DEV branch**. To facilitate the management of release branches and synchronization with the DEV branch, the {pipfun} package provides functions designed for this purpose. Specifically, ### 1. Check if release branch is there, and if not, create it -```{r setup} +```{r initial-setup} # install.packages("pak") -pak::pak("PIP-Technical-Team/pipfun@DEV_v2") -library("pipfun") +#pak::pak("PIP-Technical-Team/pipfun@DEV_v2") +library("pipfun") ``` @@ -45,18 +45,21 @@ library("pipfun") # Get branches in repo -repo_branches <- pipfun::get_repo_branches(repo = "aux_test", owner = getOption("pipfun.ghowner")) +# Example 1: TEST repo +repo_branches <- pipfun::get_repo_branches(repo = "aux_test", + owner = getOption("pipfun.ghowner")) repo_branches -# Example with ppp repo +# Example 2: PPP repo -# pipfun::get_repo_branches(repo = "aux_ppp", owner = getOption("pipfun.ghowner")) +pipfun::get_repo_branches(repo = "aux_ppp", + owner = getOption("pipfun.ghowner")) # As no release branch is found, create it from DEV -create_new_branch(new_branch = "20241005", - ref_branch = "DEV", - repo = "aux_test") +# create_new_branch(new_branch = "20241005", +# ref_branch = "DEV", +# repo = "aux_ppp") ``` @@ -64,7 +67,7 @@ create_new_branch(new_branch = "20241005", ### 2. Check that release branch is updated with DEV ```{r} -pipfun::compare_branch_content(repo = "aux_test", +pipfun::compare_branch_content(repo = "aux_test", branch1 = "DEV", branch2 = "20241005") @@ -72,19 +75,19 @@ pipfun::compare_branch_content(repo = "aux_test", ### 3. Update release branch if needed -In this case, given that the release branch has just been created, it is up to date with DEV. However, had `same_content' been FALSE, we need to update release branch accordingly as follows: +In this case, given that the release branch has just been created, it is up to date with DEV. However, had "same_content" been `FALSE`, we need to update release branch accordingly as follows: ```{r} -# Option 1: let release branch point to the latest commit as in DEV -pipfun::update_branches(repo = "aux_test", +# Option 1: Make release branch point to the latest commit as in DEV +pipfun::update_branches(repo = "aux_test", branch1 = "DEV", branch2 = "20241005") # Option 2: Merge DEV into release branch -pipfun::merge_branch_into(repo = "aux_test", +pipfun::merge_branch_into(repo = "aux_test", source_branch = "DEV", target_branch = "20241005") ``` From c3cbad1f1ef69efa3deb38157e2615bd00d00d15 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Mon, 23 Dec 2024 12:46:41 -0500 Subject: [PATCH 13/20] fix vignette --- vignettes/managing_release_branches.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/managing_release_branches.Rmd b/vignettes/managing_release_branches.Rmd index 0577c15..8589217 100644 --- a/vignettes/managing_release_branches.Rmd +++ b/vignettes/managing_release_branches.Rmd @@ -20,7 +20,7 @@ knitr::opts_chunk$set( devtools::load_all(".") ``` -The {pipaux} package manages auxiliary data used across PIP. It includes functions like `aux_ppp()` for PPP data and `aux_gdp()` for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions both save the updated dataset locally and push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. +The {pipaux} package manages auxiliary data used across PIP. It includes functions like `aux_ppp()` for PPP data and `aux_gdp()` for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. To ensure that the version of auxiliary data used for each release is properly tracked, 2 steps are necessary: From cb5f36c22b78af2482fefc27fa8f4116be48619a Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 14 Jan 2025 16:32:04 -0500 Subject: [PATCH 14/20] small fixes --- R/load_aux.R | 2 +- R/utils.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/load_aux.R b/R/load_aux.R index 9d8d712..7601f30 100644 --- a/R/load_aux.R +++ b/R/load_aux.R @@ -38,7 +38,7 @@ load_aux <- function(measure, if (apply_label) { - df <- pip_aux_labels(df, measure = measure) + df <- aux_labels_pip(df, measure = measure) } if (inherits(df, "data.frame")) { diff --git a/R/utils.R b/R/utils.R index c68e2e5..8a74461 100644 --- a/R/utils.R +++ b/R/utils.R @@ -445,7 +445,6 @@ save_aux_to_gh <- function(df, #' #' @return The result of the auxiliary function call. call_aux_function <- function(measure, - package_name = "pipaux", ...) { function_name <- paste0("aux_", From 570fae8eb04928400dd806d512d7929878ec9eea Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Wed, 15 Jan 2025 12:12:20 -0500 Subject: [PATCH 15/20] more fixes --- R/utils.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 8a74461..04f2227 100644 --- a/R/utils.R +++ b/R/utils.R @@ -444,20 +444,20 @@ save_aux_to_gh <- function(df, #' @param ... additional arguments to pass to the auxiliary function #' #' @return The result of the auxiliary function call. -call_aux_function <- function(measure, +aux <- function(measure, ...) { function_name <- paste0("aux_", measure) if (!exists(function_name, - envir = asNamespace(package_name))) { - cli::cli_abort(paste0("Function '", function_name, "' does not exist in the '", package_name, "' package.")) + envir = asNamespace("pipaux"))) { + cli::cli_abort(paste0("Function '", function_name, "' does not exist in the '", "pipaux package.")) } # Get the function func <- get(function_name, - envir = asNamespace(package_name)) + envir = asNamespace("pipaux")) # Call the function with additional arguments func(...) From 21437adecd3fe14c371e21a62ac27d316dcc4c7e Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Wed, 15 Jan 2025 17:50:50 -0500 Subject: [PATCH 16/20] more fixes:change name, rm arg, document, add comments --- R/aux_cpi_update.R | 2 +- R/utils.R | 12 ++++++++---- man/{call_aux_function.Rd => aux_f.Rd} | 8 +++----- 3 files changed, 12 insertions(+), 10 deletions(-) rename man/{call_aux_function.Rd => aux_f.Rd} (66%) diff --git a/R/aux_cpi_update.R b/R/aux_cpi_update.R index 11d8db1..741bd98 100644 --- a/R/aux_cpi_update.R +++ b/R/aux_cpi_update.R @@ -38,7 +38,7 @@ aux_cpi_update <- function(maindir = gls$PIP_DATA_DIR, maindir = maindir, branch = branch) - # changae cpi_year and cpi_data_level to year and reporting_level + # change cpi_year and cpi_data_level to year and reporting_level cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), c("year", "reporting_level"), skip_absent=TRUE) diff --git a/R/utils.R b/R/utils.R index 04f2227..e29b60e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -440,22 +440,26 @@ save_aux_to_gh <- function(df, #' Call auxiliary function for a specified measure #' #' @param measure character: Name of the auxiliary data measure (e.g., "ppp") -#' @param package_name character: Name of the package containing the auxiliary function (default is "pipaux") #' @param ... additional arguments to pass to the auxiliary function #' #' @return The result of the auxiliary function call. -aux <- function(measure, - ...) { +aux_f <- function(measure, + ...) { function_name <- paste0("aux_", measure) + # This checks if the function exists in the {pipaux} package namespace + # -- The namespace corresponds to the version currently loaded in the R session + # -- (development version loaded via devtools::load_all() or the installed version) + if (!exists(function_name, envir = asNamespace("pipaux"))) { cli::cli_abort(paste0("Function '", function_name, "' does not exist in the '", "pipaux package.")) } - # Get the function + # Retrieve the function from the {pipaux} namespace + # -- The namespace refers to the version currently loaded in the R session func <- get(function_name, envir = asNamespace("pipaux")) diff --git a/man/call_aux_function.Rd b/man/aux_f.Rd similarity index 66% rename from man/call_aux_function.Rd rename to man/aux_f.Rd index ae2c356..01db382 100644 --- a/man/call_aux_function.Rd +++ b/man/aux_f.Rd @@ -1,16 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R -\name{call_aux_function} -\alias{call_aux_function} +\name{aux_f} +\alias{aux_f} \title{Call auxiliary function for a specified measure} \usage{ -call_aux_function(measure, package_name = "pipaux", ...) +aux_f(measure, ...) } \arguments{ \item{measure}{character: Name of the auxiliary data measure (e.g., "ppp")} -\item{package_name}{character: Name of the package containing the auxiliary function (default is "pipaux")} - \item{...}{additional arguments to pass to the auxiliary function} } \value{ From 569bf259352eba00ff928dfbaf4b42a8a74d41dc Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Thu, 16 Jan 2025 11:12:26 -0500 Subject: [PATCH 17/20] remove aux function --- R/utils.R | 32 +------------------------------- man/aux_f.Rd | 19 ------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 man/aux_f.Rd diff --git a/R/utils.R b/R/utils.R index e29b60e..a5ad1b2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -436,37 +436,7 @@ save_aux_to_gh <- function(df, ext = ext, ...) } - -#' Call auxiliary function for a specified measure -#' -#' @param measure character: Name of the auxiliary data measure (e.g., "ppp") -#' @param ... additional arguments to pass to the auxiliary function -#' -#' @return The result of the auxiliary function call. -aux_f <- function(measure, - ...) { - - function_name <- paste0("aux_", - measure) - - # This checks if the function exists in the {pipaux} package namespace - # -- The namespace corresponds to the version currently loaded in the R session - # -- (development version loaded via devtools::load_all() or the installed version) - - if (!exists(function_name, - envir = asNamespace("pipaux"))) { - cli::cli_abort(paste0("Function '", function_name, "' does not exist in the '", "pipaux package.")) - } - - # Retrieve the function from the {pipaux} namespace - # -- The namespace refers to the version currently loaded in the R session - func <- get(function_name, - envir = asNamespace("pipaux")) - - # Call the function with additional arguments - func(...) -} - +# diff --git a/man/aux_f.Rd b/man/aux_f.Rd deleted file mode 100644 index 01db382..0000000 --- a/man/aux_f.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{aux_f} -\alias{aux_f} -\title{Call auxiliary function for a specified measure} -\usage{ -aux_f(measure, ...) -} -\arguments{ -\item{measure}{character: Name of the auxiliary data measure (e.g., "ppp")} - -\item{...}{additional arguments to pass to the auxiliary function} -} -\value{ -The result of the auxiliary function call. -} -\description{ -Call auxiliary function for a specified measure -} From 5e2f1218febbc4aa79b5bde1767cae4ee0a450e7 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 21 Jan 2025 16:09:59 -0500 Subject: [PATCH 18/20] reorganizing files --- R/auto_aux_update.R | 249 -------------- R/aux_countries.R | 58 ++++ R/aux_country_list.R | 333 ++++++++++++++++++ R/aux_country_list_update.R | 257 -------------- R/aux_cp.R | 399 ++++++++++++++++++++++ R/aux_cp_clean.R | 314 ----------------- R/aux_cp_update.R | 82 ----- R/aux_cpi.R | 429 +++++++++++++++++++++++ R/aux_cpi_clean.R | 71 ---- R/aux_cpi_update.R | 72 ---- R/aux_cpi_vintage.R | 92 ----- R/aux_gdm.R | 356 +++++++++++++++++++ R/aux_gdm_update.R | 232 ------------- R/aux_gdp.R | 568 ++++++++++++++++++++++++++++++- R/aux_gdp_update.R | 357 ------------------- R/aux_gdp_weo.R | 158 --------- R/aux_income_groups.R | 52 +++ R/aux_maddison.R | 38 +++ R/aux_metadata.R | 327 ++++++++++++++++++ R/aux_metadata_update.R | 135 -------- R/aux_metaregion.R | 54 --- R/aux_npl.R | 88 +++++ R/aux_pce.R | 308 +++++++++++++++++ R/aux_pce_update.R | 261 -------------- R/aux_pfw.R | 546 +++++++++++++++++++++++++++++ R/aux_pfw_clean.R | 85 ----- R/aux_pfw_key.R | 31 -- R/aux_pfw_update.R | 47 --- R/aux_pl.R | 88 +++++ R/aux_pl_clean.R | 44 --- R/aux_pop.R | 473 +++++++++++++++++++++++++ R/aux_pop_update.R | 288 ---------------- R/aux_ppp.R | 350 +++++++++++++++++++ R/aux_ppp_clean.R | 93 ----- R/aux_ppp_update.R | 121 ------- R/aux_sna.R | 112 +++++- R/aux_update_all.R | 46 --- R/aux_wdi.R | 122 +++++++ R/aux_wdi_update.R | 81 ----- R/aux_weo.R | 237 +++++++++++++ R/aux_weo_clean.R | 135 -------- R/cl_validate_raw.R | 73 ---- R/clean_validation_report.R | 12 - R/countries_validate_output.R | 57 ---- R/cpi_validate_output.R | 92 ----- R/cpi_validate_raw.R | 96 ------ R/fake_aux_sna.R | 16 - R/gdm_validate_output.R | 57 ---- R/gdm_validate_raw.R | 64 ---- R/gdp_validate_output.R | 44 --- R/get_error_validation.R | 42 --- R/incgroup_validate_output.R | 50 --- R/load_aux.R | 172 ++++++++++ R/load_raw_aux.R | 170 --------- R/metadata_validate_output.R | 54 --- R/metadata_validate_raw.R | 81 ----- R/mpd_validate_raw.R | 36 -- R/npl_validate_output.R | 40 --- R/npl_validate_raw.R | 46 --- R/pce_validate_output.R | 44 --- R/pfw_validate_output.R | 190 ----------- R/pfw_validate_raw.R | 191 ----------- R/pl_validate_output.R | 40 --- R/pop_validate_output.R | 44 --- R/pop_validate_raw.R | 54 --- R/popmain_validate_raw.R | 40 --- R/ppp_validate_output.R | 54 --- R/ppp_validate_raw.R | 78 ----- R/send_report.R | 34 -- R/sna_fy_validate_raw.R | 41 --- R/sna_validate_raw.R | 49 --- R/spop_validate_raw.R | 40 --- R/update_aux.R | 298 ++++++++++++++++ R/utils-data-table.R | 12 - R/utils-pipe.R | 14 - R/utils.R | 28 ++ R/validation_report.R | 90 +++++ R/wdi_validate_raw.R | 38 --- R/weo_validate_output.R | 36 -- R/weo_validate_raw.R | 62 ---- man/auto_aux_update.Rd | 2 +- man/aux_country_list_update.Rd | 2 +- man/aux_cp_clean.Rd | 2 +- man/aux_cp_update.Rd | 2 +- man/aux_cpi_clean.Rd | 2 +- man/aux_cpi_update.Rd | 2 +- man/aux_cpi_vintage.Rd | 2 +- man/aux_gdm_update.Rd | 2 +- man/aux_gdp_update.Rd | 2 +- man/aux_gdp_weo.Rd | 2 +- man/aux_metadata_update.Rd | 2 +- man/aux_metaregion.Rd | 2 +- man/aux_pce_update.Rd | 2 +- man/aux_pfw_clean.Rd | 2 +- man/aux_pfw_key.Rd | 2 +- man/aux_pfw_update.Rd | 2 +- man/aux_pl_clean.Rd | 2 +- man/aux_pop_update.Rd | 2 +- man/aux_ppp_clean.Rd | 2 +- man/aux_ppp_update.Rd | 2 +- man/aux_update_all.Rd | 2 +- man/aux_wdi_update.Rd | 2 +- man/aux_weo_clean.Rd | 2 +- man/cl_validate_raw.Rd | 2 +- man/clean_cp_names.Rd | 2 +- man/clean_from_wide.Rd | 2 +- man/clean_names_from_wide.Rd | 2 +- man/clean_validation_report.Rd | 2 +- man/convert_df_to_base64.Rd | 2 +- man/countries_validate_output.Rd | 2 +- man/cpi_validate_output.Rd | 2 +- man/cpi_validate_raw.Rd | 2 +- man/fake_aux_sna.Rd | 2 +- man/gdm_validate_output.Rd | 2 +- man/gdm_validate_raw.Rd | 2 +- man/gdp_validate_output.Rd | 2 +- man/get_error_validation.Rd | 2 +- man/incgroup_validate_output.Rd | 2 +- man/load_cpi.Rd | 2 +- man/load_raw_aux.Rd | 2 +- man/metadata_validate_output.Rd | 2 +- man/metadata_validate_raw.Rd | 2 +- man/mpd_validate_raw.Rd | 2 +- man/npl_validate_output.Rd | 2 +- man/npl_validate_raw.Rd | 2 +- man/pce_validate_output.Rd | 2 +- man/pfw_validate_output.Rd | 2 +- man/pfw_validate_raw.Rd | 2 +- man/pipe.Rd | 2 +- man/pl_validate_output.Rd | 2 +- man/pop_validate_output.Rd | 2 +- man/pop_validate_raw.Rd | 2 +- man/popmain_validate_raw.Rd | 2 +- man/ppp_validate_output.Rd | 2 +- man/ppp_validate_raw.Rd | 2 +- man/send_report.Rd | 2 +- man/sna_fy_validate_raw.Rd | 2 +- man/sna_validate_raw.Rd | 2 +- man/spop_validate_raw.Rd | 2 +- man/wdi_validate_raw.Rd | 2 +- man/weo_validate_output.Rd | 2 +- man/weo_validate_raw.Rd | 2 +- 142 files changed, 5532 insertions(+), 5460 deletions(-) delete mode 100644 R/auto_aux_update.R delete mode 100644 R/aux_country_list_update.R delete mode 100644 R/aux_cp_clean.R delete mode 100644 R/aux_cp_update.R delete mode 100644 R/aux_cpi_clean.R delete mode 100644 R/aux_cpi_update.R delete mode 100644 R/aux_cpi_vintage.R delete mode 100644 R/aux_gdm_update.R delete mode 100644 R/aux_gdp_update.R delete mode 100644 R/aux_gdp_weo.R delete mode 100644 R/aux_metadata_update.R delete mode 100644 R/aux_metaregion.R delete mode 100644 R/aux_pce_update.R delete mode 100644 R/aux_pfw_clean.R delete mode 100644 R/aux_pfw_key.R delete mode 100644 R/aux_pfw_update.R delete mode 100644 R/aux_pl_clean.R delete mode 100644 R/aux_pop_update.R delete mode 100644 R/aux_ppp_clean.R delete mode 100644 R/aux_ppp_update.R delete mode 100644 R/aux_update_all.R delete mode 100644 R/aux_wdi_update.R delete mode 100644 R/aux_weo_clean.R delete mode 100644 R/cl_validate_raw.R delete mode 100644 R/clean_validation_report.R delete mode 100644 R/countries_validate_output.R delete mode 100644 R/cpi_validate_output.R delete mode 100644 R/cpi_validate_raw.R delete mode 100644 R/fake_aux_sna.R delete mode 100644 R/gdm_validate_output.R delete mode 100644 R/gdm_validate_raw.R delete mode 100644 R/gdp_validate_output.R delete mode 100644 R/get_error_validation.R delete mode 100644 R/incgroup_validate_output.R delete mode 100644 R/load_raw_aux.R delete mode 100644 R/metadata_validate_output.R delete mode 100644 R/metadata_validate_raw.R delete mode 100644 R/mpd_validate_raw.R delete mode 100644 R/npl_validate_output.R delete mode 100644 R/npl_validate_raw.R delete mode 100644 R/pce_validate_output.R delete mode 100644 R/pfw_validate_output.R delete mode 100644 R/pfw_validate_raw.R delete mode 100644 R/pl_validate_output.R delete mode 100644 R/pop_validate_output.R delete mode 100644 R/pop_validate_raw.R delete mode 100644 R/popmain_validate_raw.R delete mode 100644 R/ppp_validate_output.R delete mode 100644 R/ppp_validate_raw.R delete mode 100644 R/send_report.R delete mode 100644 R/sna_fy_validate_raw.R delete mode 100644 R/sna_validate_raw.R delete mode 100644 R/spop_validate_raw.R delete mode 100644 R/utils-data-table.R delete mode 100644 R/utils-pipe.R create mode 100644 R/validation_report.R delete mode 100644 R/wdi_validate_raw.R delete mode 100644 R/weo_validate_output.R delete mode 100644 R/weo_validate_raw.R diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R deleted file mode 100644 index 2194e7c..0000000 --- a/R/auto_aux_update.R +++ /dev/null @@ -1,249 +0,0 @@ -#' Update the measure along with it's dependencies automatically. -#' -#' @param measure character: measure to be updated, if NULL will update all of -#' them -#' @inheritParams aux_pop_update -#' @export -auto_aux_update <- function(measure = NULL, - force = FALSE, - from = c("gh", "file", "api"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - pipfun::check_pkg_active("pipaux") - - branch <- match.arg(branch) - from <- match.arg(from) - files_changed <- FALSE - - isgls <- ls(sys.frame(), pattern = "^gls$") |> - length() > 0 - - if (isFALSE(isgls)) { - cli::cli_abort( - "object {.var gls} is not available in Globel env. - Run {.code gls <- pipfun::pip_create_globals()} first", - wrap = TRUE - ) - } - - # if there is validation report in the environment - remove it - clean_validation_report() - - creds <- pipfun::get_github_creds() - gh_user <- "https://raw.githubusercontent.com" - org_data <- paste(gh_user, - owner, - "pipaux/metadata/Data/git_metadata.csv", - sep = "/") |> - readr::read_csv(show_col_types = FALSE) - - - dependencies <- read_dependencies(gh_user, owner) - # Get all repositories under PIP-Technical-Team - all_repos <- gh::gh("GET /users/{username}/repos", - username = owner) |> - vapply("[[", "", "name") |> - #Keep only those repos that start with "aux_" - grep("^aux_", x = _, value = TRUE) - - if (!is.null(measure)) { - all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")] - } - # get hashs - hash <- - purrr::map(all_repos, - .f = ~ { - gh::gh( - "GET /repos/{owner}/{repo}/commits/{branch}", - owner = owner, - repo = .x, - branch = branch - ) - }) |> - purrr::map_chr( ~ .x[["sha"]]) - - # Get the latest hash of the repo - all_data <- - dplyr::tibble( - Repo = glue::glue("{owner}/{all_repos}"), - hash = hash, - branch = branch - ) - - old_data <- org_data %>% - dplyr::filter(.data$branch == branch) %>% - dplyr::rename(hash_original = hash) - - old_data <- old_data %>% - dplyr::inner_join(all_data, by = c("Repo", "branch")) - - cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") - cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") - cli::cli_alert_info("Both the numbers above should be equal or else some - debugging is required.", wrap = TRUE) - - new_data <- old_data %>% - dplyr::filter(.data$hash != .data$hash_original | - is.na(.data$hash_original) | - is.na(.data$hash)) - - # all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) - - - - # Remove everything till the last underscore so - # PIP-Technical-Team/aux_ppp changes to ppp - aux_fns <- sub(".*_", "", new_data$Repo) |> - # Keep only those whose dependencies we know - intersect(names(dependencies)) - - # For each auxiliary data to be updated - cli::cli_alert_info("Updating data for {length(aux_fns)} files.") - for (aux in aux_fns) { - # Find the corresponding functions to be run - # Add pip_ suffix so that it becomes function name - list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) - - for (fn in list_of_funcs) { - - aux_file <- sub("pip_", "", fn) - cli::cli_alert_info("Running function {fn} for aux file {aux}.") - - before_hash <- read_signature_file(aux_file, maindir, branch) - # Run the pip_.* function - match.fun(fn)(maindir = maindir, branch = branch) |> - suppressMessages() - after_hash <- read_signature_file(aux_file, maindir, branch) - - if (before_hash != after_hash) { - - cli::cli_alert_info("Updating csv for {fn}") - files_changed <- TRUE - - # find rows of of org to be modified - aux_row_org <- org_data$Repo |> - fs::path_file() |> - sub('aux_', '', x = _) %in% aux_file & - org_data$branch == branch - - # find rows in new that will be copied to org - aux_row_new <- new_data$Repo |> - fs::path_file() |> - sub('aux_', '', x = _) %in% aux_file & - new_data$branch == branch - - org_data$hash[aux_row_org] <- new_data$hash[aux_row_new] - - } # end of before_hash condition - - } # end of list_of_funcs loop - } # end of aux_fns loop - last_updated_time <- - aux_file_last_updated(maindir, names(dependencies), branch) - if (length(aux_fns) > 0 && files_changed) { - # Write the latest auxiliary file and corresponding hash to csv - # Always save at the end. - # sha - hash object of current csv file in Data/git_metadata.csv - # content - base64 of changed data - out <- gh::gh( - "GET /repos/{owner}/{repo}/contents/{file_path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - file_path = "Data/git_metadata.csv", - .params = list(ref = "metadata") - ) - # There is no way to update only the lines which has changed using Github API - # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 - res <- gh::gh( - "PUT /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - path = "Data/git_metadata.csv", - .params = list( - branch = "metadata", - message = "updating csv file", - sha = out$sha, - content = convert_df_to_base64(org_data) - ), - .token = creds$password - ) - } - cli::cli_h2("File updated status.") - knitr::kable(last_updated_time) -} - - - -return_value <- function(aux, dependencies) { - val <- dependencies[[aux]] - if (length(val) > 0) { - for (i in val) { - val <- c(return_value(i, dependencies), val) - } - } - return(unique(c(val, aux))) -} - -#' Function to write dataframe to GitHub -#' -#' @param df A dataframe -#' -#' @return base64 encoded dataframe -#' @export -#' -#' @examples -#' \dontrun { -#' convert_df_to_base64(mtcars) -#' } -convert_df_to_base64 <- function(df) { - df |> - write.table(quote = FALSE, - row.names = FALSE, - sep = ",") |> - capture.output() |> - paste(collapse = "\n") |> - charToRaw() |> - base64enc::base64encode() -} - -aux_file_last_updated <- function(data_dir, aux_files, branch) { - filenames <- - glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") - data <- sapply(filenames, function(x) - qs::qattributes(x)$datetime) - data.frame( - filename = basename(names(data)), - time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), - row.names = NULL - ) |> - dplyr::arrange(desc(time_last_update)) - -} - -read_dependencies <- function(gh_user, owner) { - dependencies <- paste(gh_user, - owner, - "pipaux/metadata/Data/dependency.yml", - sep = "/") |> - yaml::read_yaml() - - sapply(dependencies, \(x) if (length(x)) - strsplit(x, ",\\s+")[[1]] - else - character()) -} - -read_signature_file <- function(aux_file, maindir, branch) { - # Construct the path to data signature aux file - data_signature_path <- - fs::path(maindir, - "_aux", - branch, - aux_file, - glue::glue("{aux_file}_datasignature.txt")) - signature_hash <- readr::read_lines(data_signature_path) - return(signature_hash) -} diff --git a/R/aux_countries.R b/R/aux_countries.R index 02d1b59..dd1e274 100644 --- a/R/aux_countries.R +++ b/R/aux_countries.R @@ -66,3 +66,61 @@ aux_countries <- function(action = c("update", "load"), return(df) } } + +#' Validate output countries data +#' +#' @param countries output countries data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){ + + stopifnot("Countries output data is not loaded" = !is.null(countries)) + + report <- data_validation_report() + + validate(countries, name = "countries output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.character(africa_split), + description = "`africa_split` should be character") |> + validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), + africa_split, description = "`africa_split` values within range") |> + validate_if(is.character(africa_split_code), + description = "`africa_split_code` should be character") |> + validate_cols(in_set(c("AFE", "AFW", NA)), + africa_split_code, description = "`africa_split_code` values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(region_code), + description = "`region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + region_code, description = "`region_code` values within range") |> + validate_if(is.character(world), + description = "`world` should be character") |> + validate_cols(in_set(c("World")), + world, description = "`world` values within range") |> + validate_if(is.character(world_code), + description = "`world_code` should be character") |> + validate_cols(in_set(c("WLD")), + world_code, description = "`world_code` values within range") |> + validate_cols(not_na, country_code, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_country_list.R b/R/aux_country_list.R index 439bc98..2f811de 100644 --- a/R/aux_country_list.R +++ b/R/aux_country_list.R @@ -86,3 +86,336 @@ aux_country_list <- function(action = c("update", "load"), return(df) } } + +#' Update Country LIst +#' +#' @param class_branch character: names of branch of GPID-WB/class repo. Default +#' if master +aux_country_list_update <- + function(class_branch = "master") { + + # Check arguments + measure <- "country_list" + + # ____________________________________________________________________________ + # Read Data from WDI #### + + wdi <- + wbstats::wb_countries() |> + as.data.table() |> + { + \(.) { + + # clean data + + iso2 <- grep("_iso2c", names(.), value = TRUE) + x <- .[, !..iso2] + + iso3 <- grep("_iso3c", names(x), value = TRUE) + + withiso <- + gsub("_iso3c", "", iso3) |> + paste0(collapse = "|") |> + grep(names(x), value = TRUE) + + tokeep <- c("country", "iso3c", withiso) + + x[region != "Aggregates" + ][, + ..tokeep + ] + } + }() + + + # rename iso3c + owdi <- names(wdi) + nwdi <- + gsub("iso3c", "code", names(wdi)) + + setnames(wdi, owdi, nwdi) + + + # Add "(excluding high income)" to South Asia + wdi[, admin_region := fifelse(test = grepl("income", admin_region) | is.na(admin_region), + yes = admin_region , + no = paste(admin_region , "(excluding high income)"))] + + # ____________________________________________________________________________ + # Read data from CLASS.dta file #### + + ## Special national accounts -------- + byv <- + c( + "code", + "region_SSA", + "fcv_current", + "region_pip") + + dt <- pipfun::load_from_gh( + measure = measure, + owner = "GPID-WB", + repo = "Class", + branch = class_branch, + filename = "OutputData/CLASS", + ext = "dta" + ) |> + as.data.table() |> + unique(by = byv) |> + (\(.){.[, ..byv]})() # select just these variables + + + dt_o <- names(dt) + dt_n <- gsub("_current", "", dt_o) + + setnames(dt, dt_o, dt_n) + setnames(dt, + old = c("region_SSA", "region_pip"), + new = c("africa_split_code", "pip_region_code")) + + # ____________________________________________________________________________ + # Merge wdi and CLASS #### + + + rg <- + joyn::joyn(dt, wdi, + by = "code", + match_type = "1:1", + reportvar = FALSE, + verbose = FALSE) + + + # ____________________________________________________________________________ + # Clean Data #### + + # PIP region + + rg[, pip_region := fifelse(pip_region_code == "OHI", + yes = "Other High Income Countries", + no = region) + ] + + + + # East and West Africa + + rg[, + africa_split := fcase( + africa_split_code == "", "", + africa_split_code == "AFE", "Eastern and Southern Africa", + africa_split_code == "AFW", "Western and Central Africa", + default = "") + ][, + africa_split_code := fifelse(test = africa_split_code == "", + yes = "", + no = africa_split_code) + ] + + # Fragile countries + + rg[, + fcv_code := fifelse(fcv == "Yes", "FCVT", "FCVF") + ][, + fcv := fifelse(fcv == "Yes", "Fragile", "Not-fragile")] + + ## Admin regions + + rg[, + admin_region_code := fifelse( + admin_region_code == "" | is.na(admin_region_code), + NA_character_, + paste0(admin_region_code, "-AD"))] + + + # Add PCN region temporarilly + + rg[, + `:=`( + pcn_region = pip_region, + pcn_region_code = pip_region_code + )] + + # ff <- copy(rg) + # rg <- copy(ff) + + # Convert empty strings to NA + vars <- names(rg) + names(vars) <- vars + rg[, (vars) := lapply(.SD, + \(x) { + fifelse(x == "" | is.na(x), NA_character_, x) + } + ) + ] + + + # fix "Not classified" + # ff <- copy(rg) + + # rg <- copy(ff) + + # not_class <- function(x) { + # y <- deparse(substitute(x)) + # fifelse(test = grepl("classified", x), + # paste(x, "by", y), + # x) + # } + # + # rg[, (vars) := lapply(.SD,not_class), .SDcols = vars] + # + # + # rg[, (vars) := lapply(.SD, + # \(x){ + # y <- deparse(substitute(x)) + # # y <- ..x + # fifelse(test = grepl("classified", x), + # paste(x, "by", y), + # x) + # })] + # + # + # rg[lending_type_code == "LNX", unique(lending_type)] + # + + + + rg[, lending_type := fifelse(grepl("classified", lending_type), + paste(lending_type, "by", "lending type"), + lending_type)] + + + + rg[, income_level := fifelse(grepl("classified", income_level), + paste(income_level, "by", "income level"), + income_level)] + + + # Create the World + + rg[, `:=`( + world = "World", + world_code = "WLD" + )] + + + + # janitor::tabyl(rg, region_code, admin_region_code) + # janitor::tabyl(rg, region, admin_region) + # janitor::tabyl(rg, region, pip_region) + + # ____________________________________________________________________________ + # Clean and Save #### + + + rg[, + c( "region_code", "region") := NULL] + + + setnames(x = rg, + old = c("code", "country", "pip_region", "pip_region_code"), + new = c("country_code", "country_name", "region", "region_code") ) + + + + ## Order columns alphabetically ------------ + varn <- names(rg) + setcolorder(rg, sort(varn)) + setcolorder(rg, c("country_code", "country_name")) + + + ## Remove categoeries that we don't need --------- + + rm_agg <- c("fcv", "lending_type", "admin_region") + rm_agg <- c("fcv", "income_level", "lending_type", "admin_region") + + to_rm <- + rm_agg |> + paste0("_code") |> + c(rm_agg) + + rg[, (to_rm) := NULL] + + + # hardcode fixing of TWN's name + rg[country_code == "TWN", + country_name := "Taiwan, China"] + + + rg + + } + +#' Validate raw country list data +#' +#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("Country list raw data is not loaded" = !is.null(cl)) + + report <- data_validation_report() + + # country_list <- pipload::pip_load_aux("pfw") + country_list <- pipfun::load_from_gh(measure = "pfw", + owner = getOption("pipfun.ghowner"), + branch = "DEV", + ext = "dta") + + country_list <- unique(country_list[, code]) + + validate(cl, name = "CL raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + # validate_cols(in_set(country_list), + # country_code, description = "`country_code` values within range") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.character(africa_split), + description = "`africa_split` should be character") |> + validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), + africa_split, description = "`africa_split` values within range") |> + validate_if(is.character(africa_split_code), + description = "`africa_split_code` should be character") |> + validate_cols(in_set(c("AFE", "AFW", NA)), + africa_split_code, description = "`africa_split_code` values within range") |> + validate_if(is.character(pcn_region), + description = "`pcn_region` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(region_code), + description = "`region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + region_code, description = "`region_code` values within range") |> + validate_if(is.character(world), + description = "`world` should be character") |> + validate_cols(in_set(c("World")), + world, description = "`world` values within range") |> + validate_if(is.character(world_code), + description = "`world_code` should be character") |> + validate_cols(in_set(c("WLD")), + world_code, description = "`world_code` values within range") |> + validate_cols(not_na, country_code, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_country_list_update.R b/R/aux_country_list_update.R deleted file mode 100644 index ff5b861..0000000 --- a/R/aux_country_list_update.R +++ /dev/null @@ -1,257 +0,0 @@ -#' Update Country LIst -#' -#' @param class_branch character: names of branch of GPID-WB/class repo. Default -#' if master -aux_country_list_update <- - function(class_branch = "master") { - - # Check arguments - measure <- "country_list" - - # ____________________________________________________________________________ - # Read Data from WDI #### - - wdi <- - wbstats::wb_countries() |> - as.data.table() |> - { - \(.) { - - # clean data - - iso2 <- grep("_iso2c", names(.), value = TRUE) - x <- .[, !..iso2] - - iso3 <- grep("_iso3c", names(x), value = TRUE) - - withiso <- - gsub("_iso3c", "", iso3) |> - paste0(collapse = "|") |> - grep(names(x), value = TRUE) - - tokeep <- c("country", "iso3c", withiso) - - x[region != "Aggregates" - ][, - ..tokeep - ] - } - }() - - - # rename iso3c - owdi <- names(wdi) - nwdi <- - gsub("iso3c", "code", names(wdi)) - - setnames(wdi, owdi, nwdi) - - - # Add "(excluding high income)" to South Asia - wdi[, admin_region := fifelse(test = grepl("income", admin_region) | is.na(admin_region), - yes = admin_region , - no = paste(admin_region , "(excluding high income)"))] - - # ____________________________________________________________________________ - # Read data from CLASS.dta file #### - - ## Special national accounts -------- - byv <- - c( - "code", - "region_SSA", - "fcv_current", - "region_pip") - - dt <- pipfun::load_from_gh( - measure = measure, - owner = "GPID-WB", - repo = "Class", - branch = class_branch, - filename = "OutputData/CLASS", - ext = "dta" - ) |> - as.data.table() |> - unique(by = byv) |> - (\(.){.[, ..byv]})() # select just these variables - - - dt_o <- names(dt) - dt_n <- gsub("_current", "", dt_o) - - setnames(dt, dt_o, dt_n) - setnames(dt, - old = c("region_SSA", "region_pip"), - new = c("africa_split_code", "pip_region_code")) - - # ____________________________________________________________________________ - # Merge wdi and CLASS #### - - - rg <- - joyn::joyn(dt, wdi, - by = "code", - match_type = "1:1", - reportvar = FALSE, - verbose = FALSE) - - - # ____________________________________________________________________________ - # Clean Data #### - - # PIP region - - rg[, pip_region := fifelse(pip_region_code == "OHI", - yes = "Other High Income Countries", - no = region) - ] - - - - # East and West Africa - - rg[, - africa_split := fcase( - africa_split_code == "", "", - africa_split_code == "AFE", "Eastern and Southern Africa", - africa_split_code == "AFW", "Western and Central Africa", - default = "") - ][, - africa_split_code := fifelse(test = africa_split_code == "", - yes = "", - no = africa_split_code) - ] - - # Fragile countries - - rg[, - fcv_code := fifelse(fcv == "Yes", "FCVT", "FCVF") - ][, - fcv := fifelse(fcv == "Yes", "Fragile", "Not-fragile")] - - ## Admin regions - - rg[, - admin_region_code := fifelse( - admin_region_code == "" | is.na(admin_region_code), - NA_character_, - paste0(admin_region_code, "-AD"))] - - - # Add PCN region temporarilly - - rg[, - `:=`( - pcn_region = pip_region, - pcn_region_code = pip_region_code - )] - - # ff <- copy(rg) - # rg <- copy(ff) - - # Convert empty strings to NA - vars <- names(rg) - names(vars) <- vars - rg[, (vars) := lapply(.SD, - \(x) { - fifelse(x == "" | is.na(x), NA_character_, x) - } - ) - ] - - - # fix "Not classified" - # ff <- copy(rg) - - # rg <- copy(ff) - - # not_class <- function(x) { - # y <- deparse(substitute(x)) - # fifelse(test = grepl("classified", x), - # paste(x, "by", y), - # x) - # } - # - # rg[, (vars) := lapply(.SD,not_class), .SDcols = vars] - # - # - # rg[, (vars) := lapply(.SD, - # \(x){ - # y <- deparse(substitute(x)) - # # y <- ..x - # fifelse(test = grepl("classified", x), - # paste(x, "by", y), - # x) - # })] - # - # - # rg[lending_type_code == "LNX", unique(lending_type)] - # - - - - rg[, lending_type := fifelse(grepl("classified", lending_type), - paste(lending_type, "by", "lending type"), - lending_type)] - - - - rg[, income_level := fifelse(grepl("classified", income_level), - paste(income_level, "by", "income level"), - income_level)] - - - # Create the World - - rg[, `:=`( - world = "World", - world_code = "WLD" - )] - - - - # janitor::tabyl(rg, region_code, admin_region_code) - # janitor::tabyl(rg, region, admin_region) - # janitor::tabyl(rg, region, pip_region) - - # ____________________________________________________________________________ - # Clean and Save #### - - - rg[, - c( "region_code", "region") := NULL] - - - setnames(x = rg, - old = c("code", "country", "pip_region", "pip_region_code"), - new = c("country_code", "country_name", "region", "region_code") ) - - - - ## Order columns alphabetically ------------ - varn <- names(rg) - setcolorder(rg, sort(varn)) - setcolorder(rg, c("country_code", "country_name")) - - - ## Remove categoeries that we don't need --------- - - rm_agg <- c("fcv", "lending_type", "admin_region") - rm_agg <- c("fcv", "income_level", "lending_type", "admin_region") - - to_rm <- - rm_agg |> - paste0("_code") |> - c(rm_agg) - - rg[, (to_rm) := NULL] - - - # hardcode fixing of TWN's name - rg[country_code == "TWN", - country_name := "Taiwan, China"] - - - rg - -} diff --git a/R/aux_cp.R b/R/aux_cp.R index cedcc6f..1db5cb1 100644 --- a/R/aux_cp.R +++ b/R/aux_cp.R @@ -31,3 +31,402 @@ aux_cp <- function(action = c("update", "load"), return(dl) } } + +#' Clean country profile data +#' +#' @param x database from pip_cp_update +#' @param file_names character: vector with names of files +#' +#' @return data.table +aux_cp_clean <- function(x, + file_names) { + + # ____________________________________________________________________ + # Computations #### + + ## cleanup names ----------- + + dl <- purrr::map(x, clean_cp_names) + names(dl) <- gsub("(indicator_values_country_)(.*)", "\\2", file_names) + + ## Key Indicators ---------- + + # Create list of key indicators datasets + key_indicators <- merge(dl$KI1, dl$KI5_KI6_KI7, + all = TRUE, + by = c("country_code", "reporting_year") + ) + + key_indicators <- merge( + x = key_indicators, + y = dl$chart5[, + c("country_code", + "reporting_year", + "mpm_headcount", + "ppp_year") + ], + all = TRUE, + by = c("country_code", "reporting_year") + ) + + key_indicators <- list( + headcount_national = key_indicators[, + c("country_code", + "reporting_year", + "headcount_national", + "ppp_year")], + mpm_headcount = key_indicators[, + c("country_code", + "reporting_year", + "mpm_headcount", + "ppp_year")], + + reporting_pop = key_indicators[, + c("country_code", + "reporting_year", + "reporting_pop")], + + gni = key_indicators[, c("country_code", + "reporting_year", + "gni")], + + gdp_growth = key_indicators[, c("country_code", + "reporting_year", + "gdp_growth")] + ) + + # kg1 <- c("headcount_national", "mpm_headcount", "reporting_pop") + + # + # for (i in seq_along(kg1)) { + # + # var <- kg1[i] + # + # key_indicators[[var]] <- + # key_indicators[[var]] %>% + # # ff <- key_indicators[[var]] %>% + # dplyr::filter(!is.na(.data[[var]])) %>% + # dplyr::group_by(country_code, ppp_year) %>% + # dplyr::filter(reporting_year == max(reporting_year)) %>% + # dplyr::ungroup() %>% + # data.table::as.data.table() + # + # } + + kg1 <- c("headcount_national", "mpm_headcount") + for (i in seq_along(kg1)) { + + var <- kg1[i] + + key_indicators[[var]] <- + key_indicators[[var]][ + !is.na(get(var)) & !is.na(ppp_year) + ][, + .SD[which.max(reporting_year)], + by = c("country_code", "ppp_year") + ] + + } + + key_indicators$reporting_pop <- + key_indicators[["reporting_pop"]][ + !is.na(reporting_pop) + ][, + .SD[which.max(reporting_year)], + by = c("country_code") + ] + + key_indicators[4:5] <- lapply(key_indicators[4:5], function(x) { + x <- x %>% + dplyr::filter(!is.na(x[, 3])) %>% + dplyr::group_by(country_code) %>% + dplyr::slice_tail(n = 2) %>% + dplyr::mutate( + latest = + dplyr::if_else(reporting_year == max(reporting_year), + TRUE, FALSE + ) + ) %>% + dplyr::ungroup() %>% + data.table::as.data.table() + }) + + + ## Additional charts ---------- + + + ### Merge chart1_chart2_KI2 ID variables --------------- + + ab <- + joyn::merge(x = dl$chart1_chart2_KI2_data, + y = dl$chart1_chart2_KI2_ID, + by = "id", + match_type = "m:1", + reportvar = FALSE, + verbose = FALSE) + ab[, id := NULL] + + dl$chart1_chart2_KI2 <- ab + dl$chart1_chart2_KI2_data <- NULL + dl$chart1_chart2_KI2_ID <- NULL + rm(ab) + + ### chart6 ------------ + ki4 <- dl$chart6_KI4[, c("country_code", + "year_range", + "distribution", + "shared_prosperity", + "ppp_year")] + + ki4$year1 <- sapply(strsplit(ki4$year_range, "-"), \(x) x[[1]]) + ki4$year2 <- sapply(strsplit(ki4$year_range, "-"), \(x) x[[2]]) + ki4 <- ki4 %>% + dplyr::group_by(country_code, ppp_year) %>% + dplyr::filter(distribution %in% c("b40", "tot")) %>% + dplyr::filter(year2 == max(year2)) %>% + dplyr::filter(year1 == max(year1)) %>% + dplyr::ungroup() %>% + dplyr::select(country_code, + year_range, + distribution, + shared_prosperity, + ppp_year) %>% + data.table::as.data.table() %>% + data.table::dcast(country_code + ppp_year + year_range ~ distribution, + value.var = "shared_prosperity" + ) + + setnames(x = ki4, + old = c("b40", "tot"), + new = c("share_below_40", "share_total")) + + key_indicators <- append(key_indicators, list(shared_prosperity = ki4)) + + # Create list of charts datasets + mpm_cols <- grep("mpm_", names(dl$chart5), value = TRUE) + + charts <- list( + ineq_trend = + dl$chart3[, c( + "country_code", + "reporting_year", + "survey_acronym", + "welfare_type", + "survey_comparability", + "comparable_spell", + "gini", + "theil", + "reporting_level", + "ppp_year" + )], + ineq_bar = + dl$chart4[, c( + "country_code", + "reporting_year", + "welfare_type", + "survey_coverage", + "gender", + "agegroup", + "education", + "distribution", + "poverty_share_by_group", + "reporting_level", + "ppp_year" + )][, + agegroup_label := fcase( + agegroup == "0-14", "0 to 14 years old", + agegroup == "15-64", "15 to 64 years old", + agegroup == ">65", "65 and older", + default = "" + )][, + `:=`( + education_label = education, + gender_label = gender + ) + ], + mpm = + dl$chart5[, c( + "country_code", + "reporting_year", + "welfare_type", + "mpm_education_attainment", + "mpm_education_enrollment", + "mpm_electricity", + "mpm_sanitation", + "mpm_water", + "mpm_monetary", + "mpm_headcount", + "ppp_year", + "reporting_level" + )], + sp = + dl$chart6_KI4[, c("country_code", + "year_range", + "welfare_type", + "distribution", + "shared_prosperity", + "ppp_year", + "reporting_level") + ] + ) ## end of chart lists + + cp <- list(key_indicators = key_indicators, charts = charts) +} + + + + + +#' Clean names from original CP files +#' +#' @param x data.table +#' +#' @return data.table with names clenad +clean_cp_names <- function(x) { + + # ____________________________________________________________________________ + # Computations #### + + names(x) <- tolower(names(x)) + names(x) <- tolower(sub("xyzd[mcp]xyz", "", names(x))) + + # rename variables + x <- setnames( + x, + skip_absent = TRUE, + c( + "country", "requestyear", "datayear", "welfaretype", + "coverage", "interpolation", "survname", "comparability", + "comparable_spell", "povertyline", "yearrange", "si_pov_all_poor", + "sp_pop_totl", "si_pov_nahc", "ny_gnp_pcap_cd", "ny_gdp_mktp_kd_zg", + "si_pov_gini", "si_pov_theil", "si_pov_all", "si_pov_share_all", + "si_mpm_educ", "si_mpm_edue", "si_mpm_elec", "si_mpm_imps", + "si_mpm_impw", "si_mpm_mdhc", "si_mpm_poor", "si_spr_pcap_zg", "pppyear" + ), + c( + "country_code", "reporting_year", "survey_year", "welfare_type", + "survey_coverage", "is_interpolated", "survey_acronym", + "survey_comparability", "comparable_spell", + "poverty_line", "year_range", "pop_in_poverty", + "reporting_pop", "headcount_national", "gni", "gdp_growth", + "gini", "theil", "headcount", "poverty_share_by_group", + "mpm_education_attainment", "mpm_education_enrollment", + "mpm_electricity", "mpm_sanitation", "mpm_water", + "mpm_headcount", "mpm_monetary", "shared_prosperity", "ppp_year" + ) + ) + + if (any(grepl("welfare_type", names(x)))) { + + x[, + welfare_type := fifelse(welfare_type == "CONS", + "consumption", "income")] + } + + if (any(grepl("survey_coverage", names(x)))) { + # Recode survey coverage + x[, + `:=`(survey_coverage = fcase( + survey_coverage == "N", "national", + survey_coverage == "R", "rural", + survey_coverage == "U", "urban", + default = "" + ) + )] + + x[, + reporting_level := ifelse(survey_coverage == "", + "national", + survey_coverage) + ] + } + + # ____________________________________________________________________________ + # Return #### + return(x) + +} + +#' Update Country Profiles +#' +#' Update a list with country profiles data +#' +#' @inheritParams aux_cp +#' @keywords internal +aux_cp_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + measure <- "cp" + branch <- match.arg(branch) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## chart files -------- + + file_names <- + c( + "indicator_values_country_chart4", + "indicator_values_country_KI1", + "indicator_values_country_chart1_chart2_KI2_data", + "indicator_values_country_chart1_chart2_KI2_ID", + "indicator_values_country_chart5", + "indicator_values_country_chart3", + "indicator_values_country_chart6_KI4", + "indicator_values_country_KI5_KI6_KI7" + ) + + + raw_files <- purrr::map(.x = file_names, + .f = ~{ + pipfun::load_from_gh( + measure = "cp", + owner = owner, + branch = branch, + filename = .x) + }) + + + dl <- aux_cp_clean(raw_files, + file_names) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## download files -------- + fl_files <- c("flat_cp", "flat_shp") + + raw_fl <- purrr::map(.x = fl_files, + .f = ~{ + x <- pipfun::load_from_gh( + measure = "cp", + owner = owner, + branch = branch, + filename = .x, + ext = "dta") + setnames(x, "year", "reporting_year", + skip_absent=TRUE) + }) + names(raw_fl) <- fl_files + dl <- append(dl, list(flat = raw_fl)) + + + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## save -------- + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = dl, + measure = measure, + msrdir = msrdir, + force = force + ) + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## return -------- + + return(invisible(saved)) +} + diff --git a/R/aux_cp_clean.R b/R/aux_cp_clean.R deleted file mode 100644 index d746022..0000000 --- a/R/aux_cp_clean.R +++ /dev/null @@ -1,314 +0,0 @@ -#' Clean country profile data -#' -#' @param x database from pip_cp_update -#' @param file_names character: vector with names of files -#' -#' @return data.table -aux_cp_clean <- function(x, - file_names) { - - # ____________________________________________________________________ - # Computations #### - - ## cleanup names ----------- - - dl <- purrr::map(x, clean_cp_names) - names(dl) <- gsub("(indicator_values_country_)(.*)", "\\2", file_names) - - ## Key Indicators ---------- - - # Create list of key indicators datasets - key_indicators <- merge(dl$KI1, dl$KI5_KI6_KI7, - all = TRUE, - by = c("country_code", "reporting_year") - ) - - key_indicators <- merge( - x = key_indicators, - y = dl$chart5[, - c("country_code", - "reporting_year", - "mpm_headcount", - "ppp_year") - ], - all = TRUE, - by = c("country_code", "reporting_year") - ) - - key_indicators <- list( - headcount_national = key_indicators[, - c("country_code", - "reporting_year", - "headcount_national", - "ppp_year")], - mpm_headcount = key_indicators[, - c("country_code", - "reporting_year", - "mpm_headcount", - "ppp_year")], - - reporting_pop = key_indicators[, - c("country_code", - "reporting_year", - "reporting_pop")], - - gni = key_indicators[, c("country_code", - "reporting_year", - "gni")], - - gdp_growth = key_indicators[, c("country_code", - "reporting_year", - "gdp_growth")] - ) - - # kg1 <- c("headcount_national", "mpm_headcount", "reporting_pop") - - # - # for (i in seq_along(kg1)) { - # - # var <- kg1[i] - # - # key_indicators[[var]] <- - # key_indicators[[var]] %>% - # # ff <- key_indicators[[var]] %>% - # dplyr::filter(!is.na(.data[[var]])) %>% - # dplyr::group_by(country_code, ppp_year) %>% - # dplyr::filter(reporting_year == max(reporting_year)) %>% - # dplyr::ungroup() %>% - # data.table::as.data.table() - # - # } - - kg1 <- c("headcount_national", "mpm_headcount") - for (i in seq_along(kg1)) { - - var <- kg1[i] - - key_indicators[[var]] <- - key_indicators[[var]][ - !is.na(get(var)) & !is.na(ppp_year) - ][, - .SD[which.max(reporting_year)], - by = c("country_code", "ppp_year") - ] - - } - - key_indicators$reporting_pop <- - key_indicators[["reporting_pop"]][ - !is.na(reporting_pop) - ][, - .SD[which.max(reporting_year)], - by = c("country_code") - ] - - key_indicators[4:5] <- lapply(key_indicators[4:5], function(x) { - x <- x %>% - dplyr::filter(!is.na(x[, 3])) %>% - dplyr::group_by(country_code) %>% - dplyr::slice_tail(n = 2) %>% - dplyr::mutate( - latest = - dplyr::if_else(reporting_year == max(reporting_year), - TRUE, FALSE - ) - ) %>% - dplyr::ungroup() %>% - data.table::as.data.table() - }) - - - ## Additional charts ---------- - - - ### Merge chart1_chart2_KI2 ID variables --------------- - - ab <- - joyn::merge(x = dl$chart1_chart2_KI2_data, - y = dl$chart1_chart2_KI2_ID, - by = "id", - match_type = "m:1", - reportvar = FALSE, - verbose = FALSE) - ab[, id := NULL] - - dl$chart1_chart2_KI2 <- ab - dl$chart1_chart2_KI2_data <- NULL - dl$chart1_chart2_KI2_ID <- NULL - rm(ab) - - ### chart6 ------------ - ki4 <- dl$chart6_KI4[, c("country_code", - "year_range", - "distribution", - "shared_prosperity", - "ppp_year")] - - ki4$year1 <- sapply(strsplit(ki4$year_range, "-"), \(x) x[[1]]) - ki4$year2 <- sapply(strsplit(ki4$year_range, "-"), \(x) x[[2]]) - ki4 <- ki4 %>% - dplyr::group_by(country_code, ppp_year) %>% - dplyr::filter(distribution %in% c("b40", "tot")) %>% - dplyr::filter(year2 == max(year2)) %>% - dplyr::filter(year1 == max(year1)) %>% - dplyr::ungroup() %>% - dplyr::select(country_code, - year_range, - distribution, - shared_prosperity, - ppp_year) %>% - data.table::as.data.table() %>% - data.table::dcast(country_code + ppp_year + year_range ~ distribution, - value.var = "shared_prosperity" - ) - - setnames(x = ki4, - old = c("b40", "tot"), - new = c("share_below_40", "share_total")) - - key_indicators <- append(key_indicators, list(shared_prosperity = ki4)) - - # Create list of charts datasets - mpm_cols <- grep("mpm_", names(dl$chart5), value = TRUE) - - charts <- list( - ineq_trend = - dl$chart3[, c( - "country_code", - "reporting_year", - "survey_acronym", - "welfare_type", - "survey_comparability", - "comparable_spell", - "gini", - "theil", - "reporting_level", - "ppp_year" - )], - ineq_bar = - dl$chart4[, c( - "country_code", - "reporting_year", - "welfare_type", - "survey_coverage", - "gender", - "agegroup", - "education", - "distribution", - "poverty_share_by_group", - "reporting_level", - "ppp_year" - )][, - agegroup_label := fcase( - agegroup == "0-14", "0 to 14 years old", - agegroup == "15-64", "15 to 64 years old", - agegroup == ">65", "65 and older", - default = "" - )][, - `:=`( - education_label = education, - gender_label = gender - ) - ], - mpm = - dl$chart5[, c( - "country_code", - "reporting_year", - "welfare_type", - "mpm_education_attainment", - "mpm_education_enrollment", - "mpm_electricity", - "mpm_sanitation", - "mpm_water", - "mpm_monetary", - "mpm_headcount", - "ppp_year", - "reporting_level" - )], - sp = - dl$chart6_KI4[, c("country_code", - "year_range", - "welfare_type", - "distribution", - "shared_prosperity", - "ppp_year", - "reporting_level") - ] - ) ## end of chart lists - - cp <- list(key_indicators = key_indicators, charts = charts) -} - - - - - -#' Clean names from original CP files -#' -#' @param x data.table -#' -#' @return data.table with names clenad -clean_cp_names <- function(x) { - - # ____________________________________________________________________________ - # Computations #### - - names(x) <- tolower(names(x)) - names(x) <- tolower(sub("xyzd[mcp]xyz", "", names(x))) - - # rename variables - x <- setnames( - x, - skip_absent = TRUE, - c( - "country", "requestyear", "datayear", "welfaretype", - "coverage", "interpolation", "survname", "comparability", - "comparable_spell", "povertyline", "yearrange", "si_pov_all_poor", - "sp_pop_totl", "si_pov_nahc", "ny_gnp_pcap_cd", "ny_gdp_mktp_kd_zg", - "si_pov_gini", "si_pov_theil", "si_pov_all", "si_pov_share_all", - "si_mpm_educ", "si_mpm_edue", "si_mpm_elec", "si_mpm_imps", - "si_mpm_impw", "si_mpm_mdhc", "si_mpm_poor", "si_spr_pcap_zg", "pppyear" - ), - c( - "country_code", "reporting_year", "survey_year", "welfare_type", - "survey_coverage", "is_interpolated", "survey_acronym", - "survey_comparability", "comparable_spell", - "poverty_line", "year_range", "pop_in_poverty", - "reporting_pop", "headcount_national", "gni", "gdp_growth", - "gini", "theil", "headcount", "poverty_share_by_group", - "mpm_education_attainment", "mpm_education_enrollment", - "mpm_electricity", "mpm_sanitation", "mpm_water", - "mpm_headcount", "mpm_monetary", "shared_prosperity", "ppp_year" - ) - ) - - if (any(grepl("welfare_type", names(x)))) { - - x[, - welfare_type := fifelse(welfare_type == "CONS", - "consumption", "income")] - } - - if (any(grepl("survey_coverage", names(x)))) { - # Recode survey coverage - x[, - `:=`(survey_coverage = fcase( - survey_coverage == "N", "national", - survey_coverage == "R", "rural", - survey_coverage == "U", "urban", - default = "" - ) - )] - - x[, - reporting_level := ifelse(survey_coverage == "", - "national", - survey_coverage) - ] - } - - # ____________________________________________________________________________ - # Return #### - return(x) - -} diff --git a/R/aux_cp_update.R b/R/aux_cp_update.R deleted file mode 100644 index 62ac0e7..0000000 --- a/R/aux_cp_update.R +++ /dev/null @@ -1,82 +0,0 @@ -#' Update Country Profiles -#' -#' Update a list with country profiles data -#' -#' @inheritParams aux_cp -#' @keywords internal -aux_cp_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - measure <- "cp" - branch <- match.arg(branch) -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## chart files -------- - - file_names <- - c( - "indicator_values_country_chart4", - "indicator_values_country_KI1", - "indicator_values_country_chart1_chart2_KI2_data", - "indicator_values_country_chart1_chart2_KI2_ID", - "indicator_values_country_chart5", - "indicator_values_country_chart3", - "indicator_values_country_chart6_KI4", - "indicator_values_country_KI5_KI6_KI7" - ) - - - raw_files <- purrr::map(.x = file_names, - .f = ~{ - pipfun::load_from_gh( - measure = "cp", - owner = owner, - branch = branch, - filename = .x) - }) - - - dl <- aux_cp_clean(raw_files, - file_names) -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## download files -------- - fl_files <- c("flat_cp", "flat_shp") - - raw_fl <- purrr::map(.x = fl_files, - .f = ~{ - x <- pipfun::load_from_gh( - measure = "cp", - owner = owner, - branch = branch, - filename = .x, - ext = "dta") - setnames(x, "year", "reporting_year", - skip_absent=TRUE) - }) - names(raw_fl) <- fl_files - dl <- append(dl, list(flat = raw_fl)) - - - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## save -------- - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = dl, - measure = measure, - msrdir = msrdir, - force = force - ) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## return -------- - - return(invisible(saved)) -} diff --git a/R/aux_cpi.R b/R/aux_cpi.R index 87c5a47..324816c 100644 --- a/R/aux_cpi.R +++ b/R/aux_cpi.R @@ -65,4 +65,433 @@ aux_cpi <- function(action = c("update", "load"), } +#' Clean CPI data +#' +#' Clean CPI data from Datalibweb to meet PIP protocols. +#' +#' @param y dataset with CPI data from `aux_cpi_update()`. +#' @param cpivar character: CPI variable to be used as default. Currently it is +#' "cpi2011". +#' @inheritParams aux_cpi_update +#' +#' @keywords internal +aux_cpi_clean <- function(y, + cpivar = getOption("pipaux.cpivar"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + x <- data.table::as.data.table(y) + + # vars to keep + keep_vars <- c( + "country_code", "cpi_year", "survey_year", + "cpi", "ccf", "survey_acronym", "change_cpi2011", + grep("^cpi", names(x), value = TRUE) + ) + + # modifications to the database + x[ + , + c("cur_adj", "ccf") + := { + cur_adj <- ifelse(is.na(cur_adj), 1, cur_adj) + ccf <- 1 / cur_adj + + list(cur_adj, ccf) + } + ][ + , + `:=`( + country_code = code, + cpi_year = as.integer(year), + survey_year = round(ref_year, 2), + cpi = get(cpivar), + survey_acronym = survname, + cpi_domain = as.character(cpi_domain), + cpi_data_level = as.character(cpi_data_level) + ) + ][ + , + # This part should not exist if the raw data + # had been created properly + cpi_data_level := fcase( + tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "0", "rural", + tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "1", "urban", + tolower(cpi_domain) %chin% c("national", "1") & cpi_data_level %chin% c("2", "", NA_character_), "national", + default = "" + ) + ] + # keep final vars + x <- x[, ..keep_vars ] + + x <- unique(x) # remove duplicates + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + x <- x[country_code %in% cl$country_code] + + + return(x) +} + +#' Update CPI +#' +#' @inheritParams aux_cpi +#' @keywords internal +aux_cpi_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + # ____________________________________________________________________________ + # Set up #### + + measure <- "cpi" + branch <- match.arg(branch) + + + # ____________________________________________________________________________ + # load raw data #### + + cpi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) + + # validate cpi raw data + cpi_validate_raw(cpi, detail = detail) + + # ____________________________________________________________________________ + # Cleaning #### + + # Clean data + cpi <- aux_cpi_clean(cpi, + maindir = maindir, + branch = branch) + + # drop cpi_domain + cpi <- cpi[, -c("cpi_domain")] + + # changae cpi_year and cpi_data_level to year and reporting_level + cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(cpi, "aux_name", "cpi") + setattr(cpi, + "aux_key", + c("country_code", "year", "reporting_level", "survey_acronym")) + + # validate cpi clean data before saving it + cpi_validate_output(cpi, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = cpi, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) +} + +#' Check CPI Vintage +#' +#' @param msrdir character: measure directory. +#' @param dlwdir character: Datalibweb directory +#' @param force logical: If TRUE force update of vintage level 1. +#' +#' @keywords internal +aux_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), + dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), + force = FALSE) { + time <- format(Sys.time(), "%Y%m%d%H%M%S") # find a way to account for time zones + measure <- "cpi" + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + #--------- Prepar3 date --------- + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # get directories + cpi_files <- fs::dir_ls(dlwdir, regexp = "GMD_CPI\\.dta$", recurse = TRUE, type = "file") + + # load data + last_file <- max(cpi_files) + vintage <- load_cpi(last_file) + + tokeep <- names(vintage) |> + {\(.) grep("^cpi[0-9]{4}", ., value = TRUE)}() |> + c("code", "year", "survname", "cpi_data_level", "cpi_ppp_id") + + vintage <- vintage[, ..tokeep] + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + #--------- check version and save --------- + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + # save file + sfile <- fs::path(msrdir, "cpi_vintage.rds") + + equal_vintage <- TRUE + if (fs::file_exists(sfile)) { + + cfile <- readr::read_rds(sfile) + attr(cfile, "time") <- NULL # remove attributes + attr(cfile, "user") <- NULL # remove attributes + cf_vt <- all.equal(cfile, vintage) + + if (inherits(cf_vt, "character")) { + equal_vintage <- FALSE + } + } else { + equal_vintage <- FALSE + } + + if (equal_vintage == FALSE || force == TRUE) { + attr(vintage, "time") <- time + attr(vintage, "user") <- Sys.info()[8] + + readr::write_rds( + x = vintage, + file = sfile + ) + } + + return(!equal_vintage) +} # end of vintage_level_2 + +#' Load cpi files and create CPI ID variable +#' @param x character: cpi file name +#' @return data frame +load_cpi <- function(x) { + cpi_ppp_id <- gsub("(.*/Support_2005_)([^/]+)(_CPI\\.dta$)", "\\2", x) + df <- haven::read_dta(x) + df$cpi_ppp_id <- cpi_ppp_id + + to_keep <- c("label") + + to_keep_regx <- paste(to_keep, collapse = "|") + + nn <- names(df) + for (x in seq_along(nn)) { + ats <- attributes(df[[x]]) + atsn <- names(ats) + to_remove <- atsn[!grepl(to_keep_regx, atsn)] + + for (i in seq_along(to_remove)) { + attr(df[[x]], to_remove[i]) <- NULL + } + } + + data.table::setDT(df) + return(df) +} + +#' Validate raw cpi data +#' +#' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("CPI raw data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.character(cpi_domain), + description = "`cpi_domain` should be character") |> + validate_cols(in_set(c("National", "Urban/Rural")), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") |> + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011), + description = "`cpi201`1 should be numeric") |> + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") |> + validate_if(is.character(version), + description = "`version` should be character") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.numeric(cur_adj), + description = "`cur_adj` should be numeric") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U", NA)), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.numeric(cpi2011_SM22), + description = "`cpi2011_SM22` should be numeric") |> + validate_if(is.numeric(comparable), + description = "`comparable` should be numeric") |> + validate_if(is.numeric(cpi2017_SM22), + description = "`cpi2017_SM22` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> + validate_if(is.numeric(cpi_data_level), + description = "`cpi_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + cpi_data_level, description = "`cpi_data_level` values within range") |> + validate_if(is.numeric(ref_year_SM24), + description = "`ref_year_SM24` should be numeric") |> + validate_if(is.numeric(cpi2011_SM24), + description = "`cpi2011_SM24` should be numeric") |> + validate_if(is.numeric(cpi2017_SM24), + description = "`cpi2011_SM24` should be numeric") |> + validate_if(is.numeric(change_cpi2017), + description = "`change_cpi2017` should be numeric") |> + validate_if(is.numeric(change_icp2017), + description = "`change_icp2017` should be numeric") |> + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") |> + validate_if(is.numeric(change_icp2011), + description = "`change_icp2011` should be numeric") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> + validate_cols(not_na, code, year, survname, cpi_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname, cpi_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean cpi data +#' +#' @param cpi clean cpi data, output via `aux_cpi_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ + + stopifnot("CPI clean data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.numeric(cpi), + description = "`cpi` should be numeric") |> + validate_if(is.numeric(ccf), + description = "`ccf` should be numeric") |> + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") |> + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") |> + validate_cols(in_set(c(0, 1)), change_cpi2011, + description = "`change_cpi2011` values within range") |> + # validate_if(is.character(cpi_domain), + # description = "`cpi_domain` should be character") |> + # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, + # description = "`cpi_domian` values within range") |> + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") |> + validate_cols(in_set(c(0, 1)), cpi_domain_value, + description = "`cpi_domain_value` values within range") |> + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011), + description = "`cpi2011` should be numeric") |> + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") |> + # validate_if(is.numeric(cpi2011_SM22), + # description = "`cpi2011_SM22` should be numeric") |> + # validate_if(is.numeric(cpi2017_SM22), + # description = "`cpi2017_SM22` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> + # validate_if(is.numeric(cpi2011_AM23), + # description = "`cpi2011_AM23` should be numeric") |> + # validate_if(is.numeric(cpi2017_AM23), + # description = "`cpi2017_AM23` should be numeric") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> + validate_cols(not_na, country_code, year, survey_acronym, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), + description = "no duplicate records in key variables") |> + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), + description = "no duplicate cpi values") |> + add_results(report) + + num_var_list1 <- grep("cpi2011_", colnames(cpi)) + num_var_list2 <- grep("cpi2017_", colnames(cpi)) + num_var_list <- c(num_var_list1, num_var_list2) + + for (i in 1:length(num_var_list)) { + validate(cpi, name = "CPI validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } +} + + + diff --git a/R/aux_cpi_clean.R b/R/aux_cpi_clean.R deleted file mode 100644 index 3e40af2..0000000 --- a/R/aux_cpi_clean.R +++ /dev/null @@ -1,71 +0,0 @@ -#' Clean CPI data -#' -#' Clean CPI data from Datalibweb to meet PIP protocols. -#' -#' @param y dataset with CPI data from `aux_cpi_update()`. -#' @param cpivar character: CPI variable to be used as default. Currently it is -#' "cpi2011". -#' @inheritParams aux_cpi_update -#' -#' @keywords internal -aux_cpi_clean <- function(y, - cpivar = getOption("pipaux.cpivar"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - x <- data.table::as.data.table(y) - - # vars to keep - keep_vars <- c( - "country_code", "cpi_year", "survey_year", - "cpi", "ccf", "survey_acronym", "change_cpi2011", - grep("^cpi", names(x), value = TRUE) - ) - - # modifications to the database - x[ - , - c("cur_adj", "ccf") - := { - cur_adj <- ifelse(is.na(cur_adj), 1, cur_adj) - ccf <- 1 / cur_adj - - list(cur_adj, ccf) - } - ][ - , - `:=`( - country_code = code, - cpi_year = as.integer(year), - survey_year = round(ref_year, 2), - cpi = get(cpivar), - survey_acronym = survname, - cpi_domain = as.character(cpi_domain), - cpi_data_level = as.character(cpi_data_level) - ) - ][ - , - # This part should not exist if the raw data - # had been created properly - cpi_data_level := fcase( - tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "0", "rural", - tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "1", "urban", - tolower(cpi_domain) %chin% c("national", "1") & cpi_data_level %chin% c("2", "", NA_character_), "national", - default = "" - ) - ] - # keep final vars - x <- x[, ..keep_vars ] - - x <- unique(x) # remove duplicates - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - x <- x[country_code %in% cl$country_code] - - - return(x) -} diff --git a/R/aux_cpi_update.R b/R/aux_cpi_update.R deleted file mode 100644 index c3f9f74..0000000 --- a/R/aux_cpi_update.R +++ /dev/null @@ -1,72 +0,0 @@ -#' Update CPI -#' -#' @inheritParams aux_cpi -#' @keywords internal -aux_cpi_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - -# ____________________________________________________________________________ -# Set up #### - - measure <- "cpi" - branch <- match.arg(branch) - - -# ____________________________________________________________________________ -# load raw data #### - - cpi <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) - - # validate cpi raw data - cpi_validate_raw(cpi, detail = detail) - -# ____________________________________________________________________________ -# Cleaning #### - - # Clean data - cpi <- aux_cpi_clean(cpi, - maindir = maindir, - branch = branch) - - # drop cpi_domain - cpi <- cpi[, -c("cpi_domain")] - - # changae cpi_year and cpi_data_level to year and reporting_level - cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), - c("year", "reporting_level"), - skip_absent=TRUE) - - setattr(cpi, "aux_name", "cpi") - setattr(cpi, - "aux_key", - c("country_code", "year", "reporting_level", "survey_acronym")) - - # validate cpi clean data before saving it - cpi_validate_output(cpi, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = cpi, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) -} - diff --git a/R/aux_cpi_vintage.R b/R/aux_cpi_vintage.R deleted file mode 100644 index 76d65d0..0000000 --- a/R/aux_cpi_vintage.R +++ /dev/null @@ -1,92 +0,0 @@ -#' Check CPI Vintage -#' -#' @param msrdir character: measure directory. -#' @param dlwdir character: Datalibweb directory -#' @param force logical: If TRUE force update of vintage level 1. -#' -#' @keywords internal -aux_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), - dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), - force = FALSE) { - time <- format(Sys.time(), "%Y%m%d%H%M%S") # find a way to account for time zones - measure <- "cpi" - - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #--------- Prepar3 date --------- - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # get directories - cpi_files <- fs::dir_ls(dlwdir, regexp = "GMD_CPI\\.dta$", recurse = TRUE, type = "file") - - # load data - last_file <- max(cpi_files) - vintage <- load_cpi(last_file) - - tokeep <- names(vintage) |> - {\(.) grep("^cpi[0-9]{4}", ., value = TRUE)}() |> - c("code", "year", "survname", "cpi_data_level", "cpi_ppp_id") - - vintage <- vintage[, ..tokeep] - - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #--------- check version and save --------- - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - # save file - sfile <- fs::path(msrdir, "cpi_vintage.rds") - - equal_vintage <- TRUE - if (fs::file_exists(sfile)) { - - cfile <- readr::read_rds(sfile) - attr(cfile, "time") <- NULL # remove attributes - attr(cfile, "user") <- NULL # remove attributes - cf_vt <- all.equal(cfile, vintage) - - if (inherits(cf_vt, "character")) { - equal_vintage <- FALSE - } - } else { - equal_vintage <- FALSE - } - - if (equal_vintage == FALSE || force == TRUE) { - attr(vintage, "time") <- time - attr(vintage, "user") <- Sys.info()[8] - - readr::write_rds( - x = vintage, - file = sfile - ) - } - - return(!equal_vintage) -} # end of vintage_level_2 - -#' Load cpi files and create CPI ID variable -#' @param x character: cpi file name -#' @return data frame -load_cpi <- function(x) { - cpi_ppp_id <- gsub("(.*/Support_2005_)([^/]+)(_CPI\\.dta$)", "\\2", x) - df <- haven::read_dta(x) - df$cpi_ppp_id <- cpi_ppp_id - - to_keep <- c("label") - - to_keep_regx <- paste(to_keep, collapse = "|") - - nn <- names(df) - for (x in seq_along(nn)) { - ats <- attributes(df[[x]]) - atsn <- names(ats) - to_remove <- atsn[!grepl(to_keep_regx, atsn)] - - for (i in seq_along(to_remove)) { - attr(df[[x]], to_remove[i]) <- NULL - } - } - - data.table::setDT(df) - return(df) -} diff --git a/R/aux_gdm.R b/R/aux_gdm.R index c49fb76..77e7786 100644 --- a/R/aux_gdm.R +++ b/R/aux_gdm.R @@ -43,3 +43,359 @@ aux_gdm <- function(action = c("update", "load"), return(dt) } } + +#' Update GDM +#' +#' Update GDM data using the PovcalNet Masterfile. +#' +#' @inheritParams aux_gdm +#' @keywords internal +aux_gdm_update <- function(force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "gdm" + branch <- match.arg(branch) + + # _________________________________________________________ + # Load raw file #### + + df <- pipfun::load_from_gh(measure = "gdm", + owner = owner, + branch = branch, + tag = tag, + ext = "csv") + + # validate gdm raw data + gdm_validate_raw(gdm = df, detail = detail) + + # ____________________________________________________________________________ + # Transform dataset #### + + # Select for grouped data surveys + df <- df[grepl("[.]T0[1,2,5]$", + df$DistributionFileName, + ignore.case = TRUE), ] + + # Select and rename columns + old_nms <- c( + "CountryCode", + "SurveyTime", + "DataType", + "Coverage", + "SurveyMean_LCU", + "DistributionFileName", + "SurveyID" + ) + + new_nms <- c( + "country_code", + "survey_year", + "welfare_type", + "pop_data_level", + "survey_mean_lcu", + "pcn_source_file", + "pcn_survey_id" + ) + + setnames(df, old_nms, new_nms) + + df <- df[, ..new_nms] + + # Recode columns + df[, + c("pop_data_level", "welfare_type", "survey_coverage") := + { + x <- tolower(pop_data_level) + + y <- tolower(welfare_type) + y <- fifelse(y == "x", "consumption", "income") + + z <- fifelse(country_code %in% c("CHN", "IDN", "IND"), + "national", pop_data_level) + + list(x, y, z) + } + ] + + + df[, + distribution_type := fifelse(pop_data_level == "national", + "group", + "aggregate") + ][, + gd_type := sub(".*[.]", "", pcn_source_file) + ] + + + ## ............................................................................ + ## Merge with PFW #### + + # pip_pfw(maindir = maindir, + # force = force, + # owner = owner, + # branch = branch, + # tag = tag) + + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + # Subset columns + pfw <- + pfw[, c( + "country_code", + "welfare_type", + "surveyid_year", + "survey_year", + "survey_acronym", + "inpovcal" + )] + + # Merge to add surveyid_year + tmp <- pfw[, c("country_code", "surveyid_year", "survey_year")] + df <- merge(df, tmp, + all.x = TRUE, + by = c("country_code", "survey_year") + ) + + # Merge to add survey_acronym and inpovcal + df <- merge(df, pfw, + all.x = TRUE, + by = c( + "country_code", "surveyid_year", + "survey_year", "welfare_type" + ) + ) + + # Filter to select surveys in PovcalNet + df <- df[inpovcal == 1] + df <- na.omit(df, "inpovcal") + + + ## ............................................................................ + ## Merge with inventory #### + + inv <- fst::read_fst(fs::path(maindir, "_inventory/inventory.fst"), + as.data.table = TRUE) + + # Create survey_id column + inv[, + survey_id := sub("[.]dta", "", filename) + ][, + surveyid_year := as.numeric(surveyid_year) + ] + + # Subset GD rows + inv <- inv[module == "PC-GROUP"] + + # Subset columns + inv <- inv[, c("country_code", + "surveyid_year", + "survey_acronym", + "survey_id")] + + # Merge to add PIP survey_id + df <- merge(df, inv, + all.x = TRUE, + by = c( + "country_code", "surveyid_year", + "survey_acronym" + ) + ) + + + # ---- Finalize table ---- + + # Select columns + df <- df[, c( + "country_code", + "surveyid_year", + "survey_year", + "welfare_type", + "survey_mean_lcu", + "distribution_type", + "gd_type", + "pop_data_level", + "pcn_source_file", + "pcn_survey_id", + "survey_id" + )] + + df[, survey_id := toupper(survey_id)] + + # Convert LCU means to daily values + # df$survey_mean_lcu <- df$survey_mean_lcu * (12/365) + + # Sort rows + setorder(df, country_code, surveyid_year, pop_data_level) + + # Sort columns + setcolorder(df, "survey_id") + + + + ## ............................................................................ + ## Remove any non-WDI countries #### + + aux_country_list(maindir = maindir, + force = force, + branch = branch) + + cl <- load_aux(measure = "country_list", + maindir = maindir, + branch = branch) + + df <- df[country_code %in% cl$country_code] + + + # ---- Save and sign ---- + df <- df |> setnames(c("surveyid_year", "pop_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(df, "aux_name", "gdm") + setattr(df, + "aux_key", + c("country_code", "year", "reporting_level", "welfare_type")) + + # validate gdm output data + gdm_validate_output(gdm = df, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = df, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) +} + +#' Validate raw gdm data +#' +#' @param gdm raw gdm data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ + + stopifnot("GDM raw data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM raw data validation") |> + validate_if(is.character(Region), + description = "`Region` should be character") |> + validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), + Region, description = "`Region` values within range") |> + validate_if(is.character(countryName), + description = "`countryName` should be character") |> + validate_if(is.character(Coverage), + description = "`Coverage` should be character") |> + validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), + Coverage, description = "`Coverage` values within range") |> + validate_if(is.character(CountryCode), + description = "`CountryCode` should be character") |> + validate_if(is.numeric(SurveyTime), + description = "`SurveyTime` should be numeric") |> + validate_if(is.numeric(CPI_Time), + description = "`CPI_Time` should be numeric") |> + validate_if(is.character(DataType), + description = "`DataType` should be character") |> + validate_cols(in_set(c("x", "X", "y", "Y")), + DataType, description = "`DataType` values within range") |> + validate_if(is.numeric(SurveyMean_LCU), + description = "`SurveyMean_LCU` should be numeric") |> + validate_if(is.numeric(currency), + description = "`currency` should be numeric") |> + validate_if(is.character(source), + description = "`source` should be character") |> + validate_if(is.character(SurveyID), + description = "`SurveyID` should be character") |> + validate_if(is.numeric(SurveyMean_PPP), + description = "`SurveyMean_PPP` should be numeric") |> + validate_if(is.character(DistributionFileName), + description = "`DistributionFileName` should be character") |> + validate_cols(is.logical, Comment, description = "Comment should be logical") |> + validate_cols(not_na, CountryCode, Coverage, SurveyTime, DataType, + description = "no missing values in key variables") |> + validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean gdm data +#' +#' @param gdm clean gdm data, output via `pipfun::pip_gdm_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDM output data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM output data validation") |> + validate_if(is.character(survey_id), + description = "`survey_id` should be character") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), welfare_type, + description = "`welfare_type` values within range") |> + validate_if(is.numeric(survey_mean_lcu), + description = "`survey_mean_lcu` should be numeric") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregate", "group")), distribution_type, + description = "`distribution_type` values within range") |> + validate_if(is.character(gd_type), + description = "`gd_type` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> + validate_if(is.character(pcn_source_file), + description = "`pcn_source_file` should be character") |> + validate_if(is.character(pcn_survey_id), + description = "`pcn_survey_id` should be character") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } +} + diff --git a/R/aux_gdm_update.R b/R/aux_gdm_update.R deleted file mode 100644 index 4621d79..0000000 --- a/R/aux_gdm_update.R +++ /dev/null @@ -1,232 +0,0 @@ -#' Update GDM -#' -#' Update GDM data using the PovcalNet Masterfile. -#' -#' @inheritParams aux_gdm -#' @keywords internal -aux_gdm_update <- function(force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "gdm" - branch <- match.arg(branch) - -# _________________________________________________________ -# Load raw file #### - - df <- pipfun::load_from_gh(measure = "gdm", - owner = owner, - branch = branch, - tag = tag, - ext = "csv") - - # validate gdm raw data - gdm_validate_raw(gdm = df, detail = detail) - -# ____________________________________________________________________________ -# Transform dataset #### - - # Select for grouped data surveys - df <- df[grepl("[.]T0[1,2,5]$", - df$DistributionFileName, - ignore.case = TRUE), ] - - # Select and rename columns - old_nms <- c( - "CountryCode", - "SurveyTime", - "DataType", - "Coverage", - "SurveyMean_LCU", - "DistributionFileName", - "SurveyID" - ) - - new_nms <- c( - "country_code", - "survey_year", - "welfare_type", - "pop_data_level", - "survey_mean_lcu", - "pcn_source_file", - "pcn_survey_id" - ) - - setnames(df, old_nms, new_nms) - - df <- df[, ..new_nms] - - # Recode columns - df[, - c("pop_data_level", "welfare_type", "survey_coverage") := - { - x <- tolower(pop_data_level) - - y <- tolower(welfare_type) - y <- fifelse(y == "x", "consumption", "income") - - z <- fifelse(country_code %in% c("CHN", "IDN", "IND"), - "national", pop_data_level) - - list(x, y, z) - } - ] - - - df[, - distribution_type := fifelse(pop_data_level == "national", - "group", - "aggregate") - ][, - gd_type := sub(".*[.]", "", pcn_source_file) - ] - - -## ............................................................................ -## Merge with PFW #### - - # pip_pfw(maindir = maindir, - # force = force, - # owner = owner, - # branch = branch, - # tag = tag) - - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - # Subset columns - pfw <- - pfw[, c( - "country_code", - "welfare_type", - "surveyid_year", - "survey_year", - "survey_acronym", - "inpovcal" - )] - - # Merge to add surveyid_year - tmp <- pfw[, c("country_code", "surveyid_year", "survey_year")] - df <- merge(df, tmp, - all.x = TRUE, - by = c("country_code", "survey_year") - ) - - # Merge to add survey_acronym and inpovcal - df <- merge(df, pfw, - all.x = TRUE, - by = c( - "country_code", "surveyid_year", - "survey_year", "welfare_type" - ) - ) - - # Filter to select surveys in PovcalNet - df <- df[inpovcal == 1] - df <- na.omit(df, "inpovcal") - - -## ............................................................................ -## Merge with inventory #### - - inv <- fst::read_fst(fs::path(maindir, "_inventory/inventory.fst"), - as.data.table = TRUE) - - # Create survey_id column - inv[, - survey_id := sub("[.]dta", "", filename) - ][, - surveyid_year := as.numeric(surveyid_year) - ] - - # Subset GD rows - inv <- inv[module == "PC-GROUP"] - - # Subset columns - inv <- inv[, c("country_code", - "surveyid_year", - "survey_acronym", - "survey_id")] - - # Merge to add PIP survey_id - df <- merge(df, inv, - all.x = TRUE, - by = c( - "country_code", "surveyid_year", - "survey_acronym" - ) - ) - - - # ---- Finalize table ---- - - # Select columns - df <- df[, c( - "country_code", - "surveyid_year", - "survey_year", - "welfare_type", - "survey_mean_lcu", - "distribution_type", - "gd_type", - "pop_data_level", - "pcn_source_file", - "pcn_survey_id", - "survey_id" - )] - - df[, survey_id := toupper(survey_id)] - - # Convert LCU means to daily values - # df$survey_mean_lcu <- df$survey_mean_lcu * (12/365) - - # Sort rows - setorder(df, country_code, surveyid_year, pop_data_level) - - # Sort columns - setcolorder(df, "survey_id") - - - -## ............................................................................ -## Remove any non-WDI countries #### - - aux_country_list(maindir = maindir, - force = force, - branch = branch) - - cl <- load_aux(measure = "country_list", - maindir = maindir, - branch = branch) - - df <- df[country_code %in% cl$country_code] - - - # ---- Save and sign ---- - df <- df |> setnames(c("surveyid_year", "pop_data_level"), - c("year", "reporting_level"), - skip_absent=TRUE) - - setattr(df, "aux_name", "gdm") - setattr(df, - "aux_key", - c("country_code", "year", "reporting_level", "welfare_type")) - - # validate gdm output data - gdm_validate_output(gdm = df, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = df, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) -} diff --git a/R/aux_gdp.R b/R/aux_gdp.R index ccac898..f36f4bc 100644 --- a/R/aux_gdp.R +++ b/R/aux_gdp.R @@ -39,4 +39,570 @@ aux_gdp <- function(action = c("update", "load"), ) return(dt) } -} # end of pip_gdp +} # end of aux_gdp + +#' Fetch GDP data from WEO +#' +#' Create a dataset with GDP data from World Economic Outlook. +#' +#' Note that the most recent version most be downloaded from imf.org and saved +#' as an .xls file in `/_aux/weo/`. The filename should be in the +#' following structure `WEO_.xls`. Due to potential file corruption +#' the file must be opened and re-saved before it can be updated with +#' `aux_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file +#' that's not really xls. +#' +#' @inheritParams aux_prices +#' @export +aux_gdp_weo <- function(action = "update", + force = FALSE, + maindir = gls$PIP_DATA_DIR) { + measure <- "weo" + msrdir <- fs::path(maindir, "_aux/", measure) # measure dir + + if (action == "update") { + + # ---- Load data from disk ---- + + # Get latest version of file (in case there are more) + dir <- sprintf("%s_aux/weo/", maindir) + weo_files <- list.files(dir, pattern = "WEO_.*[.]xls") + weo_latest <- weo_files %>% + gsub("WEO_|.xls", "", .) %>% + as.POSIXlt() %>% + max() %>% + as.character() %>% + sprintf("%s_aux/weo/WEO_%s.xls", maindir, .) + + # Read data + dt <- readxl::read_xls( + weo_latest, + sheet = 1, na = "n/a", + col_types = "text" + ) + dt <- setDT(dt) + + # Clean column names + dt <- janitor::clean_names(dt) + + # ---- Data transformations ---- + + # Select rows w/ data on real gdp per capita + dt <- dt[weo_subject_code %in% + c("NGDPRPC", "NGDPRPPPPC", "NGDP_R")] + + # Fix country codes + dt[ + , + iso := fifelse( + iso == "WBG", "PSE", iso # West Bank & Gaza + ) + ] + dt[ + , + iso := fifelse( + iso == "UVK", "XKX", iso # Kosovo + ) + ] + + # Replace subject codes + dt[ + , + subject_code := fcase( + weo_subject_code == "NGDPRPC", "weo_gdp_lcu", + weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017", + weo_subject_code == "NGDP_R", "weo_gdp_lcu_notpc" + ) + ] + + # Reshape to long format + dt <- dt %>% + melt( + id.vars = c("iso", "subject_code"), + measure.vars = names(dt)[grepl("\\d{4}", names(dt))], + value.name = "weo_gdp", variable.name = "year" + ) + setnames(dt, "iso", "country_code") + + # Convert year and GDP to numeric + dt$year <- sub("x", "", dt$year) %>% as.numeric() + dt$weo_gdp <- suppressWarnings(as.numeric(dt$weo_gdp)) + + # Remove rows w/ missing GDP + dt <- dt[!is.na(dt$weo_gdp)] + + # Remove current year and future years + current_year <- format(Sys.Date(), "%Y") + dt <- dt[dt$year < current_year] + + # Reshape to wide for GDP columns + dt <- dt %>% + dcast( + formula = country_code + year ~ subject_code, + value.var = "weo_gdp" + ) + + # ---- Merge with population ---- + + pop <- aux_pop("load", maindir = maindir) + setDT(pop) + pop <- pop[pop_data_level == "national", ] + dt[pop, + on = .(country_code, year), + `:=`( + pop = i.pop + ) + ] + + # Calculate per capita value for NGDP_R + dt[ + , + weo_gdp_lcu := fifelse( + is.na(weo_gdp_lcu), weo_gdp_lcu_notpc / pop, weo_gdp_lcu + ) + ] + + + # ---- Chain PPP and LCU GDP columns ---- + + # Chain LCU on PPP column + dt <- chain_values( + dt, + base_var = "weo_gdp_ppp2017", + replacement_var = "weo_gdp_lcu", + new_name = "weo_gdp", + by = "country_code" + ) + + + # --- Sign and save ---- + + # Select final columns + dt <- dt[, c("country_code", "year", "weo_gdp")] + + # Save dataset + aux_sign_save( + x = dt, + measure = measure, + msrdir = msrdir, + force = force + ) + } else if (action == "load") { + dt <- load_aux( + maindir = maindir, + measure = measure + ) + return(dt) + } else { + rlang::abort(c("`action` must be `update` or `load`", + x = paste0("you provided `", action, "`") + )) + } +} + + + + + + +#' Update GDP +#' +#' Update GDP data using WDI, Maddison and Special cases. +#' +#' @inheritParams aux_gdp +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_gdp_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + + branch <- match.arg(branch) + measure <- "gdp" + + + # _________________________________________ + # Update data #### + + # # Update Maddison Project Data + # pip_maddison(force = force, + # maindir = maindir, + # branch = branch) + # + # # Update WEO data + # + # pip_weo(force = force, + # maindir = maindir, + # branch = branch) + # + # # Update WDI + # pip_wdi_update(maindir = maindir, + # from = from, + # force = force, + # branch = branch) + # + + # ____________________________________________________________________________ + # Load Data #### + + madd <- load_aux(measure = "maddison", + maindir = maindir, + branch = branch) + + + weo <- load_aux(measure = "weo", + maindir = maindir, + branch = branch) + + + wgdp <- load_aux(measure = "wdi", + maindir = maindir, + branch = branch) + + setnames(wgdp, "NY.GDP.PCAP.KD", "wdi_gdp") + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## Special national accounts -------- + sna <- pipfun::load_from_gh( + measure = "sna", + owner = owner, + branch = branch, + ext = "csv" + ) + # validate sna data + sna_validate_raw(sna, detail = detail) + + sna_fy <- pipfun::load_from_gh( + measure = "sna", + owner = owner, + branch = branch, + filename = "sna_metadata", + ext = "csv" + ) + + # load nowcast growth rates + nan <- pipfun::load_from_gh( + measure = "nan", + owner = owner, + branch = branch, + ext = "csv" + ) + + + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + + # ____________________________________________________________________________ + # Clean data #### + + ##--------- Clean GDP from WDI --------- + # Keep relevant variables + wgdp <- wgdp[, .(country_code, year, wdi_gdp)] + + # ---- Adjust FY to CY ---- + + # Merge WDI with special FY cases + sna_fy <- sna_fy[, c("Code", "Month", "Day")] + names(sna_fy) <- c("country_code", "fy_month", "fy_day") + wgdp <- merge(wgdp, sna_fy, by = "country_code", all.x = TRUE) + + # Calculate alpha + wgdp[, max_days := days_in_month(fy_month, year)] + wgdp[, month_num := get_month_number(fy_month)] + wgdp[, alpha := ((month_num - 1) + fy_day / max_days) / 12] + + # Create lead/lag vars + wgdp[, + wdi_gdp_lag := shift(wdi_gdp), + by = country_code] + wgdp[, + wdi_gdp_lead := shift(wdi_gdp, type = "lead"), + by = country_code] + + # Calculate adjusted GDP for calendar year + wgdp[, + wdi_gdp_cy := fifelse(!is.na(alpha), + fifelse(alpha < 0.5 , + alpha * wdi_gdp_lag + (1 - alpha) * wdi_gdp, + alpha * wdi_gdp + ( 1 - alpha) * wdi_gdp_lead), + NA_real_) + ] + wgdp[, + wdi_gdp_tmp := fifelse(!is.na(alpha), wdi_gdp_cy, wdi_gdp) + ] + wgdp[, + wdi_gdp := + # Egypt should only be adjusted after 1980 + fifelse(country_code == "EGY" & year < 1980, + wdi_gdp, wdi_gdp_tmp) + + ] + + # Keep relevant variables + wgdp <- wgdp[, .(country_code, year, wdi_gdp)] + + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Merge WDI GDP data with other sources ------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + # Merge Maddison and WDI (full join) + gdp <- merge.data.table( + wgdp, madd, + by = c("country_code", "year"), + all = TRUE + ) + + # Merge WEO and WDI (full join) + gdp <- merge.data.table( + gdp, weo, + by = c("country_code", "year"), + all = TRUE + ) + + # Chain in following order 1) WDI, 2) WEO, 3) Maddison + + # Chain WEO on WDI + gdp[, new_gdp := chain_val(ori_var = wdi_gdp, + rep_var = weo_gdp), + by = country_code] + + + # gdp <- chain_values( + # gdp, + # base_var = "wdi_gdp", + # replacement_var = "weo_gdp", + # new_name = "new_gdp", + # by = "country_code" + # ) + + # Chain Maddison on new GDP column + gdp[, gdp := chain_val(ori_var = new_gdp, + rep_var = mpd_gdp), + by = country_code] + + # gdp <- chain_values( + # gdp, + # base_var = "new_gdp", + # replacement_var = "mpd_gdp", + # new_name = "gdp", + # by = "country_code" + # ) + + # Select columns + gdp <- gdp[, c("country_code", "year", "gdp")] + + # ---- Hard-coded custom modifications ---- + + # Remove observations for Venezuela after 2014 + gdp[ + , + gdp := fifelse(country_code == "VEN" & year > 2014, NA_real_, gdp) + ] + + # Syria should be replaced with country specific-sources from 2010 + + # Merge with sna + sna <- na.omit(sna, "GDP") + + + # If there are special countries + if (nrow(sna) > 0) { + # Join with Special National Accounts data. + setnames(sna, "countrycode", "country_code") + + gdp[sna, + on = .(country_code, year), + `:=`( + sna_gdp = i.GDP + ) + ] + + gdp[, + gdp := fifelse(is.na(sna_gdp),gdp, sna_gdp) + ] + # remove extra variables + gdp[, + sna_gdp := NULL] + } + + # ---- Expand for special cases with U/R levels ---- + + # Special cases for IND, IDN, and CHN + sp <- gdp[country_code %chin% c("IND", "IDN", "CHN")] + + # Expand two time these cases using cross-join. + sp <- sp[CJ( + gdp_data_level = c(0, 1), + country_code = country_code, + year = year, + unique = TRUE + ), + on = .(country_code, year) + ] + + # Add data level national to main dataset + gdp[, gdp_data_level := 2] + + # Append + gdp <- rbindlist(list(gdp, sp)) + + # Add domain column + gdp[ + , + gdp_domain := fifelse(gdp_data_level == 2, 1, 2) + ] + + # Sort + setorder(gdp, country_code, year, gdp_data_level) + + # ---- Finalize table ---- + + # Remove rows with missing GDP + gdp <- gdp[!is.na(gdp) & !is.infinite(gdp)] + + # Recode domain and data_level variables + cols <- c("gdp_domain", "gdp_data_level") + gdp[, + (cols) := lapply(.SD, as.character), + .SDcols = cols + ][ + , # recode domain + gdp_domain := fcase( + gdp_domain == "1", "national", + gdp_domain == "2", "urban/rural", + gdp_domain == "3", "subnational region" + ) + ][ # Recode data_level only for those that are national or urban/rural + gdp_domain %in% c("national", "urban/rural"), + gdp_data_level := fcase( + gdp_data_level == "0", "rural", + gdp_data_level == "1", "urban", + gdp_data_level == "2", "national" + ) + ] + + + # add nowcast growth rates ---------- + byvars <- c("country_code", "gdp_data_level") + + # Find the latest GDP data year for each country + latest_gdp <- gdp[, .(last_year = max(year), + last_gdp = gdp[which.max(year)]), + by = c(byvars, "gdp_domain")] + + # Join this with growth rates or years after the last available GDP year + dt_growth <- joyn::joyn(nan, latest_gdp, + by = byvars, + match_type = "m:1", + keep = "left", + reportvar = FALSE) |> + fsubset(year > last_year) + + # Prepare for cumulative growth calculation + dt_growth[, c("initial_year", "initial_gdp") := .(last_year[1], last_gdp[1]), + by = byvars] + + # Calculate projected GDP + # Calculate cumulative GDP projections + dt_growth[, cum_growth := cumprod(1 + gdppc_growth), + by = byvars + ][, projected_GDP := last_gdp * cum_growth + ] + + # Select the relevant columns for the result + gdp <- dt_growth |> + fselect(country_code, gdp_data_level, gdp_domain, year, gdp = projected_GDP) |> + # append to actual GDP + rowbind(gdp, fill = TRUE) |> + setorder(country_code, gdp_data_level, year) + + # Remove any non-WDI countries + gdp <- gdp[country_code %in% cl$country_code] + + # drop gdp_domain + gdp <- gdp[, -c("gdp_domain")] + + # ---- Save and sign ---- + gdp <- gdp |> setnames("gdp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(gdp, "aux_name", "gdp") + setattr(gdp, + "aux_key", + c("country_code", "year", "reporting_level")) + + # validate gdp output data + gdp_validate_output(gdp = gdp, detail = detail) + + if (branch == "main") { + branch <- "" + } + + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = gdp, + measure = measure, + msrdir = msrdir, + force = force + ) + # Push data (gdp) to GitHub as gdp.csv + save_aux_to_gh(gdp, + repo = paste0("aux_", measure), + branch = branch, + filename = measure) + # All aux files that depend on gdp will be loaded from Github + return(invisible(saved)) + +} + +#' Validate output gdp data +#' +#' @param gdp output gdp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDP output data is not loaded" = !is.null(gdp)) + + report <- data_validation_report() + + validate(gdp, name = "GDP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_if(is.numeric(gdp), + description = "`gdp` should be numeric") |> + # validate_if(is.character(gdp_domain), + # description = "`gdp_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # gdp_domain, description = "`gdp_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + }} + diff --git a/R/aux_gdp_update.R b/R/aux_gdp_update.R deleted file mode 100644 index 649a93b..0000000 --- a/R/aux_gdp_update.R +++ /dev/null @@ -1,357 +0,0 @@ -#' Update GDP -#' -#' Update GDP data using WDI, Maddison and Special cases. -#' -#' @inheritParams aux_gdp -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -aux_gdp_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - - branch <- match.arg(branch) - measure <- "gdp" - - -# _________________________________________ -# Update data #### - - # # Update Maddison Project Data - # pip_maddison(force = force, - # maindir = maindir, - # branch = branch) - # - # # Update WEO data - # - # pip_weo(force = force, - # maindir = maindir, - # branch = branch) - # - # # Update WDI - # pip_wdi_update(maindir = maindir, - # from = from, - # force = force, - # branch = branch) - # - -# ____________________________________________________________________________ -# Load Data #### - - madd <- load_aux(measure = "maddison", - maindir = maindir, - branch = branch) - - - weo <- load_aux(measure = "weo", - maindir = maindir, - branch = branch) - - - wgdp <- load_aux(measure = "wdi", - maindir = maindir, - branch = branch) - - setnames(wgdp, "NY.GDP.PCAP.KD", "wdi_gdp") - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## Special national accounts -------- - sna <- pipfun::load_from_gh( - measure = "sna", - owner = owner, - branch = branch, - ext = "csv" - ) - # validate sna data - sna_validate_raw(sna, detail = detail) - - sna_fy <- pipfun::load_from_gh( - measure = "sna", - owner = owner, - branch = branch, - filename = "sna_metadata", - ext = "csv" - ) - - # load nowcast growth rates - nan <- pipfun::load_from_gh( - measure = "nan", - owner = owner, - branch = branch, - ext = "csv" - ) - - - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - -# ____________________________________________________________________________ -# Clean data #### - -##--------- Clean GDP from WDI --------- - # Keep relevant variables - wgdp <- wgdp[, .(country_code, year, wdi_gdp)] - - # ---- Adjust FY to CY ---- - - # Merge WDI with special FY cases - sna_fy <- sna_fy[, c("Code", "Month", "Day")] - names(sna_fy) <- c("country_code", "fy_month", "fy_day") - wgdp <- merge(wgdp, sna_fy, by = "country_code", all.x = TRUE) - - # Calculate alpha - wgdp[, max_days := days_in_month(fy_month, year)] - wgdp[, month_num := get_month_number(fy_month)] - wgdp[, alpha := ((month_num - 1) + fy_day / max_days) / 12] - - # Create lead/lag vars - wgdp[, - wdi_gdp_lag := shift(wdi_gdp), - by = country_code] - wgdp[, - wdi_gdp_lead := shift(wdi_gdp, type = "lead"), - by = country_code] - - # Calculate adjusted GDP for calendar year - wgdp[, - wdi_gdp_cy := fifelse(!is.na(alpha), - fifelse(alpha < 0.5 , - alpha * wdi_gdp_lag + (1 - alpha) * wdi_gdp, - alpha * wdi_gdp + ( 1 - alpha) * wdi_gdp_lead), - NA_real_) - ] - wgdp[, - wdi_gdp_tmp := fifelse(!is.na(alpha), wdi_gdp_cy, wdi_gdp) - ] - wgdp[, - wdi_gdp := - # Egypt should only be adjusted after 1980 - fifelse(country_code == "EGY" & year < 1980, - wdi_gdp, wdi_gdp_tmp) - - ] - - # Keep relevant variables - wgdp <- wgdp[, .(country_code, year, wdi_gdp)] - - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Merge WDI GDP data with other sources ------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - # Merge Maddison and WDI (full join) - gdp <- merge.data.table( - wgdp, madd, - by = c("country_code", "year"), - all = TRUE - ) - - # Merge WEO and WDI (full join) - gdp <- merge.data.table( - gdp, weo, - by = c("country_code", "year"), - all = TRUE - ) - - # Chain in following order 1) WDI, 2) WEO, 3) Maddison - - # Chain WEO on WDI - gdp[, new_gdp := chain_val(ori_var = wdi_gdp, - rep_var = weo_gdp), - by = country_code] - - - # gdp <- chain_values( - # gdp, - # base_var = "wdi_gdp", - # replacement_var = "weo_gdp", - # new_name = "new_gdp", - # by = "country_code" - # ) - - # Chain Maddison on new GDP column - gdp[, gdp := chain_val(ori_var = new_gdp, - rep_var = mpd_gdp), - by = country_code] - - # gdp <- chain_values( - # gdp, - # base_var = "new_gdp", - # replacement_var = "mpd_gdp", - # new_name = "gdp", - # by = "country_code" - # ) - - # Select columns - gdp <- gdp[, c("country_code", "year", "gdp")] - - # ---- Hard-coded custom modifications ---- - - # Remove observations for Venezuela after 2014 - gdp[ - , - gdp := fifelse(country_code == "VEN" & year > 2014, NA_real_, gdp) - ] - - # Syria should be replaced with country specific-sources from 2010 - - # Merge with sna - sna <- na.omit(sna, "GDP") - - - # If there are special countries - if (nrow(sna) > 0) { - # Join with Special National Accounts data. - setnames(sna, "countrycode", "country_code") - - gdp[sna, - on = .(country_code, year), - `:=`( - sna_gdp = i.GDP - ) - ] - - gdp[, - gdp := fifelse(is.na(sna_gdp),gdp, sna_gdp) - ] - # remove extra variables - gdp[, - sna_gdp := NULL] - } - - # ---- Expand for special cases with U/R levels ---- - - # Special cases for IND, IDN, and CHN - sp <- gdp[country_code %chin% c("IND", "IDN", "CHN")] - - # Expand two time these cases using cross-join. - sp <- sp[CJ( - gdp_data_level = c(0, 1), - country_code = country_code, - year = year, - unique = TRUE - ), - on = .(country_code, year) - ] - - # Add data level national to main dataset - gdp[, gdp_data_level := 2] - - # Append - gdp <- rbindlist(list(gdp, sp)) - - # Add domain column - gdp[ - , - gdp_domain := fifelse(gdp_data_level == 2, 1, 2) - ] - - # Sort - setorder(gdp, country_code, year, gdp_data_level) - - # ---- Finalize table ---- - - # Remove rows with missing GDP - gdp <- gdp[!is.na(gdp) & !is.infinite(gdp)] - - # Recode domain and data_level variables - cols <- c("gdp_domain", "gdp_data_level") - gdp[, - (cols) := lapply(.SD, as.character), - .SDcols = cols - ][ - , # recode domain - gdp_domain := fcase( - gdp_domain == "1", "national", - gdp_domain == "2", "urban/rural", - gdp_domain == "3", "subnational region" - ) - ][ # Recode data_level only for those that are national or urban/rural - gdp_domain %in% c("national", "urban/rural"), - gdp_data_level := fcase( - gdp_data_level == "0", "rural", - gdp_data_level == "1", "urban", - gdp_data_level == "2", "national" - ) - ] - - - # add nowcast growth rates ---------- - byvars <- c("country_code", "gdp_data_level") - - # Find the latest GDP data year for each country - latest_gdp <- gdp[, .(last_year = max(year), - last_gdp = gdp[which.max(year)]), - by = c(byvars, "gdp_domain")] - - # Join this with growth rates or years after the last available GDP year - dt_growth <- joyn::joyn(nan, latest_gdp, - by = byvars, - match_type = "m:1", - keep = "left", - reportvar = FALSE) |> - fsubset(year > last_year) - - # Prepare for cumulative growth calculation - dt_growth[, c("initial_year", "initial_gdp") := .(last_year[1], last_gdp[1]), - by = byvars] - - # Calculate projected GDP - # Calculate cumulative GDP projections - dt_growth[, cum_growth := cumprod(1 + gdppc_growth), - by = byvars - ][, projected_GDP := last_gdp * cum_growth - ] - - # Select the relevant columns for the result - gdp <- dt_growth |> - fselect(country_code, gdp_data_level, gdp_domain, year, gdp = projected_GDP) |> - # append to actual GDP - rowbind(gdp, fill = TRUE) |> - setorder(country_code, gdp_data_level, year) - - # Remove any non-WDI countries - gdp <- gdp[country_code %in% cl$country_code] - - # drop gdp_domain - gdp <- gdp[, -c("gdp_domain")] - - # ---- Save and sign ---- - gdp <- gdp |> setnames("gdp_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(gdp, "aux_name", "gdp") - setattr(gdp, - "aux_key", - c("country_code", "year", "reporting_level")) - - # validate gdp output data - gdp_validate_output(gdp = gdp, detail = detail) - - if (branch == "main") { - branch <- "" - } - - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = gdp, - measure = measure, - msrdir = msrdir, - force = force - ) - # Push data (gdp) to GitHub as gdp.csv - save_aux_to_gh(gdp, - repo = paste0("aux_", measure), - branch = branch, - filename = measure) - # All aux files that depend on gdp will be loaded from Github - return(invisible(saved)) - -} diff --git a/R/aux_gdp_weo.R b/R/aux_gdp_weo.R deleted file mode 100644 index a2ff895..0000000 --- a/R/aux_gdp_weo.R +++ /dev/null @@ -1,158 +0,0 @@ -#' Fetch GDP data from WEO -#' -#' Create a dataset with GDP data from World Economic Outlook. -#' -#' Note that the most recent version most be downloaded from imf.org and saved -#' as an .xls file in `/_aux/weo/`. The filename should be in the -#' following structure `WEO_.xls`. Due to potential file corruption -#' the file must be opened and re-saved before it can be updated with -#' `aux_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file -#' that's not really xls. -#' -#' @inheritParams aux_prices -#' @export -aux_gdp_weo <- function(action = "update", - force = FALSE, - maindir = gls$PIP_DATA_DIR) { - measure <- "weo" - msrdir <- fs::path(maindir, "_aux/", measure) # measure dir - - if (action == "update") { - - # ---- Load data from disk ---- - - # Get latest version of file (in case there are more) - dir <- sprintf("%s_aux/weo/", maindir) - weo_files <- list.files(dir, pattern = "WEO_.*[.]xls") - weo_latest <- weo_files %>% - gsub("WEO_|.xls", "", .) %>% - as.POSIXlt() %>% - max() %>% - as.character() %>% - sprintf("%s_aux/weo/WEO_%s.xls", maindir, .) - - # Read data - dt <- readxl::read_xls( - weo_latest, - sheet = 1, na = "n/a", - col_types = "text" - ) - dt <- setDT(dt) - - # Clean column names - dt <- janitor::clean_names(dt) - - # ---- Data transformations ---- - - # Select rows w/ data on real gdp per capita - dt <- dt[weo_subject_code %in% - c("NGDPRPC", "NGDPRPPPPC", "NGDP_R")] - - # Fix country codes - dt[ - , - iso := fifelse( - iso == "WBG", "PSE", iso # West Bank & Gaza - ) - ] - dt[ - , - iso := fifelse( - iso == "UVK", "XKX", iso # Kosovo - ) - ] - - # Replace subject codes - dt[ - , - subject_code := fcase( - weo_subject_code == "NGDPRPC", "weo_gdp_lcu", - weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017", - weo_subject_code == "NGDP_R", "weo_gdp_lcu_notpc" - ) - ] - - # Reshape to long format - dt <- dt %>% - melt( - id.vars = c("iso", "subject_code"), - measure.vars = names(dt)[grepl("\\d{4}", names(dt))], - value.name = "weo_gdp", variable.name = "year" - ) - setnames(dt, "iso", "country_code") - - # Convert year and GDP to numeric - dt$year <- sub("x", "", dt$year) %>% as.numeric() - dt$weo_gdp <- suppressWarnings(as.numeric(dt$weo_gdp)) - - # Remove rows w/ missing GDP - dt <- dt[!is.na(dt$weo_gdp)] - - # Remove current year and future years - current_year <- format(Sys.Date(), "%Y") - dt <- dt[dt$year < current_year] - - # Reshape to wide for GDP columns - dt <- dt %>% - dcast( - formula = country_code + year ~ subject_code, - value.var = "weo_gdp" - ) - - # ---- Merge with population ---- - - pop <- aux_pop("load", maindir = maindir) - setDT(pop) - pop <- pop[pop_data_level == "national", ] - dt[pop, - on = .(country_code, year), - `:=`( - pop = i.pop - ) - ] - - # Calculate per capita value for NGDP_R - dt[ - , - weo_gdp_lcu := fifelse( - is.na(weo_gdp_lcu), weo_gdp_lcu_notpc / pop, weo_gdp_lcu - ) - ] - - - # ---- Chain PPP and LCU GDP columns ---- - - # Chain LCU on PPP column - dt <- chain_values( - dt, - base_var = "weo_gdp_ppp2017", - replacement_var = "weo_gdp_lcu", - new_name = "weo_gdp", - by = "country_code" - ) - - - # --- Sign and save ---- - - # Select final columns - dt <- dt[, c("country_code", "year", "weo_gdp")] - - # Save dataset - aux_sign_save( - x = dt, - measure = measure, - msrdir = msrdir, - force = force - ) - } else if (action == "load") { - dt <- load_aux( - maindir = maindir, - measure = measure - ) - return(dt) - } else { - rlang::abort(c("`action` must be `update` or `load`", - x = paste0("you provided `", action, "`") - )) - } -} diff --git a/R/aux_income_groups.R b/R/aux_income_groups.R index 8e521ec..9b2b29c 100644 --- a/R/aux_income_groups.R +++ b/R/aux_income_groups.R @@ -86,3 +86,55 @@ aux_income_groups <- function(action = c("update", "load"), } } + +#' Validate income group output data +#' +#' @param incgroup income group output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ + + stopifnot("Income group output data is not loaded" = !is.null(incgroup)) + + report <- data_validation_report() + + validate(incgroup, name = "Income group output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year_data), + description = "`year_data` should be numeric") |> + validate_if(is.character(income_group), + description = "`income_group` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + income_group, description = "`income_group` values within range") |> + validate_if(is.character(income_group_code), + description = "`income_group_code` should be character") |> + validate_cols(in_set(c("HIC", "LIC", "LMIC", "UMIC")), + income_group_code, description = "`income_group_code` values within range") |> + validate_if(is.character(incgroup_historical), + description = "`incgroup_historical` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + incgroup_historical, description = "`incgroup_historical` values within range") |> + validate_if(is.character(fcv_historical), + description = "`fcv_historical` should be character") |> + validate_if(is.character(ssa_subregion_code), + description = "`ssa_subregion_code` should be character") |> + validate_cols(not_na, country_code, year_data, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year_data), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_maddison.R b/R/aux_maddison.R index 5bb356c..d70e6c7 100644 --- a/R/aux_maddison.R +++ b/R/aux_maddison.R @@ -60,3 +60,41 @@ aux_maddison <- function(action = c("update", "load"), return(df) } } + +#' Validate raw maddison data +#' +#' @param mpd raw mpd data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ + + stopifnot("mpd/ maddison raw data is not loaded" = !is.null(mpd)) + + report <- data_validation_report() + + validate(mpd, name = "mdp raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(mpd_gdp), + description = "`mpd_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_metadata.R b/R/aux_metadata.R index 1844e66..d985254 100644 --- a/R/aux_metadata.R +++ b/R/aux_metadata.R @@ -39,3 +39,330 @@ aux_metadata <- function(action = c("update", "load"), } } +#' Update metadata file +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams pipfun::load_from_gh +#' @inheritParams aux_metadata +#' @return logical. TRUE if saved correctly. FALSE if error happened +#' @export +aux_metadata_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + measure <- "metadata" + branch <- match.arg(branch) + # ____________________________________________________________________________ + # Computations #### + + df <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv") + + # validate raw metdata data + metadata_validate_raw(metadata = df, detail = detail) + + # Load pfw + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + + + + # Create distribution type column (data type) + + pfw[, + domain_check := (gdp_domain == 2 | pce_domain == 2 | + pop_domain == 2 | cpi_domain == 2 | + ppp_domain == 2)] + + # order matters here + pfw[, + distribution_type := fcase( + use_imputed == 1, "micro, imputed", + use_microdata == 1, "micro", + use_groupdata == 1 & domain_check, "aggregated", + use_groupdata == 1, "group", + default = NA_character_ + ) + ] + + # Merge datasets (inner join) + df <- + merge(df, + pfw[, c("country_code", "ctryname", "surveyid_year", "survey_acronym", + "welfare_type", "reporting_year", "distribution_type", + "surv_producer","survey_coverage", "surv_title", + "link", "survey_year")], + by = "link", all.y = TRUE + ) + + # Recode colnames + setnames(x = df, + old = c("title", "surv_producer", "ctryname"), + new = c("survey_title", "survey_conductor", "country_name")) + df[, + survey_title := fifelse(is.na(survey_title), surv_title, survey_title) + ] + + # Select columns + df <- df[, + c( + "country_code", "country_name", "reporting_year", + "surveyid_year", "survey_year", "survey_acronym", + "survey_conductor", "survey_coverage", + "welfare_type", "distribution_type", + "survey_title", "year_start", "year_end", + "authoring_entity_name", "abstract", + "collection_dates_cycle", "collection_dates_start", + "collection_dates_end", + "sampling_procedure", "collection_mode", + "coll_situation", "weight", "cleaning_operations" + ) + ] + + # Create nested table + + df <- df[, .(.(.SD)), + keyby = .( + country_code, + country_name, + reporting_year, + survey_year, + surveyid_year, + survey_title, + survey_conductor, + survey_coverage, + welfare_type, + distribution_type + ) + ] + + setnames(df, old = "V1", new = "metadata") + + ## ............................................................................ + ## Save #### + df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) + + setattr(df, "aux_name", "metadata") + setattr(df, + "aux_key", + c("country_code", "year", "welfare_type")) + + # validate raw metdata data + metadata_validate_output(metadata = df, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = df, + measure = measure, + msrdir = msrdir, + force = force + ) + + # ____________________________________________________________________________ + # Return #### + return(invisible(saved)) + +} + +#' Metadata for PIP regions +#' +#' Update or load a dataset with regions. +#' +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +aux_metaregion <- function(action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch) +) { + measure <- "metaregion" + action <- match.arg(action) + branch <- match.arg(branch) + + if (action == "update") { + mr <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch) + + + ## ............................................................................ + ## Save data #### + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + saved <- pipfun::pip_sign_save( + x = mr, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) + + + } else { + df <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(df) + } + +} # end of function + +#' Validate raw metadata data +#' +#' @param metadata raw metadata data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ + + stopifnot("metadata raw data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "metadata raw data validation") |> + validate_if(is.character(status), + description = "`status` should be character") |> + validate_if(is.character(reg), + description = "`reg` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAR", "SSA")), + reg, description = "`reg` values within range") |> + validate_if(is.numeric(id), + description = "`id` should be numeric") |> + validate_if(is.character(svy_id), + description = "`svy_id` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(title), + description = "`title` should be character") |> + validate_if(is.character(data_access), + description = "`data_access` should be character") |> + validate_if(is.numeric(year_start), + description = "`year_start` should be numeric") |> + validate_if(is.numeric(year_end), + description = "`year_end` should be numeric") |> + validate_if(is.character(authoring_entity_name), + description = "`authoring_entity_name` should be character") |> + validate_if(is.character(authoring_entity_affiliation), + description = "`authoring_entity_affiliation` should be character") |> + validate_if(is.character(contact_email), + description = "`contact_email` should be character") |> + validate_if(is.character(contact_uri), + description = "`contact_uri` should be character") |> + validate_if(is.character(abstract), + description = "`abstract` should be character") |> + validate_if(is.character(collection_dates_cycle), + description = "`collection_dates_cycle` should be character") |> + validate_if(is.character(collection_dates_start), + description = "`collection_dates_start` should be character") |> + validate_if(is.character(collection_dates_end), + description = "`collection_dates_end` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_if(is.character(sampling_procedure), + description = "`sampling_procedure` should be character") |> + validate_if(is.character(collection_mode), + description = "`collection_mode` should be character") |> + validate_if(is.character(coll_situation), + description = "coll_situation` should be character") |> + validate_if(is.character(weight), + description = "`weight` should be character") |> + validate_if(is.character(cleaning_operations), + description = "`cleaning_operations` should be character") |> + validate_if(is.character(coverage_notes), + description = "`coverage_notes` should be character") |> + validate_cols(not_na, svy_id, + description = "no missing values in key variables") |> + validate_if(is_uniq(svy_id), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate output metadata data +#' +#' @param metadata metadata data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ + + stopifnot("Metadata data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "Metadata output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(survey_title), + description = "`survey_title` should be character") |> + validate_if(is.character(survey_conductor), + description = "`survey_conductor` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), + distribution_type, description = "`distribution_type` values within range") |> + validate_cols(not_na, country_code, year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + + + + diff --git a/R/aux_metadata_update.R b/R/aux_metadata_update.R deleted file mode 100644 index 8b84b05..0000000 --- a/R/aux_metadata_update.R +++ /dev/null @@ -1,135 +0,0 @@ -#' Update metadata file -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pipfun::load_from_gh -#' @inheritParams aux_metadata -#' @return logical. TRUE if saved correctly. FALSE if error happened -#' @export -aux_metadata_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "metadata" - branch <- match.arg(branch) - # ____________________________________________________________________________ - # Computations #### - - df <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv") - - # validate raw metdata data - metadata_validate_raw(metadata = df, detail = detail) - - # Load pfw - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - - - - # Create distribution type column (data type) - - pfw[, - domain_check := (gdp_domain == 2 | pce_domain == 2 | - pop_domain == 2 | cpi_domain == 2 | - ppp_domain == 2)] - - # order matters here - pfw[, - distribution_type := fcase( - use_imputed == 1, "micro, imputed", - use_microdata == 1, "micro", - use_groupdata == 1 & domain_check, "aggregated", - use_groupdata == 1, "group", - default = NA_character_ - ) - ] - - # Merge datasets (inner join) - df <- - merge(df, - pfw[, c("country_code", "ctryname", "surveyid_year", "survey_acronym", - "welfare_type", "reporting_year", "distribution_type", - "surv_producer","survey_coverage", "surv_title", - "link", "survey_year")], - by = "link", all.y = TRUE - ) - - # Recode colnames - setnames(x = df, - old = c("title", "surv_producer", "ctryname"), - new = c("survey_title", "survey_conductor", "country_name")) - df[, - survey_title := fifelse(is.na(survey_title), surv_title, survey_title) - ] - - # Select columns - df <- df[, - c( - "country_code", "country_name", "reporting_year", - "surveyid_year", "survey_year", "survey_acronym", - "survey_conductor", "survey_coverage", - "welfare_type", "distribution_type", - "survey_title", "year_start", "year_end", - "authoring_entity_name", "abstract", - "collection_dates_cycle", "collection_dates_start", - "collection_dates_end", - "sampling_procedure", "collection_mode", - "coll_situation", "weight", "cleaning_operations" - ) - ] - - # Create nested table - - df <- df[, .(.(.SD)), - keyby = .( - country_code, - country_name, - reporting_year, - survey_year, - surveyid_year, - survey_title, - survey_conductor, - survey_coverage, - welfare_type, - distribution_type - ) - ] - - setnames(df, old = "V1", new = "metadata") - -## ............................................................................ -## Save #### - df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) - - setattr(df, "aux_name", "metadata") - setattr(df, - "aux_key", - c("country_code", "year", "welfare_type")) - - # validate raw metdata data - metadata_validate_output(metadata = df, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = df, - measure = measure, - msrdir = msrdir, - force = force - ) - - # ____________________________________________________________________________ - # Return #### - return(invisible(saved)) - -} diff --git a/R/aux_metaregion.R b/R/aux_metaregion.R deleted file mode 100644 index e8a44b6..0000000 --- a/R/aux_metaregion.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Metadata for PIP regions -#' -#' Update or load a dataset with regions. -#' -#' @inheritParams aux_cpi -#' @inheritParams pipfun::load_from_gh -#' @export -aux_metaregion <- function(action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) { - measure <- "metaregion" - action <- match.arg(action) - branch <- match.arg(branch) - - if (action == "update") { - mr <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch) - - - ## ............................................................................ - ## Save data #### - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - saved <- pipfun::pip_sign_save( - x = mr, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) - - - } else { - df <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(df) - } - -} # end of function - - - - diff --git a/R/aux_npl.R b/R/aux_npl.R index 4c9271a..d37ae48 100644 --- a/R/aux_npl.R +++ b/R/aux_npl.R @@ -81,3 +81,91 @@ aux_npl <- function(action = c("update", "load"), } } + +#' Validate npl raw data +#' +#' @param npl raw npl data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("NPL raw data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + # validate_cols(in_set(c("AFE", "AFW", "EAP", "ECA", "LAC", "MNA", "SAR")), + # region, description = "`region` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc_nc), + description = "`vsi_pov_nahc_nc` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc), + description = "`vsi_pov_nahc` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate npl output data +#' +#' @param npl output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ + + stopifnot("NPL output data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(nat_headcount), + description = "`nat_headcount` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/aux_pce.R b/R/aux_pce.R index b0d4563..2cb80ec 100644 --- a/R/aux_pce.R +++ b/R/aux_pce.R @@ -37,3 +37,311 @@ aux_pce <- function(action = c("update", "load"), return(dt) } } + +#' Update PCE +#' +#' Update PCE data using WDI and Special cases. +#' +#' @inheritParams aux_gdp +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_pce_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + measure <- "pce" + branch <- match.arg(branch) + from <- match.arg(from) + + # ________________________________________________________________ + # Load data #### + + # Update WDI + # pip_wdi_update(maindir = maindir, + # from = from, + # force = force, + # branch = branch) + # + + wpce <- load_aux(measure = "wdi", + maindir = maindir, + branch = branch) + + setnames(wpce, "NE.CON.PRVT.PC.KD", "wdi_pce") + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## Special national accounts -------- + sna <- pipfun::load_from_gh( + measure = "sna", + owner = owner, + branch = branch, + ext = "csv" + ) + + # validate sna data + sna_validate_raw(sna = sna, detail = detail) + + sna_fy <- pipfun::load_from_gh( + measure = "sna", + owner = owner, + branch = branch, + filename = "sna_metadata", + ext = "csv" + ) + # validate sna_fy data + sna_fy_validate_raw(sna_fy = sna_fy, detail = detail) + # ____________________________________________________________________________ + # Clean PCE from WDI #### + + # Keep relevant variables + wpce <- wpce[, .(country_code, year, wdi_pce)] + + ## ---- Adjust FY to CY ---- + + # Merge WDI with special FY cases + sna_fy <- sna_fy[, c("Code", "Month", "Day")] + names(sna_fy) <- c("country_code", "fy_month", "fy_day") + wpce <- merge(wpce, sna_fy, by = "country_code", all.x = TRUE) + + # Calculate alpha + wpce[, max_days := days_in_month(fy_month, year)] + wpce[, month_num := get_month_number(fy_month)] + wpce[, alpha := ((month_num - 1) + fy_day / max_days) / 12] + + # Create lead/lag vars + wpce[, wdi_pce_lag := shift(wdi_pce), by = country_code] + wpce[, wdi_pce_lead := shift(wdi_pce, type = "lead"), by = country_code] + + # Calculate adjusted GDP for calendar year + wpce[, + wdi_pce_cy := fifelse(!is.na(alpha), + fifelse(alpha < 0.5 , + alpha * wdi_pce_lag + (1 - alpha) * wdi_pce, + alpha * wdi_pce + ( 1 - alpha) * wdi_pce_lead), + NA_real_) + ] + wpce[, + wdi_pce_tmp := fifelse(!is.na(alpha), wdi_pce_cy, wdi_pce) + ] + wpce[, + wdi_pce := + fifelse(country_code == "EGY" & year < 1980, # Egypt should only be adjusted after 1980 + wdi_pce, wdi_pce_tmp) + + ] + + # Keep relevant variables + pce <- wpce[, .(country_code, year, wdi_pce)] + + + # ____________________________________________________________________________ + # Special cases #### + + ## ---- Expand for special cases with U/R levels ---- + + # Special cases for IND, IDN, and CHN + sp <- pce[country_code %in% c("IND", "IDN", "CHN")] + + # Expand two time these cases using cross-join. + sp <- sp[CJ( + pce_data_level = c(0, 1), + country_code = country_code, + year = year, + unique = TRUE + ), + on = .(country_code, year) + ] + + # Add data level national to main dataset + pce[, pce_data_level := 2] + + # Append + pce <- rbindlist(list(pce, sp)) + + # Add domain column + pce[, + pce_domain := fifelse(pce_data_level == 2, 1, 2) + ] + + # Sort + setorder(pce, country_code, year, pce_data_level) + + ## ---- Recode domain and data level ---- + + # Recode domain and data_level variables + cols <- c("pce_domain", "pce_data_level") + pce[, + (cols) := lapply(.SD, as.character), + .SDcols = cols + ][ + , # recode domain + pce_domain := fcase( + pce_domain == "1", "national", + pce_domain == "2", "urban/rural", + pce_domain == "3", "subnational region" + ) + ][ # Recode data_level only for those that are national or urban/rural + pce_domain %in% c("national", "urban/rural"), + pce_data_level := fcase( + pce_data_level == "0", "rural", + pce_data_level == "1", "urban", + pce_data_level == "2", "national" + ) + ] + + + ## ---- Hard-coded custom modifications ---- + # get survey years where only PCE is present + sna <- sna[!is.na(PCE) + ][, # lower case coverage + coverage := tolower(coverage) + ] + + # If there are special countries + if (nrow(sna) > 0) { + # Join with Special National Accounts data. + setnames(x = sna, + old = c("countrycode", "coverage"), + new = c("country_code", "pce_data_level") + ) + + pce[sna, + on = .(country_code, year, pce_data_level), + `:=`( + sna_pce = i.PCE + ) + ] + + pce[, + pce := fifelse(is.na(sna_pce),wdi_pce, sna_pce) + ] + # remvoe extra variables + pce[, + c("sna_pce", "wdi_pce") := NULL] + + } else { + # If there are no special countries + setnames(pce, "wdi_pce", "pce") + } + + + # _______________________________________________________________________ + # Hard-coded countries #### + + # Remove observations for Venezuela after 2014 + pce[ + , + pce := fifelse(country_code == "VEN" & year > 2014, NA_real_, pce) + ] + + # Remove observations for Belize before 1992 + # See issue PIP-Technical-Team/pipaux#41 + pce[ + , + pce := fifelse(country_code == "BLZ" & year < 1992, NA_real_, pce) + ] + + # Remove all observations for Iraq + # See issue PIP-Technical-Team/pipaux#43 + pce[ + , + pce := fifelse(country_code == "IRQ", NA_real_, pce) + ] + + + # __________________________________________________________________ + # Finalize table #### + + + # Remove rows with missing GDP\ + pce <- na.omit(pce, "pce") + pce <- pce[!is.infinite(pce)] + + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + + pce <- pce[country_code %in% cl$country_code] + + # drop pce_domain + pce <- pce[, -c("pce_domain")] + + ## ---- Sign and save ---- + pce <- pce |> setnames("pce_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pce, "aux_name", "pce") + setattr(pce, + "aux_key", + c("country_code", "year", "reporting_level")) + + # validate pce output data + pce_validate_output(pce = pce, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = pce, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) + +} + +#' Validate output pce data +#' +#' @param pce output pce data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ + + stopifnot("PCE clean data is not loaded" = !is.null(pce)) + + report <- data_validation_report() + + validate(pce, name = "PCE output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pce), + description = "`pce` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + # validate_if(is.character(pce_domain), + # description = "`pce_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pce_domain, description = "`pce_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_pce_update.R b/R/aux_pce_update.R deleted file mode 100644 index 19cf4e0..0000000 --- a/R/aux_pce_update.R +++ /dev/null @@ -1,261 +0,0 @@ -#' Update PCE -#' -#' Update PCE data using WDI and Special cases. -#' -#' @inheritParams aux_gdp -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -aux_pce_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - measure <- "pce" - branch <- match.arg(branch) - from <- match.arg(from) - -# ________________________________________________________________ -# Load data #### - - # Update WDI - # pip_wdi_update(maindir = maindir, - # from = from, - # force = force, - # branch = branch) - # - - wpce <- load_aux(measure = "wdi", - maindir = maindir, - branch = branch) - - setnames(wpce, "NE.CON.PRVT.PC.KD", "wdi_pce") - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## Special national accounts -------- - sna <- pipfun::load_from_gh( - measure = "sna", - owner = owner, - branch = branch, - ext = "csv" - ) - - # validate sna data - sna_validate_raw(sna = sna, detail = detail) - - sna_fy <- pipfun::load_from_gh( - measure = "sna", - owner = owner, - branch = branch, - filename = "sna_metadata", - ext = "csv" - ) - # validate sna_fy data - sna_fy_validate_raw(sna_fy = sna_fy, detail = detail) -# ____________________________________________________________________________ -# Clean PCE from WDI #### - - # Keep relevant variables - wpce <- wpce[, .(country_code, year, wdi_pce)] - - ## ---- Adjust FY to CY ---- - - # Merge WDI with special FY cases - sna_fy <- sna_fy[, c("Code", "Month", "Day")] - names(sna_fy) <- c("country_code", "fy_month", "fy_day") - wpce <- merge(wpce, sna_fy, by = "country_code", all.x = TRUE) - - # Calculate alpha - wpce[, max_days := days_in_month(fy_month, year)] - wpce[, month_num := get_month_number(fy_month)] - wpce[, alpha := ((month_num - 1) + fy_day / max_days) / 12] - - # Create lead/lag vars - wpce[, wdi_pce_lag := shift(wdi_pce), by = country_code] - wpce[, wdi_pce_lead := shift(wdi_pce, type = "lead"), by = country_code] - - # Calculate adjusted GDP for calendar year - wpce[, - wdi_pce_cy := fifelse(!is.na(alpha), - fifelse(alpha < 0.5 , - alpha * wdi_pce_lag + (1 - alpha) * wdi_pce, - alpha * wdi_pce + ( 1 - alpha) * wdi_pce_lead), - NA_real_) - ] - wpce[, - wdi_pce_tmp := fifelse(!is.na(alpha), wdi_pce_cy, wdi_pce) - ] - wpce[, - wdi_pce := - fifelse(country_code == "EGY" & year < 1980, # Egypt should only be adjusted after 1980 - wdi_pce, wdi_pce_tmp) - - ] - - # Keep relevant variables - pce <- wpce[, .(country_code, year, wdi_pce)] - - -# ____________________________________________________________________________ -# Special cases #### - - ## ---- Expand for special cases with U/R levels ---- - - # Special cases for IND, IDN, and CHN - sp <- pce[country_code %in% c("IND", "IDN", "CHN")] - - # Expand two time these cases using cross-join. - sp <- sp[CJ( - pce_data_level = c(0, 1), - country_code = country_code, - year = year, - unique = TRUE - ), - on = .(country_code, year) - ] - - # Add data level national to main dataset - pce[, pce_data_level := 2] - - # Append - pce <- rbindlist(list(pce, sp)) - - # Add domain column - pce[, - pce_domain := fifelse(pce_data_level == 2, 1, 2) - ] - - # Sort - setorder(pce, country_code, year, pce_data_level) - - ## ---- Recode domain and data level ---- - - # Recode domain and data_level variables - cols <- c("pce_domain", "pce_data_level") - pce[, - (cols) := lapply(.SD, as.character), - .SDcols = cols - ][ - , # recode domain - pce_domain := fcase( - pce_domain == "1", "national", - pce_domain == "2", "urban/rural", - pce_domain == "3", "subnational region" - ) - ][ # Recode data_level only for those that are national or urban/rural - pce_domain %in% c("national", "urban/rural"), - pce_data_level := fcase( - pce_data_level == "0", "rural", - pce_data_level == "1", "urban", - pce_data_level == "2", "national" - ) - ] - - - ## ---- Hard-coded custom modifications ---- - # get survey years where only PCE is present - sna <- sna[!is.na(PCE) - ][, # lower case coverage - coverage := tolower(coverage) - ] - - # If there are special countries - if (nrow(sna) > 0) { - # Join with Special National Accounts data. - setnames(x = sna, - old = c("countrycode", "coverage"), - new = c("country_code", "pce_data_level") - ) - - pce[sna, - on = .(country_code, year, pce_data_level), - `:=`( - sna_pce = i.PCE - ) - ] - - pce[, - pce := fifelse(is.na(sna_pce),wdi_pce, sna_pce) - ] - # remvoe extra variables - pce[, - c("sna_pce", "wdi_pce") := NULL] - - } else { - # If there are no special countries - setnames(pce, "wdi_pce", "pce") - } - - -# _______________________________________________________________________ -# Hard-coded countries #### - - # Remove observations for Venezuela after 2014 - pce[ - , - pce := fifelse(country_code == "VEN" & year > 2014, NA_real_, pce) - ] - - # Remove observations for Belize before 1992 - # See issue PIP-Technical-Team/pipaux#41 - pce[ - , - pce := fifelse(country_code == "BLZ" & year < 1992, NA_real_, pce) - ] - - # Remove all observations for Iraq - # See issue PIP-Technical-Team/pipaux#43 - pce[ - , - pce := fifelse(country_code == "IRQ", NA_real_, pce) - ] - - -# __________________________________________________________________ -# Finalize table #### - - - # Remove rows with missing GDP\ - pce <- na.omit(pce, "pce") - pce <- pce[!is.infinite(pce)] - - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - - pce <- pce[country_code %in% cl$country_code] - - # drop pce_domain - pce <- pce[, -c("pce_domain")] - - ## ---- Sign and save ---- - pce <- pce |> setnames("pce_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(pce, "aux_name", "pce") - setattr(pce, - "aux_key", - c("country_code", "year", "reporting_level")) - - # validate pce output data - pce_validate_output(pce = pce, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = pce, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) - -} diff --git a/R/aux_pfw.R b/R/aux_pfw.R index 92da3b3..4dd4aca 100644 --- a/R/aux_pfw.R +++ b/R/aux_pfw.R @@ -39,3 +39,549 @@ aux_pfw <- function(action = c("update", "load"), return(dt) } } +#' Clean PFW +#' +#' Clean PFW data from Datalibweb to meet PIP protocols. +#' +#' @param y dataset with PPP data from `aux_pfw_update()`. +#' @inheritParams load_aux +#' +#' @keywords internal +aux_pfw_clean <- function(y, + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + branch <- match.arg(branch) + + if (!inherits(y, "data.table")) { + x <- as.data.table(y) + } else { + x <- copy(y) + } + + # get just inpovcal data + + + # change variable names + old_var <- + c( + "region", + "reg_pcn", + "code", + "ref_year", + "survname", + "comparability", + "datatype", + "rep_year" + ) + + new_var <- + c( + "wb_region_code", + "pcn_region_code", + "country_code", + "survey_year", + "survey_acronym", + "survey_comparability", + "welfare_type", + "reporting_year" + ) + + setnames(x, + old = old_var, + new = new_var + ) + + # Recode some variables + + x[ + , + `:=`( + # Recode survey coverage + survey_coverage = fcase( + survey_coverage == "N", "national", + survey_coverage == "R", "rural", + survey_coverage == "U", "urban", + default = "" + ), + # Recode welfare type + welfare_type = fcase( + grepl("[Ii]", welfare_type), "income", + grepl("[Cc]", welfare_type), "consumption", + default = "" + ), + surveyid_year = as.integer(surveyid_year), + survey_year = round(survey_year, 2) + ) + ] + + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + x <- x[country_code %in% cl$country_code] + + x <- unique(x) # remove duplicates + return(x) +} + +#' Update PFW +#' +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_pfw_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + measure <- "pfw" + branch <- match.arg(branch) + + # Read data + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + # validate pfw raw data + pfw_validate_raw(pfw = pfw, detail = detail) + + # Clean data + pfw <- aux_pfw_clean(pfw, + maindir = maindir, + branch = branch) + + # validate pfw raw data + pfw_validate_output(pfw = pfw, detail = detail) + + # Save dataset + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(pfw, "aux_name", "pfw") + + saved <- pipfun::pip_sign_save( + x = pfw, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) +} + +#' Generate a dataset that contains pfw keys +#' +#' @return data.table +#' @export +#' +aux_pfw_key <- function(){ + + pfw_temp <- load_aux("pfw", maindir = temp_fld) + + pfw_key_options <- pfw_temp[, .(country_code, + survey_year, + survey_acronym, + cpi_domain_var)] + + + cpi_temp <- load_aux("cpi", maindir = temp_fld) + + cpi_temp <- cpi_temp[, cpi_domain_var := + fifelse(reporting_level == "urban" & + cpi_domain_value == 1, "urban", "")] + + cpi_temp <- cpi_temp[, .(country_code, survey_year, survey_acronym, + cpi_domain_var, reporting_level)] + + pfw_key <- cpi_temp[pfw_key_options, on = .(country_code, survey_year, + survey_acronym, cpi_domain_var)] + + any(duplicated(pfw_key, by = c("country_code", "survey_year", "survey_acronym", "cpi_domain_var"))) + + return(pfw_key) +} + +#' Validate raw pfw data +#' +#' @param pfw raw pfw data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PFW raw data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(reg_pcn), + description = "`reg_pcn` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + reg_pcn, description = "`reg_pcn` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(datatype), + description = "`datatype` should be character") |> + validate_cols(in_set(c("C", "I", "c", "i")), + datatype, description = "`datatype` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(rep_year), + description = "`rep_year` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, code, year, survname, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean pfw data +#' +#' @param pfw clean pfw data, output via `aux_pfw_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ + + stopifnot("PFW clean data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW output data validation") |> + validate_if(is.character(wb_region_code), + description = "`wb_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + wb_region_code, description = "`wb_region_code` values within range") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(reporting_year), + description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(survey_comparability), + description = "`survey_comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, country_code, year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/aux_pfw_clean.R b/R/aux_pfw_clean.R deleted file mode 100644 index 12590b6..0000000 --- a/R/aux_pfw_clean.R +++ /dev/null @@ -1,85 +0,0 @@ -#' Clean PFW -#' -#' Clean PFW data from Datalibweb to meet PIP protocols. -#' -#' @param y dataset with PPP data from `aux_pfw_update()`. -#' @inheritParams load_aux -#' -#' @keywords internal -aux_pfw_clean <- function(y, - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - branch <- match.arg(branch) - - if (!inherits(y, "data.table")) { - x <- as.data.table(y) - } else { - x <- copy(y) - } - - # get just inpovcal data - - - # change variable names - old_var <- - c( - "region", - "reg_pcn", - "code", - "ref_year", - "survname", - "comparability", - "datatype", - "rep_year" - ) - - new_var <- - c( - "wb_region_code", - "pcn_region_code", - "country_code", - "survey_year", - "survey_acronym", - "survey_comparability", - "welfare_type", - "reporting_year" - ) - - setnames(x, - old = old_var, - new = new_var - ) - - # Recode some variables - - x[ - , - `:=`( - # Recode survey coverage - survey_coverage = fcase( - survey_coverage == "N", "national", - survey_coverage == "R", "rural", - survey_coverage == "U", "urban", - default = "" - ), - # Recode welfare type - welfare_type = fcase( - grepl("[Ii]", welfare_type), "income", - grepl("[Cc]", welfare_type), "consumption", - default = "" - ), - surveyid_year = as.integer(surveyid_year), - survey_year = round(survey_year, 2) - ) - ] - - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - x <- x[country_code %in% cl$country_code] - - x <- unique(x) # remove duplicates - return(x) -} - diff --git a/R/aux_pfw_key.R b/R/aux_pfw_key.R deleted file mode 100644 index 9a9bb14..0000000 --- a/R/aux_pfw_key.R +++ /dev/null @@ -1,31 +0,0 @@ -#' Generate a dataset that contains pfw keys -#' -#' @return data.table -#' @export -#' -aux_pfw_key <- function(){ - - pfw_temp <- load_aux("pfw", maindir = temp_fld) - - pfw_key_options <- pfw_temp[, .(country_code, - survey_year, - survey_acronym, - cpi_domain_var)] - - - cpi_temp <- load_aux("cpi", maindir = temp_fld) - - cpi_temp <- cpi_temp[, cpi_domain_var := - fifelse(reporting_level == "urban" & - cpi_domain_value == 1, "urban", "")] - - cpi_temp <- cpi_temp[, .(country_code, survey_year, survey_acronym, - cpi_domain_var, reporting_level)] - - pfw_key <- cpi_temp[pfw_key_options, on = .(country_code, survey_year, - survey_acronym, cpi_domain_var)] - - any(duplicated(pfw_key, by = c("country_code", "survey_year", "survey_acronym", "cpi_domain_var"))) - - return(pfw_key) -} diff --git a/R/aux_pfw_update.R b/R/aux_pfw_update.R deleted file mode 100644 index 54ab7e7..0000000 --- a/R/aux_pfw_update.R +++ /dev/null @@ -1,47 +0,0 @@ -#' Update PFW -#' -#' @inheritParams aux_pfw -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -aux_pfw_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "pfw" - branch <- match.arg(branch) - - # Read data - pfw <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - ext = "dta") - # validate pfw raw data - pfw_validate_raw(pfw = pfw, detail = detail) - - # Clean data - pfw <- aux_pfw_clean(pfw, - maindir = maindir, - branch = branch) - - # validate pfw raw data - pfw_validate_output(pfw = pfw, detail = detail) - - # Save dataset - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(pfw, "aux_name", "pfw") - - saved <- pipfun::pip_sign_save( - x = pfw, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) -} diff --git a/R/aux_pl.R b/R/aux_pl.R index 8f82cd1..8eb845c 100644 --- a/R/aux_pl.R +++ b/R/aux_pl.R @@ -61,3 +61,91 @@ aux_pl <- function(action = c("update", "load"), return(df) } } + +#' Build a data table for each list from yaml file with poverty lines info +#' +#' @param l list from yaml file +#' +#' @return data.table +#' @export +aux_pl_clean <- function(l) { + + + # ____________________________________________________________________________ + # Computations #### + + pls <- + purrr::map(.x = l$ranges, + .f = ~{ + seq(.x$min, .x$max, .x$increment) + }) |> + unlist() + + # Create data frame + df <- data.table::data.table( + name = as.character(pls), + poverty_line = pls + ) + + + df[, + c("is_default", "is_visible", "name", "ppp_year") + := { + id <- fifelse(name == l$default, TRUE, FALSE) + + iv <- fifelse(name %in% l$visible, TRUE, FALSE) + + n <- fifelse(n_decimals(poverty_line) == 1, paste0(name, "0"), name) + n <- fifelse(n_decimals(poverty_line) == 0, paste0(n, ".00"), n) + + list(id, iv, n, l$ppp_year) + }] + + # ____________________________________________________________________________ + # Return #### + return(df) + +} + +#' Validate output pl data +#' +#' @param pl output pl data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ + + stopifnot("PL clean data is not loaded" = !is.null(pl)) + + report <- data_validation_report() + + validate(pl, name = "PL output data validation") |> + validate_if(is.character(name), + description = "`name` should be character") |> + validate_if(is.numeric(poverty_line), + description = "`poverty_line` should be numeric") |> + validate_if(is.logical(is_default), + description = "`is_default` should be logical") |> + validate_if(is.logical(is_visible), + description = "`is_visible` should be logical") |> + validate_if(is.integer(ppp_year), + description = "`ppp_year` should be numeric") |> + validate_cols(not_na, name, ppp_year, + description = "no missing values in key variables") |> + validate_if(is_uniq(name, ppp_year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_pl_clean.R b/R/aux_pl_clean.R deleted file mode 100644 index 8236df7..0000000 --- a/R/aux_pl_clean.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Build a data table for each list from yaml file with poverty lines info -#' -#' @param l list from yaml file -#' -#' @return data.table -#' @export -aux_pl_clean <- function(l) { - - - # ____________________________________________________________________________ - # Computations #### - - pls <- - purrr::map(.x = l$ranges, - .f = ~{ - seq(.x$min, .x$max, .x$increment) - }) |> - unlist() - - # Create data frame - df <- data.table::data.table( - name = as.character(pls), - poverty_line = pls - ) - - - df[, - c("is_default", "is_visible", "name", "ppp_year") - := { - id <- fifelse(name == l$default, TRUE, FALSE) - - iv <- fifelse(name %in% l$visible, TRUE, FALSE) - - n <- fifelse(n_decimals(poverty_line) == 1, paste0(name, "0"), name) - n <- fifelse(n_decimals(poverty_line) == 0, paste0(n, ".00"), n) - - list(id, iv, n, l$ppp_year) - }] - - # ____________________________________________________________________________ - # Return #### - return(df) - -} diff --git a/R/aux_pop.R b/R/aux_pop.R index 036fee9..3c37898 100644 --- a/R/aux_pop.R +++ b/R/aux_pop.R @@ -38,3 +38,476 @@ aux_pop <- function(action = c("update", "load"), return(df) } } + +#' Update POP +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @param from character: Source for population data. +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pop +aux_pop_update <- function(force = FALSE, + from = c("gh", "file", "api"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + # Check arguments + from <- match.arg(from) + branch <- match.arg(branch) + measure <- "pop" + + # Get the most recent year in PFW to filter population projection + + pfw <- pipload::pip_load_aux("pfw", + branch = branch, + maindir = maindir) + # year_max <- pfw[, max(year)] + # get current year as max year + year_max <- Sys.Date() |> + format("%Y") |> + as.numeric() + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # From WDI --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if (from == "api") { + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## from API -------- + + pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") + pop <- wbstats::wb_data(indicator = pop_indicators, + country = "all", # this is new + lang = "en", + return_wide = FALSE) |> + setDT() + + # validate wb pop data + pop_validate_raw(pop = pop, detail = detail) + + # rename vars + pop <- pop[, c("iso3c", "date", "indicator_id", "value")] + + setnames(pop, + new = c("country_code", "year", "coverage", "pop")) + + + + pop[, + year := as.numeric(year) + ][, + pop_data_level := + fcase( + grepl("POP", coverage), 2, + grepl("RUR", coverage), 0, + grepl("URB", coverage), 1 + ) + ][, + coverage := NULL] + + ### Ger special cases --------- + + spop <- pipfun::load_from_gh( + measure = measure, + filename = "spop", + owner = owner, + branch = branch, + tag = tag, + ext = "csv") |> + clean_names_from_wide() |> + clean_from_wide() + + + pop <- rbindlist(list(pop, spop), + use.names = TRUE, + fill = TRUE) + + + } else { + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## from Emi's file -------- + + # Now Emi's file is uploaded directly to GH. So we get it from there. + # Load data + + pop_main <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "xlsx" + ) |> + clean_names_from_wide() |> + clean_from_wide() + + # validate pop main raw data + popmain_validate_raw(pop_main = pop_main, detail = detail) + + ### Ger special cases --------- + spop <- pipfun::load_from_gh( + measure = measure, + filename = "spop", + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) |> + clean_names_from_wide() |> + clean_from_wide() + + # validate special cases pop raw data + spop_validate_raw(spop = spop, detail = detail) + + pop <- joyn::joyn(pop_main, spop, + by = c("country_code", "year", "pop_data_level"), + update_values = TRUE, + reportvar = FALSE, + verbose = FALSE) + + # pop <- rbindlist(list(pop_main, spop), + # use.names = TRUE, + # fill = TRUE) + + } + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Clean data --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # Remove years prior to 1960 + pop <- pop[!is.na(pop) & year >= 1960] + pop <- pop[year <= year_max] + + # sorting + setorder(pop, country_code, year, pop_data_level) + setcolorder(pop, c("country_code", "year", "pop_data_level", "pop")) + + pop[, + pop_domain := fifelse(pop_data_level == 2, 1, 2)] + + # recode domain and data_level variables + cols <- c("pop_domain", "pop_data_level") + pop[, + (cols) := lapply(.SD, as.character), + .SDcols = cols + ][ + , # recode domain + pop_domain := fcase( + pop_domain == "1", "national", + pop_domain == "2", "urban/rural", + pop_domain == "3", "subnational region" + ) + ][ # Recode data_level only for those that are national or urban/rural + pop_domain %in% c("national", "urban/rural"), + pop_data_level := fcase( + pop_data_level == "0", "rural", + pop_data_level == "1", "urban", + pop_data_level == "2", "national" + ) + ] + + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + setDT(cl) + pop <- pop[country_code %in% cl$country_code] |> + unique() # make sure we don't havce any duplicates + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Save data --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # drop pce_domain + pop <- pop[, -c("pop_domain")] + + pop <- pop |> setnames("pop_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pop, "aux_name", "pop") + setattr(pop, + "aux_key", + c("country_code", "year", "reporting_level")) + + # validate output pop data + pop_validate_output(pop = pop, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = pop, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) + +} + + + +#' Clean names from wide WDI format +#' +#' @param x data frame +#' +#' @return dataframe with names cleaned +#' @keywords internal +clean_names_from_wide <- function(x) { + if (!is.data.table(x)) { + setDT(x) + } + nnames <- as.character(x[2, 1:4]) + setnames(x, 1:4, nnames) + x <- x[-c(1:2)] + x +} + + +#' Clean from WDI format +#' +#' @param x data frame +#' +#' @return dataframe with names cleaned +#' @keywords internal +clean_from_wide <- function(x) { + if (!is.data.table(x)) { + setDT(x) + } + + + year_vars <- names(x)[6:ncol(x)] + x$Series_Name <- NULL + x$Time_Name <- NULL + + # Reshape to long format + pop_long <- x |> + data.table::setDT() |> + data.table::melt( + id.vars = c("Country", "Series"), + measure.vars = year_vars, + variable.name = "Year", + value.name = "Population" + ) + pop_long[, + Year := as.numeric(as.character(Year)) + ][, + Population := { + Population[Population == "."] <- NA_character_ + as.numeric(Population) + }] + + + + pop <- pop_long + # Create data_level column + pop[, + pop_data_level := + fcase( + grepl("POP", Series), 2, + grepl("RUR", Series), 0, + grepl("URB", Series), 1 + ) + ][, + Series := NULL] + + # Set colnames + setnames( + pop, + old = c("Country", "Year", "Population"), + new = c("country_code", "year", "pop") + ) + + return(pop) +} + +#' Validate raw main pop data +#' +#' @param pop_main raw pop main data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ + + stopifnot("POP main raw data is not loaded" = !is.null(pop_main)) + + report <- data_validation_report() + + validate(pop_main, name = "POP main raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate pop raw data download from wdi +#' +#' @param pop raw pop data, as loaded via `wbstats::wb_data` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("WB POP raw data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "WB POP raw data validation") |> + validate_if(is.character(indicator_id), + description = "`indicator_id` should be character") |> + validate_cols(in_set(c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL")), + indicator_id, description = "`indicator_id` values within range") |> + validate_if(is.character(indicator), + description = "`indicator` should be character") |> + validate_if(is.character(iso2c), + description = "`iso2c` should be character") |> + validate_if(is.character(iso3c), + description = "`iso3c` should be character") |> + validate_if(is.character(country), + description = "`country` should be character") |> + validate_if(is.numeric(date), + description = "`date` should be numeric") |> + validate_if(is.numeric(value), + description = "`value` should be numeric") |> + validate_if(is.character(unit), + description = "`unit` should be character") |> + validate_if(is.character(obs_status), + description = "`obs_status` should be character") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_if(is_date(last_updated), + description = "`last_updated` should be date") |> + validate_cols(not_na, indicator_id, iso3c, date, + description = "no missing values in key variables") |> + validate_if(is_uniq(indicator_id, iso3c, date), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate output pop data +#' +#' @param pop output pop data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("POP clean data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "POP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + # validate_if(is.character(pop_domain), + # description = "`pop_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pop_domain, description = "`pop_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw special cases pop data +#' +#' @param spop raw special case pop data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ + + stopifnot("Special POP raw data is not loaded" = !is.null(spop)) + + report <- data_validation_report() + + validate(spop, name = "Special POP raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_pop_update.R b/R/aux_pop_update.R deleted file mode 100644 index b417137..0000000 --- a/R/aux_pop_update.R +++ /dev/null @@ -1,288 +0,0 @@ -#' Update POP -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @param from character: Source for population data. -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams aux_pop -aux_pop_update <- function(force = FALSE, - from = c("gh", "file", "api"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - # Check arguments - from <- match.arg(from) - branch <- match.arg(branch) - measure <- "pop" - - # Get the most recent year in PFW to filter population projection - - pfw <- pipload::pip_load_aux("pfw", - branch = branch, - maindir = maindir) - # year_max <- pfw[, max(year)] - # get current year as max year - year_max <- Sys.Date() |> - format("%Y") |> - as.numeric() - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # From WDI --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if (from == "api") { - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## from API -------- - - pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") - pop <- wbstats::wb_data(indicator = pop_indicators, - country = "all", # this is new - lang = "en", - return_wide = FALSE) |> - setDT() - - # validate wb pop data - pop_validate_raw(pop = pop, detail = detail) - - # rename vars - pop <- pop[, c("iso3c", "date", "indicator_id", "value")] - - setnames(pop, - new = c("country_code", "year", "coverage", "pop")) - - - - pop[, - year := as.numeric(year) - ][, - pop_data_level := - fcase( - grepl("POP", coverage), 2, - grepl("RUR", coverage), 0, - grepl("URB", coverage), 1 - ) - ][, - coverage := NULL] - - ### Ger special cases --------- - - spop <- pipfun::load_from_gh( - measure = measure, - filename = "spop", - owner = owner, - branch = branch, - tag = tag, - ext = "csv") |> - clean_names_from_wide() |> - clean_from_wide() - - - pop <- rbindlist(list(pop, spop), - use.names = TRUE, - fill = TRUE) - - - } else { - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## from Emi's file -------- - - # Now Emi's file is uploaded directly to GH. So we get it from there. - # Load data - - pop_main <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "xlsx" - ) |> - clean_names_from_wide() |> - clean_from_wide() - - # validate pop main raw data - popmain_validate_raw(pop_main = pop_main, detail = detail) - - ### Ger special cases --------- - spop <- pipfun::load_from_gh( - measure = measure, - filename = "spop", - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) |> - clean_names_from_wide() |> - clean_from_wide() - - # validate special cases pop raw data - spop_validate_raw(spop = spop, detail = detail) - - pop <- joyn::joyn(pop_main, spop, - by = c("country_code", "year", "pop_data_level"), - update_values = TRUE, - reportvar = FALSE, - verbose = FALSE) - - # pop <- rbindlist(list(pop_main, spop), - # use.names = TRUE, - # fill = TRUE) - - } - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Clean data --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # Remove years prior to 1960 - pop <- pop[!is.na(pop) & year >= 1960] - pop <- pop[year <= year_max] - - # sorting - setorder(pop, country_code, year, pop_data_level) - setcolorder(pop, c("country_code", "year", "pop_data_level", "pop")) - - pop[, - pop_domain := fifelse(pop_data_level == 2, 1, 2)] - - # recode domain and data_level variables - cols <- c("pop_domain", "pop_data_level") - pop[, - (cols) := lapply(.SD, as.character), - .SDcols = cols - ][ - , # recode domain - pop_domain := fcase( - pop_domain == "1", "national", - pop_domain == "2", "urban/rural", - pop_domain == "3", "subnational region" - ) - ][ # Recode data_level only for those that are national or urban/rural - pop_domain %in% c("national", "urban/rural"), - pop_data_level := fcase( - pop_data_level == "0", "rural", - pop_data_level == "1", "urban", - pop_data_level == "2", "national" - ) - ] - - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - setDT(cl) - pop <- pop[country_code %in% cl$country_code] |> - unique() # make sure we don't havce any duplicates - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Save data --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # drop pce_domain - pop <- pop[, -c("pop_domain")] - - pop <- pop |> setnames("pop_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(pop, "aux_name", "pop") - setattr(pop, - "aux_key", - c("country_code", "year", "reporting_level")) - - # validate output pop data - pop_validate_output(pop = pop, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = pop, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) - -} - - - -#' Clean names from wide WDI format -#' -#' @param x data frame -#' -#' @return dataframe with names cleaned -#' @keywords internal -clean_names_from_wide <- function(x) { - if (!is.data.table(x)) { - setDT(x) - } - nnames <- as.character(x[2, 1:4]) - setnames(x, 1:4, nnames) - x <- x[-c(1:2)] - x -} - - -#' Clean from WDI format -#' -#' @param x data frame -#' -#' @return dataframe with names cleaned -#' @keywords internal -clean_from_wide <- function(x) { - if (!is.data.table(x)) { - setDT(x) - } - - - year_vars <- names(x)[6:ncol(x)] - x$Series_Name <- NULL - x$Time_Name <- NULL - - # Reshape to long format - pop_long <- x |> - data.table::setDT() |> - data.table::melt( - id.vars = c("Country", "Series"), - measure.vars = year_vars, - variable.name = "Year", - value.name = "Population" - ) - pop_long[, - Year := as.numeric(as.character(Year)) - ][, - Population := { - Population[Population == "."] <- NA_character_ - as.numeric(Population) - }] - - - - pop <- pop_long - # Create data_level column - pop[, - pop_data_level := - fcase( - grepl("POP", Series), 2, - grepl("RUR", Series), 0, - grepl("URB", Series), 1 - ) - ][, - Series := NULL] - - # Set colnames - setnames( - pop, - old = c("Country", "Year", "Population"), - new = c("country_code", "year", "pop") - ) - - return(pop) -} diff --git a/R/aux_ppp.R b/R/aux_ppp.R index 6efdc95..a1eeaff 100644 --- a/R/aux_ppp.R +++ b/R/aux_ppp.R @@ -60,3 +60,353 @@ aux_ppp <- function(action = c("update", "load"), } + +#' Clean PPP data from datalibweb to meet PIP protocols +#' +#' @param y dataset with PPP data from `aux_ppp_update()`. +#' @param default_year numeric: ICP round year. Default is 2011 +#' +#' @keywords internal +aux_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { + x <- data.table::as.data.table(y) + + y <- melt(x, + id.vars = c("code", "ppp_domain", "datalevel"), + measure.vars = patterns("^ppp_[0-9]{4}_[Vv][0-9]_[Vv][0-9]$"), + variable.name = "ver", + value.name = "ppp" + ) + + y[ + , + c("p", "ppp_year", "release_version", "adaptation_version") := tstrsplit(ver, "_") + ][ + , + `:=`( + ppp_year = as.numeric(ppp_year), + ppp_domain = as.character(ppp_domain), + datalevel = as.character(datalevel) + ) + ][ + , + # This part should not exist if the raw data + # has been properly created + ppp_data_level := fcase( + ppp_domain %chin% c("urban/rural", "2") & datalevel == "0", "rural", + ppp_domain %chin% c("urban/rural", "2") & datalevel == "1", "urban", + ppp_domain %chin% c("national", "1") & datalevel %chin% c("2", "", NA_character_), "national", + default = "" + ) + ][ + , + c("p", "ver", "datalevel") := NULL + ] + + setorder(y, code, ppp_year, release_version, adaptation_version) + + #--------- Get default version --------- + + y[ # Find Max release version + , + d1 := release_version == max(release_version), + by = .(code, ppp_year) + ][ + # Find max adaptation version of the max release + d1 == TRUE, + d2 := adaptation_version == max(adaptation_version), + by = .(code, ppp_year) + ][ + , + # get intersection + `:=`( + ppp_default = (d1 == TRUE & d2 == TRUE & ppp_year == (default_year)), + ppp_default_by_year = (d1 == TRUE & d2 == TRUE), + country_code = code + ) + ][ + , + # Remove unnecessary variables + c("d1", "d2", "code") := NULL + ] + + setcolorder( + y, + c( + "country_code", + "ppp_year", + "release_version", + "adaptation_version", + "ppp", + "ppp_default", + "ppp_default_by_year", + "ppp_domain", + "ppp_data_level" + ) + ) + + y <- unique(y) # remove duplicates + + # Remove non WDI countries + non_wdi <- c("BES", "EGZ", "RUT", "SDO") + if (any(y$country_code %in% non_wdi)) { + y <- y[!(country_code %in% non_wdi)] + } + + return(y) +} + +#' Update PPP +#' +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_ppp_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + + # ____________________________________________________________________________ + # set up #### + + measure <- "ppp" + branch <- match.arg(branch) + + + # ____________________________________________________________________________ + # Load raw data #### + + ppp <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) + + # validate ppp raw data + ppp_validate_raw(ppp = ppp, detail = detail) + + # ____________________________________________________________________________ + # cleaning #### + + + # Clean data + ppp <- aux_ppp_clean(ppp) + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + ppp <- ppp[country_code %in% cl$country_code] + + + ## ............................................................................ + ## Special cases #### + + # Hardcode domain / data_level fix for NRU + ppp$ppp_domain <- + ifelse(ppp$country_code == "NRU" & is.na(ppp$ppp_domain), + 1, ppp$ppp_domain + ) + ppp$ppp_data_level <- + ifelse(ppp$country_code == "NRU" & ppp$ppp_data_level == "", + "national", ppp$ppp_data_level + ) + + + # ____________________________________________________________________________ + # Saving #### + + # drop ppp_domain + ppp <- ppp[, -c("ppp_domain")] + + ppp <- ppp |> setnames("ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(ppp, "aux_name", "ppp") + setattr(ppp, + "aux_key", + c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. + + # validate ppp output data + ppp_validate_output(ppp = ppp, detail = detail) + + if (branch == "main") { + branch <- "" + } + + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = ppp, + measure = measure, + msrdir = msrdir, + force = force + ) + + + # ____________________________________________________________________________ + # PPP vintages data #### + + vars <- c("ppp_year", "release_version", "adaptation_version") + ppp_vintage <- unique(ppp[, ..vars], by = vars) + + data.table::setnames(x = ppp_vintage, + old = c("release_version", "adaptation_version"), + new = c("ppp_rv", "ppp_av")) + + # ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", + # skip_absent=TRUE) + # + # setattr(ppp_vintage, "aux_name", "ppp") + # setattr(ppp_vintage, + # "aux_key", + # c("country_code", "reporting_level")) + + # Save + pipfun::pip_sign_save( + x = ppp_vintage, + measure = "ppp_vintage", + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) +} + +#' Validate output ppp data +#' +#' @param ppp output ppp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ + + stopifnot("PPP output data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(ppp_year), + description = "`ppp_year` should be character") |> + validate_if(is.character(release_version), + description = "`release_version` should be character") |> + validate_if(is.character(adaptation_version), + description = "`adaptation_version` should be character") |> + validate_if(is.numeric(ppp), + description = "`ppp` should be numeric") |> + validate_if(is.logical(ppp_default), + description = "`ppp_default` should be numeric") |> + validate_if(is.logical(ppp_default_by_year), + description = "`ppp_default_by_year` should be numeric") |> + # validate_if(is.character(ppp_domain), + # description = "`ppp_domain` should be character") |> + # validate_cols(in_set(c("1", "2")), + # ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_cols(not_na, country_code, ppp_year, reporting_level, + adaptation_version, release_version, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, ppp_year, + reporting_level, adaptation_version, release_version), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw ppp data +#' +#' @param ppp raw ppp data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PPP raw data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP raw data validation") |> + validate_if(is.character(CountryName), + description = "`CountryName` should be character") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(CoverageType), + description = "`CoverageType` should be character") |> + validate_cols(in_set(c("National", "Rural", "Urban")), + CoverageType, description = "`CoverageType` values within range") |> + validate_if(is.numeric(ppp_2005_v1_v1), + description = "`ppp_2005_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v1), + description = "`ppp_2011_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v1), + description = "`ppp_2011_v2_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v2), + description = "`ppp_2011_v1_v2` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v2), + description = "`ppp_2011_v2_v2` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v1), + description = "`ppp_2017_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v2), + description = "`ppp_2017_v1_v2` should be numeric") |> + validate_if(is.numeric(source_ppp_2011), + description = "`source_ppp_2011` should be numeric") |> + validate_if(is.numeric(source_ppp_2005), + description = "`source_ppp_2005` should be numeric") |> + validate_if(is.numeric(datalevel), + description = "`datalevel` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + datalevel, description = "`datalevel` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.numeric(ppp_domain_value), + description = "`ppp_domain_value` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain_value, description = "`ppp_domain_value` values within range") |> + validate_if(is.numeric(oldicp2005), + description = "`oldicp2005` should be numeric") |> + validate_if(is.numeric(oldicp2011), + description = "`oldicp2011` should be numeric") |> + validate_if(is.character(Seriesname), + description = "`Seriesname` should be character") |> + validate_if(is.character(note_may192020), + description = "`note_may192020` should be character") |> + validate_if(is.character(ppp_2017_v1_v2_note), + description = "`ppp_2017_v1_v2_note` should be character") |> + validate_cols(not_na, code, CoverageType, datalevel, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, CoverageType, datalevel), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/aux_ppp_clean.R b/R/aux_ppp_clean.R deleted file mode 100644 index 6c93c70..0000000 --- a/R/aux_ppp_clean.R +++ /dev/null @@ -1,93 +0,0 @@ -#' Clean PPP data from datalibweb to meet PIP protocols -#' -#' @param y dataset with PPP data from `aux_ppp_update()`. -#' @param default_year numeric: ICP round year. Default is 2011 -#' -#' @keywords internal -aux_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { - x <- data.table::as.data.table(y) - - y <- melt(x, - id.vars = c("code", "ppp_domain", "datalevel"), - measure.vars = patterns("^ppp_[0-9]{4}_[Vv][0-9]_[Vv][0-9]$"), - variable.name = "ver", - value.name = "ppp" - ) - - y[ - , - c("p", "ppp_year", "release_version", "adaptation_version") := tstrsplit(ver, "_") - ][ - , - `:=`( - ppp_year = as.numeric(ppp_year), - ppp_domain = as.character(ppp_domain), - datalevel = as.character(datalevel) - ) - ][ - , - # This part should not exist if the raw data - # has been properly created - ppp_data_level := fcase( - ppp_domain %chin% c("urban/rural", "2") & datalevel == "0", "rural", - ppp_domain %chin% c("urban/rural", "2") & datalevel == "1", "urban", - ppp_domain %chin% c("national", "1") & datalevel %chin% c("2", "", NA_character_), "national", - default = "" - ) - ][ - , - c("p", "ver", "datalevel") := NULL - ] - - setorder(y, code, ppp_year, release_version, adaptation_version) - - #--------- Get default version --------- - - y[ # Find Max release version - , - d1 := release_version == max(release_version), - by = .(code, ppp_year) - ][ - # Find max adaptation version of the max release - d1 == TRUE, - d2 := adaptation_version == max(adaptation_version), - by = .(code, ppp_year) - ][ - , - # get intersection - `:=`( - ppp_default = (d1 == TRUE & d2 == TRUE & ppp_year == (default_year)), - ppp_default_by_year = (d1 == TRUE & d2 == TRUE), - country_code = code - ) - ][ - , - # Remove unnecessary variables - c("d1", "d2", "code") := NULL - ] - - setcolorder( - y, - c( - "country_code", - "ppp_year", - "release_version", - "adaptation_version", - "ppp", - "ppp_default", - "ppp_default_by_year", - "ppp_domain", - "ppp_data_level" - ) - ) - - y <- unique(y) # remove duplicates - - # Remove non WDI countries - non_wdi <- c("BES", "EGZ", "RUT", "SDO") - if (any(y$country_code %in% non_wdi)) { - y <- y[!(country_code %in% non_wdi)] - } - - return(y) -} diff --git a/R/aux_ppp_update.R b/R/aux_ppp_update.R deleted file mode 100644 index 5216521..0000000 --- a/R/aux_ppp_update.R +++ /dev/null @@ -1,121 +0,0 @@ -#' Update PPP -#' -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -aux_ppp_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - -# ____________________________________________________________________________ -# set up #### - - measure <- "ppp" - branch <- match.arg(branch) - - -# ____________________________________________________________________________ -# Load raw data #### - - ppp <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) - - # validate ppp raw data - ppp_validate_raw(ppp = ppp, detail = detail) - -# ____________________________________________________________________________ -# cleaning #### - - - # Clean data - ppp <- aux_ppp_clean(ppp) - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - ppp <- ppp[country_code %in% cl$country_code] - - -## ............................................................................ -## Special cases #### - - # Hardcode domain / data_level fix for NRU - ppp$ppp_domain <- - ifelse(ppp$country_code == "NRU" & is.na(ppp$ppp_domain), - 1, ppp$ppp_domain - ) - ppp$ppp_data_level <- - ifelse(ppp$country_code == "NRU" & ppp$ppp_data_level == "", - "national", ppp$ppp_data_level - ) - - -# ____________________________________________________________________________ -# Saving #### - - # drop ppp_domain - ppp <- ppp[, -c("ppp_domain")] - - ppp <- ppp |> setnames("ppp_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(ppp, "aux_name", "ppp") - setattr(ppp, - "aux_key", - c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. - - # validate ppp output data - ppp_validate_output(ppp = ppp, detail = detail) - - if (branch == "main") { - branch <- "" - } - - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = ppp, - measure = measure, - msrdir = msrdir, - force = force - ) - - -# ____________________________________________________________________________ -# PPP vintages data #### - - vars <- c("ppp_year", "release_version", "adaptation_version") - ppp_vintage <- unique(ppp[, ..vars], by = vars) - - data.table::setnames(x = ppp_vintage, - old = c("release_version", "adaptation_version"), - new = c("ppp_rv", "ppp_av")) - - # ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", - # skip_absent=TRUE) - # - # setattr(ppp_vintage, "aux_name", "ppp") - # setattr(ppp_vintage, - # "aux_key", - # c("country_code", "reporting_level")) - - # Save - pipfun::pip_sign_save( - x = ppp_vintage, - measure = "ppp_vintage", - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) -} diff --git a/R/aux_sna.R b/R/aux_sna.R index e7b2745..f137499 100644 --- a/R/aux_sna.R +++ b/R/aux_sna.R @@ -46,4 +46,114 @@ aux_sna <- function(action = c("update", "load"), ) return(dt) } -} # end of pip_gdp +} # end + +#' Validate raw special national accounts (sna) data +#' +#' @param sna raw sna data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ + + stopifnot("SNA raw data is not loaded" = !is.null(sna)) + + report <- data_validation_report() + + validate(sna, name = "SNA raw data validation") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_cols(in_set(c("National")), + coverage, description = "`coverage` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(GDP), + description = "`GDP` should be numeric") |> + validate_if(is.logical(PCE), + description = "`PCE` should be logical") |> + validate_if(is.character(sourceGDP), + description = "`sourceGDP` should be character") |> + validate_if(is.logical(sourcePCE), + description = "`sourcePCE` should be logical") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw sna_fy data +#' +#' @param sna_fy raw sna_fy data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ + + stopifnot("sna_fy raw data is not loaded" = !is.null(sna_fy)) + + report <- data_validation_report() + + validate(sna_fy, name = "sna_fy raw data validation") |> + validate_if(is.character(Code), + description = "`Code` should be character") |> + validate_if(is.character(LongName), + description = "`LongName` should be character") |> + validate_if(is.character(SpecialNotes), + description = "`SpecialNotes` should be character") |> + validate_if(is.character(Month), + description = "`Month` should be character") |> + validate_if(is.numeric(Day), + description = "`Day` should be numeric") |> + validate_cols(not_na, Code, Month, Day, + description = "no missing values in key variables") |> + # validate_if(is_uniq(Code, LongName), + # description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Fake PIP SNA function +#' +#' @inheritParams aux_gdp +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +fake_aux_sna <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api")) { + + return(invisible(TRUE)) +} + + + diff --git a/R/aux_update_all.R b/R/aux_update_all.R deleted file mode 100644 index daa3b38..0000000 --- a/R/aux_update_all.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Update all auxiliary data at once -#' -#' @inheritParams aux_cpi -#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. -#' @export -aux_update_all <- function(force = FALSE, - popsrc = getOption("pipaux.popsrc"), - maindir = gls$PIP_DATA_DIR) { - - # List of countries in WDI - aux_country_list(force = force, maindir = maindir) - - # PIP countries and regions - aux_countries(force = force, maindir = maindir) - aux_regions(force = force, maindir = maindir) - - # PIP Indicators - aux_indicators(force = force, maindir = maindir) - - # Poverty lines - aux_pl(force = force, maindir = maindir) - - # PFW, CPI and PPP from DLW - aux_pfw(force = force, maindir = maindir) - aux_cpi(force = force, maindir = maindir) - aux_ppp(force = force, maindir = maindir) - - # POP from Emi or WDI - aux_pop(force = force, maindir = maindir, src = popsrc) - - # GDP from WEO, Maddison and WDI (+ a few special cases) - aux_weo(force = force, maindir = maindir) - aux_maddison(force = force, maindir = maindir) - aux_gdp(force = force, maindir = maindir) - - # PCE from WDI (+ a few special cases) - aux_pce(force = force, maindir = maindir) - - # Country profiles (from Poverty GP) - aux_cp(force = force, maindir = maindir) - - # Survey metadata (from Poverty GP) - aux_metadata(force = force, maindir = maindir) - - return(invisible()) -} diff --git a/R/aux_wdi.R b/R/aux_wdi.R index e218bf5..7230a09 100644 --- a/R/aux_wdi.R +++ b/R/aux_wdi.R @@ -40,3 +40,125 @@ aux_wdi <- function(action = c("update", "load"), return(dt) } } # end of pip_wdi + +#' Update National accounts data from WDI +#' +#' GDP and HFCE data from WDI. It could be either from API or from file +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_gdp +#' @return data.table with gdp and pce variables +#' @export +#' +#' @examples +#' aux_wdi_update() +aux_wdi_update <- function(force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + + + from <- match.arg(from) + branch <- match.arg(branch) + + # ______________________________________________________ + # Computations #### + measure <- "wdi" + + ## ............................................................... + ## From file #### + + if (from %in% c("file", "gh")) { + wdi <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "csv") + + } else { + ## ........................................................................ + ## From API #### + wdi_indicators <- c("NY.GDP.PCAP.KD", "NE.CON.PRVT.PC.KD") + wdi <- wbstats::wb_data(indicator = wdi_indicators, + lang = "en") |> + setDT() + + wdi[, + c("country", "iso2c") := NULL] + + # Rename columns + setnames(wdi, + old = c("iso3c", "date"), + new = c("country_code", "year") + ) + } + # validate wdi raw data + wdi_validate_raw(wdi = wdi, detail = detail) + + # _________________________________________________________________________ + # Save and Return #### + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(wdi, "aux_name", "wdi") + setattr(wdi, + "aux_key", + c("country_code", "year")) + + saved <- pipfun::pip_sign_save( + x = wdi, + measure = measure, + msrdir = msrdir, + force = force, + save_dta = FALSE + ) + + return(invisible(saved)) + +} + +#' Validate raw wdi data +#' +#' @param wdi raw wdi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WDI raw data is not loaded" = !is.null(wdi)) + + report <- data_validation_report() + + validate(wdi, name = "WDI raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(NE.CON.PRVT.PC.KD), + description = "`NE.CON.PRVT.PC.KD` should be numeric") |> + validate_if(is.numeric(NY.GDP.PCAP.KD), + description = "`NY.GDP.PCAP.KD` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_wdi_update.R b/R/aux_wdi_update.R deleted file mode 100644 index 36894ac..0000000 --- a/R/aux_wdi_update.R +++ /dev/null @@ -1,81 +0,0 @@ -#' Update National accounts data from WDI -#' -#' GDP and HFCE data from WDI. It could be either from API or from file -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams aux_gdp -#' @return data.table with gdp and pce variables -#' @export -#' -#' @examples -#' aux_wdi_update() -aux_wdi_update <- function(force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - - - from <- match.arg(from) - branch <- match.arg(branch) - - # ______________________________________________________ - # Computations #### - measure <- "wdi" - - ## ............................................................... - ## From file #### - - if (from %in% c("file", "gh")) { - wdi <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - ext = "csv") - - } else { - ## ........................................................................ - ## From API #### - wdi_indicators <- c("NY.GDP.PCAP.KD", "NE.CON.PRVT.PC.KD") - wdi <- wbstats::wb_data(indicator = wdi_indicators, - lang = "en") |> - setDT() - - wdi[, - c("country", "iso2c") := NULL] - - # Rename columns - setnames(wdi, - old = c("iso3c", "date"), - new = c("country_code", "year") - ) - } - # validate wdi raw data - wdi_validate_raw(wdi = wdi, detail = detail) - - # _________________________________________________________________________ - # Save and Return #### - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(wdi, "aux_name", "wdi") - setattr(wdi, - "aux_key", - c("country_code", "year")) - - saved <- pipfun::pip_sign_save( - x = wdi, - measure = measure, - msrdir = msrdir, - force = force, - save_dta = FALSE - ) - - return(invisible(saved)) - -} - diff --git a/R/aux_weo.R b/R/aux_weo.R index 5e000c6..9c98bc6 100644 --- a/R/aux_weo.R +++ b/R/aux_weo.R @@ -75,3 +75,240 @@ aux_weo <- function(action = c("update", "load"), return(dt) } } + +#' Clean WEO data +#' +#' @param dt database with weo raw data +#' @param maindir directory where auxiliary data is stored (to load pop) +#' @param branch character: branch to be loaded +#' +#' @return data.table +#' @export +aux_weo_clean <- function(dt, + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + + branch <- match.arg(branch) + + # _________________________________________ + # Computations #### + if (!inherits(dt, "data.table")) { + setDT(dt) + } + + # Clean column names + nn <- + names(dt) |> + tolower() |> + {\(.) gsub("[-/ ]", "_", .)}() |> + {\(.) gsub("([0-9]{4})", "x\\1", .)}() + + names(dt) <- nn + + # ---- Data transformations ---- + + # Select rows w/ data on real gdp per capita + dt <- dt[weo_subject_code %in% c("NGDPRPC", "NGDPRPPPPC")] + + # Fix country codes + dt[ + , + iso := fifelse( + iso == "WBG", "PSE", iso # West Bank & Gaza + ) + ][ + , + iso := fifelse( + iso == "UVK", "XKX", iso # Kosovo + ) + ][, + # Replace subject codes + subject_code := fcase( + weo_subject_code == "NGDPRPC", "weo_gdp_lcu", + weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017" + ) + ] + + # Reshape to long format + + years_vars <- names(dt)[grepl("\\d{4}", names(dt))] + dt <- + melt(data = dt, + id.vars = c("iso", "subject_code"), + measure.vars = years_vars, + value.name = "weo_gdp", + variable.name = "year" + ) + setnames(dt, "iso", "country_code") + + # Convert year and GDP to numeric + dt[, + c("weo_gdp", "year") := { + y <- sub("x", "", year) |> + as.numeric() + + x <- as.numeric(weo_gdp) |> + suppressWarnings() + list(x, y) + }] + + # Remove rows w/ missing GDP` + dt <- na.omit(dt, cols = "weo_gdp") + + # Remove current year and future years + current_year <- format(Sys.Date(), "%Y") + dt <- dt[year < current_year] + + # Reshape to wide for GDP columns + dt <- dcast(dt, + formula = country_code + year ~ subject_code, + value.var = "weo_gdp" + ) + + # ---- Merge with population ---- + + + pop <- load_aux(measure = "pop", + maindir = maindir, + branch = branch) + + setDT(pop) + pop <- pop[reporting_level == "national", ] #pop_data_level = reporting_level + dt[pop, + on = .(country_code, year), + `:=`( + pop = i.pop + ) + ] + + # ---- Chain PPP and LCU GDP columns ---- + + # Chain LCU on PPP column + + dt[, weo_gdp := chain_val(ori_var = weo_gdp_ppp2017, + rep_var = weo_gdp_lcu), + by = country_code] + # + # dt <- chain_values( + # dt, + # base_var = "weo_gdp_ppp2017", + # replacement_var = "weo_gdp_lcu", + # new_name = "weo_gdp", + # by = "country_code" + # ) + + + # --- Sign and save ---- + + # Select final columns + dt <- dt[, c("country_code", "year", "weo_gdp")] + + + + # ____________________________________________________________________________ + # Return #### + return(dt) + +} + +#' Validate clean weo data +#' +#' @param weo clean weo data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ + + stopifnot("WEO output data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + validate(weo, name = "WEO output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(weo_gdp), + description = "`weo_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw weo data +#' +#' @param weo raw weo data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WEO raw data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + weo <- weo[!is.na(`WEO Subject Code`), ] + + validate(weo, name = "WEO raw data validation") |> + validate_if(is.character(`WEO Country Code`), + description = "`WEO Country Code` should be character") |> + validate_if(is.character(ISO), + description = "ISO should be character") |> + validate_if(is.character(`WEO Subject Code`), + description = "`WEO Subject Code` should be character") |> + validate_if(is.character(Country), + description = "`Country` should be character") |> + validate_if(is.character(`Subject Descriptor`), + description = "`Subject Descriptor` should be character") |> + validate_if(is.character(`Subject Notes`), + description = "`Subject Notes` should be character") |> + validate_if(is.character(Units), + description = "`Units` should be character") |> + validate_if(is.character(Scale), + description = "`Scale` should be character") |> + validate_if(is.character(`Country/Series-specific Notes`), + description = "`Country/Series-specific Notes` should be character") |> + validate_if(is.numeric(`Estimates Start After`), + description = "`Estimates Start After` should be numeric") |> + validate_cols(not_na, ISO, `WEO Subject Code`, + description = "no missing values in key variables") |> + validate_if(is_uniq(ISO, `WEO Subject Code`), + description = "no duplicate records in key variables") |> + add_results(report) + + num_var_list <- grep("^[[:digit:]]", colnames(weo)) + + for (i in 1:length(num_var_list)) { + validate(weo, name = "WEO validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_weo_clean.R b/R/aux_weo_clean.R deleted file mode 100644 index 220da23..0000000 --- a/R/aux_weo_clean.R +++ /dev/null @@ -1,135 +0,0 @@ -#' Clean WEO data -#' -#' @param dt database with weo raw data -#' @param maindir directory where auxiliary data is stored (to load pop) -#' @param branch character: branch to be loaded -#' -#' @return data.table -#' @export -aux_weo_clean <- function(dt, - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - - branch <- match.arg(branch) - -# _________________________________________ -# Computations #### - if (!inherits(dt, "data.table")) { - setDT(dt) - } - - # Clean column names - nn <- - names(dt) |> - tolower() |> - {\(.) gsub("[-/ ]", "_", .)}() |> - {\(.) gsub("([0-9]{4})", "x\\1", .)}() - - names(dt) <- nn - - # ---- Data transformations ---- - - # Select rows w/ data on real gdp per capita - dt <- dt[weo_subject_code %in% c("NGDPRPC", "NGDPRPPPPC")] - - # Fix country codes - dt[ - , - iso := fifelse( - iso == "WBG", "PSE", iso # West Bank & Gaza - ) - ][ - , - iso := fifelse( - iso == "UVK", "XKX", iso # Kosovo - ) - ][, - # Replace subject codes - subject_code := fcase( - weo_subject_code == "NGDPRPC", "weo_gdp_lcu", - weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017" - ) - ] - - # Reshape to long format - - years_vars <- names(dt)[grepl("\\d{4}", names(dt))] - dt <- - melt(data = dt, - id.vars = c("iso", "subject_code"), - measure.vars = years_vars, - value.name = "weo_gdp", - variable.name = "year" - ) - setnames(dt, "iso", "country_code") - - # Convert year and GDP to numeric - dt[, - c("weo_gdp", "year") := { - y <- sub("x", "", year) |> - as.numeric() - - x <- as.numeric(weo_gdp) |> - suppressWarnings() - list(x, y) - }] - - # Remove rows w/ missing GDP` - dt <- na.omit(dt, cols = "weo_gdp") - - # Remove current year and future years - current_year <- format(Sys.Date(), "%Y") - dt <- dt[year < current_year] - - # Reshape to wide for GDP columns - dt <- dcast(dt, - formula = country_code + year ~ subject_code, - value.var = "weo_gdp" - ) - - # ---- Merge with population ---- - - - pop <- load_aux(measure = "pop", - maindir = maindir, - branch = branch) - - setDT(pop) - pop <- pop[reporting_level == "national", ] #pop_data_level = reporting_level - dt[pop, - on = .(country_code, year), - `:=`( - pop = i.pop - ) - ] - - # ---- Chain PPP and LCU GDP columns ---- - - # Chain LCU on PPP column - - dt[, weo_gdp := chain_val(ori_var = weo_gdp_ppp2017, - rep_var = weo_gdp_lcu), - by = country_code] - # - # dt <- chain_values( - # dt, - # base_var = "weo_gdp_ppp2017", - # replacement_var = "weo_gdp_lcu", - # new_name = "weo_gdp", - # by = "country_code" - # ) - - - # --- Sign and save ---- - - # Select final columns - dt <- dt[, c("country_code", "year", "weo_gdp")] - - - -# ____________________________________________________________________________ -# Return #### - return(dt) - -} diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R deleted file mode 100644 index 3dda7ae..0000000 --- a/R/cl_validate_raw.R +++ /dev/null @@ -1,73 +0,0 @@ -#' Validate raw country list data -#' -#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ - - stopifnot("Country list raw data is not loaded" = !is.null(cl)) - - report <- data_validation_report() - - # country_list <- pipload::pip_load_aux("pfw") - country_list <- pipfun::load_from_gh(measure = "pfw", - owner = getOption("pipfun.ghowner"), - branch = "DEV", - ext = "dta") - - country_list <- unique(country_list[, code]) - - validate(cl, name = "CL raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - # validate_cols(in_set(country_list), - # country_code, description = "`country_code` values within range") |> - validate_if(is.character(country_name), - description = "`country_name` should be character") |> - validate_if(is.character(africa_split), - description = "`africa_split` should be character") |> - validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), - africa_split, description = "`africa_split` values within range") |> - validate_if(is.character(africa_split_code), - description = "`africa_split_code` should be character") |> - validate_cols(in_set(c("AFE", "AFW", NA)), - africa_split_code, description = "`africa_split_code` values within range") |> - validate_if(is.character(pcn_region), - description = "`pcn_region` should be character") |> - validate_if(is.character(pcn_region_code), - description = "`pcn_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - pcn_region_code, description = "`pcn_region_code` values within range") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_if(is.character(region_code), - description = "`region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - region_code, description = "`region_code` values within range") |> - validate_if(is.character(world), - description = "`world` should be character") |> - validate_cols(in_set(c("World")), - world, description = "`world` values within range") |> - validate_if(is.character(world_code), - description = "`world_code` should be character") |> - validate_cols(in_set(c("WLD")), - world_code, description = "`world_code` values within range") |> - validate_cols(not_na, country_code, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R deleted file mode 100644 index e15ee53..0000000 --- a/R/clean_validation_report.R +++ /dev/null @@ -1,12 +0,0 @@ -#' Remove data validation report from .pipaux environment variable -#' -#' @export -clean_validation_report <- function(){ - - if (rlang::env_has(.pipaux, "validation_report")){ - - # rlang::env_bind(.pipaux, validation_report = rlang::zap()) - rlang::env_unbind(.pipaux, "validation_report") - - } -} diff --git a/R/countries_validate_output.R b/R/countries_validate_output.R deleted file mode 100644 index baa016f..0000000 --- a/R/countries_validate_output.R +++ /dev/null @@ -1,57 +0,0 @@ -#' Validate output countries data -#' -#' @param countries output countries data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){ - - stopifnot("Countries output data is not loaded" = !is.null(countries)) - - report <- data_validation_report() - - validate(countries, name = "countries output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.character(country_name), - description = "`country_name` should be character") |> - validate_if(is.character(africa_split), - description = "`africa_split` should be character") |> - validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), - africa_split, description = "`africa_split` values within range") |> - validate_if(is.character(africa_split_code), - description = "`africa_split_code` should be character") |> - validate_cols(in_set(c("AFE", "AFW", NA)), - africa_split_code, description = "`africa_split_code` values within range") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_if(is.character(region_code), - description = "`region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - region_code, description = "`region_code` values within range") |> - validate_if(is.character(world), - description = "`world` should be character") |> - validate_cols(in_set(c("World")), - world, description = "`world` values within range") |> - validate_if(is.character(world_code), - description = "`world_code` should be character") |> - validate_cols(in_set(c("WLD")), - world_code, description = "`world_code` values within range") |> - validate_cols(not_na, country_code, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R deleted file mode 100644 index 7ff5c3a..0000000 --- a/R/cpi_validate_output.R +++ /dev/null @@ -1,92 +0,0 @@ -#' Validate clean cpi data -#' -#' @param cpi clean cpi data, output via `aux_cpi_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ - - stopifnot("CPI clean data is not loaded" = !is.null(cpi)) - - report <- data_validation_report() - - validate(cpi, name = "CPI output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.integer(year), - description = "`year` should be integer") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.numeric(cpi), - description = "`cpi` should be numeric") |> - validate_if(is.numeric(ccf), - description = "`ccf` should be numeric") |> - validate_if(is.character(survey_acronym), - description = "`survey_acronym` should be character") |> - validate_if(is.numeric(change_cpi2011), - description = "`change_cpi2011` should be numeric") |> - validate_cols(in_set(c(0, 1)), change_cpi2011, - description = "`change_cpi2011` values within range") |> - # validate_if(is.character(cpi_domain), - # description = "`cpi_domain` should be character") |> - # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, - # description = "`cpi_domian` values within range") |> - validate_if(is.numeric(cpi_domain_value), - description = "`cpi_domain_value` should be numeric") |> - validate_cols(in_set(c(0, 1)), cpi_domain_value, - description = "`cpi_domain_value` values within range") |> - validate_if(is.numeric(cpi2017_unadj), - description = "`cpi2017_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011_unadj), - description = "`cpi2011_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011), - description = "`cpi2011` should be numeric") |> - validate_if(is.numeric(cpi2017), - description = "`cpi2017` should be numeric") |> - # validate_if(is.numeric(cpi2011_SM22), - # description = "`cpi2011_SM22` should be numeric") |> - # validate_if(is.numeric(cpi2017_SM22), - # description = "`cpi2017_SM22` should be numeric") |> - validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), reporting_level, - description = "`reporting_level` values within range") |> - # validate_if(is.numeric(cpi2011_AM23), - # description = "`cpi2011_AM23` should be numeric") |> - # validate_if(is.numeric(cpi2017_AM23), - # description = "`cpi2017_AM23` should be numeric") |> - validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") |> - validate_cols(not_na, country_code, year, survey_acronym, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, survey_acronym, - reporting_level), - description = "no duplicate records in key variables") |> - validate_if(is_uniq(country_code, year, survey_acronym, - reporting_level), - description = "no duplicate cpi values") |> - add_results(report) - - num_var_list1 <- grep("cpi2011_", colnames(cpi)) - num_var_list2 <- grep("cpi2017_", colnames(cpi)) - num_var_list <- c(num_var_list1, num_var_list2) - - for (i in 1:length(num_var_list)) { - validate(cpi, name = "CPI validation") |> - validate_cols(is.numeric, num_var_list[i], - description = "variables (with numeric var name) should be numeric") |> - add_results(report) - } - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } -} diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R deleted file mode 100644 index 2441e6a..0000000 --- a/R/cpi_validate_raw.R +++ /dev/null @@ -1,96 +0,0 @@ -#' Validate raw cpi data -#' -#' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ - - stopifnot("CPI raw data is not loaded" = !is.null(cpi)) - - report <- data_validation_report() - - validate(cpi, name = "CPI raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - region, description = "`region` values within range") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(countryname), - description = "`countryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(survname), - description = "`survname` should be character") |> - validate_if(is.numeric(ref_year), - description = "`ref_year` should be numeric") |> - validate_if(is.character(cpi_domain), - description = "`cpi_domain` should be character") |> - validate_cols(in_set(c("National", "Urban/Rural")), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.numeric(cpi_domain_value), - description = "`cpi_domain_value` should be numeric") |> - validate_if(is.numeric(cpi2017_unadj), - description = "`cpi2017_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011_unadj), - description = "`cpi2011_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011), - description = "`cpi201`1 should be numeric") |> - validate_if(is.numeric(cpi2017), - description = "`cpi2017` should be numeric") |> - validate_if(is.character(version), - description = "`version` should be character") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.numeric(cur_adj), - description = "`cur_adj` should be numeric") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("N", "R", "U", NA)), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.numeric(cpi2011_SM22), - description = "`cpi2011_SM22` should be numeric") |> - validate_if(is.numeric(comparable), - description = "`comparable` should be numeric") |> - validate_if(is.numeric(cpi2017_SM22), - description = "`cpi2017_SM22` should be numeric") |> - validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") |> - validate_if(is.numeric(cpi_data_level), - description = "`cpi_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - cpi_data_level, description = "`cpi_data_level` values within range") |> - validate_if(is.numeric(ref_year_SM24), - description = "`ref_year_SM24` should be numeric") |> - validate_if(is.numeric(cpi2011_SM24), - description = "`cpi2011_SM24` should be numeric") |> - validate_if(is.numeric(cpi2017_SM24), - description = "`cpi2011_SM24` should be numeric") |> - validate_if(is.numeric(change_cpi2017), - description = "`change_cpi2017` should be numeric") |> - validate_if(is.numeric(change_icp2017), - description = "`change_icp2017` should be numeric") |> - validate_if(is.numeric(change_cpi2011), - description = "`change_cpi2011` should be numeric") |> - validate_if(is.numeric(change_icp2011), - description = "`change_icp2011` should be numeric") |> - validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") |> - validate_cols(not_na, code, year, survname, cpi_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, year, survname, cpi_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/fake_aux_sna.R b/R/fake_aux_sna.R deleted file mode 100644 index f0da8c8..0000000 --- a/R/fake_aux_sna.R +++ /dev/null @@ -1,16 +0,0 @@ -#' Fake PIP SNA function -#' -#' @inheritParams aux_gdp -#' @inheritParams aux_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -fake_aux_sna <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api")) { - - return(invisible(TRUE)) -} diff --git a/R/gdm_validate_output.R b/R/gdm_validate_output.R deleted file mode 100644 index 92f61dc..0000000 --- a/R/gdm_validate_output.R +++ /dev/null @@ -1,57 +0,0 @@ -#' Validate clean gdm data -#' -#' @param gdm clean gdm data, output via `pipfun::pip_gdm_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ - - stopifnot("GDM output data is not loaded" = !is.null(gdm)) - - report <- data_validation_report() - - validate(gdm, name = "GDM output data validation") |> - validate_if(is.character(survey_id), - description = "`survey_id` should be character") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.integer(year), - description = "`year` should be integer") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), welfare_type, - description = "`welfare_type` values within range") |> - validate_if(is.numeric(survey_mean_lcu), - description = "`survey_mean_lcu` should be numeric") |> - validate_if(is.character(distribution_type), - description = "`distribution_type` should be character") |> - validate_cols(in_set(c("aggregate", "group")), distribution_type, - description = "`distribution_type` values within range") |> - validate_if(is.character(gd_type), - description = "`gd_type` should be character") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), reporting_level, - description = "`reporting_level` values within range") |> - validate_if(is.character(pcn_source_file), - description = "`pcn_source_file` should be character") |> - validate_if(is.character(pcn_survey_id), - description = "`pcn_survey_id` should be character") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } -} diff --git a/R/gdm_validate_raw.R b/R/gdm_validate_raw.R deleted file mode 100644 index c841013..0000000 --- a/R/gdm_validate_raw.R +++ /dev/null @@ -1,64 +0,0 @@ -#' Validate raw gdm data -#' -#' @param gdm raw gdm data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ - - stopifnot("GDM raw data is not loaded" = !is.null(gdm)) - - report <- data_validation_report() - - validate(gdm, name = "GDM raw data validation") |> - validate_if(is.character(Region), - description = "`Region` should be character") |> - validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), - Region, description = "`Region` values within range") |> - validate_if(is.character(countryName), - description = "`countryName` should be character") |> - validate_if(is.character(Coverage), - description = "`Coverage` should be character") |> - validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), - Coverage, description = "`Coverage` values within range") |> - validate_if(is.character(CountryCode), - description = "`CountryCode` should be character") |> - validate_if(is.numeric(SurveyTime), - description = "`SurveyTime` should be numeric") |> - validate_if(is.numeric(CPI_Time), - description = "`CPI_Time` should be numeric") |> - validate_if(is.character(DataType), - description = "`DataType` should be character") |> - validate_cols(in_set(c("x", "X", "y", "Y")), - DataType, description = "`DataType` values within range") |> - validate_if(is.numeric(SurveyMean_LCU), - description = "`SurveyMean_LCU` should be numeric") |> - validate_if(is.numeric(currency), - description = "`currency` should be numeric") |> - validate_if(is.character(source), - description = "`source` should be character") |> - validate_if(is.character(SurveyID), - description = "`SurveyID` should be character") |> - validate_if(is.numeric(SurveyMean_PPP), - description = "`SurveyMean_PPP` should be numeric") |> - validate_if(is.character(DistributionFileName), - description = "`DistributionFileName` should be character") |> - validate_cols(is.logical, Comment, description = "Comment should be logical") |> - validate_cols(not_na, CountryCode, Coverage, SurveyTime, DataType, - description = "no missing values in key variables") |> - validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R deleted file mode 100644 index 14052a8..0000000 --- a/R/gdp_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output gdp data -#' -#' @param gdp output gdp data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ - - stopifnot("GDP output data is not loaded" = !is.null(gdp)) - - report <- data_validation_report() - - validate(gdp, name = "GDP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_if(is.numeric(gdp), - description = "`gdp` should be numeric") |> - # validate_if(is.character(gdp_domain), - # description = "`gdp_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # gdp_domain, description = "`gdp_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/get_error_validation.R b/R/get_error_validation.R deleted file mode 100644 index a6de5e6..0000000 --- a/R/get_error_validation.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Get validation report data validation error report -#' -#' @param vlddata validation data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' -#' @export -get_error_validation <- function(vlddata, detail){ - - stopifnot("Validation data is not availabel" = !is.null(vlddata)) - - err_t <- NULL - - if (any(vlddata$type == "error")){ - - err_t <- vlddata[type == "error", - .(table_name, description, call, - message, type)] - } - - - if (isFALSE(detail)) { - - cli::cli_abort("Description of invalid cases for {unique(err_t$table_name)}, - {err_t$description}") - - } else { - - if (!rlang::env_has(.pipaux, "validation_report")){ - - rlang::env_poke(.pipaux, "validation_report", err_t) - - } else { - - compiled_result <- rbind(.pipaux$validation_report, err_t) - rlang::env_poke(.pipaux, "validation_report", compiled_result) - - } - - cli::cli_inform("Validation report ({.field validation_report}) has been added to the environment varaible ({.field .pipaux}).") - } - -} diff --git a/R/incgroup_validate_output.R b/R/incgroup_validate_output.R deleted file mode 100644 index defeebf..0000000 --- a/R/incgroup_validate_output.R +++ /dev/null @@ -1,50 +0,0 @@ -#' Validate income group output data -#' -#' @param incgroup income group output data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ - - stopifnot("Income group output data is not loaded" = !is.null(incgroup)) - - report <- data_validation_report() - - validate(incgroup, name = "Income group output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year_data), - description = "`year_data` should be numeric") |> - validate_if(is.character(income_group), - description = "`income_group` should be character") |> - validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), - income_group, description = "`income_group` values within range") |> - validate_if(is.character(income_group_code), - description = "`income_group_code` should be character") |> - validate_cols(in_set(c("HIC", "LIC", "LMIC", "UMIC")), - income_group_code, description = "`income_group_code` values within range") |> - validate_if(is.character(incgroup_historical), - description = "`incgroup_historical` should be character") |> - validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), - incgroup_historical, description = "`incgroup_historical` values within range") |> - validate_if(is.character(fcv_historical), - description = "`fcv_historical` should be character") |> - validate_if(is.character(ssa_subregion_code), - description = "`ssa_subregion_code` should be character") |> - validate_cols(not_na, country_code, year_data, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year_data), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/load_aux.R b/R/load_aux.R index 7601f30..eb87d9d 100644 --- a/R/load_aux.R +++ b/R/load_aux.R @@ -106,3 +106,175 @@ find_path <- function(file_paths) { } +#' Load Raw Auxiliary data +#' +#' @description `r lifecycle::badge("superseded")` +#' +#' This function is deprecated because of the new, more flexible and general +#' function `pipfun::load_from_gh()` +#' @param measure character: measure to be loaded +#' @param owner character: Github repo owner. Default is +#' `getOption("pipfun.ghowner")` +#' @param repo character: name of the repo +#' @param branch character: either "DEV" or "PROD". Refers to the branch that +#' will be used to update either the development server or production. +#' @param tag character: specific release to be used in the update. +#' @param filename character: Name of file name without the ".csv" extension. +#' Default is `measure` +#' @param ext character: Extension of `filename`. Default "csv" +#' @param ... parameters to be passed to the loading functions depending of the +#' extension used +#' +#' @return dataset +#' @keywords internal +load_raw_aux <- function(measure, + owner = getOption("pipfun.ghowner"), + repo = paste0("aux_", measure), + branch = c("DEV","PROD","main"), + tag = match.arg(branch), + filename = measure, + ext = "csv", + ...) { + + lifecycle::deprecate_warn("0.1.0.9003", + "load_raw_aux()", + "pipfun::load_from_gh()") + + + # ____________________________________________________________________________ + # on.exit #### + on.exit({ + + if (exists("temp_file")) { + if (fs::file_exists(temp_file)) { + unlink(temp_file) + } + } + # close(path) + + }) + + # ____________________________________________________________________________ + # Defenses #### + branch <- match.arg(branch) + stopifnot(exprs = { + + }) + + # ____________________________________________________________________________ + # Early returns #### + if (FALSE) { + return() + } + + # ____________________________________________________________________________ + # Computations #### + + path <- + glue("https://github.com/{owner}/{repo}/raw/{tag}/{filename}.{ext}") + # path <- file(path) + + tryCatch( + expr = { + # load depending of the extension + df <- suppressMessages( # suppress any loading message + + if (ext == "csv") { + + # readr::read_csv(path, ...) + readr::read_csv(path, ...) + + } else if (ext %in% c("xls", "xlsx")) { + + temp_file <- tempfile(fileext = ext) + req <- httr::GET(path, + # write result to disk + httr::write_disk(path = temp_file)) + + + readxl::read_excel(path = temp_file, ...) + + } else if (ext == "dta") { + + haven::read_dta(path, ...) + + } else if (ext == "qs") { + + qs::qread(path, ...) + + } else if (ext == "fst") { + + fst::read_fst(path, ...) + + } else if (ext == "yaml") { + + yaml::read_yaml(path, ...) + + } + + ) + + if (is.data.frame(df)) { + setDT(df) + } + }, + # end of expr section + + error = function(e) { + if (tag == branch) { + + ## ............................................................................ + ## Error in branches #### + + branches <- get_gh(owner, repo, what = "branches") + + if (!(branch %in% branches)) { + msg <- c( + "{.field branch} specified ({branch}) does not exist in repo + {.file {owner}/{repo}}", + "i" = "Select one among {.field {branches}}" + ) + cli::cli_abort(msg, class = "pipaux_error") + + } else { + msg <- c("Problem loading {.file {filename}.{ext}} Correctly: + {e$message}") + cli::cli_abort(msg, class = "pipaux_error", + wrap = TRUE) + + } + + } else { + + ## ............................................................................ + ## Error in tags #### + + tags <- get_gh(owner, repo, what = "tags") + + if (!(tag %in% tags)) { + msg <- c( + "{.field tag} specified ({tag}) does not exist in repo + {.file {owner}/{repo}}", + "i" = "Select one among {.field {tags}}" + ) + cli::cli_abort(msg, class = "pipaux_error") + + } else { + msg <- c("Could not load {.file {filename}.{ext}} from Github repo: + {e$message}") + cli::cli_abort(msg, class = "pipaux_error") + + } + } + + } # end of finally section + + ) # End of trycatch + + # ____________________________________________________________________________ + # Return #### + return(df) + +} + + diff --git a/R/load_raw_aux.R b/R/load_raw_aux.R deleted file mode 100644 index 0928e5c..0000000 --- a/R/load_raw_aux.R +++ /dev/null @@ -1,170 +0,0 @@ -#' Load Raw Auxiliary data -#' -#' @description `r lifecycle::badge("superseded")` -#' -#' This function is deprecated because of the new, more flexible and general -#' function `pipfun::load_from_gh()` -#' @param measure character: measure to be loaded -#' @param owner character: Github repo owner. Default is -#' `getOption("pipfun.ghowner")` -#' @param repo character: name of the repo -#' @param branch character: either "DEV" or "PROD". Refers to the branch that -#' will be used to update either the development server or production. -#' @param tag character: specific release to be used in the update. -#' @param filename character: Name of file name without the ".csv" extension. -#' Default is `measure` -#' @param ext character: Extension of `filename`. Default "csv" -#' @param ... parameters to be passed to the loading functions depending of the -#' extension used -#' -#' @return dataset -#' @keywords internal -load_raw_aux <- function(measure, - owner = getOption("pipfun.ghowner"), - repo = paste0("aux_", measure), - branch = c("DEV","PROD","main"), - tag = match.arg(branch), - filename = measure, - ext = "csv", - ...) { - - lifecycle::deprecate_warn("0.1.0.9003", - "load_raw_aux()", - "pipfun::load_from_gh()") - - - # ____________________________________________________________________________ - # on.exit #### - on.exit({ - - if (exists("temp_file")) { - if (fs::file_exists(temp_file)) { - unlink(temp_file) - } - } - # close(path) - - }) - - # ____________________________________________________________________________ - # Defenses #### - branch <- match.arg(branch) - stopifnot(exprs = { - - }) - - # ____________________________________________________________________________ - # Early returns #### - if (FALSE) { - return() - } - - # ____________________________________________________________________________ - # Computations #### - - path <- - glue("https://github.com/{owner}/{repo}/raw/{tag}/{filename}.{ext}") - # path <- file(path) - - tryCatch( - expr = { - # load depending of the extension - df <- suppressMessages( # suppress any loading message - - if (ext == "csv") { - - # readr::read_csv(path, ...) - readr::read_csv(path, ...) - - } else if (ext %in% c("xls", "xlsx")) { - - temp_file <- tempfile(fileext = ext) - req <- httr::GET(path, - # write result to disk - httr::write_disk(path = temp_file)) - - - readxl::read_excel(path = temp_file, ...) - - } else if (ext == "dta") { - - haven::read_dta(path, ...) - - } else if (ext == "qs") { - - qs::qread(path, ...) - - } else if (ext == "fst") { - - fst::read_fst(path, ...) - - } else if (ext == "yaml") { - - yaml::read_yaml(path, ...) - - } - - ) - - if (is.data.frame(df)) { - setDT(df) - } - }, - # end of expr section - - error = function(e) { - if (tag == branch) { - - ## ............................................................................ - ## Error in branches #### - - branches <- get_gh(owner, repo, what = "branches") - - if (!(branch %in% branches)) { - msg <- c( - "{.field branch} specified ({branch}) does not exist in repo - {.file {owner}/{repo}}", - "i" = "Select one among {.field {branches}}" - ) - cli::cli_abort(msg, class = "pipaux_error") - - } else { - msg <- c("Problem loading {.file {filename}.{ext}} Correctly: - {e$message}") - cli::cli_abort(msg, class = "pipaux_error", - wrap = TRUE) - - } - - } else { - - ## ............................................................................ - ## Error in tags #### - - tags <- get_gh(owner, repo, what = "tags") - - if (!(tag %in% tags)) { - msg <- c( - "{.field tag} specified ({tag}) does not exist in repo - {.file {owner}/{repo}}", - "i" = "Select one among {.field {tags}}" - ) - cli::cli_abort(msg, class = "pipaux_error") - - } else { - msg <- c("Could not load {.file {filename}.{ext}} from Github repo: - {e$message}") - cli::cli_abort(msg, class = "pipaux_error") - - } - } - - } # end of finally section - - ) # End of trycatch - - # ____________________________________________________________________________ - # Return #### - return(df) - -} diff --git a/R/metadata_validate_output.R b/R/metadata_validate_output.R deleted file mode 100644 index 9976802..0000000 --- a/R/metadata_validate_output.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate output metadata data -#' -#' @param metadata metadata data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ - - stopifnot("Metadata data is not loaded" = !is.null(metadata)) - - report <- data_validation_report() - - validate(metadata, name = "Metadata output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.character(country_name), - description = "`country_name` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(survey_title), - description = "`survey_title` should be character") |> - validate_if(is.character(survey_conductor), - description = "`survey_conductor` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), - welfare_type, description = "`welfare_type` values within range") |> - validate_if(is.character(distribution_type), - description = "`distribution_type` should be character") |> - validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), - distribution_type, description = "`distribution_type` values within range") |> - validate_cols(not_na, country_code, year, welfare_type, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, welfare_type), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/metadata_validate_raw.R b/R/metadata_validate_raw.R deleted file mode 100644 index 32331fd..0000000 --- a/R/metadata_validate_raw.R +++ /dev/null @@ -1,81 +0,0 @@ -#' Validate raw metadata data -#' -#' @param metadata raw metadata data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ - - stopifnot("metadata raw data is not loaded" = !is.null(metadata)) - - report <- data_validation_report() - - validate(metadata, name = "metadata raw data validation") |> - validate_if(is.character(status), - description = "`status` should be character") |> - validate_if(is.character(reg), - description = "`reg` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAR", "SSA")), - reg, description = "`reg` values within range") |> - validate_if(is.numeric(id), - description = "`id` should be numeric") |> - validate_if(is.character(svy_id), - description = "`svy_id` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(title), - description = "`title` should be character") |> - validate_if(is.character(data_access), - description = "`data_access` should be character") |> - validate_if(is.numeric(year_start), - description = "`year_start` should be numeric") |> - validate_if(is.numeric(year_end), - description = "`year_end` should be numeric") |> - validate_if(is.character(authoring_entity_name), - description = "`authoring_entity_name` should be character") |> - validate_if(is.character(authoring_entity_affiliation), - description = "`authoring_entity_affiliation` should be character") |> - validate_if(is.character(contact_email), - description = "`contact_email` should be character") |> - validate_if(is.character(contact_uri), - description = "`contact_uri` should be character") |> - validate_if(is.character(abstract), - description = "`abstract` should be character") |> - validate_if(is.character(collection_dates_cycle), - description = "`collection_dates_cycle` should be character") |> - validate_if(is.character(collection_dates_start), - description = "`collection_dates_start` should be character") |> - validate_if(is.character(collection_dates_end), - description = "`collection_dates_end` should be character") |> - validate_if(is.character(coverage), - description = "`coverage` should be character") |> - validate_if(is.character(sampling_procedure), - description = "`sampling_procedure` should be character") |> - validate_if(is.character(collection_mode), - description = "`collection_mode` should be character") |> - validate_if(is.character(coll_situation), - description = "coll_situation` should be character") |> - validate_if(is.character(weight), - description = "`weight` should be character") |> - validate_if(is.character(cleaning_operations), - description = "`cleaning_operations` should be character") |> - validate_if(is.character(coverage_notes), - description = "`coverage_notes` should be character") |> - validate_cols(not_na, svy_id, - description = "no missing values in key variables") |> - validate_if(is_uniq(svy_id), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/mpd_validate_raw.R b/R/mpd_validate_raw.R deleted file mode 100644 index faa3411..0000000 --- a/R/mpd_validate_raw.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Validate raw maddison data -#' -#' @param mpd raw mpd data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ - - stopifnot("mpd/ maddison raw data is not loaded" = !is.null(mpd)) - - report <- data_validation_report() - - validate(mpd, name = "mdp raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(mpd_gdp), - description = "`mpd_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/npl_validate_output.R b/R/npl_validate_output.R deleted file mode 100644 index f4a3939..0000000 --- a/R/npl_validate_output.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate npl output data -#' -#' @param npl output data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ - - stopifnot("NPL output data is not loaded" = !is.null(npl)) - - report <- data_validation_report() - - validate(npl, name = "NPL output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(nat_headcount), - description = "`nat_headcount` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/npl_validate_raw.R b/R/npl_validate_raw.R deleted file mode 100644 index 0e16fcd..0000000 --- a/R/npl_validate_raw.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Validate npl raw data -#' -#' @param npl raw npl data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ - - stopifnot("NPL raw data is not loaded" = !is.null(npl)) - - report <- data_validation_report() - - validate(npl, name = "NPL raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - # validate_cols(in_set(c("AFE", "AFW", "EAP", "ECA", "LAC", "MNA", "SAR")), - # region, description = "`region` values within range") |> - validate_if(is.character(countrycode), - description = "`countrycode` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(vsi_pov_nahc_nc), - description = "`vsi_pov_nahc_nc` should be numeric") |> - validate_if(is.numeric(vsi_pov_nahc), - description = "`vsi_pov_nahc` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_cols(not_na, countrycode, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(countrycode, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R deleted file mode 100644 index dd91bad..0000000 --- a/R/pce_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output pce data -#' -#' @param pce output pce data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ - - stopifnot("PCE clean data is not loaded" = !is.null(pce)) - - report <- data_validation_report() - - validate(pce, name = "PCE output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pce), - description = "`pce` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - # validate_if(is.character(pce_domain), - # description = "`pce_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # pce_domain, description = "`pce_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R deleted file mode 100644 index 42721a5..0000000 --- a/R/pfw_validate_output.R +++ /dev/null @@ -1,190 +0,0 @@ -#' Validate clean pfw data -#' -#' @param pfw clean pfw data, output via `aux_pfw_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ - - stopifnot("PFW clean data is not loaded" = !is.null(pfw)) - - report <- data_validation_report() - - validate(pfw, name = "PFW output data validation") |> - validate_if(is.character(wb_region_code), - description = "`wb_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - wb_region_code, description = "`wb_region_code` values within range") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.character(pcn_region_code), - description = "`pcn_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - pcn_region_code, description = "`pcn_region_code` values within range") |> - validate_if(is.character(ctryname), - description = "`ctryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(surveyid_year), - description = "`surveyid_year` should be numeric") |> - validate_if(is.numeric(timewp), - description = "`timewp` should be numeric") |> - validate_if(is.numeric(fieldwork), - description = "`fieldwork` should be numeric") |> - validate_if(is.character(survey_acronym), - description = "`survey_acronym` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(altname), - description = "`altname` should be character") |> - validate_if(is.character(survey_time), - description = "`survey_time` should be character") |> - validate_if(is.numeric(wbint_link), - description = "`wbint_link` should be numeric") |> - validate_if(is.numeric(wbext_link), - description = "`wbext_link` should be numeric") |> - validate_if(is.numeric(alt_link), - description = "`alt_link` should be numeric") |> - validate_if(is.numeric(pip_meta), - description = "`pip_meta` should be numeric") |> - validate_if(is.character(surv_title), - description = "`surv_title` should be character") |> - validate_if(is.character(surv_producer), - description = "`surv_producer` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), - welfare_type, description = "`welfare_type` values within range") |> - validate_if(is.numeric(use_imputed), - description = "`use_imputed` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_imputed, description = "`use_imputed` values within range") |> - validate_if(is.numeric(use_microdata), - description = "`use_microdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_microdata, description = "`use_microdata` values within range") |> - validate_if(is.numeric(use_bin), - description = "`use_bin` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_bin, description = "`use_bin` values within range") |> - validate_if(is.numeric(use_groupdata), - description = "`use_groupdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_groupdata, description = "`use_groupdata` values within range") |> - validate_if(is.numeric(reporting_year), - description = "`reporting_year` should be numeric") |> - validate_if(is.numeric(survey_comparability), - description = "`survey_comparability` should be numeric") |> - validate_if(is.character(comp_note), - description = "`comp_note` should be character") |> - validate_if(is.character(preferable), - description = "`preferable` should be character") |> - validate_if(is.numeric(display_cp), - description = "`display_cp` should be numeric") |> - validate_cols(in_set(c(0, 1)), - display_cp, description = "`display_cp` values within range") |> - validate_if(is.character(fieldwork_range), - description = "`fieldwork_range` should be character") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(newref), - description = "`newref` should be character") |> - validate_if(is.numeric(ref_year_des), - description = "`ref_year_des` should be numeric") |> - validate_if(is.character(wf_baseprice), - description = "`wf_baseprice` should be character") |> - validate_if(is.character(wf_baseprice_note), - description = "`wf_baseprice_note` should be character") |> - validate_if(is.numeric(wf_baseprice_des), - description = "`wf_baseprice_des` should be numeric") |> - validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, - description = "`wf_baseprice_des` values within range") |> - validate_if(is.numeric(wf_spatial_des), - description = "`wf_spatial_des` should be numeric") |> - validate_if(is.character(wf_spatial_var), - description = "`wf_spatial_var` should be character") |> - validate_if(is.numeric(cpi_replication), - description = "`cpi_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - cpi_replication, description = "`cpi_replication` values within range") |> - validate_if(is.numeric(cpi_domain), - description = "`cpi_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.character(cpi_domain_var), - description = "`cpi_domain_var` should be character") |> - validate_if(is.numeric(wf_currency_des), - description = "`wf_currency_des` should be numeric") |> - validate_cols(in_set(c(0, 2)), - wf_currency_des, description = "`wf_currency_des` values within range") |> - validate_if(is.numeric(ppp_replication), - description = "`ppp_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - ppp_replication, description = "`ppp_replication` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(ppp_domain_var), - description = "`ppp_domain_var` should be character") |> - validate_if(is.numeric(wf_add_temp_des), - description = "`wf_add_temp_des` should be numeric") |> - validate_cols(in_set(c(-9, 0)), - wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> - validate_if(is.numeric(wf_add_temp_var), - description = "`wf_add_temp_var` should be numeric") |> - validate_if(is.numeric(wf_add_spatial_des), - description = "`wf_add_spatial_des` should be numeric") |> - validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, - description = "`wf_add_spatial_des` values within range") |> - validate_if(is.numeric(wf_add_spatial_var), - description = "`wf_add_spatial_var` should be numeric") |> - validate_if(is.numeric(tosplit), - description = "`tosplit` should be numeric") |> - validate_cols(in_set(c(NA, 1)), tosplit, - description = "`tosplit` values within range") |> - validate_if(is.character(tosplit_var), - description = "`tosplit_var` should be character") |> - validate_if(is.numeric(inpovcal), - description = "`inpovcal` should be numeric") |> - validate_cols(in_set(c(1)), inpovcal, - description = "`inpovcal` values within range") |> - validate_if(is.character(oth_welfare1_type), - description = "`oth_welfare1_type` should be character") |> - validate_if(is.character(oth_welfare1_var), - description = "`oth_welfare1_var` should be character") |> - validate_if(is.numeric(gdp_domain), - description = "`gdp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), gdp_domain, - description = "`gdp_domain` values within range") |> - validate_if(is.numeric(pce_domain), - description = "`pce_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pce_domain, - description = "`pce_domain` values within range") |> - validate_if(is.numeric(pop_domain), - description = "`pop_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pop_domain, - description = "`pop_domain` values within range") |> - validate_if(is.character(pfw_id), - description = "`pfw_id` should be character") |> - validate_cols(not_na, country_code, year, welfare_type, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, welfare_type), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pfw_validate_raw.R b/R/pfw_validate_raw.R deleted file mode 100644 index 69b7f66..0000000 --- a/R/pfw_validate_raw.R +++ /dev/null @@ -1,191 +0,0 @@ -#' Validate raw pfw data -#' -#' @param pfw raw pfw data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ - - stopifnot("PFW raw data is not loaded" = !is.null(pfw)) - - report <- data_validation_report() - - validate(pfw, name = "PFW raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - region, description = "`region` values within range") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(reg_pcn), - description = "`reg_pcn` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - reg_pcn, description = "`reg_pcn` values within range") |> - validate_if(is.character(ctryname), - description = "`ctryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(surveyid_year), - description = "`surveyid_year` should be numeric") |> - validate_if(is.numeric(timewp), - description = "`timewp` should be numeric") |> - validate_if(is.numeric(fieldwork), - description = "`fieldwork` should be numeric") |> - validate_if(is.character(survname), - description = "`survname` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(altname), - description = "`altname` should be character") |> - validate_if(is.character(survey_time), - description = "`survey_time` should be character") |> - validate_if(is.numeric(wbint_link), - description = "`wbint_link` should be numeric") |> - validate_if(is.numeric(wbext_link), - description = "`wbext_link` should be numeric") |> - validate_if(is.numeric(alt_link), - description = "`alt_link` should be numeric") |> - validate_if(is.numeric(pip_meta), - description = "`pip_meta` should be numeric") |> - validate_if(is.character(surv_title), - description = "`surv_title` should be character") |> - validate_if(is.character(surv_producer), - description = "`surv_producer` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("N", "R", "U")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(datatype), - description = "`datatype` should be character") |> - validate_cols(in_set(c("C", "I", "c", "i")), - datatype, description = "`datatype` values within range") |> - validate_if(is.numeric(use_imputed), - description = "`use_imputed` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_imputed, description = "`use_imputed` values within range") |> - validate_if(is.numeric(use_microdata), - description = "`use_microdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_microdata, description = "`use_microdata` values within range") |> - validate_if(is.numeric(use_bin), - description = "`use_bin` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_bin, description = "`use_bin` values within range") |> - validate_if(is.numeric(use_groupdata), - description = "`use_groupdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_groupdata, description = "`use_groupdata` values within range") |> - validate_if(is.numeric(rep_year), - description = "`rep_year` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(comp_note), - description = "`comp_note` should be character") |> - validate_if(is.character(preferable), - description = "`preferable` should be character") |> - validate_if(is.numeric(display_cp), - description = "`display_cp` should be numeric") |> - validate_cols(in_set(c(0, 1)), - display_cp, description = "`display_cp` values within range") |> - validate_if(is.character(fieldwork_range), - description = "`fieldwork_range` should be character") |> - validate_if(is.numeric(ref_year), - description = "`ref_year` should be numeric") |> - validate_if(is.character(newref), - description = "`newref` should be character") |> - validate_if(is.numeric(ref_year_des), - description = "`ref_year_des` should be numeric") |> - validate_if(is.character(wf_baseprice), - description = "`wf_baseprice` should be character") |> - validate_if(is.character(wf_baseprice_note), - description = "`wf_baseprice_note` should be character") |> - validate_if(is.numeric(wf_baseprice_des), - description = "`wf_baseprice_des` should be numeric") |> - validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, - description = "`wf_baseprice_des` values within range") |> - validate_if(is.numeric(wf_spatial_des), - description = "`wf_spatial_des` should be numeric") |> - validate_if(is.character(wf_spatial_var), - description = "`wf_spatial_var` should be character") |> - validate_if(is.numeric(cpi_replication), - description = "`cpi_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - cpi_replication, description = "`cpi_replication` values within range") |> - validate_if(is.numeric(cpi_domain), - description = "`cpi_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.character(cpi_domain_var), - description = "`cpi_domain_var` should be character") |> - validate_if(is.numeric(wf_currency_des), - description = "`wf_currency_des` should be numeric") |> - validate_cols(in_set(c(0, 2)), - wf_currency_des, description = "`wf_currency_des` values within range") |> - validate_if(is.numeric(ppp_replication), - description = "`ppp_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - ppp_replication, description = "`ppp_replication` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(ppp_domain_var), - description = "`ppp_domain_var` should be character") |> - validate_if(is.numeric(wf_add_temp_des), - description = "`wf_add_temp_des` should be numeric") |> - validate_cols(in_set(c(-9, 0)), - wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> - validate_if(is.numeric(wf_add_temp_var), - description = "`wf_add_temp_var` should be numeric") |> - validate_if(is.numeric(wf_add_spatial_des), - description = "`wf_add_spatial_des` should be numeric") |> - validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, - description = "`wf_add_spatial_des` values within range") |> - validate_if(is.numeric(wf_add_spatial_var), - description = "`wf_add_spatial_var` should be numeric") |> - validate_if(is.numeric(tosplit), - description = "`tosplit` should be numeric") |> - validate_cols(in_set(c(NA, 1)), tosplit, - description = "`tosplit` values within range") |> - validate_if(is.character(tosplit_var), - description = "`tosplit_var` should be character") |> - validate_if(is.numeric(inpovcal), - description = "`inpovcal` should be numeric") |> - validate_cols(in_set(c(1)), inpovcal, - description = "`inpovcal` values within range") |> - validate_if(is.character(oth_welfare1_type), - description = "`oth_welfare1_type` should be character") |> - validate_if(is.character(oth_welfare1_var), - description = "`oth_welfare1_var` should be character") |> - validate_if(is.numeric(gdp_domain), - description = "`gdp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), gdp_domain, - description = "`gdp_domain` values within range") |> - validate_if(is.numeric(pce_domain), - description = "`pce_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pce_domain, - description = "`pce_domain` values within range") |> - validate_if(is.numeric(pop_domain), - description = "`pop_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pop_domain, - description = "`pop_domain` values within range") |> - validate_if(is.character(pfw_id), - description = "`pfw_id` should be character") |> - validate_cols(not_na, code, year, survname, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, year, survname), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/pl_validate_output.R b/R/pl_validate_output.R deleted file mode 100644 index 013483b..0000000 --- a/R/pl_validate_output.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate output pl data -#' -#' @param pl output pl data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ - - stopifnot("PL clean data is not loaded" = !is.null(pl)) - - report <- data_validation_report() - - validate(pl, name = "PL output data validation") |> - validate_if(is.character(name), - description = "`name` should be character") |> - validate_if(is.numeric(poverty_line), - description = "`poverty_line` should be numeric") |> - validate_if(is.logical(is_default), - description = "`is_default` should be logical") |> - validate_if(is.logical(is_visible), - description = "`is_visible` should be logical") |> - validate_if(is.integer(ppp_year), - description = "`ppp_year` should be numeric") |> - validate_cols(not_na, name, ppp_year, - description = "no missing values in key variables") |> - validate_if(is_uniq(name, ppp_year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R deleted file mode 100644 index db0deaa..0000000 --- a/R/pop_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output pop data -#' -#' @param pop output pop data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ - - stopifnot("POP clean data is not loaded" = !is.null(pop)) - - report <- data_validation_report() - - validate(pop, name = "POP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - # validate_if(is.character(pop_domain), - # description = "`pop_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # pop_domain, description = "`pop_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pop_validate_raw.R b/R/pop_validate_raw.R deleted file mode 100644 index 377fc67..0000000 --- a/R/pop_validate_raw.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate pop raw data download from wdi -#' -#' @param pop raw pop data, as loaded via `wbstats::wb_data` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ - - stopifnot("WB POP raw data is not loaded" = !is.null(pop)) - - report <- data_validation_report() - - validate(pop, name = "WB POP raw data validation") |> - validate_if(is.character(indicator_id), - description = "`indicator_id` should be character") |> - validate_cols(in_set(c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL")), - indicator_id, description = "`indicator_id` values within range") |> - validate_if(is.character(indicator), - description = "`indicator` should be character") |> - validate_if(is.character(iso2c), - description = "`iso2c` should be character") |> - validate_if(is.character(iso3c), - description = "`iso3c` should be character") |> - validate_if(is.character(country), - description = "`country` should be character") |> - validate_if(is.numeric(date), - description = "`date` should be numeric") |> - validate_if(is.numeric(value), - description = "`value` should be numeric") |> - validate_if(is.character(unit), - description = "`unit` should be character") |> - validate_if(is.character(obs_status), - description = "`obs_status` should be character") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_if(is_date(last_updated), - description = "`last_updated` should be date") |> - validate_cols(not_na, indicator_id, iso3c, date, - description = "no missing values in key variables") |> - validate_if(is_uniq(indicator_id, iso3c, date), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/popmain_validate_raw.R b/R/popmain_validate_raw.R deleted file mode 100644 index c6d5d20..0000000 --- a/R/popmain_validate_raw.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate raw main pop data -#' -#' @param pop_main raw pop main data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ - - stopifnot("POP main raw data is not loaded" = !is.null(pop_main)) - - report <- data_validation_report() - - validate(pop_main, name = "POP main raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pop_data_level), - description = "`pop_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - pop_data_level, description = "`pop_data_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - validate_cols(not_na, country_code, year, pop_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pop_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R deleted file mode 100644 index af7fafb..0000000 --- a/R/ppp_validate_output.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate output ppp data -#' -#' @param ppp output ppp data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ - - stopifnot("PPP output data is not loaded" = !is.null(ppp)) - - report <- data_validation_report() - - validate(ppp, name = "PPP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(ppp_year), - description = "`ppp_year` should be character") |> - validate_if(is.character(release_version), - description = "`release_version` should be character") |> - validate_if(is.character(adaptation_version), - description = "`adaptation_version` should be character") |> - validate_if(is.numeric(ppp), - description = "`ppp` should be numeric") |> - validate_if(is.logical(ppp_default), - description = "`ppp_default` should be numeric") |> - validate_if(is.logical(ppp_default_by_year), - description = "`ppp_default_by_year` should be numeric") |> - # validate_if(is.character(ppp_domain), - # description = "`ppp_domain` should be character") |> - # validate_cols(in_set(c("1", "2")), - # ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_cols(not_na, country_code, ppp_year, reporting_level, - adaptation_version, release_version, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, ppp_year, - reporting_level, adaptation_version, release_version), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/ppp_validate_raw.R b/R/ppp_validate_raw.R deleted file mode 100644 index cc5cd5d..0000000 --- a/R/ppp_validate_raw.R +++ /dev/null @@ -1,78 +0,0 @@ -#' Validate raw ppp data -#' -#' @param ppp raw ppp data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ - - stopifnot("PPP raw data is not loaded" = !is.null(ppp)) - - report <- data_validation_report() - - validate(ppp, name = "PPP raw data validation") |> - validate_if(is.character(CountryName), - description = "`CountryName` should be character") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(CoverageType), - description = "`CoverageType` should be character") |> - validate_cols(in_set(c("National", "Rural", "Urban")), - CoverageType, description = "`CoverageType` values within range") |> - validate_if(is.numeric(ppp_2005_v1_v1), - description = "`ppp_2005_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v1_v1), - description = "`ppp_2011_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v2_v1), - description = "`ppp_2011_v2_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v1_v2), - description = "`ppp_2011_v1_v2` should be numeric") |> - validate_if(is.numeric(ppp_2011_v2_v2), - description = "`ppp_2011_v2_v2` should be numeric") |> - validate_if(is.numeric(ppp_2017_v1_v1), - description = "`ppp_2017_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2017_v1_v2), - description = "`ppp_2017_v1_v2` should be numeric") |> - validate_if(is.numeric(source_ppp_2011), - description = "`source_ppp_2011` should be numeric") |> - validate_if(is.numeric(source_ppp_2005), - description = "`source_ppp_2005` should be numeric") |> - validate_if(is.numeric(datalevel), - description = "`datalevel` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - datalevel, description = "`datalevel` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.numeric(ppp_domain_value), - description = "`ppp_domain_value` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain_value, description = "`ppp_domain_value` values within range") |> - validate_if(is.numeric(oldicp2005), - description = "`oldicp2005` should be numeric") |> - validate_if(is.numeric(oldicp2011), - description = "`oldicp2011` should be numeric") |> - validate_if(is.character(Seriesname), - description = "`Seriesname` should be character") |> - validate_if(is.character(note_may192020), - description = "`note_may192020` should be character") |> - validate_if(is.character(ppp_2017_v1_v2_note), - description = "`ppp_2017_v1_v2_note` should be character") |> - validate_cols(not_na, code, CoverageType, datalevel, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, CoverageType, datalevel), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/send_report.R b/R/send_report.R deleted file mode 100644 index a14b81f..0000000 --- a/R/send_report.R +++ /dev/null @@ -1,34 +0,0 @@ -#' Send an email that contains auxiliary data validation report -#' -#' @import blastula -#' -#' @export -send_report <- function(){ - - if (rlang::env_has(.pipaux, "validation_report")){ - - print(.pipaux$validation_report) - - # fname <- file.path(tempdir(), "data_validation_report.csv") - # - # write.csv(.pipaux$validation_report, fname, row.names = FALSE) - # - # compose_email( - # body = md(glue::glue( - # - # "Hello, - # - # The attched file contains auxiliary data validation report. - # - # Regards"))) |> - # add_attachment(file = fname, filename = "data_validation_report") |> - # smtp_send( - # from = "tefera.degefu@outlook.com", - # to = "tdegefu@worldbank.org", - # subject = "Data validation report", - # credentials = creds_envvar(user = "tefera.degefu@outlook.com", - # pass_envvar = "SMTP_GPID_EMAIL", - # provider = "outlook") - # ) - } -} diff --git a/R/sna_fy_validate_raw.R b/R/sna_fy_validate_raw.R deleted file mode 100644 index edfff25..0000000 --- a/R/sna_fy_validate_raw.R +++ /dev/null @@ -1,41 +0,0 @@ -#' Validate raw sna_fy data -#' -#' @param sna_fy raw sna_fy data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ - - stopifnot("sna_fy raw data is not loaded" = !is.null(sna_fy)) - - report <- data_validation_report() - - validate(sna_fy, name = "sna_fy raw data validation") |> - validate_if(is.character(Code), - description = "`Code` should be character") |> - validate_if(is.character(LongName), - description = "`LongName` should be character") |> - validate_if(is.character(SpecialNotes), - description = "`SpecialNotes` should be character") |> - validate_if(is.character(Month), - description = "`Month` should be character") |> - validate_if(is.numeric(Day), - description = "`Day` should be numeric") |> - validate_cols(not_na, Code, Month, Day, - description = "no missing values in key variables") |> - # validate_if(is_uniq(Code, LongName), - # description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/sna_validate_raw.R b/R/sna_validate_raw.R deleted file mode 100644 index 7433a4b..0000000 --- a/R/sna_validate_raw.R +++ /dev/null @@ -1,49 +0,0 @@ -#' Validate raw special national accounts (sna) data -#' -#' @param sna raw sna data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ - - stopifnot("SNA raw data is not loaded" = !is.null(sna)) - - report <- data_validation_report() - - validate(sna, name = "SNA raw data validation") |> - validate_if(is.character(countryname), - description = "`countryname` should be character") |> - validate_if(is.character(coverage), - description = "`coverage` should be character") |> - validate_cols(in_set(c("National")), - coverage, description = "`coverage` values within range") |> - validate_if(is.character(countrycode), - description = "`countrycode` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(GDP), - description = "`GDP` should be numeric") |> - validate_if(is.logical(PCE), - description = "`PCE` should be logical") |> - validate_if(is.character(sourceGDP), - description = "`sourceGDP` should be character") |> - validate_if(is.logical(sourcePCE), - description = "`sourcePCE` should be logical") |> - validate_cols(not_na, countrycode, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(countrycode, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/spop_validate_raw.R b/R/spop_validate_raw.R deleted file mode 100644 index e73af29..0000000 --- a/R/spop_validate_raw.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate raw special cases pop data -#' -#' @param spop raw special case pop data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ - - stopifnot("Special POP raw data is not loaded" = !is.null(spop)) - - report <- data_validation_report() - - validate(spop, name = "Special POP raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pop_data_level), - description = "`pop_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - pop_data_level, description = "`pop_data_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - validate_cols(not_na, country_code, year, pop_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pop_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/update_aux.R b/R/update_aux.R index 4a0f999..84bb4da 100644 --- a/R/update_aux.R +++ b/R/update_aux.R @@ -1,3 +1,254 @@ +#' Update the measure along with it's dependencies automatically. +#' +#' @param measure character: measure to be updated, if NULL will update all of +#' them +#' @inheritParams aux_pop_update +#' @export +auto_aux_update <- function(measure = NULL, + force = FALSE, + from = c("gh", "file", "api"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + pipfun::check_pkg_active("pipaux") + + branch <- match.arg(branch) + from <- match.arg(from) + files_changed <- FALSE + + isgls <- ls(sys.frame(), pattern = "^gls$") |> + length() > 0 + + if (isFALSE(isgls)) { + cli::cli_abort( + "object {.var gls} is not available in Globel env. + Run {.code gls <- pipfun::pip_create_globals()} first", + wrap = TRUE + ) + } + + # if there is validation report in the environment - remove it + clean_validation_report() + + creds <- pipfun::get_github_creds() + gh_user <- "https://raw.githubusercontent.com" + org_data <- paste(gh_user, + owner, + "pipaux/metadata/Data/git_metadata.csv", + sep = "/") |> + readr::read_csv(show_col_types = FALSE) + + + dependencies <- read_dependencies(gh_user, owner) + # Get all repositories under PIP-Technical-Team + all_repos <- gh::gh("GET /users/{username}/repos", + username = owner) |> + vapply("[[", "", "name") |> + #Keep only those repos that start with "aux_" + grep("^aux_", x = _, value = TRUE) + + if (!is.null(measure)) { + all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")] + } + # get hashs + hash <- + purrr::map(all_repos, + .f = ~ { + gh::gh( + "GET /repos/{owner}/{repo}/commits/{branch}", + owner = owner, + repo = .x, + branch = branch + ) + }) |> + purrr::map_chr( ~ .x[["sha"]]) + + # Get the latest hash of the repo + all_data <- + dplyr::tibble( + Repo = glue::glue("{owner}/{all_repos}"), + hash = hash, + branch = branch + ) + + old_data <- org_data %>% + dplyr::filter(.data$branch == branch) %>% + dplyr::rename(hash_original = hash) + + old_data <- old_data %>% + dplyr::inner_join(all_data, by = c("Repo", "branch")) + + cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") + cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") + cli::cli_alert_info("Both the numbers above should be equal or else some + debugging is required.", wrap = TRUE) + + new_data <- old_data %>% + dplyr::filter(.data$hash != .data$hash_original | + is.na(.data$hash_original) | + is.na(.data$hash)) + + # all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) + + + + # Remove everything till the last underscore so + # PIP-Technical-Team/aux_ppp changes to ppp + aux_fns <- sub(".*_", "", new_data$Repo) |> + # Keep only those whose dependencies we know + intersect(names(dependencies)) + + # For each auxiliary data to be updated + cli::cli_alert_info("Updating data for {length(aux_fns)} files.") + for (aux in aux_fns) { + # Find the corresponding functions to be run + # Add pip_ suffix so that it becomes function name + list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) + + for (fn in list_of_funcs) { + + aux_file <- sub("pip_", "", fn) + cli::cli_alert_info("Running function {fn} for aux file {aux}.") + + before_hash <- read_signature_file(aux_file, maindir, branch) + # Run the pip_.* function + match.fun(fn)(maindir = maindir, branch = branch) |> + suppressMessages() + after_hash <- read_signature_file(aux_file, maindir, branch) + + if (before_hash != after_hash) { + + cli::cli_alert_info("Updating csv for {fn}") + files_changed <- TRUE + + # find rows of of org to be modified + aux_row_org <- org_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + org_data$branch == branch + + # find rows in new that will be copied to org + aux_row_new <- new_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + new_data$branch == branch + + org_data$hash[aux_row_org] <- new_data$hash[aux_row_new] + + } # end of before_hash condition + + } # end of list_of_funcs loop + } # end of aux_fns loop + last_updated_time <- + aux_file_last_updated(maindir, names(dependencies), branch) + if (length(aux_fns) > 0 && files_changed) { + # Write the latest auxiliary file and corresponding hash to csv + # Always save at the end. + # sha - hash object of current csv file in Data/git_metadata.csv + # content - base64 of changed data + out <- gh::gh( + "GET /repos/{owner}/{repo}/contents/{file_path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + file_path = "Data/git_metadata.csv", + .params = list(ref = "metadata") + ) + # There is no way to update only the lines which has changed using Github API + # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 + res <- gh::gh( + "PUT /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + path = "Data/git_metadata.csv", + .params = list( + branch = "metadata", + message = "updating csv file", + sha = out$sha, + content = convert_df_to_base64(org_data) + ), + .token = creds$password + ) + } + cli::cli_h2("File updated status.") + knitr::kable(last_updated_time) +} + + + +return_value <- function(aux, dependencies) { + val <- dependencies[[aux]] + if (length(val) > 0) { + for (i in val) { + val <- c(return_value(i, dependencies), val) + } + } + return(unique(c(val, aux))) +} + +#' Function to write dataframe to GitHub +#' +#' @param df A dataframe +#' +#' @return base64 encoded dataframe +#' @export +#' +#' @examples +#' \dontrun { +#' convert_df_to_base64(mtcars) +#' } +convert_df_to_base64 <- function(df) { + df |> + write.table(quote = FALSE, + row.names = FALSE, + sep = ",") |> + capture.output() |> + paste(collapse = "\n") |> + charToRaw() |> + base64enc::base64encode() +} + +aux_file_last_updated <- function(data_dir, aux_files, branch) { + filenames <- + glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") + data <- sapply(filenames, function(x) + qs::qattributes(x)$datetime) + data.frame( + filename = basename(names(data)), + time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), + row.names = NULL + ) |> + dplyr::arrange(desc(time_last_update)) + +} + +read_dependencies <- function(gh_user, owner) { + dependencies <- paste(gh_user, + owner, + "pipaux/metadata/Data/dependency.yml", + sep = "/") |> + yaml::read_yaml() + + sapply(dependencies, \(x) if (length(x)) + strsplit(x, ",\\s+")[[1]] + else + character()) +} + +read_signature_file <- function(aux_file, maindir, branch) { + # Construct the path to data signature aux file + data_signature_path <- + fs::path(maindir, + "_aux", + branch, + aux_file, + glue::glue("{aux_file}_datasignature.txt")) + signature_hash <- readr::read_lines(data_signature_path) + return(signature_hash) +} + + #' Update Auxiliary data. Wrapper of measure-specific functions. #' #' @inheritParams aux_labels_pip @@ -72,3 +323,50 @@ update_aux <- function(measure, return(rs) } +#' Update all auxiliary data at once +#' +#' @inheritParams aux_cpi +#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. +#' @export +aux_update_all <- function(force = FALSE, + popsrc = getOption("pipaux.popsrc"), + maindir = gls$PIP_DATA_DIR) { + + # List of countries in WDI + aux_country_list(force = force, maindir = maindir) + + # PIP countries and regions + aux_countries(force = force, maindir = maindir) + aux_regions(force = force, maindir = maindir) + + # PIP Indicators + aux_indicators(force = force, maindir = maindir) + + # Poverty lines + aux_pl(force = force, maindir = maindir) + + # PFW, CPI and PPP from DLW + aux_pfw(force = force, maindir = maindir) + aux_cpi(force = force, maindir = maindir) + aux_ppp(force = force, maindir = maindir) + + # POP from Emi or WDI + aux_pop(force = force, maindir = maindir, src = popsrc) + + # GDP from WEO, Maddison and WDI (+ a few special cases) + aux_weo(force = force, maindir = maindir) + aux_maddison(force = force, maindir = maindir) + aux_gdp(force = force, maindir = maindir) + + # PCE from WDI (+ a few special cases) + aux_pce(force = force, maindir = maindir) + + # Country profiles (from Poverty GP) + aux_cp(force = force, maindir = maindir) + + # Survey metadata (from Poverty GP) + aux_metadata(force = force, maindir = maindir) + + return(invisible()) +} + diff --git a/R/utils-data-table.R b/R/utils-data-table.R deleted file mode 100644 index d2f2964..0000000 --- a/R/utils-data-table.R +++ /dev/null @@ -1,12 +0,0 @@ -# data.table is generally careful to minimize the scope for namespace -# conflicts (i.e., functions with the same name as in other packages); -# a more conservative approach using @importFrom should be careful to -# import any needed data.table special symbols as well, e.g., if you -# run DT[ , .N, by='grp'] in your package, you'll need to add -# @importFrom data.table .N to prevent the NOTE from R CMD check. -# See ?data.table::`special-symbols` for the list of such symbols -# data.table defines; see the 'Importing data.table' vignette for more -# advice (vignette('datatable-importing', 'data.table')). -# -#' @import data.table -NULL diff --git a/R/utils-pipe.R b/R/utils-pipe.R deleted file mode 100644 index fd0b1d1..0000000 --- a/R/utils-pipe.R +++ /dev/null @@ -1,14 +0,0 @@ -#' Pipe operator -#' -#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @param lhs A value or the magrittr placeholder. -#' @param rhs A function call using the magrittr semantics. -#' @return The result of calling `rhs(lhs)`. -NULL diff --git a/R/utils.R b/R/utils.R index a5ad1b2..442f43e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -436,7 +436,35 @@ save_aux_to_gh <- function(df, ext = ext, ...) } + + +# data.table is generally careful to minimize the scope for namespace +# conflicts (i.e., functions with the same name as in other packages); +# a more conservative approach using @importFrom should be careful to +# import any needed data.table special symbols as well, e.g., if you +# run DT[ , .N, by='grp'] in your package, you'll need to add +# @importFrom data.table .N to prevent the NOTE from R CMD check. +# See ?data.table::`special-symbols` for the list of such symbols +# data.table defines; see the 'Importing data.table' vignette for more +# advice (vignette('datatable-importing', 'data.table')). # +#' @import data.table +NULL + +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/R/validation_report.R b/R/validation_report.R new file mode 100644 index 0000000..8e9ec8f --- /dev/null +++ b/R/validation_report.R @@ -0,0 +1,90 @@ +#' Get validation report data validation error report +#' +#' @param vlddata validation data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' +#' @export +get_error_validation <- function(vlddata, detail){ + + stopifnot("Validation data is not availabel" = !is.null(vlddata)) + + err_t <- NULL + + if (any(vlddata$type == "error")){ + + err_t <- vlddata[type == "error", + .(table_name, description, call, + message, type)] + } + + + if (isFALSE(detail)) { + + cli::cli_abort("Description of invalid cases for {unique(err_t$table_name)}, + {err_t$description}") + + } else { + + if (!rlang::env_has(.pipaux, "validation_report")){ + + rlang::env_poke(.pipaux, "validation_report", err_t) + + } else { + + compiled_result <- rbind(.pipaux$validation_report, err_t) + rlang::env_poke(.pipaux, "validation_report", compiled_result) + + } + + cli::cli_inform("Validation report ({.field validation_report}) has been added to the environment varaible ({.field .pipaux}).") + } + +} + +#' Remove data validation report from .pipaux environment variable +#' +#' @export +clean_validation_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + # rlang::env_bind(.pipaux, validation_report = rlang::zap()) + rlang::env_unbind(.pipaux, "validation_report") + + } +} + +#' Send an email that contains auxiliary data validation report +#' +#' @import blastula +#' +#' @export +send_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + print(.pipaux$validation_report) + + # fname <- file.path(tempdir(), "data_validation_report.csv") + # + # write.csv(.pipaux$validation_report, fname, row.names = FALSE) + # + # compose_email( + # body = md(glue::glue( + # + # "Hello, + # + # The attched file contains auxiliary data validation report. + # + # Regards"))) |> + # add_attachment(file = fname, filename = "data_validation_report") |> + # smtp_send( + # from = "tefera.degefu@outlook.com", + # to = "tdegefu@worldbank.org", + # subject = "Data validation report", + # credentials = creds_envvar(user = "tefera.degefu@outlook.com", + # pass_envvar = "SMTP_GPID_EMAIL", + # provider = "outlook") + # ) + } +} diff --git a/R/wdi_validate_raw.R b/R/wdi_validate_raw.R deleted file mode 100644 index 47fb1c8..0000000 --- a/R/wdi_validate_raw.R +++ /dev/null @@ -1,38 +0,0 @@ -#' Validate raw wdi data -#' -#' @param wdi raw wdi data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ - - stopifnot("WDI raw data is not loaded" = !is.null(wdi)) - - report <- data_validation_report() - - validate(wdi, name = "WDI raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(NE.CON.PRVT.PC.KD), - description = "`NE.CON.PRVT.PC.KD` should be numeric") |> - validate_if(is.numeric(NY.GDP.PCAP.KD), - description = "`NY.GDP.PCAP.KD` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R deleted file mode 100644 index 6392418..0000000 --- a/R/weo_validate_output.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Validate clean weo data -#' -#' @param weo clean weo data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ - - stopifnot("WEO output data is not loaded" = !is.null(weo)) - - report <- data_validation_report() - - validate(weo, name = "WEO output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(weo_gdp), - description = "`weo_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/weo_validate_raw.R b/R/weo_validate_raw.R deleted file mode 100644 index 679ffb6..0000000 --- a/R/weo_validate_raw.R +++ /dev/null @@ -1,62 +0,0 @@ -#' Validate raw weo data -#' -#' @param weo raw weo data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ - - stopifnot("WEO raw data is not loaded" = !is.null(weo)) - - report <- data_validation_report() - - weo <- weo[!is.na(`WEO Subject Code`), ] - - validate(weo, name = "WEO raw data validation") |> - validate_if(is.character(`WEO Country Code`), - description = "`WEO Country Code` should be character") |> - validate_if(is.character(ISO), - description = "ISO should be character") |> - validate_if(is.character(`WEO Subject Code`), - description = "`WEO Subject Code` should be character") |> - validate_if(is.character(Country), - description = "`Country` should be character") |> - validate_if(is.character(`Subject Descriptor`), - description = "`Subject Descriptor` should be character") |> - validate_if(is.character(`Subject Notes`), - description = "`Subject Notes` should be character") |> - validate_if(is.character(Units), - description = "`Units` should be character") |> - validate_if(is.character(Scale), - description = "`Scale` should be character") |> - validate_if(is.character(`Country/Series-specific Notes`), - description = "`Country/Series-specific Notes` should be character") |> - validate_if(is.numeric(`Estimates Start After`), - description = "`Estimates Start After` should be numeric") |> - validate_cols(not_na, ISO, `WEO Subject Code`, - description = "no missing values in key variables") |> - validate_if(is_uniq(ISO, `WEO Subject Code`), - description = "no duplicate records in key variables") |> - add_results(report) - - num_var_list <- grep("^[[:digit:]]", colnames(weo)) - - for (i in 1:length(num_var_list)) { - validate(weo, name = "WEO validation") |> - validate_cols(is.numeric, num_var_list[i], - description = "variables (with numeric var name) should be numeric") |> - add_results(report) - } - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/man/auto_aux_update.Rd b/man/auto_aux_update.Rd index 5bdea0a..e2b3611 100644 --- a/man/auto_aux_update.Rd +++ b/man/auto_aux_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auto_aux_update.R +% Please edit documentation in R/update_aux.R \name{auto_aux_update} \alias{auto_aux_update} \title{Update the measure along with it's dependencies automatically.} diff --git a/man/aux_country_list_update.Rd b/man/aux_country_list_update.Rd index e5f4a01..1bc1618 100644 --- a/man/aux_country_list_update.Rd +++ b/man/aux_country_list_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_country_list_update.R +% Please edit documentation in R/aux_country_list.R \name{aux_country_list_update} \alias{aux_country_list_update} \title{Update Country LIst} diff --git a/man/aux_cp_clean.Rd b/man/aux_cp_clean.Rd index ab496a3..2f1f192 100644 --- a/man/aux_cp_clean.Rd +++ b/man/aux_cp_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cp_clean.R +% Please edit documentation in R/aux_cp.R \name{aux_cp_clean} \alias{aux_cp_clean} \title{Clean country profile data} diff --git a/man/aux_cp_update.Rd b/man/aux_cp_update.Rd index 4d292a8..4016c25 100644 --- a/man/aux_cp_update.Rd +++ b/man/aux_cp_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cp_update.R +% Please edit documentation in R/aux_cp.R \name{aux_cp_update} \alias{aux_cp_update} \title{Update Country Profiles} diff --git a/man/aux_cpi_clean.Rd b/man/aux_cpi_clean.Rd index 8256dc1..20c3e01 100644 --- a/man/aux_cpi_clean.Rd +++ b/man/aux_cpi_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cpi_clean.R +% Please edit documentation in R/aux_cpi.R \name{aux_cpi_clean} \alias{aux_cpi_clean} \title{Clean CPI data} diff --git a/man/aux_cpi_update.Rd b/man/aux_cpi_update.Rd index 0660607..3a73d83 100644 --- a/man/aux_cpi_update.Rd +++ b/man/aux_cpi_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cpi_update.R +% Please edit documentation in R/aux_cpi.R \name{aux_cpi_update} \alias{aux_cpi_update} \title{Update CPI} diff --git a/man/aux_cpi_vintage.Rd b/man/aux_cpi_vintage.Rd index ffec5d5..2f94e44 100644 --- a/man/aux_cpi_vintage.Rd +++ b/man/aux_cpi_vintage.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cpi_vintage.R +% Please edit documentation in R/aux_cpi.R \name{aux_cpi_vintage} \alias{aux_cpi_vintage} \title{Check CPI Vintage} diff --git a/man/aux_gdm_update.Rd b/man/aux_gdm_update.Rd index df9d148..8101a7c 100644 --- a/man/aux_gdm_update.Rd +++ b/man/aux_gdm_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_gdm_update.R +% Please edit documentation in R/aux_gdm.R \name{aux_gdm_update} \alias{aux_gdm_update} \title{Update GDM} diff --git a/man/aux_gdp_update.Rd b/man/aux_gdp_update.Rd index 04e8f7b..68fd158 100644 --- a/man/aux_gdp_update.Rd +++ b/man/aux_gdp_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_gdp_update.R +% Please edit documentation in R/aux_gdp.R \name{aux_gdp_update} \alias{aux_gdp_update} \title{Update GDP} diff --git a/man/aux_gdp_weo.Rd b/man/aux_gdp_weo.Rd index 1a573a2..94044ac 100644 --- a/man/aux_gdp_weo.Rd +++ b/man/aux_gdp_weo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_gdp_weo.R +% Please edit documentation in R/aux_gdp.R \name{aux_gdp_weo} \alias{aux_gdp_weo} \title{Fetch GDP data from WEO} diff --git a/man/aux_metadata_update.Rd b/man/aux_metadata_update.Rd index 8e255a2..0e9f05b 100644 --- a/man/aux_metadata_update.Rd +++ b/man/aux_metadata_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_metadata_update.R +% Please edit documentation in R/aux_metadata.R \name{aux_metadata_update} \alias{aux_metadata_update} \title{Update metadata file} diff --git a/man/aux_metaregion.Rd b/man/aux_metaregion.Rd index 891e4a5..ae7a680 100644 --- a/man/aux_metaregion.Rd +++ b/man/aux_metaregion.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_metaregion.R +% Please edit documentation in R/aux_metadata.R \name{aux_metaregion} \alias{aux_metaregion} \title{Metadata for PIP regions} diff --git a/man/aux_pce_update.Rd b/man/aux_pce_update.Rd index 6190a2b..a967a30 100644 --- a/man/aux_pce_update.Rd +++ b/man/aux_pce_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pce_update.R +% Please edit documentation in R/aux_pce.R \name{aux_pce_update} \alias{aux_pce_update} \title{Update PCE} diff --git a/man/aux_pfw_clean.Rd b/man/aux_pfw_clean.Rd index d0bd814..d55ba52 100644 --- a/man/aux_pfw_clean.Rd +++ b/man/aux_pfw_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pfw_clean.R +% Please edit documentation in R/aux_pfw.R \name{aux_pfw_clean} \alias{aux_pfw_clean} \title{Clean PFW} diff --git a/man/aux_pfw_key.Rd b/man/aux_pfw_key.Rd index d5f9d5e..a6a58cb 100644 --- a/man/aux_pfw_key.Rd +++ b/man/aux_pfw_key.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pfw_key.R +% Please edit documentation in R/aux_pfw.R \name{aux_pfw_key} \alias{aux_pfw_key} \title{Generate a dataset that contains pfw keys} diff --git a/man/aux_pfw_update.Rd b/man/aux_pfw_update.Rd index ef99acd..e876e5e 100644 --- a/man/aux_pfw_update.Rd +++ b/man/aux_pfw_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pfw_update.R +% Please edit documentation in R/aux_pfw.R \name{aux_pfw_update} \alias{aux_pfw_update} \title{Update PFW} diff --git a/man/aux_pl_clean.Rd b/man/aux_pl_clean.Rd index beb1116..9a296c3 100644 --- a/man/aux_pl_clean.Rd +++ b/man/aux_pl_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pl_clean.R +% Please edit documentation in R/aux_pl.R \name{aux_pl_clean} \alias{aux_pl_clean} \title{Build a data table for each list from yaml file with poverty lines info} diff --git a/man/aux_pop_update.Rd b/man/aux_pop_update.Rd index c1fca11..82a85c7 100644 --- a/man/aux_pop_update.Rd +++ b/man/aux_pop_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pop_update.R +% Please edit documentation in R/aux_pop.R \name{aux_pop_update} \alias{aux_pop_update} \title{Update POP} diff --git a/man/aux_ppp_clean.Rd b/man/aux_ppp_clean.Rd index 9768319..c092ebb 100644 --- a/man/aux_ppp_clean.Rd +++ b/man/aux_ppp_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_ppp_clean.R +% Please edit documentation in R/aux_ppp.R \name{aux_ppp_clean} \alias{aux_ppp_clean} \title{Clean PPP data from datalibweb to meet PIP protocols} diff --git a/man/aux_ppp_update.Rd b/man/aux_ppp_update.Rd index e5f21d9..f1cbe9e 100644 --- a/man/aux_ppp_update.Rd +++ b/man/aux_ppp_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_ppp_update.R +% Please edit documentation in R/aux_ppp.R \name{aux_ppp_update} \alias{aux_ppp_update} \title{Update PPP} diff --git a/man/aux_update_all.Rd b/man/aux_update_all.Rd index fff2d92..64b7147 100644 --- a/man/aux_update_all.Rd +++ b/man/aux_update_all.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_update_all.R +% Please edit documentation in R/update_aux.R \name{aux_update_all} \alias{aux_update_all} \title{Update all auxiliary data at once} diff --git a/man/aux_wdi_update.Rd b/man/aux_wdi_update.Rd index e24963a..1986df6 100644 --- a/man/aux_wdi_update.Rd +++ b/man/aux_wdi_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_wdi_update.R +% Please edit documentation in R/aux_wdi.R \name{aux_wdi_update} \alias{aux_wdi_update} \title{Update National accounts data from WDI} diff --git a/man/aux_weo_clean.Rd b/man/aux_weo_clean.Rd index 18c68fc..d7d1d15 100644 --- a/man/aux_weo_clean.Rd +++ b/man/aux_weo_clean.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_weo_clean.R +% Please edit documentation in R/aux_weo.R \name{aux_weo_clean} \alias{aux_weo_clean} \title{Clean WEO data} diff --git a/man/cl_validate_raw.Rd b/man/cl_validate_raw.Rd index 21e4aff..399b7d8 100644 --- a/man/cl_validate_raw.Rd +++ b/man/cl_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cl_validate_raw.R +% Please edit documentation in R/aux_country_list.R \name{cl_validate_raw} \alias{cl_validate_raw} \title{Validate raw country list data} diff --git a/man/clean_cp_names.Rd b/man/clean_cp_names.Rd index 7ef9628..14a9c5e 100644 --- a/man/clean_cp_names.Rd +++ b/man/clean_cp_names.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cp_clean.R +% Please edit documentation in R/aux_cp.R \name{clean_cp_names} \alias{clean_cp_names} \title{Clean names from original CP files} diff --git a/man/clean_from_wide.Rd b/man/clean_from_wide.Rd index 8794185..f66adf4 100644 --- a/man/clean_from_wide.Rd +++ b/man/clean_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pop_update.R +% Please edit documentation in R/aux_pop.R \name{clean_from_wide} \alias{clean_from_wide} \title{Clean from WDI format} diff --git a/man/clean_names_from_wide.Rd b/man/clean_names_from_wide.Rd index 7a73217..8ab393b 100644 --- a/man/clean_names_from_wide.Rd +++ b/man/clean_names_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_pop_update.R +% Please edit documentation in R/aux_pop.R \name{clean_names_from_wide} \alias{clean_names_from_wide} \title{Clean names from wide WDI format} diff --git a/man/clean_validation_report.Rd b/man/clean_validation_report.Rd index 2573e0c..52a56d7 100644 --- a/man/clean_validation_report.Rd +++ b/man/clean_validation_report.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clean_validation_report.R +% Please edit documentation in R/validation_report.R \name{clean_validation_report} \alias{clean_validation_report} \title{Remove data validation report from .pipaux environment variable} diff --git a/man/convert_df_to_base64.Rd b/man/convert_df_to_base64.Rd index 88fb3aa..46d0717 100644 --- a/man/convert_df_to_base64.Rd +++ b/man/convert_df_to_base64.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auto_aux_update.R +% Please edit documentation in R/update_aux.R \name{convert_df_to_base64} \alias{convert_df_to_base64} \title{Function to write dataframe to GitHub} diff --git a/man/countries_validate_output.Rd b/man/countries_validate_output.Rd index 14d1105..d01d9da 100644 --- a/man/countries_validate_output.Rd +++ b/man/countries_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/countries_validate_output.R +% Please edit documentation in R/aux_countries.R \name{countries_validate_output} \alias{countries_validate_output} \title{Validate output countries data} diff --git a/man/cpi_validate_output.Rd b/man/cpi_validate_output.Rd index 09767fc..5bcd165 100644 --- a/man/cpi_validate_output.Rd +++ b/man/cpi_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cpi_validate_output.R +% Please edit documentation in R/aux_cpi.R \name{cpi_validate_output} \alias{cpi_validate_output} \title{Validate clean cpi data} diff --git a/man/cpi_validate_raw.Rd b/man/cpi_validate_raw.Rd index 16e3cad..caab9e6 100644 --- a/man/cpi_validate_raw.Rd +++ b/man/cpi_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cpi_validate_raw.R +% Please edit documentation in R/aux_cpi.R \name{cpi_validate_raw} \alias{cpi_validate_raw} \title{Validate raw cpi data} diff --git a/man/fake_aux_sna.Rd b/man/fake_aux_sna.Rd index a0b593d..13c752c 100644 --- a/man/fake_aux_sna.Rd +++ b/man/fake_aux_sna.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fake_aux_sna.R +% Please edit documentation in R/aux_sna.R \name{fake_aux_sna} \alias{fake_aux_sna} \title{Fake PIP SNA function} diff --git a/man/gdm_validate_output.Rd b/man/gdm_validate_output.Rd index dce34c7..36bfa03 100644 --- a/man/gdm_validate_output.Rd +++ b/man/gdm_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdm_validate_output.R +% Please edit documentation in R/aux_gdm.R \name{gdm_validate_output} \alias{gdm_validate_output} \title{Validate clean gdm data} diff --git a/man/gdm_validate_raw.Rd b/man/gdm_validate_raw.Rd index 23cdb9a..57ba868 100644 --- a/man/gdm_validate_raw.Rd +++ b/man/gdm_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdm_validate_raw.R +% Please edit documentation in R/aux_gdm.R \name{gdm_validate_raw} \alias{gdm_validate_raw} \title{Validate raw gdm data} diff --git a/man/gdp_validate_output.Rd b/man/gdp_validate_output.Rd index b682f70..6c58583 100644 --- a/man/gdp_validate_output.Rd +++ b/man/gdp_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdp_validate_output.R +% Please edit documentation in R/aux_gdp.R \name{gdp_validate_output} \alias{gdp_validate_output} \title{Validate output gdp data} diff --git a/man/get_error_validation.Rd b/man/get_error_validation.Rd index 397ab01..87d2efd 100644 --- a/man/get_error_validation.Rd +++ b/man/get_error_validation.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_error_validation.R +% Please edit documentation in R/validation_report.R \name{get_error_validation} \alias{get_error_validation} \title{Get validation report data validation error report} diff --git a/man/incgroup_validate_output.Rd b/man/incgroup_validate_output.Rd index e470eb1..ca12105 100644 --- a/man/incgroup_validate_output.Rd +++ b/man/incgroup_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/incgroup_validate_output.R +% Please edit documentation in R/aux_income_groups.R \name{incgroup_validate_output} \alias{incgroup_validate_output} \title{Validate income group output data} diff --git a/man/load_cpi.Rd b/man/load_cpi.Rd index cd3a807..2c732d1 100644 --- a/man/load_cpi.Rd +++ b/man/load_cpi.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_cpi_vintage.R +% Please edit documentation in R/aux_cpi.R \name{load_cpi} \alias{load_cpi} \title{Load cpi files and create CPI ID variable} diff --git a/man/load_raw_aux.Rd b/man/load_raw_aux.Rd index 5c01c5d..bd41da0 100644 --- a/man/load_raw_aux.Rd +++ b/man/load_raw_aux.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load_raw_aux.R +% Please edit documentation in R/load_aux.R \name{load_raw_aux} \alias{load_raw_aux} \title{Load Raw Auxiliary data} diff --git a/man/metadata_validate_output.Rd b/man/metadata_validate_output.Rd index 811bb1c..b8fb37d 100644 --- a/man/metadata_validate_output.Rd +++ b/man/metadata_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metadata_validate_output.R +% Please edit documentation in R/aux_metadata.R \name{metadata_validate_output} \alias{metadata_validate_output} \title{Validate output metadata data} diff --git a/man/metadata_validate_raw.Rd b/man/metadata_validate_raw.Rd index 61e7cae..b4ea3af 100644 --- a/man/metadata_validate_raw.Rd +++ b/man/metadata_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metadata_validate_raw.R +% Please edit documentation in R/aux_metadata.R \name{metadata_validate_raw} \alias{metadata_validate_raw} \title{Validate raw metadata data} diff --git a/man/mpd_validate_raw.Rd b/man/mpd_validate_raw.Rd index 9e9071b..6146b00 100644 --- a/man/mpd_validate_raw.Rd +++ b/man/mpd_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mpd_validate_raw.R +% Please edit documentation in R/aux_maddison.R \name{mpd_validate_raw} \alias{mpd_validate_raw} \title{Validate raw maddison data} diff --git a/man/npl_validate_output.Rd b/man/npl_validate_output.Rd index aaafeda..48faa47 100644 --- a/man/npl_validate_output.Rd +++ b/man/npl_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/npl_validate_output.R +% Please edit documentation in R/aux_npl.R \name{npl_validate_output} \alias{npl_validate_output} \title{Validate npl output data} diff --git a/man/npl_validate_raw.Rd b/man/npl_validate_raw.Rd index 89de168..6702bd3 100644 --- a/man/npl_validate_raw.Rd +++ b/man/npl_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/npl_validate_raw.R +% Please edit documentation in R/aux_npl.R \name{npl_validate_raw} \alias{npl_validate_raw} \title{Validate npl raw data} diff --git a/man/pce_validate_output.Rd b/man/pce_validate_output.Rd index 007c847..eedddf8 100644 --- a/man/pce_validate_output.Rd +++ b/man/pce_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pce_validate_output.R +% Please edit documentation in R/aux_pce.R \name{pce_validate_output} \alias{pce_validate_output} \title{Validate output pce data} diff --git a/man/pfw_validate_output.Rd b/man/pfw_validate_output.Rd index 2d32ac2..59173f9 100644 --- a/man/pfw_validate_output.Rd +++ b/man/pfw_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pfw_validate_output.R +% Please edit documentation in R/aux_pfw.R \name{pfw_validate_output} \alias{pfw_validate_output} \title{Validate clean pfw data} diff --git a/man/pfw_validate_raw.Rd b/man/pfw_validate_raw.Rd index 1eb915c..64101c3 100644 --- a/man/pfw_validate_raw.Rd +++ b/man/pfw_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pfw_validate_raw.R +% Please edit documentation in R/aux_pfw.R \name{pfw_validate_raw} \alias{pfw_validate_raw} \title{Validate raw pfw data} diff --git a/man/pipe.Rd b/man/pipe.Rd index a648c29..5fa90fe 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-pipe.R +% Please edit documentation in R/utils.R \name{\%>\%} \alias{\%>\%} \title{Pipe operator} diff --git a/man/pl_validate_output.Rd b/man/pl_validate_output.Rd index 6c21dbc..4eaabfd 100644 --- a/man/pl_validate_output.Rd +++ b/man/pl_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pl_validate_output.R +% Please edit documentation in R/aux_pl.R \name{pl_validate_output} \alias{pl_validate_output} \title{Validate output pl data} diff --git a/man/pop_validate_output.Rd b/man/pop_validate_output.Rd index f35f76d..118f796 100644 --- a/man/pop_validate_output.Rd +++ b/man/pop_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pop_validate_output.R +% Please edit documentation in R/aux_pop.R \name{pop_validate_output} \alias{pop_validate_output} \title{Validate output pop data} diff --git a/man/pop_validate_raw.Rd b/man/pop_validate_raw.Rd index 2f903a8..2722911 100644 --- a/man/pop_validate_raw.Rd +++ b/man/pop_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pop_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{pop_validate_raw} \alias{pop_validate_raw} \title{Validate pop raw data download from wdi} diff --git a/man/popmain_validate_raw.Rd b/man/popmain_validate_raw.Rd index 1ce94bd..f45b52e 100644 --- a/man/popmain_validate_raw.Rd +++ b/man/popmain_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/popmain_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{popmain_validate_raw} \alias{popmain_validate_raw} \title{Validate raw main pop data} diff --git a/man/ppp_validate_output.Rd b/man/ppp_validate_output.Rd index 1295262..37e7d8d 100644 --- a/man/ppp_validate_output.Rd +++ b/man/ppp_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ppp_validate_output.R +% Please edit documentation in R/aux_ppp.R \name{ppp_validate_output} \alias{ppp_validate_output} \title{Validate output ppp data} diff --git a/man/ppp_validate_raw.Rd b/man/ppp_validate_raw.Rd index 2afdcdb..991366a 100644 --- a/man/ppp_validate_raw.Rd +++ b/man/ppp_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ppp_validate_raw.R +% Please edit documentation in R/aux_ppp.R \name{ppp_validate_raw} \alias{ppp_validate_raw} \title{Validate raw ppp data} diff --git a/man/send_report.Rd b/man/send_report.Rd index cabd5f0..f77c2e9 100644 --- a/man/send_report.Rd +++ b/man/send_report.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/send_report.R +% Please edit documentation in R/validation_report.R \name{send_report} \alias{send_report} \title{Send an email that contains auxiliary data validation report} diff --git a/man/sna_fy_validate_raw.Rd b/man/sna_fy_validate_raw.Rd index f5d66d9..c1e051c 100644 --- a/man/sna_fy_validate_raw.Rd +++ b/man/sna_fy_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sna_fy_validate_raw.R +% Please edit documentation in R/aux_sna.R \name{sna_fy_validate_raw} \alias{sna_fy_validate_raw} \title{Validate raw sna_fy data} diff --git a/man/sna_validate_raw.Rd b/man/sna_validate_raw.Rd index 8fb8a5d..4c090dc 100644 --- a/man/sna_validate_raw.Rd +++ b/man/sna_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sna_validate_raw.R +% Please edit documentation in R/aux_sna.R \name{sna_validate_raw} \alias{sna_validate_raw} \title{Validate raw special national accounts (sna) data} diff --git a/man/spop_validate_raw.Rd b/man/spop_validate_raw.Rd index 0222f97..d0817f1 100644 --- a/man/spop_validate_raw.Rd +++ b/man/spop_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spop_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{spop_validate_raw} \alias{spop_validate_raw} \title{Validate raw special cases pop data} diff --git a/man/wdi_validate_raw.Rd b/man/wdi_validate_raw.Rd index b56e209..0a82df0 100644 --- a/man/wdi_validate_raw.Rd +++ b/man/wdi_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wdi_validate_raw.R +% Please edit documentation in R/aux_wdi.R \name{wdi_validate_raw} \alias{wdi_validate_raw} \title{Validate raw wdi data} diff --git a/man/weo_validate_output.Rd b/man/weo_validate_output.Rd index 3c24bdf..39d8309 100644 --- a/man/weo_validate_output.Rd +++ b/man/weo_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/weo_validate_output.R +% Please edit documentation in R/aux_weo.R \name{weo_validate_output} \alias{weo_validate_output} \title{Validate clean weo data} diff --git a/man/weo_validate_raw.Rd b/man/weo_validate_raw.Rd index 17462dc..ed3e53a 100644 --- a/man/weo_validate_raw.Rd +++ b/man/weo_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/weo_validate_raw.R +% Please edit documentation in R/aux_weo.R \name{weo_validate_raw} \alias{weo_validate_raw} \title{Validate raw weo data} From a2b844f71d1f4e88f27fc9057548f1ab07ec06c1 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 21 Jan 2025 17:23:50 -0500 Subject: [PATCH 19/20] fix test cl validation --- tests/testthat/test-cl-validation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index ff442f3..33eb5e8 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -8,7 +8,7 @@ temp_fld <- "Y:/tefera_pipaux_test" test_that("cl_validate_raw() works identifying duplicate error", { - cl <- pip_country_list_update(class_branch = "master") + cl <- aux_country_list_update(class_branch = "master") cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] @@ -19,7 +19,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { test_that("cl_validate_raw() works identifying invalid value", { - cl <- pip_country_list_update(class_branch = "master") + cl <- aux_country_list_update(class_branch = "master") cl[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", "SSA", africa_split_code), From faba9da7385c0a9a8ed760fd6edc1dab962edc11 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 21 Jan 2025 17:44:33 -0500 Subject: [PATCH 20/20] more fixes --- tests/testthat/test-load_raw_aux.R | 10 ---------- tests/testthat/test-merger_aux.R | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) delete mode 100644 tests/testthat/test-load_raw_aux.R diff --git a/tests/testthat/test-load_raw_aux.R b/tests/testthat/test-load_raw_aux.R deleted file mode 100644 index cc4c18f..0000000 --- a/tests/testthat/test-load_raw_aux.R +++ /dev/null @@ -1,10 +0,0 @@ -test_that("pipfun::load_raw_aux is deprecated", { - expect_snapshot({ - - lr <- pipfun::load_from_gh(measure = "cpi") - lf <- pipfun::load_from_gh(measure = "cpi") - - expect_equal(lr, lf, ignore_attr = TRUE) - - }) -}) diff --git a/tests/testthat/test-merger_aux.R b/tests/testthat/test-merger_aux.R index 83ded34..ba0a403 100644 --- a/tests/testthat/test-merger_aux.R +++ b/tests/testthat/test-merger_aux.R @@ -1,4 +1,4 @@ -temp_fld <- "Y:/tefera_pipaux_test" +temp_fld <- "Y:\\tefera_pipaux_test" pfw <- load_aux("pfw", branch = "DEV",