diff --git a/NAMESPACE b/NAMESPACE index 1521cf4..a6eca3b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,40 @@ export("%>%") export(auto_aux_update) +export(aux_censoring) +export(aux_countries) +export(aux_country_list) +export(aux_cp) +export(aux_cpi) +export(aux_data) +export(aux_dictionary) +export(aux_gdm) +export(aux_gdp) +export(aux_gdp_weo) +export(aux_income_groups) +export(aux_indicators) +export(aux_maddison) +export(aux_metadata) +export(aux_metadata_update) +export(aux_metaregion) +export(aux_missing_data) +export(aux_nan) +export(aux_npl) +export(aux_pce) +export(aux_pfw) +export(aux_pfw_key) +export(aux_pl) +export(aux_pl_clean) +export(aux_pop) +export(aux_ppp) +export(aux_prices) +export(aux_regions) +export(aux_sna) +export(aux_update_all) +export(aux_wdi) +export(aux_wdi_update) +export(aux_weo) +export(aux_weo_clean) export(cl_validate_raw) export(clean_validation_report) export(convert_df_to_base64) @@ -9,6 +43,7 @@ export(countries_validate_output) export(cpi_validate_output) export(cpi_validate_raw) export(draw_model) +export(fake_aux_sna) export(gdm_validate_output) export(gdm_validate_raw) export(gdp_validate_output) @@ -24,39 +59,6 @@ export(npl_validate_raw) export(pce_validate_output) export(pfw_validate_output) export(pfw_validate_raw) -export(pip_censoring) -export(pip_countries) -export(pip_country_list) -export(pip_cp) -export(pip_cpi) -export(pip_dictionary) -export(pip_gdm) -export(pip_gdp) -export(pip_gdp_weo) -export(pip_income_groups) -export(pip_indicators) -export(pip_maddison) -export(pip_metadata) -export(pip_metadata_update) -export(pip_metaregion) -export(pip_missing_data) -export(pip_nan) -export(pip_npl) -export(pip_pce) -export(pip_pfw) -export(pip_pfw_key) -export(pip_pl) -export(pip_pl_clean) -export(pip_pop) -export(pip_ppp) -export(pip_prices) -export(pip_regions) -export(pip_sna) -export(pip_update_all_aux) -export(pip_wdi) -export(pip_wdi_update) -export(pip_weo) -export(pip_weo_clean) export(pl_validate_output) export(pop_validate_output) export(pop_validate_raw) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R deleted file mode 100644 index b2bfcd1..0000000 --- a/R/auto_aux_update.R +++ /dev/null @@ -1,249 +0,0 @@ -#' Update the measure along with it's dependencies automatically. -#' -#' @param measure character: measure to be updated, if NULL will update all of -#' them -#' @inheritParams pip_pop_update -#' @export -auto_aux_update <- function(measure = NULL, - force = FALSE, - from = c("gh", "file", "api"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - pipfun::check_pkg_active("pipaux") - - branch <- match.arg(branch) - from <- match.arg(from) - files_changed <- FALSE - - isgls <- ls(sys.frame(), pattern = "^gls$") |> - length() > 0 - - if (isFALSE(isgls)) { - cli::cli_abort( - "object {.var gls} is not available in Globel env. - Run {.code gls <- pipfun::pip_create_globals()} first", - wrap = TRUE - ) - } - - # if there is validation report in the environment - remove it - clean_validation_report() - - creds <- pipfun::get_github_creds() - gh_user <- "https://raw.githubusercontent.com" - org_data <- paste(gh_user, - owner, - "pipaux/metadata/Data/git_metadata.csv", - sep = "/") |> - readr::read_csv(show_col_types = FALSE) - - - dependencies <- read_dependencies(gh_user, owner) - # Get all repositories under PIP-Technical-Team - all_repos <- gh::gh("GET /users/{username}/repos", - username = owner) |> - vapply("[[", "", "name") |> - #Keep only those repos that start with "aux_" - grep("^aux_", x = _, value = TRUE) - - if (!is.null(measure)) { - all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")] - } - # get hashs - hash <- - purrr::map(all_repos, - .f = ~ { - gh::gh( - "GET /repos/{owner}/{repo}/commits/{branch}", - owner = owner, - repo = .x, - branch = branch - ) - }) |> - purrr::map_chr( ~ .x[["sha"]]) - - # Get the latest hash of the repo - all_data <- - dplyr::tibble( - Repo = glue::glue("{owner}/{all_repos}"), - hash = hash, - branch = branch - ) - - old_data <- org_data %>% - dplyr::filter(.data$branch == branch) %>% - dplyr::rename(hash_original = hash) - - old_data <- old_data %>% - dplyr::inner_join(all_data, by = c("Repo", "branch")) - - cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") - cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") - cli::cli_alert_info("Both the numbers above should be equal or else some - debugging is required.", wrap = TRUE) - - new_data <- old_data %>% - dplyr::filter(.data$hash != .data$hash_original | - is.na(.data$hash_original) | - is.na(.data$hash)) - - # all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) - - - - # Remove everything till the last underscore so - # PIP-Technical-Team/aux_ppp changes to ppp - aux_fns <- sub(".*_", "", new_data$Repo) |> - # Keep only those whose dependencies we know - intersect(names(dependencies)) - - # For each auxiliary data to be updated - cli::cli_alert_info("Updating data for {length(aux_fns)} files.") - for (aux in aux_fns) { - # Find the corresponding functions to be run - # Add pip_ suffix so that it becomes function name - list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) - - for (fn in list_of_funcs) { - - aux_file <- sub("pip_", "", fn) - cli::cli_alert_info("Running function {fn} for aux file {aux}.") - - before_hash <- read_signature_file(aux_file, maindir, branch) - # Run the pip_.* function - match.fun(fn)(maindir = maindir, branch = branch) |> - suppressMessages() - after_hash <- read_signature_file(aux_file, maindir, branch) - - if (before_hash != after_hash) { - - cli::cli_alert_info("Updating csv for {fn}") - files_changed <- TRUE - - # find rows of of org to be modified - aux_row_org <- org_data$Repo |> - fs::path_file() |> - sub('aux_', '', x = _) %in% aux_file & - org_data$branch == branch - - # find rows in new that will be copied to org - aux_row_new <- new_data$Repo |> - fs::path_file() |> - sub('aux_', '', x = _) %in% aux_file & - new_data$branch == branch - - org_data$hash[aux_row_org] <- new_data$hash[aux_row_new] - - } # end of before_hash condition - - } # end of list_of_funcs loop - } # end of aux_fns loop - last_updated_time <- - aux_file_last_updated(maindir, names(dependencies), branch) - if (length(aux_fns) > 0 && files_changed) { - # Write the latest auxiliary file and corresponding hash to csv - # Always save at the end. - # sha - hash object of current csv file in Data/git_metadata.csv - # content - base64 of changed data - out <- gh::gh( - "GET /repos/{owner}/{repo}/contents/{file_path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - file_path = "Data/git_metadata.csv", - .params = list(ref = "metadata") - ) - # There is no way to update only the lines which has changed using Github API - # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 - res <- gh::gh( - "PUT /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "pipaux", - path = "Data/git_metadata.csv", - .params = list( - branch = "metadata", - message = "updating csv file", - sha = out$sha, - content = convert_df_to_base64(org_data) - ), - .token = creds$password - ) - } - cli::cli_h2("File updated status.") - knitr::kable(last_updated_time) -} - - - -return_value <- function(aux, dependencies) { - val <- dependencies[[aux]] - if (length(val) > 0) { - for (i in val) { - val <- c(return_value(i, dependencies), val) - } - } - return(unique(c(val, aux))) -} - -#' Function to write dataframe to GitHub -#' -#' @param df A dataframe -#' -#' @return base64 encoded dataframe -#' @export -#' -#' @examples -#' \dontrun { -#' convert_df_to_base64(mtcars) -#' } -convert_df_to_base64 <- function(df) { - df |> - write.table(quote = FALSE, - row.names = FALSE, - sep = ",") |> - capture.output() |> - paste(collapse = "\n") |> - charToRaw() |> - base64enc::base64encode() -} - -aux_file_last_updated <- function(data_dir, aux_files, branch) { - filenames <- - glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") - data <- sapply(filenames, function(x) - qs::qattributes(x)$datetime) - data.frame( - filename = basename(names(data)), - time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), - row.names = NULL - ) |> - dplyr::arrange(desc(time_last_update)) - -} - -read_dependencies <- function(gh_user, owner) { - dependencies <- paste(gh_user, - owner, - "pipaux/metadata/Data/dependency.yml", - sep = "/") |> - yaml::read_yaml() - - sapply(dependencies, \(x) if (length(x)) - strsplit(x, ",\\s+")[[1]] - else - character()) -} - -read_signature_file <- function(aux_file, maindir, branch) { - # Construct the path to data signature aux file - data_signature_path <- - fs::path(maindir, - "_aux", - branch, - aux_file, - glue::glue("{aux_file}_datasignature.txt")) - signature_hash <- readr::read_lines(data_signature_path) - return(signature_hash) -} diff --git a/R/pip_censoring.R b/R/aux_censoring.R similarity index 95% rename from R/pip_censoring.R rename to R/aux_censoring.R index d5cc657..f239e03 100644 --- a/R/pip_censoring.R +++ b/R/aux_censoring.R @@ -3,10 +3,10 @@ #' Load or update censoring data #' #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_censoring <- function(action = c("update", "load"), +aux_censoring <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/countries_validate_output.R b/R/aux_countries.R similarity index 60% rename from R/countries_validate_output.R rename to R/aux_countries.R index baa016f..dd1e274 100644 --- a/R/countries_validate_output.R +++ b/R/aux_countries.R @@ -1,3 +1,72 @@ +#' PIP Countries +#' +#' Update or load a dataset with countries. +#' +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +aux_countries <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + measure <- "countries" + action <- match.arg(action) + branch <- match.arg(branch) + + if (action == "update") { + + ## Special national accounts -------- + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + + + pfw <- pfw[inpovcal == 1, + ][, + c("country_code") + ] |> + unique() + + + countries <- cl[country_code %in% pfw$country_code + ][, + c("pcn_region", "pcn_region_code") := NULL] + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## save -------- + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(countries, "aux_name", "countries") + setattr(countries, + "aux_key", + c("country_code")) + + pipfun::pip_sign_save( + x = countries, + measure = measure, + msrdir = msrdir, + force = force + ) + } else { + df <- load_aux( + maindir = maindir, + measure = measure + ) + return(df) + } +} + #' Validate output countries data #' #' @param countries output countries data diff --git a/R/aux_country_list.R b/R/aux_country_list.R new file mode 100644 index 0000000..2f811de --- /dev/null +++ b/R/aux_country_list.R @@ -0,0 +1,421 @@ +#' List of countries +#' +#' Load or update dataset with WDI countries. See details. +#' +#' This function creates a combined dataset of countries in WDI and their +#' respective regional classification by querying `wbstats::wb_countries()`, as +#' well as reading from the PovcalNet Masterfile to fetch PCN region codes. +#' +#' The dependency on the PCN Masterfile should be changed in the future. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +#' @return logical if `action = "update"` or data.table if `action = "load"` +aux_country_list <- function(action = c("update", "load"), + maindir = gls$PIP_DATA_DIR, + force = FALSE, + branch = c("DEV", "PROD", "main"), + class_branch = "master", + detail = getOption("pipaux.detail.raw") + ) { + measure <- "country_list" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + + ## Special national accounts -------- + cl <- aux_country_list_update(class_branch = class_branch) + + # validate country list raw data + cl_validate_raw(cl, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(cl, "aux_name", "country_list") + setattr(cl, + "aux_key", + c("country_code")) + + saved <- pipfun::pip_sign_save( + x = cl, + measure = measure, + msrdir = msrdir, + force = force + ) + + if (saved) { + cl_sha <- digest::sha1(cl) + out <- gh::gh( + "GET /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "aux_country_list", + path = "sha_country_list.txt", + .params = list(ref = "DEV") + ) + + res <- gh::gh( + "PUT /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "aux_country_list", + path = "sha_country_list.txt", + .params = list( + branch = branch, + message = paste0("update on ", prettyNum(Sys.time())), + sha = out$sha, + content = base64enc::base64encode(charToRaw(cl_sha)) + ), + .token = Sys.getenv("GITHUB_PAT") + ) + + } + + return(invisible(saved)) + + } else { + + df <- load_aux(maindir = maindir, + measure = measure, + branch = branch) + return(df) + } +} + +#' Update Country LIst +#' +#' @param class_branch character: names of branch of GPID-WB/class repo. Default +#' if master +aux_country_list_update <- + function(class_branch = "master") { + + # Check arguments + measure <- "country_list" + + # ____________________________________________________________________________ + # Read Data from WDI #### + + wdi <- + wbstats::wb_countries() |> + as.data.table() |> + { + \(.) { + + # clean data + + iso2 <- grep("_iso2c", names(.), value = TRUE) + x <- .[, !..iso2] + + iso3 <- grep("_iso3c", names(x), value = TRUE) + + withiso <- + gsub("_iso3c", "", iso3) |> + paste0(collapse = "|") |> + grep(names(x), value = TRUE) + + tokeep <- c("country", "iso3c", withiso) + + x[region != "Aggregates" + ][, + ..tokeep + ] + } + }() + + + # rename iso3c + owdi <- names(wdi) + nwdi <- + gsub("iso3c", "code", names(wdi)) + + setnames(wdi, owdi, nwdi) + + + # Add "(excluding high income)" to South Asia + wdi[, admin_region := fifelse(test = grepl("income", admin_region) | is.na(admin_region), + yes = admin_region , + no = paste(admin_region , "(excluding high income)"))] + + # ____________________________________________________________________________ + # Read data from CLASS.dta file #### + + ## Special national accounts -------- + byv <- + c( + "code", + "region_SSA", + "fcv_current", + "region_pip") + + dt <- pipfun::load_from_gh( + measure = measure, + owner = "GPID-WB", + repo = "Class", + branch = class_branch, + filename = "OutputData/CLASS", + ext = "dta" + ) |> + as.data.table() |> + unique(by = byv) |> + (\(.){.[, ..byv]})() # select just these variables + + + dt_o <- names(dt) + dt_n <- gsub("_current", "", dt_o) + + setnames(dt, dt_o, dt_n) + setnames(dt, + old = c("region_SSA", "region_pip"), + new = c("africa_split_code", "pip_region_code")) + + # ____________________________________________________________________________ + # Merge wdi and CLASS #### + + + rg <- + joyn::joyn(dt, wdi, + by = "code", + match_type = "1:1", + reportvar = FALSE, + verbose = FALSE) + + + # ____________________________________________________________________________ + # Clean Data #### + + # PIP region + + rg[, pip_region := fifelse(pip_region_code == "OHI", + yes = "Other High Income Countries", + no = region) + ] + + + + # East and West Africa + + rg[, + africa_split := fcase( + africa_split_code == "", "", + africa_split_code == "AFE", "Eastern and Southern Africa", + africa_split_code == "AFW", "Western and Central Africa", + default = "") + ][, + africa_split_code := fifelse(test = africa_split_code == "", + yes = "", + no = africa_split_code) + ] + + # Fragile countries + + rg[, + fcv_code := fifelse(fcv == "Yes", "FCVT", "FCVF") + ][, + fcv := fifelse(fcv == "Yes", "Fragile", "Not-fragile")] + + ## Admin regions + + rg[, + admin_region_code := fifelse( + admin_region_code == "" | is.na(admin_region_code), + NA_character_, + paste0(admin_region_code, "-AD"))] + + + # Add PCN region temporarilly + + rg[, + `:=`( + pcn_region = pip_region, + pcn_region_code = pip_region_code + )] + + # ff <- copy(rg) + # rg <- copy(ff) + + # Convert empty strings to NA + vars <- names(rg) + names(vars) <- vars + rg[, (vars) := lapply(.SD, + \(x) { + fifelse(x == "" | is.na(x), NA_character_, x) + } + ) + ] + + + # fix "Not classified" + # ff <- copy(rg) + + # rg <- copy(ff) + + # not_class <- function(x) { + # y <- deparse(substitute(x)) + # fifelse(test = grepl("classified", x), + # paste(x, "by", y), + # x) + # } + # + # rg[, (vars) := lapply(.SD,not_class), .SDcols = vars] + # + # + # rg[, (vars) := lapply(.SD, + # \(x){ + # y <- deparse(substitute(x)) + # # y <- ..x + # fifelse(test = grepl("classified", x), + # paste(x, "by", y), + # x) + # })] + # + # + # rg[lending_type_code == "LNX", unique(lending_type)] + # + + + + rg[, lending_type := fifelse(grepl("classified", lending_type), + paste(lending_type, "by", "lending type"), + lending_type)] + + + + rg[, income_level := fifelse(grepl("classified", income_level), + paste(income_level, "by", "income level"), + income_level)] + + + # Create the World + + rg[, `:=`( + world = "World", + world_code = "WLD" + )] + + + + # janitor::tabyl(rg, region_code, admin_region_code) + # janitor::tabyl(rg, region, admin_region) + # janitor::tabyl(rg, region, pip_region) + + # ____________________________________________________________________________ + # Clean and Save #### + + + rg[, + c( "region_code", "region") := NULL] + + + setnames(x = rg, + old = c("code", "country", "pip_region", "pip_region_code"), + new = c("country_code", "country_name", "region", "region_code") ) + + + + ## Order columns alphabetically ------------ + varn <- names(rg) + setcolorder(rg, sort(varn)) + setcolorder(rg, c("country_code", "country_name")) + + + ## Remove categoeries that we don't need --------- + + rm_agg <- c("fcv", "lending_type", "admin_region") + rm_agg <- c("fcv", "income_level", "lending_type", "admin_region") + + to_rm <- + rm_agg |> + paste0("_code") |> + c(rm_agg) + + rg[, (to_rm) := NULL] + + + # hardcode fixing of TWN's name + rg[country_code == "TWN", + country_name := "Taiwan, China"] + + + rg + + } + +#' Validate raw country list data +#' +#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("Country list raw data is not loaded" = !is.null(cl)) + + report <- data_validation_report() + + # country_list <- pipload::pip_load_aux("pfw") + country_list <- pipfun::load_from_gh(measure = "pfw", + owner = getOption("pipfun.ghowner"), + branch = "DEV", + ext = "dta") + + country_list <- unique(country_list[, code]) + + validate(cl, name = "CL raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + # validate_cols(in_set(country_list), + # country_code, description = "`country_code` values within range") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.character(africa_split), + description = "`africa_split` should be character") |> + validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), + africa_split, description = "`africa_split` values within range") |> + validate_if(is.character(africa_split_code), + description = "`africa_split_code` should be character") |> + validate_cols(in_set(c("AFE", "AFW", NA)), + africa_split_code, description = "`africa_split_code` values within range") |> + validate_if(is.character(pcn_region), + description = "`pcn_region` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(region_code), + description = "`region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + region_code, description = "`region_code` values within range") |> + validate_if(is.character(world), + description = "`world` should be character") |> + validate_cols(in_set(c("World")), + world, description = "`world` values within range") |> + validate_if(is.character(world_code), + description = "`world_code` should be character") |> + validate_cols(in_set(c("WLD")), + world_code, description = "`world_code` values within range") |> + validate_cols(not_na, country_code, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/pip_cp_clean.R b/R/aux_cp.R similarity index 73% rename from R/pip_cp_clean.R rename to R/aux_cp.R index 350ebf6..1db5cb1 100644 --- a/R/pip_cp_clean.R +++ b/R/aux_cp.R @@ -1,10 +1,44 @@ +#' Country Profiles +#' +#' Update a list with country profiles data +#' +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +aux_cp <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + measure <- "cp" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + aux_cp_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag) + } else { + + dl <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dl) + } +} + #' Clean country profile data #' #' @param x database from pip_cp_update #' @param file_names character: vector with names of files #' #' @return data.table -pip_cp_clean <- function(x, +aux_cp_clean <- function(x, file_names) { # ____________________________________________________________________ @@ -30,7 +64,7 @@ pip_cp_clean <- function(x, "reporting_year", "mpm_headcount", "ppp_year") - ], + ], all = TRUE, by = c("country_code", "reporting_year") ) @@ -90,7 +124,7 @@ pip_cp_clean <- function(x, ][, .SD[which.max(reporting_year)], by = c("country_code", "ppp_year") - ] + ] } @@ -163,8 +197,8 @@ pip_cp_clean <- function(x, ) setnames(x = ki4, - old = c("b40", "tot"), - new = c("share_below_40", "share_total")) + old = c("b40", "tot"), + new = c("share_below_40", "share_total")) key_indicators <- append(key_indicators, list(shared_prosperity = ki4)) @@ -233,7 +267,7 @@ pip_cp_clean <- function(x, "shared_prosperity", "ppp_year", "reporting_level") - ] + ] ) ## end of chart lists cp <- list(key_indicators = key_indicators, charts = charts) @@ -312,3 +346,87 @@ clean_cp_names <- function(x) { return(x) } + +#' Update Country Profiles +#' +#' Update a list with country profiles data +#' +#' @inheritParams aux_cp +#' @keywords internal +aux_cp_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + measure <- "cp" + branch <- match.arg(branch) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## chart files -------- + + file_names <- + c( + "indicator_values_country_chart4", + "indicator_values_country_KI1", + "indicator_values_country_chart1_chart2_KI2_data", + "indicator_values_country_chart1_chart2_KI2_ID", + "indicator_values_country_chart5", + "indicator_values_country_chart3", + "indicator_values_country_chart6_KI4", + "indicator_values_country_KI5_KI6_KI7" + ) + + + raw_files <- purrr::map(.x = file_names, + .f = ~{ + pipfun::load_from_gh( + measure = "cp", + owner = owner, + branch = branch, + filename = .x) + }) + + + dl <- aux_cp_clean(raw_files, + file_names) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## download files -------- + fl_files <- c("flat_cp", "flat_shp") + + raw_fl <- purrr::map(.x = fl_files, + .f = ~{ + x <- pipfun::load_from_gh( + measure = "cp", + owner = owner, + branch = branch, + filename = .x, + ext = "dta") + setnames(x, "year", "reporting_year", + skip_absent=TRUE) + }) + names(raw_fl) <- fl_files + dl <- append(dl, list(flat = raw_fl)) + + + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## save -------- + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = dl, + measure = measure, + msrdir = msrdir, + force = force + ) + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## return -------- + + return(invisible(saved)) +} + diff --git a/R/aux_cpi.R b/R/aux_cpi.R new file mode 100644 index 0000000..324816c --- /dev/null +++ b/R/aux_cpi.R @@ -0,0 +1,497 @@ +#' PIP CPI +#' +#' Load or update PIP CPI data. +#' +#' @param action character: Either "load" or "update". Default is "update". If +#' "update" data will be updated on the system. If "load" data is loaded in +#' memory. +#' @param maindir character: Main directory of project. +#' @param force logical: If TRUE data will be overwritten. +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams pipfun::load_from_gh +#' +#' @export +#' @import data.table +aux_cpi <- function(action = c("update", "load"), + maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + # ____________________________________________________________________________ + # on.exit #### + on.exit({ + + }) + + # ____________________________________________________________________________ + # Defenses #### + measure <- "cpi" + action <- match.arg(action) + branch <- match.arg(branch) + + stopifnot( exprs = { + + } + ) + + # ____________________________________________________________________________ + # Early returns #### + if (FALSE) { + return() + } + + # ____________________________________________________________________________ + # Computations #### + if (action == "update") { + aux_cpi_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + detail = detail) + } + else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } + + +} + +#' Clean CPI data +#' +#' Clean CPI data from Datalibweb to meet PIP protocols. +#' +#' @param y dataset with CPI data from `aux_cpi_update()`. +#' @param cpivar character: CPI variable to be used as default. Currently it is +#' "cpi2011". +#' @inheritParams aux_cpi_update +#' +#' @keywords internal +aux_cpi_clean <- function(y, + cpivar = getOption("pipaux.cpivar"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + x <- data.table::as.data.table(y) + + # vars to keep + keep_vars <- c( + "country_code", "cpi_year", "survey_year", + "cpi", "ccf", "survey_acronym", "change_cpi2011", + grep("^cpi", names(x), value = TRUE) + ) + + # modifications to the database + x[ + , + c("cur_adj", "ccf") + := { + cur_adj <- ifelse(is.na(cur_adj), 1, cur_adj) + ccf <- 1 / cur_adj + + list(cur_adj, ccf) + } + ][ + , + `:=`( + country_code = code, + cpi_year = as.integer(year), + survey_year = round(ref_year, 2), + cpi = get(cpivar), + survey_acronym = survname, + cpi_domain = as.character(cpi_domain), + cpi_data_level = as.character(cpi_data_level) + ) + ][ + , + # This part should not exist if the raw data + # had been created properly + cpi_data_level := fcase( + tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "0", "rural", + tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "1", "urban", + tolower(cpi_domain) %chin% c("national", "1") & cpi_data_level %chin% c("2", "", NA_character_), "national", + default = "" + ) + ] + # keep final vars + x <- x[, ..keep_vars ] + + x <- unique(x) # remove duplicates + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + x <- x[country_code %in% cl$country_code] + + + return(x) +} + +#' Update CPI +#' +#' @inheritParams aux_cpi +#' @keywords internal +aux_cpi_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + # ____________________________________________________________________________ + # Set up #### + + measure <- "cpi" + branch <- match.arg(branch) + + + # ____________________________________________________________________________ + # load raw data #### + + cpi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) + + # validate cpi raw data + cpi_validate_raw(cpi, detail = detail) + + # ____________________________________________________________________________ + # Cleaning #### + + # Clean data + cpi <- aux_cpi_clean(cpi, + maindir = maindir, + branch = branch) + + # drop cpi_domain + cpi <- cpi[, -c("cpi_domain")] + + # changae cpi_year and cpi_data_level to year and reporting_level + cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(cpi, "aux_name", "cpi") + setattr(cpi, + "aux_key", + c("country_code", "year", "reporting_level", "survey_acronym")) + + # validate cpi clean data before saving it + cpi_validate_output(cpi, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = cpi, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) +} + +#' Check CPI Vintage +#' +#' @param msrdir character: measure directory. +#' @param dlwdir character: Datalibweb directory +#' @param force logical: If TRUE force update of vintage level 1. +#' +#' @keywords internal +aux_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), + dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), + force = FALSE) { + time <- format(Sys.time(), "%Y%m%d%H%M%S") # find a way to account for time zones + measure <- "cpi" + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + #--------- Prepar3 date --------- + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # get directories + cpi_files <- fs::dir_ls(dlwdir, regexp = "GMD_CPI\\.dta$", recurse = TRUE, type = "file") + + # load data + last_file <- max(cpi_files) + vintage <- load_cpi(last_file) + + tokeep <- names(vintage) |> + {\(.) grep("^cpi[0-9]{4}", ., value = TRUE)}() |> + c("code", "year", "survname", "cpi_data_level", "cpi_ppp_id") + + vintage <- vintage[, ..tokeep] + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + #--------- check version and save --------- + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + # save file + sfile <- fs::path(msrdir, "cpi_vintage.rds") + + equal_vintage <- TRUE + if (fs::file_exists(sfile)) { + + cfile <- readr::read_rds(sfile) + attr(cfile, "time") <- NULL # remove attributes + attr(cfile, "user") <- NULL # remove attributes + cf_vt <- all.equal(cfile, vintage) + + if (inherits(cf_vt, "character")) { + equal_vintage <- FALSE + } + } else { + equal_vintage <- FALSE + } + + if (equal_vintage == FALSE || force == TRUE) { + attr(vintage, "time") <- time + attr(vintage, "user") <- Sys.info()[8] + + readr::write_rds( + x = vintage, + file = sfile + ) + } + + return(!equal_vintage) +} # end of vintage_level_2 + +#' Load cpi files and create CPI ID variable +#' @param x character: cpi file name +#' @return data frame +load_cpi <- function(x) { + cpi_ppp_id <- gsub("(.*/Support_2005_)([^/]+)(_CPI\\.dta$)", "\\2", x) + df <- haven::read_dta(x) + df$cpi_ppp_id <- cpi_ppp_id + + to_keep <- c("label") + + to_keep_regx <- paste(to_keep, collapse = "|") + + nn <- names(df) + for (x in seq_along(nn)) { + ats <- attributes(df[[x]]) + atsn <- names(ats) + to_remove <- atsn[!grepl(to_keep_regx, atsn)] + + for (i in seq_along(to_remove)) { + attr(df[[x]], to_remove[i]) <- NULL + } + } + + data.table::setDT(df) + return(df) +} + +#' Validate raw cpi data +#' +#' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("CPI raw data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.character(cpi_domain), + description = "`cpi_domain` should be character") |> + validate_cols(in_set(c("National", "Urban/Rural")), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") |> + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011), + description = "`cpi201`1 should be numeric") |> + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") |> + validate_if(is.character(version), + description = "`version` should be character") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.numeric(cur_adj), + description = "`cur_adj` should be numeric") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U", NA)), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.numeric(cpi2011_SM22), + description = "`cpi2011_SM22` should be numeric") |> + validate_if(is.numeric(comparable), + description = "`comparable` should be numeric") |> + validate_if(is.numeric(cpi2017_SM22), + description = "`cpi2017_SM22` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> + validate_if(is.numeric(cpi_data_level), + description = "`cpi_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + cpi_data_level, description = "`cpi_data_level` values within range") |> + validate_if(is.numeric(ref_year_SM24), + description = "`ref_year_SM24` should be numeric") |> + validate_if(is.numeric(cpi2011_SM24), + description = "`cpi2011_SM24` should be numeric") |> + validate_if(is.numeric(cpi2017_SM24), + description = "`cpi2011_SM24` should be numeric") |> + validate_if(is.numeric(change_cpi2017), + description = "`change_cpi2017` should be numeric") |> + validate_if(is.numeric(change_icp2017), + description = "`change_icp2017` should be numeric") |> + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") |> + validate_if(is.numeric(change_icp2011), + description = "`change_icp2011` should be numeric") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> + validate_cols(not_na, code, year, survname, cpi_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname, cpi_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean cpi data +#' +#' @param cpi clean cpi data, output via `aux_cpi_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ + + stopifnot("CPI clean data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.numeric(cpi), + description = "`cpi` should be numeric") |> + validate_if(is.numeric(ccf), + description = "`ccf` should be numeric") |> + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") |> + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") |> + validate_cols(in_set(c(0, 1)), change_cpi2011, + description = "`change_cpi2011` values within range") |> + # validate_if(is.character(cpi_domain), + # description = "`cpi_domain` should be character") |> + # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, + # description = "`cpi_domian` values within range") |> + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") |> + validate_cols(in_set(c(0, 1)), cpi_domain_value, + description = "`cpi_domain_value` values within range") |> + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011), + description = "`cpi2011` should be numeric") |> + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") |> + # validate_if(is.numeric(cpi2011_SM22), + # description = "`cpi2011_SM22` should be numeric") |> + # validate_if(is.numeric(cpi2017_SM22), + # description = "`cpi2017_SM22` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> + # validate_if(is.numeric(cpi2011_AM23), + # description = "`cpi2011_AM23` should be numeric") |> + # validate_if(is.numeric(cpi2017_AM23), + # description = "`cpi2017_AM23` should be numeric") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> + validate_cols(not_na, country_code, year, survey_acronym, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), + description = "no duplicate records in key variables") |> + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), + description = "no duplicate cpi values") |> + add_results(report) + + num_var_list1 <- grep("cpi2011_", colnames(cpi)) + num_var_list2 <- grep("cpi2017_", colnames(cpi)) + num_var_list <- c(num_var_list1, num_var_list2) + + for (i in 1:length(num_var_list)) { + validate(cpi, name = "CPI validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } +} + + + + diff --git a/R/aux_data_files.R b/R/aux_data_files.R new file mode 100644 index 0000000..38d8c68 --- /dev/null +++ b/R/aux_data_files.R @@ -0,0 +1,136 @@ +#' Attache key values into auxiliary file +#' +#' @param aux_file auxiliary file +#' +#' @return data.table with key values +#' @export +#' +aux_data <- function(aux_file){ + + # list of possible auxiliary keys -------------------------------------------- + keycolsg0 <- c("country_code") # countries, country_list + keycolsg1 <- c("country_code", "surveyid_year") # maddison, weo, npl, income_group + keycolsg2 <- c("country_code", "surveyid_year", "reporting_level") # gdp, pce, pop + keycolsg3 <- c("country_code", "surveyid_year", "reporting_level") # gdm + keycolsg4 <- c("country_code", "surveyid_year", "survey_acronym", "reporting_level") # cpi "survey_acronym" + keypfw <- c("country_code", "surveyid_year", "year", "survey_acronym", + "reporting_level") + + # list of all the auxiliary files + aux_file_names <- c("pfw", "cpi", "gdp", "gdm", "pce", "pop", "ppp", "maddison", + "weo", "npl", "countries", "country_list", "regions", + "income_groups", "metadata") + + if (deparse(substitute(aux_file)) %chin% aux_file_names) { + + # pfw --------------------------------------------------------------------- + if (deparse(substitute(aux_file)) == "pfw"){ + + setkeyv(pfw, + c("country_code", "survey_year", "survey_acronym", "cpi_domain")) + + # generate a dataset that can be used to add reporting_level variable to pfw data + pfw_key <- pip_pfw_key() + + aux_file <- pfw_key[aux_file] |> + setkeyv(keypfw) + } else if (deparse(substitute(aux_file)) == "ppp"){ + + # ppp -------------------------------------------------------------------- + # filter ppp based on defualt ppp value + aux_file <- ppp[ppp_default == TRUE, + .(country_code, ppp_year, ppp, ppp_data_level)] + + setnames(aux_file, "ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setkeyv(aux_file, c("country_code", "reporting_level")) + + } else if (deparse(substitute(aux_file)) == "cpi"){ + + # cpi -------------------------------------------------------------------- + # rename two variables cpi_year to surveyid_year and cpi_data_level to reporting_level + aux_file <- aux_file |> + setnames(c("cpi_year", "cpi_data_level"), + c("surveyid_year", "reporting_level"), + skip_absent=TRUE) + + setkeyv(aux_file, c("country_code", "surveyid_year", "survey_acronym", "reporting_level")) #keycolsg4) + + } else if (deparse(substitute(aux_file)) == "gdm"){ + + # gdm -------------------------------------------------------------------- + aux_file <- aux_file |> + setnames("pop_daaux_file ta_level", "reporting_level", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg3) + + } else if (deparse(substitute(aux_file)) == "npl"){ + + # npl -------------------------------------------------------------------- + aux_file <- aux_file |> + setnames("reporting_year", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) == "income_groups"){ + + # income_groups ------------------------------------------------------------ + # rename year_data into surveyid_year + aux_file <- aux_file |> + setnames("year_data", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) == "countries"){ + + # countries ---------------------------------------------------------------- + setkeyv(aux_file, keycolsg0) + + } else if (deparse(substitute(aux_file)) == "country_list"){ + + # country_list-------------------------------------------------------------- + setkeyv(aux_file, keycolsg0) + + } else if (deparse(substitute(aux_file)) == "metadata"){ + + # metadata ----------------------------------------------------------------- + setkeyv(aux_file, keycolsg5) + + } else if (deparse(substitute(aux_file)) %chin% c("maddison", "weo")){ + + # auxiliary datasets - group 1 (maddison and weo) -------------------------- + aux_file |> + setnames("year", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) %chin% c("gdp", "pop", "pce")){ + + # auxiliary datasets - group 2 (gdp, pop, pce) --------------------------- + aux_data_level <- paste0(deparse(substitute(aux_file)), "_data_level") + + aux_file |> + setnames(c(aux_data_level, "year"), + c("reporting_level", "surveyid_year"), + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg2) + + } + + return(aux_file) + + } else { + + return(aux_file) + + } + +} + + diff --git a/R/pip_dictionary.R b/R/aux_dictionary.R similarity index 92% rename from R/pip_dictionary.R rename to R/aux_dictionary.R index 513ca0f..c5c129b 100644 --- a/R/pip_dictionary.R +++ b/R/aux_dictionary.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with the indicators master sheet. #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_dictionary <- function(action = c("update", "load"), +aux_dictionary <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/aux_gdm.R b/R/aux_gdm.R new file mode 100644 index 0000000..77e7786 --- /dev/null +++ b/R/aux_gdm.R @@ -0,0 +1,401 @@ +#' PIP GDM +#' +#' Load or update grouped data means dataset from PovcalNet Masterfile. See +#' details. +#' +#' Survey means cannot be automatically calculated for grouped data, so at some +#' stage the mean needs to be entered manually. This function reads from the PCN +#' Masterfile to ensure that PCN and PIP uses the same data means. +#' +#' The dependency on the PCN Masterfile should be changed in the future. +#' +#' @inheritParams aux_cpi +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams pipfun::load_from_gh +#' @export +aux_gdm <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + measure <- "gdm" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + + aux_gdm_update(force = force, + maindir = maindir, + owner = owner, + branch = branch, + tag = tag, + detail = detail) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} + +#' Update GDM +#' +#' Update GDM data using the PovcalNet Masterfile. +#' +#' @inheritParams aux_gdm +#' @keywords internal +aux_gdm_update <- function(force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "gdm" + branch <- match.arg(branch) + + # _________________________________________________________ + # Load raw file #### + + df <- pipfun::load_from_gh(measure = "gdm", + owner = owner, + branch = branch, + tag = tag, + ext = "csv") + + # validate gdm raw data + gdm_validate_raw(gdm = df, detail = detail) + + # ____________________________________________________________________________ + # Transform dataset #### + + # Select for grouped data surveys + df <- df[grepl("[.]T0[1,2,5]$", + df$DistributionFileName, + ignore.case = TRUE), ] + + # Select and rename columns + old_nms <- c( + "CountryCode", + "SurveyTime", + "DataType", + "Coverage", + "SurveyMean_LCU", + "DistributionFileName", + "SurveyID" + ) + + new_nms <- c( + "country_code", + "survey_year", + "welfare_type", + "pop_data_level", + "survey_mean_lcu", + "pcn_source_file", + "pcn_survey_id" + ) + + setnames(df, old_nms, new_nms) + + df <- df[, ..new_nms] + + # Recode columns + df[, + c("pop_data_level", "welfare_type", "survey_coverage") := + { + x <- tolower(pop_data_level) + + y <- tolower(welfare_type) + y <- fifelse(y == "x", "consumption", "income") + + z <- fifelse(country_code %in% c("CHN", "IDN", "IND"), + "national", pop_data_level) + + list(x, y, z) + } + ] + + + df[, + distribution_type := fifelse(pop_data_level == "national", + "group", + "aggregate") + ][, + gd_type := sub(".*[.]", "", pcn_source_file) + ] + + + ## ............................................................................ + ## Merge with PFW #### + + # pip_pfw(maindir = maindir, + # force = force, + # owner = owner, + # branch = branch, + # tag = tag) + + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + # Subset columns + pfw <- + pfw[, c( + "country_code", + "welfare_type", + "surveyid_year", + "survey_year", + "survey_acronym", + "inpovcal" + )] + + # Merge to add surveyid_year + tmp <- pfw[, c("country_code", "surveyid_year", "survey_year")] + df <- merge(df, tmp, + all.x = TRUE, + by = c("country_code", "survey_year") + ) + + # Merge to add survey_acronym and inpovcal + df <- merge(df, pfw, + all.x = TRUE, + by = c( + "country_code", "surveyid_year", + "survey_year", "welfare_type" + ) + ) + + # Filter to select surveys in PovcalNet + df <- df[inpovcal == 1] + df <- na.omit(df, "inpovcal") + + + ## ............................................................................ + ## Merge with inventory #### + + inv <- fst::read_fst(fs::path(maindir, "_inventory/inventory.fst"), + as.data.table = TRUE) + + # Create survey_id column + inv[, + survey_id := sub("[.]dta", "", filename) + ][, + surveyid_year := as.numeric(surveyid_year) + ] + + # Subset GD rows + inv <- inv[module == "PC-GROUP"] + + # Subset columns + inv <- inv[, c("country_code", + "surveyid_year", + "survey_acronym", + "survey_id")] + + # Merge to add PIP survey_id + df <- merge(df, inv, + all.x = TRUE, + by = c( + "country_code", "surveyid_year", + "survey_acronym" + ) + ) + + + # ---- Finalize table ---- + + # Select columns + df <- df[, c( + "country_code", + "surveyid_year", + "survey_year", + "welfare_type", + "survey_mean_lcu", + "distribution_type", + "gd_type", + "pop_data_level", + "pcn_source_file", + "pcn_survey_id", + "survey_id" + )] + + df[, survey_id := toupper(survey_id)] + + # Convert LCU means to daily values + # df$survey_mean_lcu <- df$survey_mean_lcu * (12/365) + + # Sort rows + setorder(df, country_code, surveyid_year, pop_data_level) + + # Sort columns + setcolorder(df, "survey_id") + + + + ## ............................................................................ + ## Remove any non-WDI countries #### + + aux_country_list(maindir = maindir, + force = force, + branch = branch) + + cl <- load_aux(measure = "country_list", + maindir = maindir, + branch = branch) + + df <- df[country_code %in% cl$country_code] + + + # ---- Save and sign ---- + df <- df |> setnames(c("surveyid_year", "pop_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(df, "aux_name", "gdm") + setattr(df, + "aux_key", + c("country_code", "year", "reporting_level", "welfare_type")) + + # validate gdm output data + gdm_validate_output(gdm = df, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = df, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) +} + +#' Validate raw gdm data +#' +#' @param gdm raw gdm data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ + + stopifnot("GDM raw data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM raw data validation") |> + validate_if(is.character(Region), + description = "`Region` should be character") |> + validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), + Region, description = "`Region` values within range") |> + validate_if(is.character(countryName), + description = "`countryName` should be character") |> + validate_if(is.character(Coverage), + description = "`Coverage` should be character") |> + validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), + Coverage, description = "`Coverage` values within range") |> + validate_if(is.character(CountryCode), + description = "`CountryCode` should be character") |> + validate_if(is.numeric(SurveyTime), + description = "`SurveyTime` should be numeric") |> + validate_if(is.numeric(CPI_Time), + description = "`CPI_Time` should be numeric") |> + validate_if(is.character(DataType), + description = "`DataType` should be character") |> + validate_cols(in_set(c("x", "X", "y", "Y")), + DataType, description = "`DataType` values within range") |> + validate_if(is.numeric(SurveyMean_LCU), + description = "`SurveyMean_LCU` should be numeric") |> + validate_if(is.numeric(currency), + description = "`currency` should be numeric") |> + validate_if(is.character(source), + description = "`source` should be character") |> + validate_if(is.character(SurveyID), + description = "`SurveyID` should be character") |> + validate_if(is.numeric(SurveyMean_PPP), + description = "`SurveyMean_PPP` should be numeric") |> + validate_if(is.character(DistributionFileName), + description = "`DistributionFileName` should be character") |> + validate_cols(is.logical, Comment, description = "Comment should be logical") |> + validate_cols(not_na, CountryCode, Coverage, SurveyTime, DataType, + description = "no missing values in key variables") |> + validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean gdm data +#' +#' @param gdm clean gdm data, output via `pipfun::pip_gdm_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDM output data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM output data validation") |> + validate_if(is.character(survey_id), + description = "`survey_id` should be character") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), welfare_type, + description = "`welfare_type` values within range") |> + validate_if(is.numeric(survey_mean_lcu), + description = "`survey_mean_lcu` should be numeric") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregate", "group")), distribution_type, + description = "`distribution_type` values within range") |> + validate_if(is.character(gd_type), + description = "`gd_type` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> + validate_if(is.character(pcn_source_file), + description = "`pcn_source_file` should be character") |> + validate_if(is.character(pcn_survey_id), + description = "`pcn_survey_id` should be character") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } +} + diff --git a/R/pip_gdp_update.R b/R/aux_gdp.R similarity index 51% rename from R/pip_gdp_update.R rename to R/aux_gdp.R index ef57ebb..f36f4bc 100644 --- a/R/pip_gdp_update.R +++ b/R/aux_gdp.R @@ -1,11 +1,218 @@ +#' PIP GDP +#' +#' Update or load GDP data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" +#' and "gh" are synonymous +#' @export +aux_gdp <- function(action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = "file", + detail = getOption("pipaux.detail.raw")) { + + measure <- "gdp" + branch <- match.arg(branch) + action <- match.arg(action) + + + if (action == "update") { + aux_gdp_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + from = from, + detail = detail) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} # end of aux_gdp + +#' Fetch GDP data from WEO +#' +#' Create a dataset with GDP data from World Economic Outlook. +#' +#' Note that the most recent version most be downloaded from imf.org and saved +#' as an .xls file in `/_aux/weo/`. The filename should be in the +#' following structure `WEO_.xls`. Due to potential file corruption +#' the file must be opened and re-saved before it can be updated with +#' `aux_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file +#' that's not really xls. +#' +#' @inheritParams aux_prices +#' @export +aux_gdp_weo <- function(action = "update", + force = FALSE, + maindir = gls$PIP_DATA_DIR) { + measure <- "weo" + msrdir <- fs::path(maindir, "_aux/", measure) # measure dir + + if (action == "update") { + + # ---- Load data from disk ---- + + # Get latest version of file (in case there are more) + dir <- sprintf("%s_aux/weo/", maindir) + weo_files <- list.files(dir, pattern = "WEO_.*[.]xls") + weo_latest <- weo_files %>% + gsub("WEO_|.xls", "", .) %>% + as.POSIXlt() %>% + max() %>% + as.character() %>% + sprintf("%s_aux/weo/WEO_%s.xls", maindir, .) + + # Read data + dt <- readxl::read_xls( + weo_latest, + sheet = 1, na = "n/a", + col_types = "text" + ) + dt <- setDT(dt) + + # Clean column names + dt <- janitor::clean_names(dt) + + # ---- Data transformations ---- + + # Select rows w/ data on real gdp per capita + dt <- dt[weo_subject_code %in% + c("NGDPRPC", "NGDPRPPPPC", "NGDP_R")] + + # Fix country codes + dt[ + , + iso := fifelse( + iso == "WBG", "PSE", iso # West Bank & Gaza + ) + ] + dt[ + , + iso := fifelse( + iso == "UVK", "XKX", iso # Kosovo + ) + ] + + # Replace subject codes + dt[ + , + subject_code := fcase( + weo_subject_code == "NGDPRPC", "weo_gdp_lcu", + weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017", + weo_subject_code == "NGDP_R", "weo_gdp_lcu_notpc" + ) + ] + + # Reshape to long format + dt <- dt %>% + melt( + id.vars = c("iso", "subject_code"), + measure.vars = names(dt)[grepl("\\d{4}", names(dt))], + value.name = "weo_gdp", variable.name = "year" + ) + setnames(dt, "iso", "country_code") + + # Convert year and GDP to numeric + dt$year <- sub("x", "", dt$year) %>% as.numeric() + dt$weo_gdp <- suppressWarnings(as.numeric(dt$weo_gdp)) + + # Remove rows w/ missing GDP + dt <- dt[!is.na(dt$weo_gdp)] + + # Remove current year and future years + current_year <- format(Sys.Date(), "%Y") + dt <- dt[dt$year < current_year] + + # Reshape to wide for GDP columns + dt <- dt %>% + dcast( + formula = country_code + year ~ subject_code, + value.var = "weo_gdp" + ) + + # ---- Merge with population ---- + + pop <- aux_pop("load", maindir = maindir) + setDT(pop) + pop <- pop[pop_data_level == "national", ] + dt[pop, + on = .(country_code, year), + `:=`( + pop = i.pop + ) + ] + + # Calculate per capita value for NGDP_R + dt[ + , + weo_gdp_lcu := fifelse( + is.na(weo_gdp_lcu), weo_gdp_lcu_notpc / pop, weo_gdp_lcu + ) + ] + + + # ---- Chain PPP and LCU GDP columns ---- + + # Chain LCU on PPP column + dt <- chain_values( + dt, + base_var = "weo_gdp_ppp2017", + replacement_var = "weo_gdp_lcu", + new_name = "weo_gdp", + by = "country_code" + ) + + + # --- Sign and save ---- + + # Select final columns + dt <- dt[, c("country_code", "year", "weo_gdp")] + + # Save dataset + aux_sign_save( + x = dt, + measure = measure, + msrdir = msrdir, + force = force + ) + } else if (action == "load") { + dt <- load_aux( + maindir = maindir, + measure = measure + ) + return(dt) + } else { + rlang::abort(c("`action` must be `update` or `load`", + x = paste0("you provided `", action, "`") + )) + } +} + + + + + + #' Update GDP #' #' Update GDP data using WDI, Maddison and Special cases. #' -#' @inheritParams pip_gdp +#' @inheritParams aux_gdp #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, +aux_gdp_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -17,8 +224,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, measure <- "gdp" -# _________________________________________ -# Update data #### + # _________________________________________ + # Update data #### # # Update Maddison Project Data # pip_maddison(force = force, @@ -38,8 +245,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # branch = branch) # -# ____________________________________________________________________________ -# Load Data #### + # ____________________________________________________________________________ + # Load Data #### madd <- load_aux(measure = "maddison", maindir = maindir, @@ -90,10 +297,10 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch) -# ____________________________________________________________________________ -# Clean data #### + # ____________________________________________________________________________ + # Clean data #### -##--------- Clean GDP from WDI --------- + ##--------- Clean GDP from WDI --------- # Keep relevant variables wgdp <- wgdp[, .(country_code, year, wdi_gdp)] @@ -120,10 +327,10 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # Calculate adjusted GDP for calendar year wgdp[, wdi_gdp_cy := fifelse(!is.na(alpha), - fifelse(alpha < 0.5 , - alpha * wdi_gdp_lag + (1 - alpha) * wdi_gdp, - alpha * wdi_gdp + ( 1 - alpha) * wdi_gdp_lead), - NA_real_) + fifelse(alpha < 0.5 , + alpha * wdi_gdp_lag + (1 - alpha) * wdi_gdp, + alpha * wdi_gdp + ( 1 - alpha) * wdi_gdp_lead), + NA_real_) ] wgdp[, wdi_gdp_tmp := fifelse(!is.na(alpha), wdi_gdp_cy, wdi_gdp) @@ -163,8 +370,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # Chain WEO on WDI gdp[, new_gdp := chain_val(ori_var = wdi_gdp, - rep_var = weo_gdp), - by = country_code] + rep_var = weo_gdp), + by = country_code] # gdp <- chain_values( @@ -177,7 +384,7 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # Chain Maddison on new GDP column gdp[, gdp := chain_val(ori_var = new_gdp, - rep_var = mpd_gdp), + rep_var = mpd_gdp), by = country_code] # gdp <- chain_values( @@ -263,8 +470,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # Recode domain and data_level variables cols <- c("gdp_domain", "gdp_data_level") gdp[, - (cols) := lapply(.SD, as.character), - .SDcols = cols + (cols) := lapply(.SD, as.character), + .SDcols = cols ][ , # recode domain gdp_domain := fcase( @@ -355,3 +562,47 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, return(invisible(saved)) } + +#' Validate output gdp data +#' +#' @param gdp output gdp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDP output data is not loaded" = !is.null(gdp)) + + report <- data_validation_report() + + validate(gdp, name = "GDP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_if(is.numeric(gdp), + description = "`gdp` should be numeric") |> + # validate_if(is.character(gdp_domain), + # description = "`gdp_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # gdp_domain, description = "`gdp_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + }} + diff --git a/R/pip_income_groups.R b/R/aux_income_groups.R similarity index 51% rename from R/pip_income_groups.R rename to R/aux_income_groups.R index 5b43c71..9b2b29c 100644 --- a/R/pip_income_groups.R +++ b/R/aux_income_groups.R @@ -4,10 +4,10 @@ #' available in the PIP-Technical-Team group but in the Povcalnet-team group. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_income_groups <- function(action = c("update", "load"), +aux_income_groups <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, @@ -86,3 +86,55 @@ pip_income_groups <- function(action = c("update", "load"), } } + +#' Validate income group output data +#' +#' @param incgroup income group output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ + + stopifnot("Income group output data is not loaded" = !is.null(incgroup)) + + report <- data_validation_report() + + validate(incgroup, name = "Income group output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year_data), + description = "`year_data` should be numeric") |> + validate_if(is.character(income_group), + description = "`income_group` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + income_group, description = "`income_group` values within range") |> + validate_if(is.character(income_group_code), + description = "`income_group_code` should be character") |> + validate_cols(in_set(c("HIC", "LIC", "LMIC", "UMIC")), + income_group_code, description = "`income_group_code` values within range") |> + validate_if(is.character(incgroup_historical), + description = "`incgroup_historical` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + incgroup_historical, description = "`incgroup_historical` values within range") |> + validate_if(is.character(fcv_historical), + description = "`fcv_historical` should be character") |> + validate_if(is.character(ssa_subregion_code), + description = "`ssa_subregion_code` should be character") |> + validate_cols(not_na, country_code, year_data, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year_data), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/pip_indicators.R b/R/aux_indicators.R similarity index 94% rename from R/pip_indicators.R rename to R/aux_indicators.R index a6d127a..5f2d3c1 100644 --- a/R/pip_indicators.R +++ b/R/aux_indicators.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with the indicators master sheet. #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export -pip_indicators <- function(action = c("update", "load"), +aux_indicators <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_aux_labels.R b/R/aux_labels_pip.R similarity index 98% rename from R/pip_aux_labels.R rename to R/aux_labels_pip.R index c4a4303..5464207 100644 --- a/R/pip_aux_labels.R +++ b/R/aux_labels_pip.R @@ -1,10 +1,10 @@ -#' pip_aux_labels +#' PIP Auxiliary Labels #' #' @param x Data frame to be labeled. #' @param measure type of data frame, e.g., "cpi" or "PPP". #' #' @keywords internal -pip_aux_labels <- function(x, measure) { +aux_labels_pip <- function(x, measure) { if (measure == "cpi") { # Label variables diff --git a/R/pip_maddison.R b/R/aux_maddison.R similarity index 52% rename from R/pip_maddison.R rename to R/aux_maddison.R index dc744c9..d70e6c7 100644 --- a/R/pip_maddison.R +++ b/R/aux_maddison.R @@ -3,11 +3,11 @@ #' Load or update data from the Maddison project. #' #' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @export #' @import data.table -pip_maddison <- function(action = c("update", "load"), +aux_maddison <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), force = FALSE, maindir = gls$PIP_DATA_DIR, @@ -60,3 +60,41 @@ pip_maddison <- function(action = c("update", "load"), return(df) } } + +#' Validate raw maddison data +#' +#' @param mpd raw mpd data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ + + stopifnot("mpd/ maddison raw data is not loaded" = !is.null(mpd)) + + report <- data_validation_report() + + validate(mpd, name = "mdp raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(mpd_gdp), + description = "`mpd_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_metadata.R b/R/aux_metadata.R new file mode 100644 index 0000000..d985254 --- /dev/null +++ b/R/aux_metadata.R @@ -0,0 +1,368 @@ +#' PIP Survey Metadata +#' +#' Update or load a dataset with survey metadata. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams load_raw_indicators +#' @export +aux_metadata <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "metadata" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + + aux_metadata_update( + maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + detail = detail + ) + + } else { + + load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + + } +} + +#' Update metadata file +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams pipfun::load_from_gh +#' @inheritParams aux_metadata +#' @return logical. TRUE if saved correctly. FALSE if error happened +#' @export +aux_metadata_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + measure <- "metadata" + branch <- match.arg(branch) + # ____________________________________________________________________________ + # Computations #### + + df <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv") + + # validate raw metdata data + metadata_validate_raw(metadata = df, detail = detail) + + # Load pfw + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + + + + # Create distribution type column (data type) + + pfw[, + domain_check := (gdp_domain == 2 | pce_domain == 2 | + pop_domain == 2 | cpi_domain == 2 | + ppp_domain == 2)] + + # order matters here + pfw[, + distribution_type := fcase( + use_imputed == 1, "micro, imputed", + use_microdata == 1, "micro", + use_groupdata == 1 & domain_check, "aggregated", + use_groupdata == 1, "group", + default = NA_character_ + ) + ] + + # Merge datasets (inner join) + df <- + merge(df, + pfw[, c("country_code", "ctryname", "surveyid_year", "survey_acronym", + "welfare_type", "reporting_year", "distribution_type", + "surv_producer","survey_coverage", "surv_title", + "link", "survey_year")], + by = "link", all.y = TRUE + ) + + # Recode colnames + setnames(x = df, + old = c("title", "surv_producer", "ctryname"), + new = c("survey_title", "survey_conductor", "country_name")) + df[, + survey_title := fifelse(is.na(survey_title), surv_title, survey_title) + ] + + # Select columns + df <- df[, + c( + "country_code", "country_name", "reporting_year", + "surveyid_year", "survey_year", "survey_acronym", + "survey_conductor", "survey_coverage", + "welfare_type", "distribution_type", + "survey_title", "year_start", "year_end", + "authoring_entity_name", "abstract", + "collection_dates_cycle", "collection_dates_start", + "collection_dates_end", + "sampling_procedure", "collection_mode", + "coll_situation", "weight", "cleaning_operations" + ) + ] + + # Create nested table + + df <- df[, .(.(.SD)), + keyby = .( + country_code, + country_name, + reporting_year, + survey_year, + surveyid_year, + survey_title, + survey_conductor, + survey_coverage, + welfare_type, + distribution_type + ) + ] + + setnames(df, old = "V1", new = "metadata") + + ## ............................................................................ + ## Save #### + df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) + + setattr(df, "aux_name", "metadata") + setattr(df, + "aux_key", + c("country_code", "year", "welfare_type")) + + # validate raw metdata data + metadata_validate_output(metadata = df, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = df, + measure = measure, + msrdir = msrdir, + force = force + ) + + # ____________________________________________________________________________ + # Return #### + return(invisible(saved)) + +} + +#' Metadata for PIP regions +#' +#' Update or load a dataset with regions. +#' +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +aux_metaregion <- function(action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch) +) { + measure <- "metaregion" + action <- match.arg(action) + branch <- match.arg(branch) + + if (action == "update") { + mr <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch) + + + ## ............................................................................ + ## Save data #### + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + saved <- pipfun::pip_sign_save( + x = mr, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) + + + } else { + df <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(df) + } + +} # end of function + +#' Validate raw metadata data +#' +#' @param metadata raw metadata data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ + + stopifnot("metadata raw data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "metadata raw data validation") |> + validate_if(is.character(status), + description = "`status` should be character") |> + validate_if(is.character(reg), + description = "`reg` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAR", "SSA")), + reg, description = "`reg` values within range") |> + validate_if(is.numeric(id), + description = "`id` should be numeric") |> + validate_if(is.character(svy_id), + description = "`svy_id` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(title), + description = "`title` should be character") |> + validate_if(is.character(data_access), + description = "`data_access` should be character") |> + validate_if(is.numeric(year_start), + description = "`year_start` should be numeric") |> + validate_if(is.numeric(year_end), + description = "`year_end` should be numeric") |> + validate_if(is.character(authoring_entity_name), + description = "`authoring_entity_name` should be character") |> + validate_if(is.character(authoring_entity_affiliation), + description = "`authoring_entity_affiliation` should be character") |> + validate_if(is.character(contact_email), + description = "`contact_email` should be character") |> + validate_if(is.character(contact_uri), + description = "`contact_uri` should be character") |> + validate_if(is.character(abstract), + description = "`abstract` should be character") |> + validate_if(is.character(collection_dates_cycle), + description = "`collection_dates_cycle` should be character") |> + validate_if(is.character(collection_dates_start), + description = "`collection_dates_start` should be character") |> + validate_if(is.character(collection_dates_end), + description = "`collection_dates_end` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_if(is.character(sampling_procedure), + description = "`sampling_procedure` should be character") |> + validate_if(is.character(collection_mode), + description = "`collection_mode` should be character") |> + validate_if(is.character(coll_situation), + description = "coll_situation` should be character") |> + validate_if(is.character(weight), + description = "`weight` should be character") |> + validate_if(is.character(cleaning_operations), + description = "`cleaning_operations` should be character") |> + validate_if(is.character(coverage_notes), + description = "`coverage_notes` should be character") |> + validate_cols(not_na, svy_id, + description = "no missing values in key variables") |> + validate_if(is_uniq(svy_id), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate output metadata data +#' +#' @param metadata metadata data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ + + stopifnot("Metadata data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "Metadata output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(survey_title), + description = "`survey_title` should be character") |> + validate_if(is.character(survey_conductor), + description = "`survey_conductor` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), + distribution_type, description = "`distribution_type` values within range") |> + validate_cols(not_na, country_code, year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + + + + diff --git a/R/pip_missing_data.R b/R/aux_missing_data.R similarity index 98% rename from R/pip_missing_data.R rename to R/aux_missing_data.R index ce514eb..e8607da 100644 --- a/R/pip_missing_data.R +++ b/R/aux_missing_data.R @@ -1,12 +1,12 @@ #' Create table with missing countries #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' #' @return if `action = "update"` returns logical. If `action = "load"` returns #' a data.table #' @export -pip_missing_data <- function(action = c("update", "load"), +aux_missing_data <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/R/pip_nan.R b/R/aux_nan.R similarity index 93% rename from R/pip_nan.R rename to R/aux_nan.R index 9b51351..7f9afa6 100644 --- a/R/pip_nan.R +++ b/R/aux_nan.R @@ -2,12 +2,12 @@ #' #' Update nowcast data #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" #' and "gh" are synonymous #' @export -pip_nan <- function(action = c("update", "load"), +aux_nan <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/aux_npl.R b/R/aux_npl.R new file mode 100644 index 0000000..d37ae48 --- /dev/null +++ b/R/aux_npl.R @@ -0,0 +1,171 @@ +#' National Poverty headcount +#' +#' Update series of national poverty lines +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +aux_npl <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## setup -------- + + measure <- "npl" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## update -------- + + npl <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "dta") |> + setDT() + + # validate npl raw data + npl_validate_raw(npl = npl, detail = detail) + + setnames(x = npl, + old = c("countrycode", "year", "vsi_pov_nahc_nc"), + new = c("country_code", "reporting_year", "nat_headcount"), + skip_absent = TRUE) + + npl[, c("region", "vsi_pov_nahc") := NULL] + npl[, nat_headcount := nat_headcount / 100] + + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## save -------- + npl <- npl |> setnames("reporting_year", "year", + skip_absent=TRUE) + + setattr(npl, "aux_name", "npl") + setattr(npl, + "aux_key", + c("country_code", "year")) + + # validate npl output data + npl_validate_output(npl = npl, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = npl, + measure = measure, + msrdir = msrdir, + force = force + ) + + + } else { + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## load -------- + + load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + + } +} + +#' Validate npl raw data +#' +#' @param npl raw npl data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("NPL raw data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + # validate_cols(in_set(c("AFE", "AFW", "EAP", "ECA", "LAC", "MNA", "SAR")), + # region, description = "`region` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc_nc), + description = "`vsi_pov_nahc_nc` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc), + description = "`vsi_pov_nahc` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate npl output data +#' +#' @param npl output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ + + stopifnot("NPL output data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(nat_headcount), + description = "`nat_headcount` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pip_pce_update.R b/R/aux_pce.R similarity index 60% rename from R/pip_pce_update.R rename to R/aux_pce.R index 2d1bf5e..2cb80ec 100644 --- a/R/pip_pce_update.R +++ b/R/aux_pce.R @@ -1,11 +1,51 @@ +#' PIP PCE +#' +#' Load or update PCE data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_gdp +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +aux_pce <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + measure <- "pce" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + aux_pce_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + from = from, + detail = detail) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} + #' Update PCE #' #' Update PCE data using WDI and Special cases. #' -#' @inheritParams pip_gdp +#' @inheritParams aux_gdp #' @inheritParams pipfun::load_from_gh #' @keywords internal -pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, +aux_pce_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), @@ -16,8 +56,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, branch <- match.arg(branch) from <- match.arg(from) -# ________________________________________________________________ -# Load data #### + # ________________________________________________________________ + # Load data #### # Update WDI # pip_wdi_update(maindir = maindir, @@ -53,8 +93,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ) # validate sna_fy data sna_fy_validate_raw(sna_fy = sna_fy, detail = detail) -# ____________________________________________________________________________ -# Clean PCE from WDI #### + # ____________________________________________________________________________ + # Clean PCE from WDI #### # Keep relevant variables wpce <- wpce[, .(country_code, year, wdi_pce)] @@ -97,8 +137,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, pce <- wpce[, .(country_code, year, wdi_pce)] -# ____________________________________________________________________________ -# Special cases #### + # ____________________________________________________________________________ + # Special cases #### ## ---- Expand for special cases with U/R levels ---- @@ -124,7 +164,7 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, # Add domain column pce[, pce_domain := fifelse(pce_data_level == 2, 1, 2) - ] + ] # Sort setorder(pce, country_code, year, pce_data_level) @@ -134,8 +174,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, # Recode domain and data_level variables cols <- c("pce_domain", "pce_data_level") pce[, - (cols) := lapply(.SD, as.character), - .SDcols = cols + (cols) := lapply(.SD, as.character), + .SDcols = cols ][ , # recode domain pce_domain := fcase( @@ -156,9 +196,9 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ## ---- Hard-coded custom modifications ---- # get survey years where only PCE is present sna <- sna[!is.na(PCE) - ][, # lower case coverage - coverage := tolower(coverage) - ] + ][, # lower case coverage + coverage := tolower(coverage) + ] # If there are special countries if (nrow(sna) > 0) { @@ -166,18 +206,18 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, setnames(x = sna, old = c("countrycode", "coverage"), new = c("country_code", "pce_data_level") - ) + ) pce[sna, - on = .(country_code, year, pce_data_level), - `:=`( - sna_pce = i.PCE - ) + on = .(country_code, year, pce_data_level), + `:=`( + sna_pce = i.PCE + ) ] pce[, pce := fifelse(is.na(sna_pce),wdi_pce, sna_pce) - ] + ] # remvoe extra variables pce[, c("sna_pce", "wdi_pce") := NULL] @@ -188,8 +228,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, } -# _______________________________________________________________________ -# Hard-coded countries #### + # _______________________________________________________________________ + # Hard-coded countries #### # Remove observations for Venezuela after 2014 pce[ @@ -212,8 +252,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ] -# __________________________________________________________________ -# Finalize table #### + # __________________________________________________________________ + # Finalize table #### # Remove rows with missing GDP\ @@ -259,3 +299,49 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, return(invisible(saved)) } + +#' Validate output pce data +#' +#' @param pce output pce data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ + + stopifnot("PCE clean data is not loaded" = !is.null(pce)) + + report <- data_validation_report() + + validate(pce, name = "PCE output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pce), + description = "`pce` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + # validate_if(is.character(pce_domain), + # description = "`pce_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pce_domain, description = "`pce_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/aux_pfw.R b/R/aux_pfw.R new file mode 100644 index 0000000..4dd4aca --- /dev/null +++ b/R/aux_pfw.R @@ -0,0 +1,587 @@ +#' PIP PFW +#' +#' Load or update PIP Price Framework data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @param action character: Either "load" or "update". Default is "update". If +#' "update" data will be updated on the system. If "load" data is loaded in memory. +#' @param maindir character: Main directory of project. +#' @param force logical: If TRUE data will be overwritten. +#' @inheritParams pipfun::load_from_gh +#' @export +#' @import data.table +aux_pfw <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "pfw" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + aux_pfw_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + detail = detail) + + } else { + + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} +#' Clean PFW +#' +#' Clean PFW data from Datalibweb to meet PIP protocols. +#' +#' @param y dataset with PPP data from `aux_pfw_update()`. +#' @inheritParams load_aux +#' +#' @keywords internal +aux_pfw_clean <- function(y, + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + branch <- match.arg(branch) + + if (!inherits(y, "data.table")) { + x <- as.data.table(y) + } else { + x <- copy(y) + } + + # get just inpovcal data + + + # change variable names + old_var <- + c( + "region", + "reg_pcn", + "code", + "ref_year", + "survname", + "comparability", + "datatype", + "rep_year" + ) + + new_var <- + c( + "wb_region_code", + "pcn_region_code", + "country_code", + "survey_year", + "survey_acronym", + "survey_comparability", + "welfare_type", + "reporting_year" + ) + + setnames(x, + old = old_var, + new = new_var + ) + + # Recode some variables + + x[ + , + `:=`( + # Recode survey coverage + survey_coverage = fcase( + survey_coverage == "N", "national", + survey_coverage == "R", "rural", + survey_coverage == "U", "urban", + default = "" + ), + # Recode welfare type + welfare_type = fcase( + grepl("[Ii]", welfare_type), "income", + grepl("[Cc]", welfare_type), "consumption", + default = "" + ), + surveyid_year = as.integer(surveyid_year), + survey_year = round(survey_year, 2) + ) + ] + + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + x <- x[country_code %in% cl$country_code] + + x <- unique(x) # remove duplicates + return(x) +} + +#' Update PFW +#' +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_pfw_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + measure <- "pfw" + branch <- match.arg(branch) + + # Read data + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + # validate pfw raw data + pfw_validate_raw(pfw = pfw, detail = detail) + + # Clean data + pfw <- aux_pfw_clean(pfw, + maindir = maindir, + branch = branch) + + # validate pfw raw data + pfw_validate_output(pfw = pfw, detail = detail) + + # Save dataset + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(pfw, "aux_name", "pfw") + + saved <- pipfun::pip_sign_save( + x = pfw, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) +} + +#' Generate a dataset that contains pfw keys +#' +#' @return data.table +#' @export +#' +aux_pfw_key <- function(){ + + pfw_temp <- load_aux("pfw", maindir = temp_fld) + + pfw_key_options <- pfw_temp[, .(country_code, + survey_year, + survey_acronym, + cpi_domain_var)] + + + cpi_temp <- load_aux("cpi", maindir = temp_fld) + + cpi_temp <- cpi_temp[, cpi_domain_var := + fifelse(reporting_level == "urban" & + cpi_domain_value == 1, "urban", "")] + + cpi_temp <- cpi_temp[, .(country_code, survey_year, survey_acronym, + cpi_domain_var, reporting_level)] + + pfw_key <- cpi_temp[pfw_key_options, on = .(country_code, survey_year, + survey_acronym, cpi_domain_var)] + + any(duplicated(pfw_key, by = c("country_code", "survey_year", "survey_acronym", "cpi_domain_var"))) + + return(pfw_key) +} + +#' Validate raw pfw data +#' +#' @param pfw raw pfw data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PFW raw data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(reg_pcn), + description = "`reg_pcn` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + reg_pcn, description = "`reg_pcn` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(datatype), + description = "`datatype` should be character") |> + validate_cols(in_set(c("C", "I", "c", "i")), + datatype, description = "`datatype` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(rep_year), + description = "`rep_year` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, code, year, survname, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate clean pfw data +#' +#' @param pfw clean pfw data, output via `aux_pfw_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ + + stopifnot("PFW clean data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW output data validation") |> + validate_if(is.character(wb_region_code), + description = "`wb_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + wb_region_code, description = "`wb_region_code` values within range") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(reporting_year), + description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(survey_comparability), + description = "`survey_comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, country_code, year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/aux_pl.R b/R/aux_pl.R new file mode 100644 index 0000000..8eb845c --- /dev/null +++ b/R/aux_pl.R @@ -0,0 +1,151 @@ +#' Poverty lines +#' +#' Update or load a dataset with poverty lines. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +aux_pl <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") + ) { + + measure <- "pl" + branch <- match.arg(branch) + action <- match.arg(action) + + + if (action == "update") { + # Read yaml file + + dl <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "yaml" + ) + + dt <- purrr::map_df(dl,pip_pl_clean) + + # Save + + # validate pl clean data + pl_validate_output(pl = dt, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + saved <- pipfun::pip_sign_save( + x = dt, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) + + } else { + df <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + + return(df) + } +} + +#' Build a data table for each list from yaml file with poverty lines info +#' +#' @param l list from yaml file +#' +#' @return data.table +#' @export +aux_pl_clean <- function(l) { + + + # ____________________________________________________________________________ + # Computations #### + + pls <- + purrr::map(.x = l$ranges, + .f = ~{ + seq(.x$min, .x$max, .x$increment) + }) |> + unlist() + + # Create data frame + df <- data.table::data.table( + name = as.character(pls), + poverty_line = pls + ) + + + df[, + c("is_default", "is_visible", "name", "ppp_year") + := { + id <- fifelse(name == l$default, TRUE, FALSE) + + iv <- fifelse(name %in% l$visible, TRUE, FALSE) + + n <- fifelse(n_decimals(poverty_line) == 1, paste0(name, "0"), name) + n <- fifelse(n_decimals(poverty_line) == 0, paste0(n, ".00"), n) + + list(id, iv, n, l$ppp_year) + }] + + # ____________________________________________________________________________ + # Return #### + return(df) + +} + +#' Validate output pl data +#' +#' @param pl output pl data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ + + stopifnot("PL clean data is not loaded" = !is.null(pl)) + + report <- data_validation_report() + + validate(pl, name = "PL output data validation") |> + validate_if(is.character(name), + description = "`name` should be character") |> + validate_if(is.numeric(poverty_line), + description = "`poverty_line` should be numeric") |> + validate_if(is.logical(is_default), + description = "`is_default` should be logical") |> + validate_if(is.logical(is_visible), + description = "`is_visible` should be logical") |> + validate_if(is.integer(ppp_year), + description = "`ppp_year` should be numeric") |> + validate_cols(not_na, name, ppp_year, + description = "no missing values in key variables") |> + validate_if(is_uniq(name, ppp_year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_pop.R b/R/aux_pop.R new file mode 100644 index 0000000..3c37898 --- /dev/null +++ b/R/aux_pop.R @@ -0,0 +1,513 @@ +#' PIP POP +#' +#' Load or update population data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_cpi +#' @inheritParams pipfun::load_from_gh +#' @param from character: Source for population data. +#' @export +aux_pop <- function(action = c("update", "load"), + force = FALSE, + from = c("gh", "file", "api"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "pop" + from <- tolower(from) + action <- match.arg(action) + + if (action == "update") { + aux_pop_update( + force = force, + from = from, + maindir = maindir, + owner = owner, + branch = branch, + tag = tag, + detail = detail) + + } else { + + df <- load_aux(maindir = maindir, + measure = measure, + branch = branch) + + return(df) + } +} + +#' Update POP +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @param from character: Source for population data. +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pop +aux_pop_update <- function(force = FALSE, + from = c("gh", "file", "api"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + # Check arguments + from <- match.arg(from) + branch <- match.arg(branch) + measure <- "pop" + + # Get the most recent year in PFW to filter population projection + + pfw <- pipload::pip_load_aux("pfw", + branch = branch, + maindir = maindir) + # year_max <- pfw[, max(year)] + # get current year as max year + year_max <- Sys.Date() |> + format("%Y") |> + as.numeric() + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # From WDI --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if (from == "api") { + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## from API -------- + + pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") + pop <- wbstats::wb_data(indicator = pop_indicators, + country = "all", # this is new + lang = "en", + return_wide = FALSE) |> + setDT() + + # validate wb pop data + pop_validate_raw(pop = pop, detail = detail) + + # rename vars + pop <- pop[, c("iso3c", "date", "indicator_id", "value")] + + setnames(pop, + new = c("country_code", "year", "coverage", "pop")) + + + + pop[, + year := as.numeric(year) + ][, + pop_data_level := + fcase( + grepl("POP", coverage), 2, + grepl("RUR", coverage), 0, + grepl("URB", coverage), 1 + ) + ][, + coverage := NULL] + + ### Ger special cases --------- + + spop <- pipfun::load_from_gh( + measure = measure, + filename = "spop", + owner = owner, + branch = branch, + tag = tag, + ext = "csv") |> + clean_names_from_wide() |> + clean_from_wide() + + + pop <- rbindlist(list(pop, spop), + use.names = TRUE, + fill = TRUE) + + + } else { + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## from Emi's file -------- + + # Now Emi's file is uploaded directly to GH. So we get it from there. + # Load data + + pop_main <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "xlsx" + ) |> + clean_names_from_wide() |> + clean_from_wide() + + # validate pop main raw data + popmain_validate_raw(pop_main = pop_main, detail = detail) + + ### Ger special cases --------- + spop <- pipfun::load_from_gh( + measure = measure, + filename = "spop", + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) |> + clean_names_from_wide() |> + clean_from_wide() + + # validate special cases pop raw data + spop_validate_raw(spop = spop, detail = detail) + + pop <- joyn::joyn(pop_main, spop, + by = c("country_code", "year", "pop_data_level"), + update_values = TRUE, + reportvar = FALSE, + verbose = FALSE) + + # pop <- rbindlist(list(pop_main, spop), + # use.names = TRUE, + # fill = TRUE) + + } + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Clean data --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # Remove years prior to 1960 + pop <- pop[!is.na(pop) & year >= 1960] + pop <- pop[year <= year_max] + + # sorting + setorder(pop, country_code, year, pop_data_level) + setcolorder(pop, c("country_code", "year", "pop_data_level", "pop")) + + pop[, + pop_domain := fifelse(pop_data_level == 2, 1, 2)] + + # recode domain and data_level variables + cols <- c("pop_domain", "pop_data_level") + pop[, + (cols) := lapply(.SD, as.character), + .SDcols = cols + ][ + , # recode domain + pop_domain := fcase( + pop_domain == "1", "national", + pop_domain == "2", "urban/rural", + pop_domain == "3", "subnational region" + ) + ][ # Recode data_level only for those that are national or urban/rural + pop_domain %in% c("national", "urban/rural"), + pop_data_level := fcase( + pop_data_level == "0", "rural", + pop_data_level == "1", "urban", + pop_data_level == "2", "national" + ) + ] + + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + setDT(cl) + pop <- pop[country_code %in% cl$country_code] |> + unique() # make sure we don't havce any duplicates + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Save data --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # drop pce_domain + pop <- pop[, -c("pop_domain")] + + pop <- pop |> setnames("pop_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pop, "aux_name", "pop") + setattr(pop, + "aux_key", + c("country_code", "year", "reporting_level")) + + # validate output pop data + pop_validate_output(pop = pop, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = pop, + measure = measure, + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) + +} + + + +#' Clean names from wide WDI format +#' +#' @param x data frame +#' +#' @return dataframe with names cleaned +#' @keywords internal +clean_names_from_wide <- function(x) { + if (!is.data.table(x)) { + setDT(x) + } + nnames <- as.character(x[2, 1:4]) + setnames(x, 1:4, nnames) + x <- x[-c(1:2)] + x +} + + +#' Clean from WDI format +#' +#' @param x data frame +#' +#' @return dataframe with names cleaned +#' @keywords internal +clean_from_wide <- function(x) { + if (!is.data.table(x)) { + setDT(x) + } + + + year_vars <- names(x)[6:ncol(x)] + x$Series_Name <- NULL + x$Time_Name <- NULL + + # Reshape to long format + pop_long <- x |> + data.table::setDT() |> + data.table::melt( + id.vars = c("Country", "Series"), + measure.vars = year_vars, + variable.name = "Year", + value.name = "Population" + ) + pop_long[, + Year := as.numeric(as.character(Year)) + ][, + Population := { + Population[Population == "."] <- NA_character_ + as.numeric(Population) + }] + + + + pop <- pop_long + # Create data_level column + pop[, + pop_data_level := + fcase( + grepl("POP", Series), 2, + grepl("RUR", Series), 0, + grepl("URB", Series), 1 + ) + ][, + Series := NULL] + + # Set colnames + setnames( + pop, + old = c("Country", "Year", "Population"), + new = c("country_code", "year", "pop") + ) + + return(pop) +} + +#' Validate raw main pop data +#' +#' @param pop_main raw pop main data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ + + stopifnot("POP main raw data is not loaded" = !is.null(pop_main)) + + report <- data_validation_report() + + validate(pop_main, name = "POP main raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate pop raw data download from wdi +#' +#' @param pop raw pop data, as loaded via `wbstats::wb_data` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("WB POP raw data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "WB POP raw data validation") |> + validate_if(is.character(indicator_id), + description = "`indicator_id` should be character") |> + validate_cols(in_set(c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL")), + indicator_id, description = "`indicator_id` values within range") |> + validate_if(is.character(indicator), + description = "`indicator` should be character") |> + validate_if(is.character(iso2c), + description = "`iso2c` should be character") |> + validate_if(is.character(iso3c), + description = "`iso3c` should be character") |> + validate_if(is.character(country), + description = "`country` should be character") |> + validate_if(is.numeric(date), + description = "`date` should be numeric") |> + validate_if(is.numeric(value), + description = "`value` should be numeric") |> + validate_if(is.character(unit), + description = "`unit` should be character") |> + validate_if(is.character(obs_status), + description = "`obs_status` should be character") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_if(is_date(last_updated), + description = "`last_updated` should be date") |> + validate_cols(not_na, indicator_id, iso3c, date, + description = "no missing values in key variables") |> + validate_if(is_uniq(indicator_id, iso3c, date), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate output pop data +#' +#' @param pop output pop data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("POP clean data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "POP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + # validate_if(is.character(pop_domain), + # description = "`pop_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pop_domain, description = "`pop_domain` values within range") |> + validate_cols(not_na, country_code, year, reporting_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, reporting_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw special cases pop data +#' +#' @param spop raw special case pop data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ + + stopifnot("Special POP raw data is not loaded" = !is.null(spop)) + + report <- data_validation_report() + + validate(spop, name = "Special POP raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_ppp.R b/R/aux_ppp.R new file mode 100644 index 0000000..a1eeaff --- /dev/null +++ b/R/aux_ppp.R @@ -0,0 +1,412 @@ +#' PIP PPP +#' +#' Load or update PPP data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +#' @import data.table +aux_ppp <- function(action = c("update", "load"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + force = FALSE, + tag = branch, + detail = getOption("pipaux.detail.raw"), + ppp_defaults = TRUE) { + + # ____________________________________________________________________________ + # on.exit #### + on.exit({ + + }) + + # ____________________________________________________________________________ + # Defenses #### + measure <- "ppp" + action <- match.arg(action) + branch <- match.arg(branch) + + stopifnot( exprs = { + + } + ) + + # ____________________________________________________________________________ + # Early returns #### + if (FALSE) { + return() + } + + # ____________________________________________________________________________ + # Computations #### + if (action == "update") { + aux_ppp_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + detail = detail) + } + else { + load_aux( + maindir = maindir, + measure = measure, + branch = branch, + ppp_defaults = ppp_defaults + ) + } + + +} + +#' Clean PPP data from datalibweb to meet PIP protocols +#' +#' @param y dataset with PPP data from `aux_ppp_update()`. +#' @param default_year numeric: ICP round year. Default is 2011 +#' +#' @keywords internal +aux_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { + x <- data.table::as.data.table(y) + + y <- melt(x, + id.vars = c("code", "ppp_domain", "datalevel"), + measure.vars = patterns("^ppp_[0-9]{4}_[Vv][0-9]_[Vv][0-9]$"), + variable.name = "ver", + value.name = "ppp" + ) + + y[ + , + c("p", "ppp_year", "release_version", "adaptation_version") := tstrsplit(ver, "_") + ][ + , + `:=`( + ppp_year = as.numeric(ppp_year), + ppp_domain = as.character(ppp_domain), + datalevel = as.character(datalevel) + ) + ][ + , + # This part should not exist if the raw data + # has been properly created + ppp_data_level := fcase( + ppp_domain %chin% c("urban/rural", "2") & datalevel == "0", "rural", + ppp_domain %chin% c("urban/rural", "2") & datalevel == "1", "urban", + ppp_domain %chin% c("national", "1") & datalevel %chin% c("2", "", NA_character_), "national", + default = "" + ) + ][ + , + c("p", "ver", "datalevel") := NULL + ] + + setorder(y, code, ppp_year, release_version, adaptation_version) + + #--------- Get default version --------- + + y[ # Find Max release version + , + d1 := release_version == max(release_version), + by = .(code, ppp_year) + ][ + # Find max adaptation version of the max release + d1 == TRUE, + d2 := adaptation_version == max(adaptation_version), + by = .(code, ppp_year) + ][ + , + # get intersection + `:=`( + ppp_default = (d1 == TRUE & d2 == TRUE & ppp_year == (default_year)), + ppp_default_by_year = (d1 == TRUE & d2 == TRUE), + country_code = code + ) + ][ + , + # Remove unnecessary variables + c("d1", "d2", "code") := NULL + ] + + setcolorder( + y, + c( + "country_code", + "ppp_year", + "release_version", + "adaptation_version", + "ppp", + "ppp_default", + "ppp_default_by_year", + "ppp_domain", + "ppp_data_level" + ) + ) + + y <- unique(y) # remove duplicates + + # Remove non WDI countries + non_wdi <- c("BES", "EGZ", "RUT", "SDO") + if (any(y$country_code %in% non_wdi)) { + y <- y[!(country_code %in% non_wdi)] + } + + return(y) +} + +#' Update PPP +#' +#' @inheritParams pipfun::load_from_gh +#' @keywords internal +aux_ppp_update <- function(maindir = gls$PIP_DATA_DIR, + force = FALSE, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + + + # ____________________________________________________________________________ + # set up #### + + measure <- "ppp" + branch <- match.arg(branch) + + + # ____________________________________________________________________________ + # Load raw data #### + + ppp <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) + + # validate ppp raw data + ppp_validate_raw(ppp = ppp, detail = detail) + + # ____________________________________________________________________________ + # cleaning #### + + + # Clean data + ppp <- aux_ppp_clean(ppp) + + # Remove any non-WDI countries + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + ppp <- ppp[country_code %in% cl$country_code] + + + ## ............................................................................ + ## Special cases #### + + # Hardcode domain / data_level fix for NRU + ppp$ppp_domain <- + ifelse(ppp$country_code == "NRU" & is.na(ppp$ppp_domain), + 1, ppp$ppp_domain + ) + ppp$ppp_data_level <- + ifelse(ppp$country_code == "NRU" & ppp$ppp_data_level == "", + "national", ppp$ppp_data_level + ) + + + # ____________________________________________________________________________ + # Saving #### + + # drop ppp_domain + ppp <- ppp[, -c("ppp_domain")] + + ppp <- ppp |> setnames("ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(ppp, "aux_name", "ppp") + setattr(ppp, + "aux_key", + c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. + + # validate ppp output data + ppp_validate_output(ppp = ppp, detail = detail) + + if (branch == "main") { + branch <- "" + } + + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = ppp, + measure = measure, + msrdir = msrdir, + force = force + ) + + + # ____________________________________________________________________________ + # PPP vintages data #### + + vars <- c("ppp_year", "release_version", "adaptation_version") + ppp_vintage <- unique(ppp[, ..vars], by = vars) + + data.table::setnames(x = ppp_vintage, + old = c("release_version", "adaptation_version"), + new = c("ppp_rv", "ppp_av")) + + # ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", + # skip_absent=TRUE) + # + # setattr(ppp_vintage, "aux_name", "ppp") + # setattr(ppp_vintage, + # "aux_key", + # c("country_code", "reporting_level")) + + # Save + pipfun::pip_sign_save( + x = ppp_vintage, + measure = "ppp_vintage", + msrdir = msrdir, + force = force + ) + + return(invisible(saved)) +} + +#' Validate output ppp data +#' +#' @param ppp output ppp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ + + stopifnot("PPP output data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(ppp_year), + description = "`ppp_year` should be character") |> + validate_if(is.character(release_version), + description = "`release_version` should be character") |> + validate_if(is.character(adaptation_version), + description = "`adaptation_version` should be character") |> + validate_if(is.numeric(ppp), + description = "`ppp` should be numeric") |> + validate_if(is.logical(ppp_default), + description = "`ppp_default` should be numeric") |> + validate_if(is.logical(ppp_default_by_year), + description = "`ppp_default_by_year` should be numeric") |> + # validate_if(is.character(ppp_domain), + # description = "`ppp_domain` should be character") |> + # validate_cols(in_set(c("1", "2")), + # ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + reporting_level, description = "`reporting_level` values within range") |> + validate_cols(not_na, country_code, ppp_year, reporting_level, + adaptation_version, release_version, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, ppp_year, + reporting_level, adaptation_version, release_version), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw ppp data +#' +#' @param ppp raw ppp data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PPP raw data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP raw data validation") |> + validate_if(is.character(CountryName), + description = "`CountryName` should be character") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(CoverageType), + description = "`CoverageType` should be character") |> + validate_cols(in_set(c("National", "Rural", "Urban")), + CoverageType, description = "`CoverageType` values within range") |> + validate_if(is.numeric(ppp_2005_v1_v1), + description = "`ppp_2005_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v1), + description = "`ppp_2011_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v1), + description = "`ppp_2011_v2_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v2), + description = "`ppp_2011_v1_v2` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v2), + description = "`ppp_2011_v2_v2` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v1), + description = "`ppp_2017_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v2), + description = "`ppp_2017_v1_v2` should be numeric") |> + validate_if(is.numeric(source_ppp_2011), + description = "`source_ppp_2011` should be numeric") |> + validate_if(is.numeric(source_ppp_2005), + description = "`source_ppp_2005` should be numeric") |> + validate_if(is.numeric(datalevel), + description = "`datalevel` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + datalevel, description = "`datalevel` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.numeric(ppp_domain_value), + description = "`ppp_domain_value` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain_value, description = "`ppp_domain_value` values within range") |> + validate_if(is.numeric(oldicp2005), + description = "`oldicp2005` should be numeric") |> + validate_if(is.numeric(oldicp2011), + description = "`oldicp2011` should be numeric") |> + validate_if(is.character(Seriesname), + description = "`Seriesname` should be character") |> + validate_if(is.character(note_may192020), + description = "`note_may192020` should be character") |> + validate_if(is.character(ppp_2017_v1_v2_note), + description = "`ppp_2017_v1_v2_note` should be character") |> + validate_cols(not_na, code, CoverageType, datalevel, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, CoverageType, datalevel), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pip_prices.R b/R/aux_prices.R similarity index 96% rename from R/pip_prices.R rename to R/aux_prices.R index 3754e27..33d8942 100644 --- a/R/pip_prices.R +++ b/R/aux_prices.R @@ -11,7 +11,7 @@ #' #' @export #' @import data.table -pip_prices <- function(measure = NULL, +aux_prices <- function(measure = NULL, action = "update", maindir = gls$PIP_DATA_DIR, dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), @@ -73,19 +73,19 @@ pip_prices <- function(measure = NULL, #--------- update --------- if (action == "update") { if (measure == "cpi") { - pip_cpi_update( + aux_cpi_update( maindir = maindir, dlwdir = dlwdir, force = force ) } else if (measure == "ppp") { - pip_ppp_update( + aux_ppp_update( maindir = maindir, dlwdir = dlwdir, force = force ) } else if (measure == "pfw") { - pip_pfw_update( + aux_pfw_update( maindir = maindir, dlwdir = dlwdir, force = force diff --git a/R/pip_regions.R b/R/aux_regions.R similarity index 97% rename from R/pip_regions.R rename to R/aux_regions.R index c8d5244..b50df1c 100644 --- a/R/pip_regions.R +++ b/R/aux_regions.R @@ -2,10 +2,10 @@ #' #' Update or load a dataset with regions. #' -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @export -pip_regions <- function(action = c("update", "load"), +aux_regions <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/R/pip_sign_save.R b/R/aux_sign_save.R similarity index 98% rename from R/pip_sign_save.R rename to R/aux_sign_save.R index f8d97ac..a967e44 100644 --- a/R/pip_sign_save.R +++ b/R/aux_sign_save.R @@ -5,14 +5,14 @@ #' This function is deprecated because of the new, more flexible and general #' function `pipfun::pip_sign_save()` #' @param x data.frame Data frame to be signed and saved. -#' @inheritParams pip_cpi +#' @inheritParams aux_cpi #' @param msrdir character: Directory where the data and data signature will be #' saved. #' @param save_dta logical: If TRUE a Stata (.dta) version of the dataset is #' also saved. #' @keywords internal #' @return logical -pip_sign_save <- function(x, +aux_sign_save <- function(x, measure, msrdir, force = FALSE, diff --git a/R/aux_sna.R b/R/aux_sna.R new file mode 100644 index 0000000..f137499 --- /dev/null +++ b/R/aux_sna.R @@ -0,0 +1,159 @@ +#' PIP Special National accounts +#' +#' Update special national accounts data +#' +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" +#' and "gh" are synonymous +#' @export +aux_sna <- function(action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + measure <- "sna" + branch <- match.arg(branch) + action <- match.arg(action) + + + if (action == "update") { + # load nowcast growth rates + sna <- pipfun::load_from_gh( + measure = "sna", + owner = owner, + branch = branch + ) + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + saved <- pipfun::pip_sign_save( + x = sna, + measure = measure, + msrdir = msrdir, + force = force + ) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} # end + +#' Validate raw special national accounts (sna) data +#' +#' @param sna raw sna data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ + + stopifnot("SNA raw data is not loaded" = !is.null(sna)) + + report <- data_validation_report() + + validate(sna, name = "SNA raw data validation") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_cols(in_set(c("National")), + coverage, description = "`coverage` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(GDP), + description = "`GDP` should be numeric") |> + validate_if(is.logical(PCE), + description = "`PCE` should be logical") |> + validate_if(is.character(sourceGDP), + description = "`sourceGDP` should be character") |> + validate_if(is.logical(sourcePCE), + description = "`sourcePCE` should be logical") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw sna_fy data +#' +#' @param sna_fy raw sna_fy data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ + + stopifnot("sna_fy raw data is not loaded" = !is.null(sna_fy)) + + report <- data_validation_report() + + validate(sna_fy, name = "sna_fy raw data validation") |> + validate_if(is.character(Code), + description = "`Code` should be character") |> + validate_if(is.character(LongName), + description = "`LongName` should be character") |> + validate_if(is.character(SpecialNotes), + description = "`SpecialNotes` should be character") |> + validate_if(is.character(Month), + description = "`Month` should be character") |> + validate_if(is.numeric(Day), + description = "`Day` should be numeric") |> + validate_cols(not_na, Code, Month, Day, + description = "no missing values in key variables") |> + # validate_if(is_uniq(Code, LongName), + # description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Fake PIP SNA function +#' +#' @inheritParams aux_gdp +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +fake_aux_sna <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api")) { + + return(invisible(TRUE)) +} + + + diff --git a/R/aux_wdi.R b/R/aux_wdi.R new file mode 100644 index 0000000..7230a09 --- /dev/null +++ b/R/aux_wdi.R @@ -0,0 +1,164 @@ +#' PIP wdi +#' +#' Update or load wdi data. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" +#' and "gh" are synonymous +#' @export +aux_wdi <- function(action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + + measure <- "wdi" + branch <- match.arg(branch) + action <- match.arg(action) + + + if (action == "update") { + aux_wdi_update(maindir = maindir, + force = force, + owner = owner, + branch = branch, + tag = tag, + from = from, + detail = detail) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} # end of pip_wdi + +#' Update National accounts data from WDI +#' +#' GDP and HFCE data from WDI. It could be either from API or from file +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_gdp +#' @return data.table with gdp and pce variables +#' @export +#' +#' @examples +#' aux_wdi_update() +aux_wdi_update <- function(force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { + + + from <- match.arg(from) + branch <- match.arg(branch) + + # ______________________________________________________ + # Computations #### + measure <- "wdi" + + ## ............................................................... + ## From file #### + + if (from %in% c("file", "gh")) { + wdi <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "csv") + + } else { + ## ........................................................................ + ## From API #### + wdi_indicators <- c("NY.GDP.PCAP.KD", "NE.CON.PRVT.PC.KD") + wdi <- wbstats::wb_data(indicator = wdi_indicators, + lang = "en") |> + setDT() + + wdi[, + c("country", "iso2c") := NULL] + + # Rename columns + setnames(wdi, + old = c("iso3c", "date"), + new = c("country_code", "year") + ) + } + # validate wdi raw data + wdi_validate_raw(wdi = wdi, detail = detail) + + # _________________________________________________________________________ + # Save and Return #### + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(wdi, "aux_name", "wdi") + setattr(wdi, + "aux_key", + c("country_code", "year")) + + saved <- pipfun::pip_sign_save( + x = wdi, + measure = measure, + msrdir = msrdir, + force = force, + save_dta = FALSE + ) + + return(invisible(saved)) + +} + +#' Validate raw wdi data +#' +#' @param wdi raw wdi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WDI raw data is not loaded" = !is.null(wdi)) + + report <- data_validation_report() + + validate(wdi, name = "WDI raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(NE.CON.PRVT.PC.KD), + description = "`NE.CON.PRVT.PC.KD` should be numeric") |> + validate_if(is.numeric(NY.GDP.PCAP.KD), + description = "`NY.GDP.PCAP.KD` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/aux_weo.R b/R/aux_weo.R new file mode 100644 index 0000000..9c98bc6 --- /dev/null +++ b/R/aux_weo.R @@ -0,0 +1,314 @@ +#' Fetch GDP data from WEO +#' +#' Create a dataset with GDP data from World Economic Outlook. +#' +#' Note that the most recent version most be downloaded from imf.org and saved +#' as an .xls file in `/_aux/weo/`. The filename should be in the +#' following structure `WEO_.xls`. Due to potential file corruption +#' the file must be opened and re-saved before it can be updated with +#' `pip_weo()`. Hopefully in the future IMF will stop using an `.xls` file +#' that's not really xls. +#' +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @inheritParams aux_pfw +#' @inheritParams pipfun::load_from_gh +#' @export +aux_weo <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { + measure <- "weo" + branch <- match.arg(branch) + action <- match.arg(action) + + if (action == "update") { + + # ---- Load data from disk ---- + + # Read data + dt <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + tag = tag, + ext = "csv" + ) + + # validate weo raw data + weo_validate_raw(weo = dt, detail = detail) + + dt <- aux_weo_clean(dt, + maindir = maindir, + branch = branch) + + # Save dataset + setattr(dt, "aux_name", "weo") + setattr(dt, + "aux_key", + c("country_code", "year")) + # validate weo clean data + weo_validate_output(weo = dt, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + cat('\nDir : ', msrdir) + saved <- pipfun::pip_sign_save( + x = dt, + measure = measure, + msrdir = msrdir, + force = force + ) + return(invisible(saved)) + + } else { + dt <- load_aux( + maindir = maindir, + measure = measure, + branch = branch + ) + return(dt) + } +} + +#' Clean WEO data +#' +#' @param dt database with weo raw data +#' @param maindir directory where auxiliary data is stored (to load pop) +#' @param branch character: branch to be loaded +#' +#' @return data.table +#' @export +aux_weo_clean <- function(dt, + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main")) { + + + branch <- match.arg(branch) + + # _________________________________________ + # Computations #### + if (!inherits(dt, "data.table")) { + setDT(dt) + } + + # Clean column names + nn <- + names(dt) |> + tolower() |> + {\(.) gsub("[-/ ]", "_", .)}() |> + {\(.) gsub("([0-9]{4})", "x\\1", .)}() + + names(dt) <- nn + + # ---- Data transformations ---- + + # Select rows w/ data on real gdp per capita + dt <- dt[weo_subject_code %in% c("NGDPRPC", "NGDPRPPPPC")] + + # Fix country codes + dt[ + , + iso := fifelse( + iso == "WBG", "PSE", iso # West Bank & Gaza + ) + ][ + , + iso := fifelse( + iso == "UVK", "XKX", iso # Kosovo + ) + ][, + # Replace subject codes + subject_code := fcase( + weo_subject_code == "NGDPRPC", "weo_gdp_lcu", + weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017" + ) + ] + + # Reshape to long format + + years_vars <- names(dt)[grepl("\\d{4}", names(dt))] + dt <- + melt(data = dt, + id.vars = c("iso", "subject_code"), + measure.vars = years_vars, + value.name = "weo_gdp", + variable.name = "year" + ) + setnames(dt, "iso", "country_code") + + # Convert year and GDP to numeric + dt[, + c("weo_gdp", "year") := { + y <- sub("x", "", year) |> + as.numeric() + + x <- as.numeric(weo_gdp) |> + suppressWarnings() + list(x, y) + }] + + # Remove rows w/ missing GDP` + dt <- na.omit(dt, cols = "weo_gdp") + + # Remove current year and future years + current_year <- format(Sys.Date(), "%Y") + dt <- dt[year < current_year] + + # Reshape to wide for GDP columns + dt <- dcast(dt, + formula = country_code + year ~ subject_code, + value.var = "weo_gdp" + ) + + # ---- Merge with population ---- + + + pop <- load_aux(measure = "pop", + maindir = maindir, + branch = branch) + + setDT(pop) + pop <- pop[reporting_level == "national", ] #pop_data_level = reporting_level + dt[pop, + on = .(country_code, year), + `:=`( + pop = i.pop + ) + ] + + # ---- Chain PPP and LCU GDP columns ---- + + # Chain LCU on PPP column + + dt[, weo_gdp := chain_val(ori_var = weo_gdp_ppp2017, + rep_var = weo_gdp_lcu), + by = country_code] + # + # dt <- chain_values( + # dt, + # base_var = "weo_gdp_ppp2017", + # replacement_var = "weo_gdp_lcu", + # new_name = "weo_gdp", + # by = "country_code" + # ) + + + # --- Sign and save ---- + + # Select final columns + dt <- dt[, c("country_code", "year", "weo_gdp")] + + + + # ____________________________________________________________________________ + # Return #### + return(dt) + +} + +#' Validate clean weo data +#' +#' @param weo clean weo data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ + + stopifnot("WEO output data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + validate(weo, name = "WEO output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(weo_gdp), + description = "`weo_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + +#' Validate raw weo data +#' +#' @param weo raw weo data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' @keywords internal +#' +#' @export +weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WEO raw data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + weo <- weo[!is.na(`WEO Subject Code`), ] + + validate(weo, name = "WEO raw data validation") |> + validate_if(is.character(`WEO Country Code`), + description = "`WEO Country Code` should be character") |> + validate_if(is.character(ISO), + description = "ISO should be character") |> + validate_if(is.character(`WEO Subject Code`), + description = "`WEO Subject Code` should be character") |> + validate_if(is.character(Country), + description = "`Country` should be character") |> + validate_if(is.character(`Subject Descriptor`), + description = "`Subject Descriptor` should be character") |> + validate_if(is.character(`Subject Notes`), + description = "`Subject Notes` should be character") |> + validate_if(is.character(Units), + description = "`Units` should be character") |> + validate_if(is.character(Scale), + description = "`Scale` should be character") |> + validate_if(is.character(`Country/Series-specific Notes`), + description = "`Country/Series-specific Notes` should be character") |> + validate_if(is.numeric(`Estimates Start After`), + description = "`Estimates Start After` should be numeric") |> + validate_cols(not_na, ISO, `WEO Subject Code`, + description = "no missing values in key variables") |> + validate_if(is_uniq(ISO, `WEO Subject Code`), + description = "no duplicate records in key variables") |> + add_results(report) + + num_var_list <- grep("^[[:digit:]]", colnames(weo)) + + for (i in 1:length(num_var_list)) { + validate(weo, name = "WEO validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + + diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R deleted file mode 100644 index 3dda7ae..0000000 --- a/R/cl_validate_raw.R +++ /dev/null @@ -1,73 +0,0 @@ -#' Validate raw country list data -#' -#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ - - stopifnot("Country list raw data is not loaded" = !is.null(cl)) - - report <- data_validation_report() - - # country_list <- pipload::pip_load_aux("pfw") - country_list <- pipfun::load_from_gh(measure = "pfw", - owner = getOption("pipfun.ghowner"), - branch = "DEV", - ext = "dta") - - country_list <- unique(country_list[, code]) - - validate(cl, name = "CL raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - # validate_cols(in_set(country_list), - # country_code, description = "`country_code` values within range") |> - validate_if(is.character(country_name), - description = "`country_name` should be character") |> - validate_if(is.character(africa_split), - description = "`africa_split` should be character") |> - validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), - africa_split, description = "`africa_split` values within range") |> - validate_if(is.character(africa_split_code), - description = "`africa_split_code` should be character") |> - validate_cols(in_set(c("AFE", "AFW", NA)), - africa_split_code, description = "`africa_split_code` values within range") |> - validate_if(is.character(pcn_region), - description = "`pcn_region` should be character") |> - validate_if(is.character(pcn_region_code), - description = "`pcn_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - pcn_region_code, description = "`pcn_region_code` values within range") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_if(is.character(region_code), - description = "`region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - region_code, description = "`region_code` values within range") |> - validate_if(is.character(world), - description = "`world` should be character") |> - validate_cols(in_set(c("World")), - world, description = "`world` values within range") |> - validate_if(is.character(world_code), - description = "`world_code` should be character") |> - validate_cols(in_set(c("WLD")), - world_code, description = "`world_code` values within range") |> - validate_cols(not_na, country_code, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R deleted file mode 100644 index e15ee53..0000000 --- a/R/clean_validation_report.R +++ /dev/null @@ -1,12 +0,0 @@ -#' Remove data validation report from .pipaux environment variable -#' -#' @export -clean_validation_report <- function(){ - - if (rlang::env_has(.pipaux, "validation_report")){ - - # rlang::env_bind(.pipaux, validation_report = rlang::zap()) - rlang::env_unbind(.pipaux, "validation_report") - - } -} diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R deleted file mode 100644 index 3b80919..0000000 --- a/R/cpi_validate_output.R +++ /dev/null @@ -1,92 +0,0 @@ -#' Validate clean cpi data -#' -#' @param cpi clean cpi data, output via `pip_cpi_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ - - stopifnot("CPI clean data is not loaded" = !is.null(cpi)) - - report <- data_validation_report() - - validate(cpi, name = "CPI output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.integer(year), - description = "`year` should be integer") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.numeric(cpi), - description = "`cpi` should be numeric") |> - validate_if(is.numeric(ccf), - description = "`ccf` should be numeric") |> - validate_if(is.character(survey_acronym), - description = "`survey_acronym` should be character") |> - validate_if(is.numeric(change_cpi2011), - description = "`change_cpi2011` should be numeric") |> - validate_cols(in_set(c(0, 1)), change_cpi2011, - description = "`change_cpi2011` values within range") |> - # validate_if(is.character(cpi_domain), - # description = "`cpi_domain` should be character") |> - # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, - # description = "`cpi_domian` values within range") |> - validate_if(is.numeric(cpi_domain_value), - description = "`cpi_domain_value` should be numeric") |> - validate_cols(in_set(c(0, 1)), cpi_domain_value, - description = "`cpi_domain_value` values within range") |> - validate_if(is.numeric(cpi2017_unadj), - description = "`cpi2017_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011_unadj), - description = "`cpi2011_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011), - description = "`cpi2011` should be numeric") |> - validate_if(is.numeric(cpi2017), - description = "`cpi2017` should be numeric") |> - # validate_if(is.numeric(cpi2011_SM22), - # description = "`cpi2011_SM22` should be numeric") |> - # validate_if(is.numeric(cpi2017_SM22), - # description = "`cpi2017_SM22` should be numeric") |> - validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), reporting_level, - description = "`reporting_level` values within range") |> - # validate_if(is.numeric(cpi2011_AM23), - # description = "`cpi2011_AM23` should be numeric") |> - # validate_if(is.numeric(cpi2017_AM23), - # description = "`cpi2017_AM23` should be numeric") |> - validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") |> - validate_cols(not_na, country_code, year, survey_acronym, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, survey_acronym, - reporting_level), - description = "no duplicate records in key variables") |> - validate_if(is_uniq(country_code, year, survey_acronym, - reporting_level), - description = "no duplicate cpi values") |> - add_results(report) - - num_var_list1 <- grep("cpi2011_", colnames(cpi)) - num_var_list2 <- grep("cpi2017_", colnames(cpi)) - num_var_list <- c(num_var_list1, num_var_list2) - - for (i in 1:length(num_var_list)) { - validate(cpi, name = "CPI validation") |> - validate_cols(is.numeric, num_var_list[i], - description = "variables (with numeric var name) should be numeric") |> - add_results(report) - } - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } -} diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R deleted file mode 100644 index 2441e6a..0000000 --- a/R/cpi_validate_raw.R +++ /dev/null @@ -1,96 +0,0 @@ -#' Validate raw cpi data -#' -#' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ - - stopifnot("CPI raw data is not loaded" = !is.null(cpi)) - - report <- data_validation_report() - - validate(cpi, name = "CPI raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - region, description = "`region` values within range") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(countryname), - description = "`countryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(survname), - description = "`survname` should be character") |> - validate_if(is.numeric(ref_year), - description = "`ref_year` should be numeric") |> - validate_if(is.character(cpi_domain), - description = "`cpi_domain` should be character") |> - validate_cols(in_set(c("National", "Urban/Rural")), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.numeric(cpi_domain_value), - description = "`cpi_domain_value` should be numeric") |> - validate_if(is.numeric(cpi2017_unadj), - description = "`cpi2017_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011_unadj), - description = "`cpi2011_unadj` should be numeric") |> - validate_if(is.numeric(cpi2011), - description = "`cpi201`1 should be numeric") |> - validate_if(is.numeric(cpi2017), - description = "`cpi2017` should be numeric") |> - validate_if(is.character(version), - description = "`version` should be character") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.numeric(cur_adj), - description = "`cur_adj` should be numeric") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("N", "R", "U", NA)), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.numeric(cpi2011_SM22), - description = "`cpi2011_SM22` should be numeric") |> - validate_if(is.numeric(comparable), - description = "`comparable` should be numeric") |> - validate_if(is.numeric(cpi2017_SM22), - description = "`cpi2017_SM22` should be numeric") |> - validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") |> - validate_if(is.numeric(cpi_data_level), - description = "`cpi_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - cpi_data_level, description = "`cpi_data_level` values within range") |> - validate_if(is.numeric(ref_year_SM24), - description = "`ref_year_SM24` should be numeric") |> - validate_if(is.numeric(cpi2011_SM24), - description = "`cpi2011_SM24` should be numeric") |> - validate_if(is.numeric(cpi2017_SM24), - description = "`cpi2011_SM24` should be numeric") |> - validate_if(is.numeric(change_cpi2017), - description = "`change_cpi2017` should be numeric") |> - validate_if(is.numeric(change_icp2017), - description = "`change_icp2017` should be numeric") |> - validate_if(is.numeric(change_cpi2011), - description = "`change_cpi2011` should be numeric") |> - validate_if(is.numeric(change_icp2011), - description = "`change_icp2011` should be numeric") |> - validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") |> - validate_cols(not_na, code, year, survname, cpi_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, year, survname, cpi_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/gdm_validate_output.R b/R/gdm_validate_output.R deleted file mode 100644 index 92f61dc..0000000 --- a/R/gdm_validate_output.R +++ /dev/null @@ -1,57 +0,0 @@ -#' Validate clean gdm data -#' -#' @param gdm clean gdm data, output via `pipfun::pip_gdm_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ - - stopifnot("GDM output data is not loaded" = !is.null(gdm)) - - report <- data_validation_report() - - validate(gdm, name = "GDM output data validation") |> - validate_if(is.character(survey_id), - description = "`survey_id` should be character") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.integer(year), - description = "`year` should be integer") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), welfare_type, - description = "`welfare_type` values within range") |> - validate_if(is.numeric(survey_mean_lcu), - description = "`survey_mean_lcu` should be numeric") |> - validate_if(is.character(distribution_type), - description = "`distribution_type` should be character") |> - validate_cols(in_set(c("aggregate", "group")), distribution_type, - description = "`distribution_type` values within range") |> - validate_if(is.character(gd_type), - description = "`gd_type` should be character") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), reporting_level, - description = "`reporting_level` values within range") |> - validate_if(is.character(pcn_source_file), - description = "`pcn_source_file` should be character") |> - validate_if(is.character(pcn_survey_id), - description = "`pcn_survey_id` should be character") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } -} diff --git a/R/gdm_validate_raw.R b/R/gdm_validate_raw.R deleted file mode 100644 index c841013..0000000 --- a/R/gdm_validate_raw.R +++ /dev/null @@ -1,64 +0,0 @@ -#' Validate raw gdm data -#' -#' @param gdm raw gdm data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ - - stopifnot("GDM raw data is not loaded" = !is.null(gdm)) - - report <- data_validation_report() - - validate(gdm, name = "GDM raw data validation") |> - validate_if(is.character(Region), - description = "`Region` should be character") |> - validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), - Region, description = "`Region` values within range") |> - validate_if(is.character(countryName), - description = "`countryName` should be character") |> - validate_if(is.character(Coverage), - description = "`Coverage` should be character") |> - validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), - Coverage, description = "`Coverage` values within range") |> - validate_if(is.character(CountryCode), - description = "`CountryCode` should be character") |> - validate_if(is.numeric(SurveyTime), - description = "`SurveyTime` should be numeric") |> - validate_if(is.numeric(CPI_Time), - description = "`CPI_Time` should be numeric") |> - validate_if(is.character(DataType), - description = "`DataType` should be character") |> - validate_cols(in_set(c("x", "X", "y", "Y")), - DataType, description = "`DataType` values within range") |> - validate_if(is.numeric(SurveyMean_LCU), - description = "`SurveyMean_LCU` should be numeric") |> - validate_if(is.numeric(currency), - description = "`currency` should be numeric") |> - validate_if(is.character(source), - description = "`source` should be character") |> - validate_if(is.character(SurveyID), - description = "`SurveyID` should be character") |> - validate_if(is.numeric(SurveyMean_PPP), - description = "`SurveyMean_PPP` should be numeric") |> - validate_if(is.character(DistributionFileName), - description = "`DistributionFileName` should be character") |> - validate_cols(is.logical, Comment, description = "Comment should be logical") |> - validate_cols(not_na, CountryCode, Coverage, SurveyTime, DataType, - description = "no missing values in key variables") |> - validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R deleted file mode 100644 index 14052a8..0000000 --- a/R/gdp_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output gdp data -#' -#' @param gdp output gdp data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ - - stopifnot("GDP output data is not loaded" = !is.null(gdp)) - - report <- data_validation_report() - - validate(gdp, name = "GDP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_if(is.numeric(gdp), - description = "`gdp` should be numeric") |> - # validate_if(is.character(gdp_domain), - # description = "`gdp_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # gdp_domain, description = "`gdp_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/get_error_validation.R b/R/get_error_validation.R deleted file mode 100644 index a6de5e6..0000000 --- a/R/get_error_validation.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Get validation report data validation error report -#' -#' @param vlddata validation data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' -#' @export -get_error_validation <- function(vlddata, detail){ - - stopifnot("Validation data is not availabel" = !is.null(vlddata)) - - err_t <- NULL - - if (any(vlddata$type == "error")){ - - err_t <- vlddata[type == "error", - .(table_name, description, call, - message, type)] - } - - - if (isFALSE(detail)) { - - cli::cli_abort("Description of invalid cases for {unique(err_t$table_name)}, - {err_t$description}") - - } else { - - if (!rlang::env_has(.pipaux, "validation_report")){ - - rlang::env_poke(.pipaux, "validation_report", err_t) - - } else { - - compiled_result <- rbind(.pipaux$validation_report, err_t) - rlang::env_poke(.pipaux, "validation_report", compiled_result) - - } - - cli::cli_inform("Validation report ({.field validation_report}) has been added to the environment varaible ({.field .pipaux}).") - } - -} diff --git a/R/incgroup_validate_output.R b/R/incgroup_validate_output.R deleted file mode 100644 index defeebf..0000000 --- a/R/incgroup_validate_output.R +++ /dev/null @@ -1,50 +0,0 @@ -#' Validate income group output data -#' -#' @param incgroup income group output data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ - - stopifnot("Income group output data is not loaded" = !is.null(incgroup)) - - report <- data_validation_report() - - validate(incgroup, name = "Income group output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year_data), - description = "`year_data` should be numeric") |> - validate_if(is.character(income_group), - description = "`income_group` should be character") |> - validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), - income_group, description = "`income_group` values within range") |> - validate_if(is.character(income_group_code), - description = "`income_group_code` should be character") |> - validate_cols(in_set(c("HIC", "LIC", "LMIC", "UMIC")), - income_group_code, description = "`income_group_code` values within range") |> - validate_if(is.character(incgroup_historical), - description = "`incgroup_historical` should be character") |> - validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), - incgroup_historical, description = "`incgroup_historical` values within range") |> - validate_if(is.character(fcv_historical), - description = "`fcv_historical` should be character") |> - validate_if(is.character(ssa_subregion_code), - description = "`ssa_subregion_code` should be character") |> - validate_cols(not_na, country_code, year_data, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year_data), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/load_aux.R b/R/load_aux.R index 8f858b9..eb87d9d 100644 --- a/R/load_aux.R +++ b/R/load_aux.R @@ -1,6 +1,6 @@ #' Load any auxiliary data #' -#' @inheritParams pip_pfw +#' @inheritParams aux_pfw #' @inheritParams pipfun::load_from_gh #' @param apply_label logical: If TRUE, predefined labels will applied. #' @param ppp_defaults logical: If TRUE, wider format ppp data will be returned @@ -38,7 +38,7 @@ load_aux <- function(measure, if (apply_label) { - df <- pip_aux_labels(df, measure = measure) + df <- aux_labels_pip(df, measure = measure) } if (inherits(df, "data.frame")) { @@ -106,3 +106,175 @@ find_path <- function(file_paths) { } +#' Load Raw Auxiliary data +#' +#' @description `r lifecycle::badge("superseded")` +#' +#' This function is deprecated because of the new, more flexible and general +#' function `pipfun::load_from_gh()` +#' @param measure character: measure to be loaded +#' @param owner character: Github repo owner. Default is +#' `getOption("pipfun.ghowner")` +#' @param repo character: name of the repo +#' @param branch character: either "DEV" or "PROD". Refers to the branch that +#' will be used to update either the development server or production. +#' @param tag character: specific release to be used in the update. +#' @param filename character: Name of file name without the ".csv" extension. +#' Default is `measure` +#' @param ext character: Extension of `filename`. Default "csv" +#' @param ... parameters to be passed to the loading functions depending of the +#' extension used +#' +#' @return dataset +#' @keywords internal +load_raw_aux <- function(measure, + owner = getOption("pipfun.ghowner"), + repo = paste0("aux_", measure), + branch = c("DEV","PROD","main"), + tag = match.arg(branch), + filename = measure, + ext = "csv", + ...) { + + lifecycle::deprecate_warn("0.1.0.9003", + "load_raw_aux()", + "pipfun::load_from_gh()") + + + # ____________________________________________________________________________ + # on.exit #### + on.exit({ + + if (exists("temp_file")) { + if (fs::file_exists(temp_file)) { + unlink(temp_file) + } + } + # close(path) + + }) + + # ____________________________________________________________________________ + # Defenses #### + branch <- match.arg(branch) + stopifnot(exprs = { + + }) + + # ____________________________________________________________________________ + # Early returns #### + if (FALSE) { + return() + } + + # ____________________________________________________________________________ + # Computations #### + + path <- + glue("https://github.com/{owner}/{repo}/raw/{tag}/{filename}.{ext}") + # path <- file(path) + + tryCatch( + expr = { + # load depending of the extension + df <- suppressMessages( # suppress any loading message + + if (ext == "csv") { + + # readr::read_csv(path, ...) + readr::read_csv(path, ...) + + } else if (ext %in% c("xls", "xlsx")) { + + temp_file <- tempfile(fileext = ext) + req <- httr::GET(path, + # write result to disk + httr::write_disk(path = temp_file)) + + + readxl::read_excel(path = temp_file, ...) + + } else if (ext == "dta") { + + haven::read_dta(path, ...) + + } else if (ext == "qs") { + + qs::qread(path, ...) + + } else if (ext == "fst") { + + fst::read_fst(path, ...) + + } else if (ext == "yaml") { + + yaml::read_yaml(path, ...) + + } + + ) + + if (is.data.frame(df)) { + setDT(df) + } + }, + # end of expr section + + error = function(e) { + if (tag == branch) { + + ## ............................................................................ + ## Error in branches #### + + branches <- get_gh(owner, repo, what = "branches") + + if (!(branch %in% branches)) { + msg <- c( + "{.field branch} specified ({branch}) does not exist in repo + {.file {owner}/{repo}}", + "i" = "Select one among {.field {branches}}" + ) + cli::cli_abort(msg, class = "pipaux_error") + + } else { + msg <- c("Problem loading {.file {filename}.{ext}} Correctly: + {e$message}") + cli::cli_abort(msg, class = "pipaux_error", + wrap = TRUE) + + } + + } else { + + ## ............................................................................ + ## Error in tags #### + + tags <- get_gh(owner, repo, what = "tags") + + if (!(tag %in% tags)) { + msg <- c( + "{.field tag} specified ({tag}) does not exist in repo + {.file {owner}/{repo}}", + "i" = "Select one among {.field {tags}}" + ) + cli::cli_abort(msg, class = "pipaux_error") + + } else { + msg <- c("Could not load {.file {filename}.{ext}} from Github repo: + {e$message}") + cli::cli_abort(msg, class = "pipaux_error") + + } + } + + } # end of finally section + + ) # End of trycatch + + # ____________________________________________________________________________ + # Return #### + return(df) + +} + + diff --git a/R/load_raw_aux.R b/R/load_raw_aux.R deleted file mode 100644 index 0928e5c..0000000 --- a/R/load_raw_aux.R +++ /dev/null @@ -1,170 +0,0 @@ -#' Load Raw Auxiliary data -#' -#' @description `r lifecycle::badge("superseded")` -#' -#' This function is deprecated because of the new, more flexible and general -#' function `pipfun::load_from_gh()` -#' @param measure character: measure to be loaded -#' @param owner character: Github repo owner. Default is -#' `getOption("pipfun.ghowner")` -#' @param repo character: name of the repo -#' @param branch character: either "DEV" or "PROD". Refers to the branch that -#' will be used to update either the development server or production. -#' @param tag character: specific release to be used in the update. -#' @param filename character: Name of file name without the ".csv" extension. -#' Default is `measure` -#' @param ext character: Extension of `filename`. Default "csv" -#' @param ... parameters to be passed to the loading functions depending of the -#' extension used -#' -#' @return dataset -#' @keywords internal -load_raw_aux <- function(measure, - owner = getOption("pipfun.ghowner"), - repo = paste0("aux_", measure), - branch = c("DEV","PROD","main"), - tag = match.arg(branch), - filename = measure, - ext = "csv", - ...) { - - lifecycle::deprecate_warn("0.1.0.9003", - "load_raw_aux()", - "pipfun::load_from_gh()") - - - # ____________________________________________________________________________ - # on.exit #### - on.exit({ - - if (exists("temp_file")) { - if (fs::file_exists(temp_file)) { - unlink(temp_file) - } - } - # close(path) - - }) - - # ____________________________________________________________________________ - # Defenses #### - branch <- match.arg(branch) - stopifnot(exprs = { - - }) - - # ____________________________________________________________________________ - # Early returns #### - if (FALSE) { - return() - } - - # ____________________________________________________________________________ - # Computations #### - - path <- - glue("https://github.com/{owner}/{repo}/raw/{tag}/{filename}.{ext}") - # path <- file(path) - - tryCatch( - expr = { - # load depending of the extension - df <- suppressMessages( # suppress any loading message - - if (ext == "csv") { - - # readr::read_csv(path, ...) - readr::read_csv(path, ...) - - } else if (ext %in% c("xls", "xlsx")) { - - temp_file <- tempfile(fileext = ext) - req <- httr::GET(path, - # write result to disk - httr::write_disk(path = temp_file)) - - - readxl::read_excel(path = temp_file, ...) - - } else if (ext == "dta") { - - haven::read_dta(path, ...) - - } else if (ext == "qs") { - - qs::qread(path, ...) - - } else if (ext == "fst") { - - fst::read_fst(path, ...) - - } else if (ext == "yaml") { - - yaml::read_yaml(path, ...) - - } - - ) - - if (is.data.frame(df)) { - setDT(df) - } - }, - # end of expr section - - error = function(e) { - if (tag == branch) { - - ## ............................................................................ - ## Error in branches #### - - branches <- get_gh(owner, repo, what = "branches") - - if (!(branch %in% branches)) { - msg <- c( - "{.field branch} specified ({branch}) does not exist in repo - {.file {owner}/{repo}}", - "i" = "Select one among {.field {branches}}" - ) - cli::cli_abort(msg, class = "pipaux_error") - - } else { - msg <- c("Problem loading {.file {filename}.{ext}} Correctly: - {e$message}") - cli::cli_abort(msg, class = "pipaux_error", - wrap = TRUE) - - } - - } else { - - ## ............................................................................ - ## Error in tags #### - - tags <- get_gh(owner, repo, what = "tags") - - if (!(tag %in% tags)) { - msg <- c( - "{.field tag} specified ({tag}) does not exist in repo - {.file {owner}/{repo}}", - "i" = "Select one among {.field {tags}}" - ) - cli::cli_abort(msg, class = "pipaux_error") - - } else { - msg <- c("Could not load {.file {filename}.{ext}} from Github repo: - {e$message}") - cli::cli_abort(msg, class = "pipaux_error") - - } - } - - } # end of finally section - - ) # End of trycatch - - # ____________________________________________________________________________ - # Return #### - return(df) - -} diff --git a/R/merger_aux.R b/R/merger_aux.R index 9b7e16b..0ca6d95 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -42,7 +42,7 @@ merger_aux <- function(aux_data1, if (measure1 == "pfw" || measure2 == "pfw"){ # generate a dataset that can be used to add reporting_level variable to pfw data - pfw_key <- pip_pfw_key() + pfw_key <- aux_pfw_key() pfw <- pfw_key[pfw, on = .(country_code, survey_year, survey_acronym, cpi_domain_var)] diff --git a/R/metadata_validate_output.R b/R/metadata_validate_output.R deleted file mode 100644 index 9976802..0000000 --- a/R/metadata_validate_output.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate output metadata data -#' -#' @param metadata metadata data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ - - stopifnot("Metadata data is not loaded" = !is.null(metadata)) - - report <- data_validation_report() - - validate(metadata, name = "Metadata output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.character(country_name), - description = "`country_name` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(survey_title), - description = "`survey_title` should be character") |> - validate_if(is.character(survey_conductor), - description = "`survey_conductor` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), - welfare_type, description = "`welfare_type` values within range") |> - validate_if(is.character(distribution_type), - description = "`distribution_type` should be character") |> - validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), - distribution_type, description = "`distribution_type` values within range") |> - validate_cols(not_na, country_code, year, welfare_type, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, welfare_type), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/metadata_validate_raw.R b/R/metadata_validate_raw.R deleted file mode 100644 index 32331fd..0000000 --- a/R/metadata_validate_raw.R +++ /dev/null @@ -1,81 +0,0 @@ -#' Validate raw metadata data -#' -#' @param metadata raw metadata data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ - - stopifnot("metadata raw data is not loaded" = !is.null(metadata)) - - report <- data_validation_report() - - validate(metadata, name = "metadata raw data validation") |> - validate_if(is.character(status), - description = "`status` should be character") |> - validate_if(is.character(reg), - description = "`reg` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAR", "SSA")), - reg, description = "`reg` values within range") |> - validate_if(is.numeric(id), - description = "`id` should be numeric") |> - validate_if(is.character(svy_id), - description = "`svy_id` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(title), - description = "`title` should be character") |> - validate_if(is.character(data_access), - description = "`data_access` should be character") |> - validate_if(is.numeric(year_start), - description = "`year_start` should be numeric") |> - validate_if(is.numeric(year_end), - description = "`year_end` should be numeric") |> - validate_if(is.character(authoring_entity_name), - description = "`authoring_entity_name` should be character") |> - validate_if(is.character(authoring_entity_affiliation), - description = "`authoring_entity_affiliation` should be character") |> - validate_if(is.character(contact_email), - description = "`contact_email` should be character") |> - validate_if(is.character(contact_uri), - description = "`contact_uri` should be character") |> - validate_if(is.character(abstract), - description = "`abstract` should be character") |> - validate_if(is.character(collection_dates_cycle), - description = "`collection_dates_cycle` should be character") |> - validate_if(is.character(collection_dates_start), - description = "`collection_dates_start` should be character") |> - validate_if(is.character(collection_dates_end), - description = "`collection_dates_end` should be character") |> - validate_if(is.character(coverage), - description = "`coverage` should be character") |> - validate_if(is.character(sampling_procedure), - description = "`sampling_procedure` should be character") |> - validate_if(is.character(collection_mode), - description = "`collection_mode` should be character") |> - validate_if(is.character(coll_situation), - description = "coll_situation` should be character") |> - validate_if(is.character(weight), - description = "`weight` should be character") |> - validate_if(is.character(cleaning_operations), - description = "`cleaning_operations` should be character") |> - validate_if(is.character(coverage_notes), - description = "`coverage_notes` should be character") |> - validate_cols(not_na, svy_id, - description = "no missing values in key variables") |> - validate_if(is_uniq(svy_id), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/mpd_validate_raw.R b/R/mpd_validate_raw.R deleted file mode 100644 index faa3411..0000000 --- a/R/mpd_validate_raw.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Validate raw maddison data -#' -#' @param mpd raw mpd data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ - - stopifnot("mpd/ maddison raw data is not loaded" = !is.null(mpd)) - - report <- data_validation_report() - - validate(mpd, name = "mdp raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(mpd_gdp), - description = "`mpd_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/npl_validate_output.R b/R/npl_validate_output.R deleted file mode 100644 index f4a3939..0000000 --- a/R/npl_validate_output.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate npl output data -#' -#' @param npl output data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ - - stopifnot("NPL output data is not loaded" = !is.null(npl)) - - report <- data_validation_report() - - validate(npl, name = "NPL output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(nat_headcount), - description = "`nat_headcount` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/npl_validate_raw.R b/R/npl_validate_raw.R deleted file mode 100644 index 0e16fcd..0000000 --- a/R/npl_validate_raw.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Validate npl raw data -#' -#' @param npl raw npl data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ - - stopifnot("NPL raw data is not loaded" = !is.null(npl)) - - report <- data_validation_report() - - validate(npl, name = "NPL raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - # validate_cols(in_set(c("AFE", "AFW", "EAP", "ECA", "LAC", "MNA", "SAR")), - # region, description = "`region` values within range") |> - validate_if(is.character(countrycode), - description = "`countrycode` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(vsi_pov_nahc_nc), - description = "`vsi_pov_nahc_nc` should be numeric") |> - validate_if(is.numeric(vsi_pov_nahc), - description = "`vsi_pov_nahc` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_cols(not_na, countrycode, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(countrycode, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R deleted file mode 100644 index dd91bad..0000000 --- a/R/pce_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output pce data -#' -#' @param pce output pce data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ - - stopifnot("PCE clean data is not loaded" = !is.null(pce)) - - report <- data_validation_report() - - validate(pce, name = "PCE output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pce), - description = "`pce` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - # validate_if(is.character(pce_domain), - # description = "`pce_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # pce_domain, description = "`pce_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R deleted file mode 100644 index b7af2f6..0000000 --- a/R/pfw_validate_output.R +++ /dev/null @@ -1,190 +0,0 @@ -#' Validate clean pfw data -#' -#' @param pfw clean pfw data, output via `pip_pfw_clean` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ - - stopifnot("PFW clean data is not loaded" = !is.null(pfw)) - - report <- data_validation_report() - - validate(pfw, name = "PFW output data validation") |> - validate_if(is.character(wb_region_code), - description = "`wb_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - wb_region_code, description = "`wb_region_code` values within range") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.character(pcn_region_code), - description = "`pcn_region_code` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - pcn_region_code, description = "`pcn_region_code` values within range") |> - validate_if(is.character(ctryname), - description = "`ctryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(surveyid_year), - description = "`surveyid_year` should be numeric") |> - validate_if(is.numeric(timewp), - description = "`timewp` should be numeric") |> - validate_if(is.numeric(fieldwork), - description = "`fieldwork` should be numeric") |> - validate_if(is.character(survey_acronym), - description = "`survey_acronym` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(altname), - description = "`altname` should be character") |> - validate_if(is.character(survey_time), - description = "`survey_time` should be character") |> - validate_if(is.numeric(wbint_link), - description = "`wbint_link` should be numeric") |> - validate_if(is.numeric(wbext_link), - description = "`wbext_link` should be numeric") |> - validate_if(is.numeric(alt_link), - description = "`alt_link` should be numeric") |> - validate_if(is.numeric(pip_meta), - description = "`pip_meta` should be numeric") |> - validate_if(is.character(surv_title), - description = "`surv_title` should be character") |> - validate_if(is.character(surv_producer), - description = "`surv_producer` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(welfare_type), - description = "`welfare_type` should be character") |> - validate_cols(in_set(c("consumption", "income")), - welfare_type, description = "`welfare_type` values within range") |> - validate_if(is.numeric(use_imputed), - description = "`use_imputed` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_imputed, description = "`use_imputed` values within range") |> - validate_if(is.numeric(use_microdata), - description = "`use_microdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_microdata, description = "`use_microdata` values within range") |> - validate_if(is.numeric(use_bin), - description = "`use_bin` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_bin, description = "`use_bin` values within range") |> - validate_if(is.numeric(use_groupdata), - description = "`use_groupdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_groupdata, description = "`use_groupdata` values within range") |> - validate_if(is.numeric(reporting_year), - description = "`reporting_year` should be numeric") |> - validate_if(is.numeric(survey_comparability), - description = "`survey_comparability` should be numeric") |> - validate_if(is.character(comp_note), - description = "`comp_note` should be character") |> - validate_if(is.character(preferable), - description = "`preferable` should be character") |> - validate_if(is.numeric(display_cp), - description = "`display_cp` should be numeric") |> - validate_cols(in_set(c(0, 1)), - display_cp, description = "`display_cp` values within range") |> - validate_if(is.character(fieldwork_range), - description = "`fieldwork_range` should be character") |> - validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") |> - validate_if(is.character(newref), - description = "`newref` should be character") |> - validate_if(is.numeric(ref_year_des), - description = "`ref_year_des` should be numeric") |> - validate_if(is.character(wf_baseprice), - description = "`wf_baseprice` should be character") |> - validate_if(is.character(wf_baseprice_note), - description = "`wf_baseprice_note` should be character") |> - validate_if(is.numeric(wf_baseprice_des), - description = "`wf_baseprice_des` should be numeric") |> - validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, - description = "`wf_baseprice_des` values within range") |> - validate_if(is.numeric(wf_spatial_des), - description = "`wf_spatial_des` should be numeric") |> - validate_if(is.character(wf_spatial_var), - description = "`wf_spatial_var` should be character") |> - validate_if(is.numeric(cpi_replication), - description = "`cpi_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - cpi_replication, description = "`cpi_replication` values within range") |> - validate_if(is.numeric(cpi_domain), - description = "`cpi_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.character(cpi_domain_var), - description = "`cpi_domain_var` should be character") |> - validate_if(is.numeric(wf_currency_des), - description = "`wf_currency_des` should be numeric") |> - validate_cols(in_set(c(0, 2)), - wf_currency_des, description = "`wf_currency_des` values within range") |> - validate_if(is.numeric(ppp_replication), - description = "`ppp_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - ppp_replication, description = "`ppp_replication` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(ppp_domain_var), - description = "`ppp_domain_var` should be character") |> - validate_if(is.numeric(wf_add_temp_des), - description = "`wf_add_temp_des` should be numeric") |> - validate_cols(in_set(c(-9, 0)), - wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> - validate_if(is.numeric(wf_add_temp_var), - description = "`wf_add_temp_var` should be numeric") |> - validate_if(is.numeric(wf_add_spatial_des), - description = "`wf_add_spatial_des` should be numeric") |> - validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, - description = "`wf_add_spatial_des` values within range") |> - validate_if(is.numeric(wf_add_spatial_var), - description = "`wf_add_spatial_var` should be numeric") |> - validate_if(is.numeric(tosplit), - description = "`tosplit` should be numeric") |> - validate_cols(in_set(c(NA, 1)), tosplit, - description = "`tosplit` values within range") |> - validate_if(is.character(tosplit_var), - description = "`tosplit_var` should be character") |> - validate_if(is.numeric(inpovcal), - description = "`inpovcal` should be numeric") |> - validate_cols(in_set(c(1)), inpovcal, - description = "`inpovcal` values within range") |> - validate_if(is.character(oth_welfare1_type), - description = "`oth_welfare1_type` should be character") |> - validate_if(is.character(oth_welfare1_var), - description = "`oth_welfare1_var` should be character") |> - validate_if(is.numeric(gdp_domain), - description = "`gdp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), gdp_domain, - description = "`gdp_domain` values within range") |> - validate_if(is.numeric(pce_domain), - description = "`pce_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pce_domain, - description = "`pce_domain` values within range") |> - validate_if(is.numeric(pop_domain), - description = "`pop_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pop_domain, - description = "`pop_domain` values within range") |> - validate_if(is.character(pfw_id), - description = "`pfw_id` should be character") |> - validate_cols(not_na, country_code, year, welfare_type, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, welfare_type), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pfw_validate_raw.R b/R/pfw_validate_raw.R deleted file mode 100644 index 69b7f66..0000000 --- a/R/pfw_validate_raw.R +++ /dev/null @@ -1,191 +0,0 @@ -#' Validate raw pfw data -#' -#' @param pfw raw pfw data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ - - stopifnot("PFW raw data is not loaded" = !is.null(pfw)) - - report <- data_validation_report() - - validate(pfw, name = "PFW raw data validation") |> - validate_if(is.character(region), - description = "`region` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), - region, description = "`region` values within range") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(reg_pcn), - description = "`reg_pcn` should be character") |> - validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), - reg_pcn, description = "`reg_pcn` values within range") |> - validate_if(is.character(ctryname), - description = "`ctryname` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(surveyid_year), - description = "`surveyid_year` should be numeric") |> - validate_if(is.numeric(timewp), - description = "`timewp` should be numeric") |> - validate_if(is.numeric(fieldwork), - description = "`fieldwork` should be numeric") |> - validate_if(is.character(survname), - description = "`survname` should be character") |> - validate_if(is.character(link), - description = "`link` should be character") |> - validate_if(is.character(altname), - description = "`altname` should be character") |> - validate_if(is.character(survey_time), - description = "`survey_time` should be character") |> - validate_if(is.numeric(wbint_link), - description = "`wbint_link` should be numeric") |> - validate_if(is.numeric(wbext_link), - description = "`wbext_link` should be numeric") |> - validate_if(is.numeric(alt_link), - description = "`alt_link` should be numeric") |> - validate_if(is.numeric(pip_meta), - description = "`pip_meta` should be numeric") |> - validate_if(is.character(surv_title), - description = "`surv_title` should be character") |> - validate_if(is.character(surv_producer), - description = "`surv_producer` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_cols(in_set(c("N", "R", "U")), - survey_coverage, description = "`survey_coverage` values within range") |> - validate_if(is.character(datatype), - description = "`datatype` should be character") |> - validate_cols(in_set(c("C", "I", "c", "i")), - datatype, description = "`datatype` values within range") |> - validate_if(is.numeric(use_imputed), - description = "`use_imputed` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_imputed, description = "`use_imputed` values within range") |> - validate_if(is.numeric(use_microdata), - description = "`use_microdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_microdata, description = "`use_microdata` values within range") |> - validate_if(is.numeric(use_bin), - description = "`use_bin` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_bin, description = "`use_bin` values within range") |> - validate_if(is.numeric(use_groupdata), - description = "`use_groupdata` should be numeric") |> - validate_cols(in_set(c(0, 1)), - use_groupdata, description = "`use_groupdata` values within range") |> - validate_if(is.numeric(rep_year), - description = "`rep_year` should be numeric") |> - validate_if(is.numeric(comparability), - description = "`comparability` should be numeric") |> - validate_if(is.character(comp_note), - description = "`comp_note` should be character") |> - validate_if(is.character(preferable), - description = "`preferable` should be character") |> - validate_if(is.numeric(display_cp), - description = "`display_cp` should be numeric") |> - validate_cols(in_set(c(0, 1)), - display_cp, description = "`display_cp` values within range") |> - validate_if(is.character(fieldwork_range), - description = "`fieldwork_range` should be character") |> - validate_if(is.numeric(ref_year), - description = "`ref_year` should be numeric") |> - validate_if(is.character(newref), - description = "`newref` should be character") |> - validate_if(is.numeric(ref_year_des), - description = "`ref_year_des` should be numeric") |> - validate_if(is.character(wf_baseprice), - description = "`wf_baseprice` should be character") |> - validate_if(is.character(wf_baseprice_note), - description = "`wf_baseprice_note` should be character") |> - validate_if(is.numeric(wf_baseprice_des), - description = "`wf_baseprice_des` should be numeric") |> - validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, - description = "`wf_baseprice_des` values within range") |> - validate_if(is.numeric(wf_spatial_des), - description = "`wf_spatial_des` should be numeric") |> - validate_if(is.character(wf_spatial_var), - description = "`wf_spatial_var` should be character") |> - validate_if(is.numeric(cpi_replication), - description = "`cpi_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - cpi_replication, description = "`cpi_replication` values within range") |> - validate_if(is.numeric(cpi_domain), - description = "`cpi_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - cpi_domain, description = "`cpi_domain` values within range") |> - validate_if(is.character(cpi_domain_var), - description = "`cpi_domain_var` should be character") |> - validate_if(is.numeric(wf_currency_des), - description = "`wf_currency_des` should be numeric") |> - validate_cols(in_set(c(0, 2)), - wf_currency_des, description = "`wf_currency_des` values within range") |> - validate_if(is.numeric(ppp_replication), - description = "`ppp_replication` should be numeric") |> - validate_cols(in_set(c(-9, 1)), - ppp_replication, description = "`ppp_replication` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(ppp_domain_var), - description = "`ppp_domain_var` should be character") |> - validate_if(is.numeric(wf_add_temp_des), - description = "`wf_add_temp_des` should be numeric") |> - validate_cols(in_set(c(-9, 0)), - wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> - validate_if(is.numeric(wf_add_temp_var), - description = "`wf_add_temp_var` should be numeric") |> - validate_if(is.numeric(wf_add_spatial_des), - description = "`wf_add_spatial_des` should be numeric") |> - validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, - description = "`wf_add_spatial_des` values within range") |> - validate_if(is.numeric(wf_add_spatial_var), - description = "`wf_add_spatial_var` should be numeric") |> - validate_if(is.numeric(tosplit), - description = "`tosplit` should be numeric") |> - validate_cols(in_set(c(NA, 1)), tosplit, - description = "`tosplit` values within range") |> - validate_if(is.character(tosplit_var), - description = "`tosplit_var` should be character") |> - validate_if(is.numeric(inpovcal), - description = "`inpovcal` should be numeric") |> - validate_cols(in_set(c(1)), inpovcal, - description = "`inpovcal` values within range") |> - validate_if(is.character(oth_welfare1_type), - description = "`oth_welfare1_type` should be character") |> - validate_if(is.character(oth_welfare1_var), - description = "`oth_welfare1_var` should be character") |> - validate_if(is.numeric(gdp_domain), - description = "`gdp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), gdp_domain, - description = "`gdp_domain` values within range") |> - validate_if(is.numeric(pce_domain), - description = "`pce_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pce_domain, - description = "`pce_domain` values within range") |> - validate_if(is.numeric(pop_domain), - description = "`pop_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), pop_domain, - description = "`pop_domain` values within range") |> - validate_if(is.character(pfw_id), - description = "`pfw_id` should be character") |> - validate_cols(not_na, code, year, survname, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, year, survname), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/pip_countries.R b/R/pip_countries.R deleted file mode 100644 index 009f7a9..0000000 --- a/R/pip_countries.R +++ /dev/null @@ -1,68 +0,0 @@ -#' PIP Countries -#' -#' Update or load a dataset with countries. -#' -#' @inheritParams pip_cpi -#' @inheritParams pipfun::load_from_gh -#' @export -pip_countries <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - measure <- "countries" - action <- match.arg(action) - branch <- match.arg(branch) - - if (action == "update") { - - ## Special national accounts -------- - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - - - pfw <- pfw[inpovcal == 1, - ][, - c("country_code") - ] |> - unique() - - - countries <- cl[country_code %in% pfw$country_code - ][, - c("pcn_region", "pcn_region_code") := NULL] - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## save -------- - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(countries, "aux_name", "countries") - setattr(countries, - "aux_key", - c("country_code")) - - pipfun::pip_sign_save( - x = countries, - measure = measure, - msrdir = msrdir, - force = force - ) - } else { - df <- load_aux( - maindir = maindir, - measure = measure - ) - return(df) - } -} diff --git a/R/pip_country_list.R b/R/pip_country_list.R deleted file mode 100644 index bd9266a..0000000 --- a/R/pip_country_list.R +++ /dev/null @@ -1,88 +0,0 @@ -#' List of countries -#' -#' Load or update dataset with WDI countries. See details. -#' -#' This function creates a combined dataset of countries in WDI and their -#' respective regional classification by querying `wbstats::wb_countries()`, as -#' well as reading from the PovcalNet Masterfile to fetch PCN region codes. -#' -#' The dependency on the PCN Masterfile should be changed in the future. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -#' @return logical if `action = "update"` or data.table if `action = "load"` -pip_country_list <- function(action = c("update", "load"), - maindir = gls$PIP_DATA_DIR, - force = FALSE, - branch = c("DEV", "PROD", "main"), - class_branch = "master", - detail = getOption("pipaux.detail.raw") - ) { - measure <- "country_list" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - - ## Special national accounts -------- - cl <- pip_country_list_update(class_branch = class_branch) - - # validate country list raw data - cl_validate_raw(cl, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(cl, "aux_name", "country_list") - setattr(cl, - "aux_key", - c("country_code")) - - saved <- pipfun::pip_sign_save( - x = cl, - measure = measure, - msrdir = msrdir, - force = force - ) - - if (saved) { - cl_sha <- digest::sha1(cl) - out <- gh::gh( - "GET /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "aux_country_list", - path = "sha_country_list.txt", - .params = list(ref = "DEV") - ) - - res <- gh::gh( - "PUT /repos/{owner}/{repo}/contents/{path}", - owner = "PIP-Technical-Team", - repo = "aux_country_list", - path = "sha_country_list.txt", - .params = list( - branch = branch, - message = paste0("update on ", prettyNum(Sys.time())), - sha = out$sha, - content = base64enc::base64encode(charToRaw(cl_sha)) - ), - .token = Sys.getenv("GITHUB_PAT") - ) - - } - - return(invisible(saved)) - - } else { - - df <- load_aux(maindir = maindir, - measure = measure, - branch = branch) - return(df) - } -} diff --git a/R/pip_country_list_update.R b/R/pip_country_list_update.R deleted file mode 100644 index b696bc2..0000000 --- a/R/pip_country_list_update.R +++ /dev/null @@ -1,257 +0,0 @@ -#' Update Country LIst -#' -#' @param class_branch character: names of branch of GPID-WB/class repo. Default -#' if master -pip_country_list_update <- - function(class_branch = "master") { - - # Check arguments - measure <- "country_list" - - # ____________________________________________________________________________ - # Read Data from WDI #### - - wdi <- - wbstats::wb_countries() |> - as.data.table() |> - { - \(.) { - - # clean data - - iso2 <- grep("_iso2c", names(.), value = TRUE) - x <- .[, !..iso2] - - iso3 <- grep("_iso3c", names(x), value = TRUE) - - withiso <- - gsub("_iso3c", "", iso3) |> - paste0(collapse = "|") |> - grep(names(x), value = TRUE) - - tokeep <- c("country", "iso3c", withiso) - - x[region != "Aggregates" - ][, - ..tokeep - ] - } - }() - - - # rename iso3c - owdi <- names(wdi) - nwdi <- - gsub("iso3c", "code", names(wdi)) - - setnames(wdi, owdi, nwdi) - - - # Add "(excluding high income)" to South Asia - wdi[, admin_region := fifelse(test = grepl("income", admin_region) | is.na(admin_region), - yes = admin_region , - no = paste(admin_region , "(excluding high income)"))] - - # ____________________________________________________________________________ - # Read data from CLASS.dta file #### - - ## Special national accounts -------- - byv <- - c( - "code", - "region_SSA", - "fcv_current", - "region_pip") - - dt <- pipfun::load_from_gh( - measure = measure, - owner = "GPID-WB", - repo = "Class", - branch = class_branch, - filename = "OutputData/CLASS", - ext = "dta" - ) |> - as.data.table() |> - unique(by = byv) |> - (\(.){.[, ..byv]})() # select just these variables - - - dt_o <- names(dt) - dt_n <- gsub("_current", "", dt_o) - - setnames(dt, dt_o, dt_n) - setnames(dt, - old = c("region_SSA", "region_pip"), - new = c("africa_split_code", "pip_region_code")) - - # ____________________________________________________________________________ - # Merge wdi and CLASS #### - - - rg <- - joyn::joyn(dt, wdi, - by = "code", - match_type = "1:1", - reportvar = FALSE, - verbose = FALSE) - - - # ____________________________________________________________________________ - # Clean Data #### - - # PIP region - - rg[, pip_region := fifelse(pip_region_code == "OHI", - yes = "Other High Income Countries", - no = region) - ] - - - - # East and West Africa - - rg[, - africa_split := fcase( - africa_split_code == "", "", - africa_split_code == "AFE", "Eastern and Southern Africa", - africa_split_code == "AFW", "Western and Central Africa", - default = "") - ][, - africa_split_code := fifelse(test = africa_split_code == "", - yes = "", - no = africa_split_code) - ] - - # Fragile countries - - rg[, - fcv_code := fifelse(fcv == "Yes", "FCVT", "FCVF") - ][, - fcv := fifelse(fcv == "Yes", "Fragile", "Not-fragile")] - - ## Admin regions - - rg[, - admin_region_code := fifelse( - admin_region_code == "" | is.na(admin_region_code), - NA_character_, - paste0(admin_region_code, "-AD"))] - - - # Add PCN region temporarilly - - rg[, - `:=`( - pcn_region = pip_region, - pcn_region_code = pip_region_code - )] - - # ff <- copy(rg) - # rg <- copy(ff) - - # Convert empty strings to NA - vars <- names(rg) - names(vars) <- vars - rg[, (vars) := lapply(.SD, - \(x) { - fifelse(x == "" | is.na(x), NA_character_, x) - } - ) - ] - - - # fix "Not classified" - # ff <- copy(rg) - - # rg <- copy(ff) - - # not_class <- function(x) { - # y <- deparse(substitute(x)) - # fifelse(test = grepl("classified", x), - # paste(x, "by", y), - # x) - # } - # - # rg[, (vars) := lapply(.SD,not_class), .SDcols = vars] - # - # - # rg[, (vars) := lapply(.SD, - # \(x){ - # y <- deparse(substitute(x)) - # # y <- ..x - # fifelse(test = grepl("classified", x), - # paste(x, "by", y), - # x) - # })] - # - # - # rg[lending_type_code == "LNX", unique(lending_type)] - # - - - - rg[, lending_type := fifelse(grepl("classified", lending_type), - paste(lending_type, "by", "lending type"), - lending_type)] - - - - rg[, income_level := fifelse(grepl("classified", income_level), - paste(income_level, "by", "income level"), - income_level)] - - - # Create the World - - rg[, `:=`( - world = "World", - world_code = "WLD" - )] - - - - # janitor::tabyl(rg, region_code, admin_region_code) - # janitor::tabyl(rg, region, admin_region) - # janitor::tabyl(rg, region, pip_region) - - # ____________________________________________________________________________ - # Clean and Save #### - - - rg[, - c( "region_code", "region") := NULL] - - - setnames(x = rg, - old = c("code", "country", "pip_region", "pip_region_code"), - new = c("country_code", "country_name", "region", "region_code") ) - - - - ## Order columns alphabetically ------------ - varn <- names(rg) - setcolorder(rg, sort(varn)) - setcolorder(rg, c("country_code", "country_name")) - - - ## Remove categoeries that we don't need --------- - - rm_agg <- c("fcv", "lending_type", "admin_region") - rm_agg <- c("fcv", "income_level", "lending_type", "admin_region") - - to_rm <- - rm_agg |> - paste0("_code") |> - c(rm_agg) - - rg[, (to_rm) := NULL] - - - # hardcode fixing of TWN's name - rg[country_code == "TWN", - country_name := "Taiwan, China"] - - - rg - -} diff --git a/R/pip_cp.R b/R/pip_cp.R deleted file mode 100644 index 2a65be8..0000000 --- a/R/pip_cp.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Country Profiles -#' -#' Update a list with country profiles data -#' -#' @inheritParams pip_cpi -#' @inheritParams pipfun::load_from_gh -#' @export -pip_cp <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - measure <- "cp" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - pip_cp_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag) - } else { - - dl <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dl) - } -} diff --git a/R/pip_cp_update.R b/R/pip_cp_update.R deleted file mode 100644 index 13d62c3..0000000 --- a/R/pip_cp_update.R +++ /dev/null @@ -1,82 +0,0 @@ -#' Update Country Profiles -#' -#' Update a list with country profiles data -#' -#' @inheritParams pip_cp -#' @keywords internal -pip_cp_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - measure <- "cp" - branch <- match.arg(branch) -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## chart files -------- - - file_names <- - c( - "indicator_values_country_chart4", - "indicator_values_country_KI1", - "indicator_values_country_chart1_chart2_KI2_data", - "indicator_values_country_chart1_chart2_KI2_ID", - "indicator_values_country_chart5", - "indicator_values_country_chart3", - "indicator_values_country_chart6_KI4", - "indicator_values_country_KI5_KI6_KI7" - ) - - - raw_files <- purrr::map(.x = file_names, - .f = ~{ - pipfun::load_from_gh( - measure = "cp", - owner = owner, - branch = branch, - filename = .x) - }) - - - dl <- pip_cp_clean(raw_files, - file_names) -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## download files -------- - fl_files <- c("flat_cp", "flat_shp") - - raw_fl <- purrr::map(.x = fl_files, - .f = ~{ - x <- pipfun::load_from_gh( - measure = "cp", - owner = owner, - branch = branch, - filename = .x, - ext = "dta") - setnames(x, "year", "reporting_year", - skip_absent=TRUE) - }) - names(raw_fl) <- fl_files - dl <- append(dl, list(flat = raw_fl)) - - - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## save -------- - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = dl, - measure = measure, - msrdir = msrdir, - force = force - ) - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -## return -------- - - return(invisible(saved)) -} diff --git a/R/pip_cpi.R b/R/pip_cpi.R deleted file mode 100644 index 6303ed2..0000000 --- a/R/pip_cpi.R +++ /dev/null @@ -1,68 +0,0 @@ -#' PIP CPI -#' -#' Load or update PIP CPI data. -#' -#' @param action character: Either "load" or "update". Default is "update". If -#' "update" data will be updated on the system. If "load" data is loaded in -#' memory. -#' @param maindir character: Main directory of project. -#' @param force logical: If TRUE data will be overwritten. -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pipfun::load_from_gh -#' -#' @export -#' @import data.table -pip_cpi <- function(action = c("update", "load"), - maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - # ____________________________________________________________________________ - # on.exit #### - on.exit({ - - }) - - # ____________________________________________________________________________ - # Defenses #### - measure <- "cpi" - action <- match.arg(action) - branch <- match.arg(branch) - - stopifnot( exprs = { - - } - ) - - # ____________________________________________________________________________ - # Early returns #### - if (FALSE) { - return() - } - - # ____________________________________________________________________________ - # Computations #### - if (action == "update") { - pip_cpi_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - detail = detail) - } - else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } - - -} - - diff --git a/R/pip_cpi_clean.R b/R/pip_cpi_clean.R deleted file mode 100644 index 17d6524..0000000 --- a/R/pip_cpi_clean.R +++ /dev/null @@ -1,71 +0,0 @@ -#' Clean CPI data -#' -#' Clean CPI data from Datalibweb to meet PIP protocols. -#' -#' @param y dataset with CPI data from `pip_cpi_update()`. -#' @param cpivar character: CPI variable to be used as default. Currently it is -#' "cpi2011". -#' @inheritParams pip_cpi_update -#' -#' @keywords internal -pip_cpi_clean <- function(y, - cpivar = getOption("pipaux.cpivar"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - x <- data.table::as.data.table(y) - - # vars to keep - keep_vars <- c( - "country_code", "cpi_year", "survey_year", - "cpi", "ccf", "survey_acronym", "change_cpi2011", - grep("^cpi", names(x), value = TRUE) - ) - - # modifications to the database - x[ - , - c("cur_adj", "ccf") - := { - cur_adj <- ifelse(is.na(cur_adj), 1, cur_adj) - ccf <- 1 / cur_adj - - list(cur_adj, ccf) - } - ][ - , - `:=`( - country_code = code, - cpi_year = as.integer(year), - survey_year = round(ref_year, 2), - cpi = get(cpivar), - survey_acronym = survname, - cpi_domain = as.character(cpi_domain), - cpi_data_level = as.character(cpi_data_level) - ) - ][ - , - # This part should not exist if the raw data - # had been created properly - cpi_data_level := fcase( - tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "0", "rural", - tolower(cpi_domain) %chin% c("urban/rural", "2") & cpi_data_level == "1", "urban", - tolower(cpi_domain) %chin% c("national", "1") & cpi_data_level %chin% c("2", "", NA_character_), "national", - default = "" - ) - ] - # keep final vars - x <- x[, ..keep_vars ] - - x <- unique(x) # remove duplicates - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - x <- x[country_code %in% cl$country_code] - - - return(x) -} diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R deleted file mode 100644 index 7e55397..0000000 --- a/R/pip_cpi_update.R +++ /dev/null @@ -1,72 +0,0 @@ -#' Update CPI -#' -#' @inheritParams pip_cpi -#' @keywords internal -pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - -# ____________________________________________________________________________ -# Set up #### - - measure <- "cpi" - branch <- match.arg(branch) - - -# ____________________________________________________________________________ -# load raw data #### - - cpi <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) - - # validate cpi raw data - cpi_validate_raw(cpi, detail = detail) - -# ____________________________________________________________________________ -# Cleaning #### - - # Clean data - cpi <- pip_cpi_clean(cpi, - maindir = maindir, - branch = branch) - - # drop cpi_domain - cpi <- cpi[, -c("cpi_domain")] - - # changae cpi_year and cpi_data_level to year and reporting_level - cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), - c("year", "reporting_level"), - skip_absent=TRUE) - - setattr(cpi, "aux_name", "cpi") - setattr(cpi, - "aux_key", - c("country_code", "year", "reporting_level", "survey_acronym")) - - # validate cpi clean data before saving it - cpi_validate_output(cpi, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = cpi, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) -} - diff --git a/R/pip_cpi_vintage.R b/R/pip_cpi_vintage.R deleted file mode 100644 index d761193..0000000 --- a/R/pip_cpi_vintage.R +++ /dev/null @@ -1,92 +0,0 @@ -#' Check CPI Vintage -#' -#' @param msrdir character: measure directory. -#' @param dlwdir character: Datalibweb directory -#' @param force logical: If TRUE force update of veintage level 1. -#' -#' @keywords internal -pip_cpi_vintage <- function(msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), - dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), - force = FALSE) { - time <- format(Sys.time(), "%Y%m%d%H%M%S") # find a way to account for time zones - measure <- "cpi" - - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #--------- Prepar3 date --------- - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # get directories - cpi_files <- fs::dir_ls(dlwdir, regexp = "GMD_CPI\\.dta$", recurse = TRUE, type = "file") - - # load data - last_file <- max(cpi_files) - vintage <- load_cpi(last_file) - - tokeep <- names(vintage) |> - {\(.) grep("^cpi[0-9]{4}", ., value = TRUE)}() |> - c("code", "year", "survname", "cpi_data_level", "cpi_ppp_id") - - vintage <- vintage[, ..tokeep] - - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - #--------- check version and save --------- - # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - # save file - sfile <- fs::path(msrdir, "cpi_vintage.rds") - - equal_vintage <- TRUE - if (fs::file_exists(sfile)) { - - cfile <- readr::read_rds(sfile) - attr(cfile, "time") <- NULL # remove attributes - attr(cfile, "user") <- NULL # remove attributes - cf_vt <- all.equal(cfile, vintage) - - if (inherits(cf_vt, "character")) { - equal_vintage <- FALSE - } - } else { - equal_vintage <- FALSE - } - - if (equal_vintage == FALSE || force == TRUE) { - attr(vintage, "time") <- time - attr(vintage, "user") <- Sys.info()[8] - - readr::write_rds( - x = vintage, - file = sfile - ) - } - - return(!equal_vintage) -} # end of vintage_level_2 - -#' Load cpi files and create CPI ID variable -#' @param x character: cpi file name -#' @return data frame -load_cpi <- function(x) { - cpi_ppp_id <- gsub("(.*/Support_2005_)([^/]+)(_CPI\\.dta$)", "\\2", x) - df <- haven::read_dta(x) - df$cpi_ppp_id <- cpi_ppp_id - - to_keep <- c("label") - - to_keep_regx <- paste(to_keep, collapse = "|") - - nn <- names(df) - for (x in seq_along(nn)) { - ats <- attributes(df[[x]]) - atsn <- names(ats) - to_remove <- atsn[!grepl(to_keep_regx, atsn)] - - for (i in seq_along(to_remove)) { - attr(df[[x]], to_remove[i]) <- NULL - } - } - - data.table::setDT(df) - return(df) -} diff --git a/R/pip_gdm.R b/R/pip_gdm.R deleted file mode 100644 index 3fc7dfc..0000000 --- a/R/pip_gdm.R +++ /dev/null @@ -1,45 +0,0 @@ -#' PIP GDM -#' -#' Load or update grouped data means dataset from PovcalNet Masterfile. See -#' details. -#' -#' Survey means cannot be automatically calculated for grouped data, so at some -#' stage the mean needs to be entered manually. This function reads from the PCN -#' Masterfile to ensure that PCN and PIP uses the same data means. -#' -#' The dependency on the PCN Masterfile should be changed in the future. -#' -#' @inheritParams pip_cpi -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pipfun::load_from_gh -#' @export -pip_gdm <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "gdm" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - - pip_gdm_update(force = force, - maindir = maindir, - owner = owner, - branch = branch, - tag = tag, - detail = detail) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} diff --git a/R/pip_gdm_update.R b/R/pip_gdm_update.R deleted file mode 100644 index 72f1e49..0000000 --- a/R/pip_gdm_update.R +++ /dev/null @@ -1,232 +0,0 @@ -#' Update GDM -#' -#' Update GDM data using the PovcalNet Masterfile. -#' -#' @inheritParams pip_gdm -#' @keywords internal -pip_gdm_update <- function(force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "gdm" - branch <- match.arg(branch) - -# _________________________________________________________ -# Load raw file #### - - df <- pipfun::load_from_gh(measure = "gdm", - owner = owner, - branch = branch, - tag = tag, - ext = "csv") - - # validate gdm raw data - gdm_validate_raw(gdm = df, detail = detail) - -# ____________________________________________________________________________ -# Transform dataset #### - - # Select for grouped data surveys - df <- df[grepl("[.]T0[1,2,5]$", - df$DistributionFileName, - ignore.case = TRUE), ] - - # Select and rename columns - old_nms <- c( - "CountryCode", - "SurveyTime", - "DataType", - "Coverage", - "SurveyMean_LCU", - "DistributionFileName", - "SurveyID" - ) - - new_nms <- c( - "country_code", - "survey_year", - "welfare_type", - "pop_data_level", - "survey_mean_lcu", - "pcn_source_file", - "pcn_survey_id" - ) - - setnames(df, old_nms, new_nms) - - df <- df[, ..new_nms] - - # Recode columns - df[, - c("pop_data_level", "welfare_type", "survey_coverage") := - { - x <- tolower(pop_data_level) - - y <- tolower(welfare_type) - y <- fifelse(y == "x", "consumption", "income") - - z <- fifelse(country_code %in% c("CHN", "IDN", "IND"), - "national", pop_data_level) - - list(x, y, z) - } - ] - - - df[, - distribution_type := fifelse(pop_data_level == "national", - "group", - "aggregate") - ][, - gd_type := sub(".*[.]", "", pcn_source_file) - ] - - -## ............................................................................ -## Merge with PFW #### - - # pip_pfw(maindir = maindir, - # force = force, - # owner = owner, - # branch = branch, - # tag = tag) - - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - # Subset columns - pfw <- - pfw[, c( - "country_code", - "welfare_type", - "surveyid_year", - "survey_year", - "survey_acronym", - "inpovcal" - )] - - # Merge to add surveyid_year - tmp <- pfw[, c("country_code", "surveyid_year", "survey_year")] - df <- merge(df, tmp, - all.x = TRUE, - by = c("country_code", "survey_year") - ) - - # Merge to add survey_acronym and inpovcal - df <- merge(df, pfw, - all.x = TRUE, - by = c( - "country_code", "surveyid_year", - "survey_year", "welfare_type" - ) - ) - - # Filter to select surveys in PovcalNet - df <- df[inpovcal == 1] - df <- na.omit(df, "inpovcal") - - -## ............................................................................ -## Merge with inventory #### - - inv <- fst::read_fst(fs::path(maindir, "_inventory/inventory.fst"), - as.data.table = TRUE) - - # Create survey_id column - inv[, - survey_id := sub("[.]dta", "", filename) - ][, - surveyid_year := as.numeric(surveyid_year) - ] - - # Subset GD rows - inv <- inv[module == "PC-GROUP"] - - # Subset columns - inv <- inv[, c("country_code", - "surveyid_year", - "survey_acronym", - "survey_id")] - - # Merge to add PIP survey_id - df <- merge(df, inv, - all.x = TRUE, - by = c( - "country_code", "surveyid_year", - "survey_acronym" - ) - ) - - - # ---- Finalize table ---- - - # Select columns - df <- df[, c( - "country_code", - "surveyid_year", - "survey_year", - "welfare_type", - "survey_mean_lcu", - "distribution_type", - "gd_type", - "pop_data_level", - "pcn_source_file", - "pcn_survey_id", - "survey_id" - )] - - df[, survey_id := toupper(survey_id)] - - # Convert LCU means to daily values - # df$survey_mean_lcu <- df$survey_mean_lcu * (12/365) - - # Sort rows - setorder(df, country_code, surveyid_year, pop_data_level) - - # Sort columns - setcolorder(df, "survey_id") - - - -## ............................................................................ -## Remove any non-WDI countries #### - - pip_country_list(maindir = maindir, - force = force, - branch = branch) - - cl <- load_aux(measure = "country_list", - maindir = maindir, - branch = branch) - - df <- df[country_code %in% cl$country_code] - - - # ---- Save and sign ---- - df <- df |> setnames(c("surveyid_year", "pop_data_level"), - c("year", "reporting_level"), - skip_absent=TRUE) - - setattr(df, "aux_name", "gdm") - setattr(df, - "aux_key", - c("country_code", "year", "reporting_level", "welfare_type")) - - # validate gdm output data - gdm_validate_output(gdm = df, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = df, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) -} diff --git a/R/pip_gdp.R b/R/pip_gdp.R deleted file mode 100644 index b012729..0000000 --- a/R/pip_gdp.R +++ /dev/null @@ -1,42 +0,0 @@ -#' PIP GDP -#' -#' Update or load GDP data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" -#' and "gh" are synonymous -#' @export -pip_gdp <- function(action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = "file", - detail = getOption("pipaux.detail.raw")) { - - measure <- "gdp" - branch <- match.arg(branch) - action <- match.arg(action) - - - if (action == "update") { - pip_gdp_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - from = from, - detail = detail) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} # end of pip_gdp diff --git a/R/pip_gdp_weo.R b/R/pip_gdp_weo.R deleted file mode 100644 index 5b616ad..0000000 --- a/R/pip_gdp_weo.R +++ /dev/null @@ -1,158 +0,0 @@ -#' Fetch GDP data from WEO -#' -#' Create a dataset with GDP data from World Economic Outlook. -#' -#' Note that the most recent version most be downloaded from imf.org and saved -#' as an .xls file in `/_aux/weo/`. The filename should be in the -#' following structure `WEO_.xls`. Due to potential file corruption -#' the file must be opened and re-saved before it can be updated with -#' `pip_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file -#' that's not really xls. -#' -#' @inheritParams pip_prices -#' @export -pip_gdp_weo <- function(action = "update", - force = FALSE, - maindir = gls$PIP_DATA_DIR) { - measure <- "weo" - msrdir <- fs::path(maindir, "_aux/", measure) # measure dir - - if (action == "update") { - - # ---- Load data from disk ---- - - # Get latest version of file (in case there are more) - dir <- sprintf("%s_aux/weo/", maindir) - weo_files <- list.files(dir, pattern = "WEO_.*[.]xls") - weo_latest <- weo_files %>% - gsub("WEO_|.xls", "", .) %>% - as.POSIXlt() %>% - max() %>% - as.character() %>% - sprintf("%s_aux/weo/WEO_%s.xls", maindir, .) - - # Read data - dt <- readxl::read_xls( - weo_latest, - sheet = 1, na = "n/a", - col_types = "text" - ) - dt <- setDT(dt) - - # Clean column names - dt <- janitor::clean_names(dt) - - # ---- Data transformations ---- - - # Select rows w/ data on real gdp per capita - dt <- dt[weo_subject_code %in% - c("NGDPRPC", "NGDPRPPPPC", "NGDP_R")] - - # Fix country codes - dt[ - , - iso := fifelse( - iso == "WBG", "PSE", iso # West Bank & Gaza - ) - ] - dt[ - , - iso := fifelse( - iso == "UVK", "XKX", iso # Kosovo - ) - ] - - # Replace subject codes - dt[ - , - subject_code := fcase( - weo_subject_code == "NGDPRPC", "weo_gdp_lcu", - weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017", - weo_subject_code == "NGDP_R", "weo_gdp_lcu_notpc" - ) - ] - - # Reshape to long format - dt <- dt %>% - melt( - id.vars = c("iso", "subject_code"), - measure.vars = names(dt)[grepl("\\d{4}", names(dt))], - value.name = "weo_gdp", variable.name = "year" - ) - setnames(dt, "iso", "country_code") - - # Convert year and GDP to numeric - dt$year <- sub("x", "", dt$year) %>% as.numeric() - dt$weo_gdp <- suppressWarnings(as.numeric(dt$weo_gdp)) - - # Remove rows w/ missing GDP - dt <- dt[!is.na(dt$weo_gdp)] - - # Remove current year and future years - current_year <- format(Sys.Date(), "%Y") - dt <- dt[dt$year < current_year] - - # Reshape to wide for GDP columns - dt <- dt %>% - dcast( - formula = country_code + year ~ subject_code, - value.var = "weo_gdp" - ) - - # ---- Merge with population ---- - - pop <- pip_pop("load", maindir = maindir) - setDT(pop) - pop <- pop[pop_data_level == "national", ] - dt[pop, - on = .(country_code, year), - `:=`( - pop = i.pop - ) - ] - - # Calculate per capita value for NGDP_R - dt[ - , - weo_gdp_lcu := fifelse( - is.na(weo_gdp_lcu), weo_gdp_lcu_notpc / pop, weo_gdp_lcu - ) - ] - - - # ---- Chain PPP and LCU GDP columns ---- - - # Chain LCU on PPP column - dt <- chain_values( - dt, - base_var = "weo_gdp_ppp2017", - replacement_var = "weo_gdp_lcu", - new_name = "weo_gdp", - by = "country_code" - ) - - - # --- Sign and save ---- - - # Select final columns - dt <- dt[, c("country_code", "year", "weo_gdp")] - - # Save dataset - pip_sign_save( - x = dt, - measure = measure, - msrdir = msrdir, - force = force - ) - } else if (action == "load") { - dt <- load_aux( - maindir = maindir, - measure = measure - ) - return(dt) - } else { - rlang::abort(c("`action` must be `update` or `load`", - x = paste0("you provided `", action, "`") - )) - } -} diff --git a/R/pip_metadata.R b/R/pip_metadata.R deleted file mode 100644 index b82b52f..0000000 --- a/R/pip_metadata.R +++ /dev/null @@ -1,41 +0,0 @@ -#' PIP Survey Metadata -#' -#' Update or load a dataset with survey metadata. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams load_raw_indicators -#' @export -pip_metadata <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "metadata" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - - pip_metadata_update( - maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - detail = detail - ) - - } else { - - load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - - } -} - diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R deleted file mode 100644 index ed634e2..0000000 --- a/R/pip_metadata_update.R +++ /dev/null @@ -1,135 +0,0 @@ -#' Update metadata file -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pipfun::load_from_gh -#' @inheritParams pip_metadata -#' @return logical. TRUE if saved correctly. FALSE if error happened -#' @export -pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "metadata" - branch <- match.arg(branch) - # ____________________________________________________________________________ - # Computations #### - - df <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv") - - # validate raw metdata data - metadata_validate_raw(metadata = df, detail = detail) - - # Load pfw - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - - - - # Create distribution type column (data type) - - pfw[, - domain_check := (gdp_domain == 2 | pce_domain == 2 | - pop_domain == 2 | cpi_domain == 2 | - ppp_domain == 2)] - - # order matters here - pfw[, - distribution_type := fcase( - use_imputed == 1, "micro, imputed", - use_microdata == 1, "micro", - use_groupdata == 1 & domain_check, "aggregated", - use_groupdata == 1, "group", - default = NA_character_ - ) - ] - - # Merge datasets (inner join) - df <- - merge(df, - pfw[, c("country_code", "ctryname", "surveyid_year", "survey_acronym", - "welfare_type", "reporting_year", "distribution_type", - "surv_producer","survey_coverage", "surv_title", - "link", "survey_year")], - by = "link", all.y = TRUE - ) - - # Recode colnames - setnames(x = df, - old = c("title", "surv_producer", "ctryname"), - new = c("survey_title", "survey_conductor", "country_name")) - df[, - survey_title := fifelse(is.na(survey_title), surv_title, survey_title) - ] - - # Select columns - df <- df[, - c( - "country_code", "country_name", "reporting_year", - "surveyid_year", "survey_year", "survey_acronym", - "survey_conductor", "survey_coverage", - "welfare_type", "distribution_type", - "survey_title", "year_start", "year_end", - "authoring_entity_name", "abstract", - "collection_dates_cycle", "collection_dates_start", - "collection_dates_end", - "sampling_procedure", "collection_mode", - "coll_situation", "weight", "cleaning_operations" - ) - ] - - # Create nested table - - df <- df[, .(.(.SD)), - keyby = .( - country_code, - country_name, - reporting_year, - survey_year, - surveyid_year, - survey_title, - survey_conductor, - survey_coverage, - welfare_type, - distribution_type - ) - ] - - setnames(df, old = "V1", new = "metadata") - -## ............................................................................ -## Save #### - df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) - - setattr(df, "aux_name", "metadata") - setattr(df, - "aux_key", - c("country_code", "year", "welfare_type")) - - # validate raw metdata data - metadata_validate_output(metadata = df, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = df, - measure = measure, - msrdir = msrdir, - force = force - ) - - # ____________________________________________________________________________ - # Return #### - return(invisible(saved)) - -} diff --git a/R/pip_metaregion.R b/R/pip_metaregion.R deleted file mode 100644 index 57a6d89..0000000 --- a/R/pip_metaregion.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Metadata for PIP regions -#' -#' Update or load a dataset with regions. -#' -#' @inheritParams pip_cpi -#' @inheritParams pipfun::load_from_gh -#' @export -pip_metaregion <- function(action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) { - measure <- "metaregion" - action <- match.arg(action) - branch <- match.arg(branch) - - if (action == "update") { - mr <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch) - - - ## ............................................................................ - ## Save data #### - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - saved <- pipfun::pip_sign_save( - x = mr, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) - - - } else { - df <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(df) - } - -} # end of function - - - - diff --git a/R/pip_npl.R b/R/pip_npl.R deleted file mode 100644 index 53fdf2f..0000000 --- a/R/pip_npl.R +++ /dev/null @@ -1,83 +0,0 @@ -#' National Poverty headcount -#' -#' Update series of national poverty lines -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi -#' @inheritParams pipfun::load_from_gh -#' @export -pip_npl <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## setup -------- - - measure <- "npl" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## update -------- - - npl <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "dta") |> - setDT() - - # validate npl raw data - npl_validate_raw(npl = npl, detail = detail) - - setnames(x = npl, - old = c("countrycode", "year", "vsi_pov_nahc_nc"), - new = c("country_code", "reporting_year", "nat_headcount"), - skip_absent = TRUE) - - npl[, c("region", "vsi_pov_nahc") := NULL] - npl[, nat_headcount := nat_headcount / 100] - - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## save -------- - npl <- npl |> setnames("reporting_year", "year", - skip_absent=TRUE) - - setattr(npl, "aux_name", "npl") - setattr(npl, - "aux_key", - c("country_code", "year")) - - # validate npl output data - npl_validate_output(npl = npl, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = npl, - measure = measure, - msrdir = msrdir, - force = force - ) - - - } else { - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## load -------- - - load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - - } -} diff --git a/R/pip_pce.R b/R/pip_pce.R deleted file mode 100644 index c19d26d..0000000 --- a/R/pip_pce.R +++ /dev/null @@ -1,39 +0,0 @@ -#' PIP PCE -#' -#' Load or update PCE data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_gdp -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -pip_pce <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - measure <- "pce" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - pip_pce_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - from = from, - detail = detail) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} diff --git a/R/pip_pfw.R b/R/pip_pfw.R deleted file mode 100644 index 3c20bff..0000000 --- a/R/pip_pfw.R +++ /dev/null @@ -1,41 +0,0 @@ -#' PIP PFW -#' -#' Load or update PIP Price Framework data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @param action character: Either "load" or "update". Default is "update". If -#' "update" data will be updated on the system. If "load" data is loaded in memory. -#' @param maindir character: Main directory of project. -#' @param force logical: If TRUE data will be overwritten. -#' @inheritParams pipfun::load_from_gh -#' @export -#' @import data.table -pip_pfw <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "pfw" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - pip_pfw_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - detail = detail) - - } else { - - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} diff --git a/R/pip_pfw_clean.R b/R/pip_pfw_clean.R deleted file mode 100644 index 25019f8..0000000 --- a/R/pip_pfw_clean.R +++ /dev/null @@ -1,85 +0,0 @@ -#' Clean PFW -#' -#' Clean PFW data from Datalibweb to meet PIP protocols. -#' -#' @param y dataset with PPP data from `pip_pfw_update()`. -#' @inheritParams load_aux -#' -#' @keywords internal -pip_pfw_clean <- function(y, - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - branch <- match.arg(branch) - - if (!inherits(y, "data.table")) { - x <- as.data.table(y) - } else { - x <- copy(y) - } - - # get just inpovcal data - - - # change variable names - old_var <- - c( - "region", - "reg_pcn", - "code", - "ref_year", - "survname", - "comparability", - "datatype", - "rep_year" - ) - - new_var <- - c( - "wb_region_code", - "pcn_region_code", - "country_code", - "survey_year", - "survey_acronym", - "survey_comparability", - "welfare_type", - "reporting_year" - ) - - setnames(x, - old = old_var, - new = new_var - ) - - # Recode some variables - - x[ - , - `:=`( - # Recode survey coverage - survey_coverage = fcase( - survey_coverage == "N", "national", - survey_coverage == "R", "rural", - survey_coverage == "U", "urban", - default = "" - ), - # Recode welfare type - welfare_type = fcase( - grepl("[Ii]", welfare_type), "income", - grepl("[Cc]", welfare_type), "consumption", - default = "" - ), - surveyid_year = as.integer(surveyid_year), - survey_year = round(survey_year, 2) - ) - ] - - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - x <- x[country_code %in% cl$country_code] - - x <- unique(x) # remove duplicates - return(x) -} - diff --git a/R/pip_pfw_key.R b/R/pip_pfw_key.R deleted file mode 100644 index 9d752b9..0000000 --- a/R/pip_pfw_key.R +++ /dev/null @@ -1,31 +0,0 @@ -#' Generate a dataset that contains pfw keys -#' -#' @return data.table -#' @export -#' -pip_pfw_key <- function(){ - - pfw_temp <- load_aux("pfw", maindir = temp_fld) - - pfw_key_options <- pfw_temp[, .(country_code, - survey_year, - survey_acronym, - cpi_domain_var)] - - - cpi_temp <- load_aux("cpi", maindir = temp_fld) - - cpi_temp <- cpi_temp[, cpi_domain_var := - fifelse(reporting_level == "urban" & - cpi_domain_value == 1, "urban", "")] - - cpi_temp <- cpi_temp[, .(country_code, survey_year, survey_acronym, - cpi_domain_var, reporting_level)] - - pfw_key <- cpi_temp[pfw_key_options, on = .(country_code, survey_year, - survey_acronym, cpi_domain_var)] - - any(duplicated(pfw_key, by = c("country_code", "survey_year", "survey_acronym", "cpi_domain_var"))) - - return(pfw_key) -} diff --git a/R/pip_pfw_update.R b/R/pip_pfw_update.R deleted file mode 100644 index f499269..0000000 --- a/R/pip_pfw_update.R +++ /dev/null @@ -1,47 +0,0 @@ -#' Update PFW -#' -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "pfw" - branch <- match.arg(branch) - - # Read data - pfw <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - ext = "dta") - # validate pfw raw data - pfw_validate_raw(pfw = pfw, detail = detail) - - # Clean data - pfw <- pip_pfw_clean(pfw, - maindir = maindir, - branch = branch) - - # validate pfw raw data - pfw_validate_output(pfw = pfw, detail = detail) - - # Save dataset - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(pfw, "aux_name", "pfw") - - saved <- pipfun::pip_sign_save( - x = pfw, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) -} diff --git a/R/pip_pl.R b/R/pip_pl.R deleted file mode 100644 index 1b467e5..0000000 --- a/R/pip_pl.R +++ /dev/null @@ -1,63 +0,0 @@ -#' Poverty lines -#' -#' Update or load a dataset with poverty lines. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -pip_pl <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") - ) { - - measure <- "pl" - branch <- match.arg(branch) - action <- match.arg(action) - - - if (action == "update") { - # Read yaml file - - dl <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "yaml" - ) - - dt <- purrr::map_df(dl,pip_pl_clean) - - # Save - - # validate pl clean data - pl_validate_output(pl = dt, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - saved <- pipfun::pip_sign_save( - x = dt, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) - - } else { - df <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - - return(df) - } -} diff --git a/R/pip_pl_clean.R b/R/pip_pl_clean.R deleted file mode 100644 index ba4cc8e..0000000 --- a/R/pip_pl_clean.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Build a data table for each list from yaml file with poverty lines info -#' -#' @param l list from yaml file -#' -#' @return data.table -#' @export -pip_pl_clean <- function(l) { - - - # ____________________________________________________________________________ - # Computations #### - - pls <- - purrr::map(.x = l$ranges, - .f = ~{ - seq(.x$min, .x$max, .x$increment) - }) |> - unlist() - - # Create data frame - df <- data.table::data.table( - name = as.character(pls), - poverty_line = pls - ) - - - df[, - c("is_default", "is_visible", "name", "ppp_year") - := { - id <- fifelse(name == l$default, TRUE, FALSE) - - iv <- fifelse(name %in% l$visible, TRUE, FALSE) - - n <- fifelse(n_decimals(poverty_line) == 1, paste0(name, "0"), name) - n <- fifelse(n_decimals(poverty_line) == 0, paste0(n, ".00"), n) - - list(id, iv, n, l$ppp_year) - }] - - # ____________________________________________________________________________ - # Return #### - return(df) - -} diff --git a/R/pip_pop.R b/R/pip_pop.R deleted file mode 100644 index 4b0ec7e..0000000 --- a/R/pip_pop.R +++ /dev/null @@ -1,40 +0,0 @@ -#' PIP POP -#' -#' Load or update population data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_cpi -#' @inheritParams pipfun::load_from_gh -#' @param from character: Source for population data. -#' @export -pip_pop <- function(action = c("update", "load"), - force = FALSE, - from = c("gh", "file", "api"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "pop" - from <- tolower(from) - action <- match.arg(action) - - if (action == "update") { - pip_pop_update( - force = force, - from = from, - maindir = maindir, - owner = owner, - branch = branch, - tag = tag, - detail = detail) - - } else { - - df <- load_aux(maindir = maindir, - measure = measure, - branch = branch) - - return(df) - } -} diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R deleted file mode 100644 index f55fc0a..0000000 --- a/R/pip_pop_update.R +++ /dev/null @@ -1,288 +0,0 @@ -#' Update POP -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @param from character: Source for population data. -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pop -pip_pop_update <- function(force = FALSE, - from = c("gh", "file", "api"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - # Check arguments - from <- match.arg(from) - branch <- match.arg(branch) - measure <- "pop" - - # Get the most recent year in PFW to filter population projection - - pfw <- pipload::pip_load_aux("pfw", - branch = branch, - maindir = maindir) - # year_max <- pfw[, max(year)] - # get current year as max year - year_max <- Sys.Date() |> - format("%Y") |> - as.numeric() - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # From WDI --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - if (from == "api") { - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## from API -------- - - pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") - pop <- wbstats::wb_data(indicator = pop_indicators, - country = "all", # this is new - lang = "en", - return_wide = FALSE) |> - setDT() - - # validate wb pop data - pop_validate_raw(pop = pop, detail = detail) - - # rename vars - pop <- pop[, c("iso3c", "date", "indicator_id", "value")] - - setnames(pop, - new = c("country_code", "year", "coverage", "pop")) - - - - pop[, - year := as.numeric(year) - ][, - pop_data_level := - fcase( - grepl("POP", coverage), 2, - grepl("RUR", coverage), 0, - grepl("URB", coverage), 1 - ) - ][, - coverage := NULL] - - ### Ger special cases --------- - - spop <- pipfun::load_from_gh( - measure = measure, - filename = "spop", - owner = owner, - branch = branch, - tag = tag, - ext = "csv") |> - clean_names_from_wide() |> - clean_from_wide() - - - pop <- rbindlist(list(pop, spop), - use.names = TRUE, - fill = TRUE) - - - } else { - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## from Emi's file -------- - - # Now Emi's file is uploaded directly to GH. So we get it from there. - # Load data - - pop_main <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "xlsx" - ) |> - clean_names_from_wide() |> - clean_from_wide() - - # validate pop main raw data - popmain_validate_raw(pop_main = pop_main, detail = detail) - - ### Ger special cases --------- - spop <- pipfun::load_from_gh( - measure = measure, - filename = "spop", - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) |> - clean_names_from_wide() |> - clean_from_wide() - - # validate special cases pop raw data - spop_validate_raw(spop = spop, detail = detail) - - pop <- joyn::joyn(pop_main, spop, - by = c("country_code", "year", "pop_data_level"), - update_values = TRUE, - reportvar = FALSE, - verbose = FALSE) - - # pop <- rbindlist(list(pop_main, spop), - # use.names = TRUE, - # fill = TRUE) - - } - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Clean data --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # Remove years prior to 1960 - pop <- pop[!is.na(pop) & year >= 1960] - pop <- pop[year <= year_max] - - # sorting - setorder(pop, country_code, year, pop_data_level) - setcolorder(pop, c("country_code", "year", "pop_data_level", "pop")) - - pop[, - pop_domain := fifelse(pop_data_level == 2, 1, 2)] - - # recode domain and data_level variables - cols <- c("pop_domain", "pop_data_level") - pop[, - (cols) := lapply(.SD, as.character), - .SDcols = cols - ][ - , # recode domain - pop_domain := fcase( - pop_domain == "1", "national", - pop_domain == "2", "urban/rural", - pop_domain == "3", "subnational region" - ) - ][ # Recode data_level only for those that are national or urban/rural - pop_domain %in% c("national", "urban/rural"), - pop_data_level := fcase( - pop_data_level == "0", "rural", - pop_data_level == "1", "urban", - pop_data_level == "2", "national" - ) - ] - - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - setDT(cl) - pop <- pop[country_code %in% cl$country_code] |> - unique() # make sure we don't havce any duplicates - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Save data --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # drop pce_domain - pop <- pop[, -c("pop_domain")] - - pop <- pop |> setnames("pop_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(pop, "aux_name", "pop") - setattr(pop, - "aux_key", - c("country_code", "year", "reporting_level")) - - # validate output pop data - pop_validate_output(pop = pop, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = pop, - measure = measure, - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) - -} - - - -#' Clean names from wide WDI format -#' -#' @param x data frame -#' -#' @return dataframe with names cleaned -#' @keywords internal -clean_names_from_wide <- function(x) { - if (!is.data.table(x)) { - setDT(x) - } - nnames <- as.character(x[2, 1:4]) - setnames(x, 1:4, nnames) - x <- x[-c(1:2)] - x -} - - -#' Clean from WDI format -#' -#' @param x data frame -#' -#' @return dataframe with names cleaned -#' @keywords internal -clean_from_wide <- function(x) { - if (!is.data.table(x)) { - setDT(x) - } - - - year_vars <- names(x)[6:ncol(x)] - x$Series_Name <- NULL - x$Time_Name <- NULL - - # Reshape to long format - pop_long <- x |> - data.table::setDT() |> - data.table::melt( - id.vars = c("Country", "Series"), - measure.vars = year_vars, - variable.name = "Year", - value.name = "Population" - ) - pop_long[, - Year := as.numeric(as.character(Year)) - ][, - Population := { - Population[Population == "."] <- NA_character_ - as.numeric(Population) - }] - - - - pop <- pop_long - # Create data_level column - pop[, - pop_data_level := - fcase( - grepl("POP", Series), 2, - grepl("RUR", Series), 0, - grepl("URB", Series), 1 - ) - ][, - Series := NULL] - - # Set colnames - setnames( - pop, - old = c("Country", "Year", "Population"), - new = c("country_code", "year", "pop") - ) - - return(pop) -} diff --git a/R/pip_ppp.R b/R/pip_ppp.R deleted file mode 100644 index e8fe3a7..0000000 --- a/R/pip_ppp.R +++ /dev/null @@ -1,62 +0,0 @@ -#' PIP PPP -#' -#' Load or update PPP data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -#' @import data.table -pip_ppp <- function(action = c("update", "load"), - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - force = FALSE, - tag = branch, - detail = getOption("pipaux.detail.raw"), - ppp_defaults = TRUE) { - - # ____________________________________________________________________________ - # on.exit #### - on.exit({ - - }) - - # ____________________________________________________________________________ - # Defenses #### - measure <- "ppp" - action <- match.arg(action) - branch <- match.arg(branch) - - stopifnot( exprs = { - - } - ) - - # ____________________________________________________________________________ - # Early returns #### - if (FALSE) { - return() - } - - # ____________________________________________________________________________ - # Computations #### - if (action == "update") { - pip_ppp_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - detail = detail) - } - else { - load_aux( - maindir = maindir, - measure = measure, - branch = branch, - ppp_defaults = ppp_defaults - ) - } - - -} diff --git a/R/pip_ppp_clean.R b/R/pip_ppp_clean.R deleted file mode 100644 index b9efa69..0000000 --- a/R/pip_ppp_clean.R +++ /dev/null @@ -1,93 +0,0 @@ -#' Clean PPP data from datalibweb to meet PIP protocols -#' -#' @param y dataset with PPP data from `pip_ppp_update()`. -#' @param default_year numeric: ICP round year. Default is 2011 -#' -#' @keywords internal -pip_ppp_clean <- function(y, default_year = getOption("pipaux.pppyear")) { - x <- data.table::as.data.table(y) - - y <- melt(x, - id.vars = c("code", "ppp_domain", "datalevel"), - measure.vars = patterns("^ppp_[0-9]{4}_[Vv][0-9]_[Vv][0-9]$"), - variable.name = "ver", - value.name = "ppp" - ) - - y[ - , - c("p", "ppp_year", "release_version", "adaptation_version") := tstrsplit(ver, "_") - ][ - , - `:=`( - ppp_year = as.numeric(ppp_year), - ppp_domain = as.character(ppp_domain), - datalevel = as.character(datalevel) - ) - ][ - , - # This part should not exist if the raw data - # has been properly created - ppp_data_level := fcase( - ppp_domain %chin% c("urban/rural", "2") & datalevel == "0", "rural", - ppp_domain %chin% c("urban/rural", "2") & datalevel == "1", "urban", - ppp_domain %chin% c("national", "1") & datalevel %chin% c("2", "", NA_character_), "national", - default = "" - ) - ][ - , - c("p", "ver", "datalevel") := NULL - ] - - setorder(y, code, ppp_year, release_version, adaptation_version) - - #--------- Get default version --------- - - y[ # Find Max release version - , - d1 := release_version == max(release_version), - by = .(code, ppp_year) - ][ - # Find max adaptation version of the max release - d1 == TRUE, - d2 := adaptation_version == max(adaptation_version), - by = .(code, ppp_year) - ][ - , - # get intersection - `:=`( - ppp_default = (d1 == TRUE & d2 == TRUE & ppp_year == (default_year)), - ppp_default_by_year = (d1 == TRUE & d2 == TRUE), - country_code = code - ) - ][ - , - # Remove unnecessary variables - c("d1", "d2", "code") := NULL - ] - - setcolorder( - y, - c( - "country_code", - "ppp_year", - "release_version", - "adaptation_version", - "ppp", - "ppp_default", - "ppp_default_by_year", - "ppp_domain", - "ppp_data_level" - ) - ) - - y <- unique(y) # remove duplicates - - # Remove non WDI countries - non_wdi <- c("BES", "EGZ", "RUT", "SDO") - if (any(y$country_code %in% non_wdi)) { - y <- y[!(country_code %in% non_wdi)] - } - - return(y) -} diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R deleted file mode 100644 index 6320ad3..0000000 --- a/R/pip_ppp_update.R +++ /dev/null @@ -1,121 +0,0 @@ -#' Update PPP -#' -#' @inheritParams pipfun::load_from_gh -#' @keywords internal -pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, - force = FALSE, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - -# ____________________________________________________________________________ -# set up #### - - measure <- "ppp" - branch <- match.arg(branch) - - -# ____________________________________________________________________________ -# Load raw data #### - - ppp <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) - - # validate ppp raw data - ppp_validate_raw(ppp = ppp, detail = detail) - -# ____________________________________________________________________________ -# cleaning #### - - - # Clean data - ppp <- pip_ppp_clean(ppp) - - # Remove any non-WDI countries - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - ppp <- ppp[country_code %in% cl$country_code] - - -## ............................................................................ -## Special cases #### - - # Hardcode domain / data_level fix for NRU - ppp$ppp_domain <- - ifelse(ppp$country_code == "NRU" & is.na(ppp$ppp_domain), - 1, ppp$ppp_domain - ) - ppp$ppp_data_level <- - ifelse(ppp$country_code == "NRU" & ppp$ppp_data_level == "", - "national", ppp$ppp_data_level - ) - - -# ____________________________________________________________________________ -# Saving #### - - # drop ppp_domain - ppp <- ppp[, -c("ppp_domain")] - - ppp <- ppp |> setnames("ppp_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(ppp, "aux_name", "ppp") - setattr(ppp, - "aux_key", - c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. - - # validate ppp output data - ppp_validate_output(ppp = ppp, detail = detail) - - if (branch == "main") { - branch <- "" - } - - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = ppp, - measure = measure, - msrdir = msrdir, - force = force - ) - - -# ____________________________________________________________________________ -# PPP vintages data #### - - vars <- c("ppp_year", "release_version", "adaptation_version") - ppp_vintage <- unique(ppp[, ..vars], by = vars) - - data.table::setnames(x = ppp_vintage, - old = c("release_version", "adaptation_version"), - new = c("ppp_rv", "ppp_av")) - - # ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", - # skip_absent=TRUE) - # - # setattr(ppp_vintage, "aux_name", "ppp") - # setattr(ppp_vintage, - # "aux_key", - # c("country_code", "reporting_level")) - - # Save - pipfun::pip_sign_save( - x = ppp_vintage, - measure = "ppp_vintage", - msrdir = msrdir, - force = force - ) - - return(invisible(saved)) -} diff --git a/R/pip_sna.R b/R/pip_sna.R deleted file mode 100644 index 37ee45a..0000000 --- a/R/pip_sna.R +++ /dev/null @@ -1,16 +0,0 @@ -#' Fake PIP SNA function -#' -#' @inheritParams pip_gdp -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -pip_sna <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api")) { - - return(invisible(TRUE)) -} diff --git a/R/pip_update_all_aux.R b/R/pip_update_all_aux.R deleted file mode 100644 index 4f42e85..0000000 --- a/R/pip_update_all_aux.R +++ /dev/null @@ -1,46 +0,0 @@ -#' Update all auxiliary data at once -#' -#' @inheritParams pip_cpi -#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. -#' @export -pip_update_all_aux <- function(force = FALSE, - popsrc = getOption("pipaux.popsrc"), - maindir = gls$PIP_DATA_DIR) { - - # List of countries in WDI - pip_country_list(force = force, maindir = maindir) - - # PIP countries and regions - pip_countries(force = force, maindir = maindir) - pip_regions(force = force, maindir = maindir) - - # PIP Indicators - pip_indicators(force = force, maindir = maindir) - - # Poverty lines - pip_pl(force = force, maindir = maindir) - - # PFW, CPI and PPP from DLW - pip_pfw(force = force, maindir = maindir) - pip_cpi(force = force, maindir = maindir) - pip_ppp(force = force, maindir = maindir) - - # POP from Emi or WDI - pip_pop(force = force, maindir = maindir, src = popsrc) - - # GDP from WEO, Maddison and WDI (+ a few special cases) - pip_weo(force = force, maindir = maindir) - pip_maddison(force = force, maindir = maindir) - pip_gdp(force = force, maindir = maindir) - - # PCE from WDI (+ a few special cases) - pip_pce(force = force, maindir = maindir) - - # Country profiles (from Poverty GP) - pip_cp(force = force, maindir = maindir) - - # Survey metadata (from Poverty GP) - pip_metadata(force = force, maindir = maindir) - - return(invisible()) -} diff --git a/R/pip_wdi.R b/R/pip_wdi.R deleted file mode 100644 index af5073f..0000000 --- a/R/pip_wdi.R +++ /dev/null @@ -1,42 +0,0 @@ -#' PIP wdi -#' -#' Update or load wdi data. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" -#' and "gh" are synonymous -#' @export -pip_wdi <- function(action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - - measure <- "wdi" - branch <- match.arg(branch) - action <- match.arg(action) - - - if (action == "update") { - pip_wdi_update(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag, - from = from, - detail = detail) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} # end of pip_wdi diff --git a/R/pip_wdi_update.R b/R/pip_wdi_update.R deleted file mode 100644 index 325cdef..0000000 --- a/R/pip_wdi_update.R +++ /dev/null @@ -1,81 +0,0 @@ -#' Update National accounts data from WDI -#' -#' GDP and HFCE data from WDI. It could be either from API or from file -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_gdp -#' @return data.table with gdp and pce variables -#' @export -#' -#' @examples -#' pip_wdi_update() -pip_wdi_update <- function(force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = c("gh", "file", "api"), - detail = getOption("pipaux.detail.raw")) { - - - from <- match.arg(from) - branch <- match.arg(branch) - - # ______________________________________________________ - # Computations #### - measure <- "wdi" - - ## ............................................................... - ## From file #### - - if (from %in% c("file", "gh")) { - wdi <- pipfun::load_from_gh(measure = measure, - owner = owner, - branch = branch, - ext = "csv") - - } else { - ## ........................................................................ - ## From API #### - wdi_indicators <- c("NY.GDP.PCAP.KD", "NE.CON.PRVT.PC.KD") - wdi <- wbstats::wb_data(indicator = wdi_indicators, - lang = "en") |> - setDT() - - wdi[, - c("country", "iso2c") := NULL] - - # Rename columns - setnames(wdi, - old = c("iso3c", "date"), - new = c("country_code", "year") - ) - } - # validate wdi raw data - wdi_validate_raw(wdi = wdi, detail = detail) - - # _________________________________________________________________________ - # Save and Return #### - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - setattr(wdi, "aux_name", "wdi") - setattr(wdi, - "aux_key", - c("country_code", "year")) - - saved <- pipfun::pip_sign_save( - x = wdi, - measure = measure, - msrdir = msrdir, - force = force, - save_dta = FALSE - ) - - return(invisible(saved)) - -} - diff --git a/R/pip_weo.R b/R/pip_weo.R deleted file mode 100644 index 060ccd3..0000000 --- a/R/pip_weo.R +++ /dev/null @@ -1,77 +0,0 @@ -#' Fetch GDP data from WEO -#' -#' Create a dataset with GDP data from World Economic Outlook. -#' -#' Note that the most recent version most be downloaded from imf.org and saved -#' as an .xls file in `/_aux/weo/`. The filename should be in the -#' following structure `WEO_.xls`. Due to potential file corruption -#' the file must be opened and re-saved before it can be updated with -#' `pip_weo()`. Hopefully in the future IMF will stop using an `.xls` file -#' that's not really xls. -#' -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @export -pip_weo <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - measure <- "weo" - branch <- match.arg(branch) - action <- match.arg(action) - - if (action == "update") { - - # ---- Load data from disk ---- - - # Read data - dt <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch, - tag = tag, - ext = "csv" - ) - - # validate weo raw data - weo_validate_raw(weo = dt, detail = detail) - - dt <- pip_weo_clean(dt, - maindir = maindir, - branch = branch) - - # Save dataset - setattr(dt, "aux_name", "weo") - setattr(dt, - "aux_key", - c("country_code", "year")) - # validate weo clean data - weo_validate_output(weo = dt, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - cat('\nDir : ', msrdir) - saved <- pipfun::pip_sign_save( - x = dt, - measure = measure, - msrdir = msrdir, - force = force - ) - return(invisible(saved)) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} diff --git a/R/pip_weo_clean.R b/R/pip_weo_clean.R deleted file mode 100644 index 120fa27..0000000 --- a/R/pip_weo_clean.R +++ /dev/null @@ -1,135 +0,0 @@ -#' Clean WEO data -#' -#' @param dt database with weo raw data -#' @param maindir directory where auxiliary data is stored (to load pop) -#' @param branch character: branch to be loaded -#' -#' @return data.table -#' @export -pip_weo_clean <- function(dt, - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main")) { - - - branch <- match.arg(branch) - -# _________________________________________ -# Computations #### - if (!inherits(dt, "data.table")) { - setDT(dt) - } - - # Clean column names - nn <- - names(dt) |> - tolower() |> - {\(.) gsub("[-/ ]", "_", .)}() |> - {\(.) gsub("([0-9]{4})", "x\\1", .)}() - - names(dt) <- nn - - # ---- Data transformations ---- - - # Select rows w/ data on real gdp per capita - dt <- dt[weo_subject_code %in% c("NGDPRPC", "NGDPRPPPPC")] - - # Fix country codes - dt[ - , - iso := fifelse( - iso == "WBG", "PSE", iso # West Bank & Gaza - ) - ][ - , - iso := fifelse( - iso == "UVK", "XKX", iso # Kosovo - ) - ][, - # Replace subject codes - subject_code := fcase( - weo_subject_code == "NGDPRPC", "weo_gdp_lcu", - weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017" - ) - ] - - # Reshape to long format - - years_vars <- names(dt)[grepl("\\d{4}", names(dt))] - dt <- - melt(data = dt, - id.vars = c("iso", "subject_code"), - measure.vars = years_vars, - value.name = "weo_gdp", - variable.name = "year" - ) - setnames(dt, "iso", "country_code") - - # Convert year and GDP to numeric - dt[, - c("weo_gdp", "year") := { - y <- sub("x", "", year) |> - as.numeric() - - x <- as.numeric(weo_gdp) |> - suppressWarnings() - list(x, y) - }] - - # Remove rows w/ missing GDP` - dt <- na.omit(dt, cols = "weo_gdp") - - # Remove current year and future years - current_year <- format(Sys.Date(), "%Y") - dt <- dt[year < current_year] - - # Reshape to wide for GDP columns - dt <- dcast(dt, - formula = country_code + year ~ subject_code, - value.var = "weo_gdp" - ) - - # ---- Merge with population ---- - - - pop <- load_aux(measure = "pop", - maindir = maindir, - branch = branch) - - setDT(pop) - pop <- pop[reporting_level == "national", ] #pop_data_level = reporting_level - dt[pop, - on = .(country_code, year), - `:=`( - pop = i.pop - ) - ] - - # ---- Chain PPP and LCU GDP columns ---- - - # Chain LCU on PPP column - - dt[, weo_gdp := chain_val(ori_var = weo_gdp_ppp2017, - rep_var = weo_gdp_lcu), - by = country_code] - # - # dt <- chain_values( - # dt, - # base_var = "weo_gdp_ppp2017", - # replacement_var = "weo_gdp_lcu", - # new_name = "weo_gdp", - # by = "country_code" - # ) - - - # --- Sign and save ---- - - # Select final columns - dt <- dt[, c("country_code", "year", "weo_gdp")] - - - -# ____________________________________________________________________________ -# Return #### - return(dt) - -} diff --git a/R/pl_validate_output.R b/R/pl_validate_output.R deleted file mode 100644 index 013483b..0000000 --- a/R/pl_validate_output.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate output pl data -#' -#' @param pl output pl data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ - - stopifnot("PL clean data is not loaded" = !is.null(pl)) - - report <- data_validation_report() - - validate(pl, name = "PL output data validation") |> - validate_if(is.character(name), - description = "`name` should be character") |> - validate_if(is.numeric(poverty_line), - description = "`poverty_line` should be numeric") |> - validate_if(is.logical(is_default), - description = "`is_default` should be logical") |> - validate_if(is.logical(is_visible), - description = "`is_visible` should be logical") |> - validate_if(is.integer(ppp_year), - description = "`ppp_year` should be numeric") |> - validate_cols(not_na, name, ppp_year, - description = "no missing values in key variables") |> - validate_if(is_uniq(name, ppp_year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R deleted file mode 100644 index db0deaa..0000000 --- a/R/pop_validate_output.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Validate output pop data -#' -#' @param pop output pop data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ - - stopifnot("POP clean data is not loaded" = !is.null(pop)) - - report <- data_validation_report() - - validate(pop, name = "POP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - # validate_if(is.character(pop_domain), - # description = "`pop_domain` should be character") |> - # validate_cols(in_set(c("national", "urban/rural")), - # pop_domain, description = "`pop_domain` values within range") |> - validate_cols(not_na, country_code, year, reporting_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, reporting_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/pop_validate_raw.R b/R/pop_validate_raw.R deleted file mode 100644 index 377fc67..0000000 --- a/R/pop_validate_raw.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate pop raw data download from wdi -#' -#' @param pop raw pop data, as loaded via `wbstats::wb_data` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ - - stopifnot("WB POP raw data is not loaded" = !is.null(pop)) - - report <- data_validation_report() - - validate(pop, name = "WB POP raw data validation") |> - validate_if(is.character(indicator_id), - description = "`indicator_id` should be character") |> - validate_cols(in_set(c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL")), - indicator_id, description = "`indicator_id` values within range") |> - validate_if(is.character(indicator), - description = "`indicator` should be character") |> - validate_if(is.character(iso2c), - description = "`iso2c` should be character") |> - validate_if(is.character(iso3c), - description = "`iso3c` should be character") |> - validate_if(is.character(country), - description = "`country` should be character") |> - validate_if(is.numeric(date), - description = "`date` should be numeric") |> - validate_if(is.numeric(value), - description = "`value` should be numeric") |> - validate_if(is.character(unit), - description = "`unit` should be character") |> - validate_if(is.character(obs_status), - description = "`obs_status` should be character") |> - validate_if(is.character(footnote), - description = "`footnote` should be character") |> - validate_if(is_date(last_updated), - description = "`last_updated` should be date") |> - validate_cols(not_na, indicator_id, iso3c, date, - description = "no missing values in key variables") |> - validate_if(is_uniq(indicator_id, iso3c, date), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/popmain_validate_raw.R b/R/popmain_validate_raw.R deleted file mode 100644 index c6d5d20..0000000 --- a/R/popmain_validate_raw.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate raw main pop data -#' -#' @param pop_main raw pop main data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ - - stopifnot("POP main raw data is not loaded" = !is.null(pop_main)) - - report <- data_validation_report() - - validate(pop_main, name = "POP main raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pop_data_level), - description = "`pop_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - pop_data_level, description = "`pop_data_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - validate_cols(not_na, country_code, year, pop_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pop_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R deleted file mode 100644 index af7fafb..0000000 --- a/R/ppp_validate_output.R +++ /dev/null @@ -1,54 +0,0 @@ -#' Validate output ppp data -#' -#' @param ppp output ppp data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ - - stopifnot("PPP output data is not loaded" = !is.null(ppp)) - - report <- data_validation_report() - - validate(ppp, name = "PPP output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(ppp_year), - description = "`ppp_year` should be character") |> - validate_if(is.character(release_version), - description = "`release_version` should be character") |> - validate_if(is.character(adaptation_version), - description = "`adaptation_version` should be character") |> - validate_if(is.numeric(ppp), - description = "`ppp` should be numeric") |> - validate_if(is.logical(ppp_default), - description = "`ppp_default` should be numeric") |> - validate_if(is.logical(ppp_default_by_year), - description = "`ppp_default_by_year` should be numeric") |> - # validate_if(is.character(ppp_domain), - # description = "`ppp_domain` should be character") |> - # validate_cols(in_set(c("1", "2")), - # ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(reporting_level), - description = "`reporting_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), - reporting_level, description = "`reporting_level` values within range") |> - validate_cols(not_na, country_code, ppp_year, reporting_level, - adaptation_version, release_version, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, ppp_year, - reporting_level, adaptation_version, release_version), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/ppp_validate_raw.R b/R/ppp_validate_raw.R deleted file mode 100644 index cc5cd5d..0000000 --- a/R/ppp_validate_raw.R +++ /dev/null @@ -1,78 +0,0 @@ -#' Validate raw ppp data -#' -#' @param ppp raw ppp data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ - - stopifnot("PPP raw data is not loaded" = !is.null(ppp)) - - report <- data_validation_report() - - validate(ppp, name = "PPP raw data validation") |> - validate_if(is.character(CountryName), - description = "`CountryName` should be character") |> - validate_if(is.character(code), - description = "`code` should be character") |> - validate_if(is.character(CoverageType), - description = "`CoverageType` should be character") |> - validate_cols(in_set(c("National", "Rural", "Urban")), - CoverageType, description = "`CoverageType` values within range") |> - validate_if(is.numeric(ppp_2005_v1_v1), - description = "`ppp_2005_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v1_v1), - description = "`ppp_2011_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v2_v1), - description = "`ppp_2011_v2_v1` should be numeric") |> - validate_if(is.numeric(ppp_2011_v1_v2), - description = "`ppp_2011_v1_v2` should be numeric") |> - validate_if(is.numeric(ppp_2011_v2_v2), - description = "`ppp_2011_v2_v2` should be numeric") |> - validate_if(is.numeric(ppp_2017_v1_v1), - description = "`ppp_2017_v1_v1` should be numeric") |> - validate_if(is.numeric(ppp_2017_v1_v2), - description = "`ppp_2017_v1_v2` should be numeric") |> - validate_if(is.numeric(source_ppp_2011), - description = "`source_ppp_2011` should be numeric") |> - validate_if(is.numeric(source_ppp_2005), - description = "`source_ppp_2005` should be numeric") |> - validate_if(is.numeric(datalevel), - description = "`datalevel` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - datalevel, description = "`datalevel` values within range") |> - validate_if(is.numeric(ppp_domain), - description = "`ppp_domain` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.numeric(ppp_domain_value), - description = "`ppp_domain_value` should be numeric") |> - validate_cols(in_set(c(1, 2)), - ppp_domain_value, description = "`ppp_domain_value` values within range") |> - validate_if(is.numeric(oldicp2005), - description = "`oldicp2005` should be numeric") |> - validate_if(is.numeric(oldicp2011), - description = "`oldicp2011` should be numeric") |> - validate_if(is.character(Seriesname), - description = "`Seriesname` should be character") |> - validate_if(is.character(note_may192020), - description = "`note_may192020` should be character") |> - validate_if(is.character(ppp_2017_v1_v2_note), - description = "`ppp_2017_v1_v2_note` should be character") |> - validate_cols(not_na, code, CoverageType, datalevel, - description = "no missing values in key variables") |> - validate_if(is_uniq(code, CoverageType, datalevel), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/send_report.R b/R/send_report.R deleted file mode 100644 index a14b81f..0000000 --- a/R/send_report.R +++ /dev/null @@ -1,34 +0,0 @@ -#' Send an email that contains auxiliary data validation report -#' -#' @import blastula -#' -#' @export -send_report <- function(){ - - if (rlang::env_has(.pipaux, "validation_report")){ - - print(.pipaux$validation_report) - - # fname <- file.path(tempdir(), "data_validation_report.csv") - # - # write.csv(.pipaux$validation_report, fname, row.names = FALSE) - # - # compose_email( - # body = md(glue::glue( - # - # "Hello, - # - # The attched file contains auxiliary data validation report. - # - # Regards"))) |> - # add_attachment(file = fname, filename = "data_validation_report") |> - # smtp_send( - # from = "tefera.degefu@outlook.com", - # to = "tdegefu@worldbank.org", - # subject = "Data validation report", - # credentials = creds_envvar(user = "tefera.degefu@outlook.com", - # pass_envvar = "SMTP_GPID_EMAIL", - # provider = "outlook") - # ) - } -} diff --git a/R/sna.R b/R/sna.R deleted file mode 100644 index 263de85..0000000 --- a/R/sna.R +++ /dev/null @@ -1,49 +0,0 @@ -#' PIP Special National accounts -#' -#' Update special national accounts data -#' -#' @inheritParams pip_pfw -#' @inheritParams pipfun::load_from_gh -#' @param from character: Either "gh", "file" or "api". Default is "gh". "file" -#' and "gh" are synonymous -#' @export -pip_sna <- function(action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { - - measure <- "sna" - branch <- match.arg(branch) - action <- match.arg(action) - - - if (action == "update") { - # load nowcast growth rates - sna <- pipfun::load_from_gh( - measure = "sna", - owner = owner, - branch = branch - ) - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - saved <- pipfun::pip_sign_save( - x = sna, - measure = measure, - msrdir = msrdir, - force = force - ) - - } else { - dt <- load_aux( - maindir = maindir, - measure = measure, - branch = branch - ) - return(dt) - } -} # end of pip_gdp diff --git a/R/sna_fy_validate_raw.R b/R/sna_fy_validate_raw.R deleted file mode 100644 index edfff25..0000000 --- a/R/sna_fy_validate_raw.R +++ /dev/null @@ -1,41 +0,0 @@ -#' Validate raw sna_fy data -#' -#' @param sna_fy raw sna_fy data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ - - stopifnot("sna_fy raw data is not loaded" = !is.null(sna_fy)) - - report <- data_validation_report() - - validate(sna_fy, name = "sna_fy raw data validation") |> - validate_if(is.character(Code), - description = "`Code` should be character") |> - validate_if(is.character(LongName), - description = "`LongName` should be character") |> - validate_if(is.character(SpecialNotes), - description = "`SpecialNotes` should be character") |> - validate_if(is.character(Month), - description = "`Month` should be character") |> - validate_if(is.numeric(Day), - description = "`Day` should be numeric") |> - validate_cols(not_na, Code, Month, Day, - description = "no missing values in key variables") |> - # validate_if(is_uniq(Code, LongName), - # description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/sna_validate_raw.R b/R/sna_validate_raw.R deleted file mode 100644 index 7433a4b..0000000 --- a/R/sna_validate_raw.R +++ /dev/null @@ -1,49 +0,0 @@ -#' Validate raw special national accounts (sna) data -#' -#' @param sna raw sna data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ - - stopifnot("SNA raw data is not loaded" = !is.null(sna)) - - report <- data_validation_report() - - validate(sna, name = "SNA raw data validation") |> - validate_if(is.character(countryname), - description = "`countryname` should be character") |> - validate_if(is.character(coverage), - description = "`coverage` should be character") |> - validate_cols(in_set(c("National")), - coverage, description = "`coverage` values within range") |> - validate_if(is.character(countrycode), - description = "`countrycode` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(GDP), - description = "`GDP` should be numeric") |> - validate_if(is.logical(PCE), - description = "`PCE` should be logical") |> - validate_if(is.character(sourceGDP), - description = "`sourceGDP` should be character") |> - validate_if(is.logical(sourcePCE), - description = "`sourcePCE` should be logical") |> - validate_cols(not_na, countrycode, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(countrycode, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/R/spop_validate_raw.R b/R/spop_validate_raw.R deleted file mode 100644 index e73af29..0000000 --- a/R/spop_validate_raw.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Validate raw special cases pop data -#' -#' @param spop raw special case pop data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ - - stopifnot("Special POP raw data is not loaded" = !is.null(spop)) - - report <- data_validation_report() - - validate(spop, name = "Special POP raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(pop_data_level), - description = "`pop_data_level` should be numeric") |> - validate_cols(in_set(c(0, 1, 2)), - pop_data_level, description = "`pop_data_level` values within range") |> - validate_if(is.numeric(pop), - description = "`pop` should be numeric") |> - validate_cols(not_na, country_code, year, pop_data_level, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pop_data_level), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/update_aux.R b/R/update_aux.R index c888d48..84bb4da 100644 --- a/R/update_aux.R +++ b/R/update_aux.R @@ -1,7 +1,258 @@ +#' Update the measure along with it's dependencies automatically. +#' +#' @param measure character: measure to be updated, if NULL will update all of +#' them +#' @inheritParams aux_pop_update +#' @export +auto_aux_update <- function(measure = NULL, + force = FALSE, + from = c("gh", "file", "api"), + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + pipfun::check_pkg_active("pipaux") + + branch <- match.arg(branch) + from <- match.arg(from) + files_changed <- FALSE + + isgls <- ls(sys.frame(), pattern = "^gls$") |> + length() > 0 + + if (isFALSE(isgls)) { + cli::cli_abort( + "object {.var gls} is not available in Globel env. + Run {.code gls <- pipfun::pip_create_globals()} first", + wrap = TRUE + ) + } + + # if there is validation report in the environment - remove it + clean_validation_report() + + creds <- pipfun::get_github_creds() + gh_user <- "https://raw.githubusercontent.com" + org_data <- paste(gh_user, + owner, + "pipaux/metadata/Data/git_metadata.csv", + sep = "/") |> + readr::read_csv(show_col_types = FALSE) + + + dependencies <- read_dependencies(gh_user, owner) + # Get all repositories under PIP-Technical-Team + all_repos <- gh::gh("GET /users/{username}/repos", + username = owner) |> + vapply("[[", "", "name") |> + #Keep only those repos that start with "aux_" + grep("^aux_", x = _, value = TRUE) + + if (!is.null(measure)) { + all_repos <- all_repos[all_repos %in% glue::glue("aux_{measure}")] + } + # get hashs + hash <- + purrr::map(all_repos, + .f = ~ { + gh::gh( + "GET /repos/{owner}/{repo}/commits/{branch}", + owner = owner, + repo = .x, + branch = branch + ) + }) |> + purrr::map_chr( ~ .x[["sha"]]) + + # Get the latest hash of the repo + all_data <- + dplyr::tibble( + Repo = glue::glue("{owner}/{all_repos}"), + hash = hash, + branch = branch + ) + + old_data <- org_data %>% + dplyr::filter(.data$branch == branch) %>% + dplyr::rename(hash_original = hash) + + old_data <- old_data %>% + dplyr::inner_join(all_data, by = c("Repo", "branch")) + + cli::cli_alert_info("Number of rows from csv file : {nrow(old_data)}") + cli::cli_alert_info("Number of rows from Github : {nrow(all_data)}") + cli::cli_alert_info("Both the numbers above should be equal or else some + debugging is required.", wrap = TRUE) + + new_data <- old_data %>% + dplyr::filter(.data$hash != .data$hash_original | + is.na(.data$hash_original) | + is.na(.data$hash)) + + # all_data <- dplyr::rows_update(org_data, all_data, by = c("Repo", "branch")) + + + + # Remove everything till the last underscore so + # PIP-Technical-Team/aux_ppp changes to ppp + aux_fns <- sub(".*_", "", new_data$Repo) |> + # Keep only those whose dependencies we know + intersect(names(dependencies)) + + # For each auxiliary data to be updated + cli::cli_alert_info("Updating data for {length(aux_fns)} files.") + for (aux in aux_fns) { + # Find the corresponding functions to be run + # Add pip_ suffix so that it becomes function name + list_of_funcs <- paste0("pip_", return_value(aux, dependencies)) + + for (fn in list_of_funcs) { + + aux_file <- sub("pip_", "", fn) + cli::cli_alert_info("Running function {fn} for aux file {aux}.") + + before_hash <- read_signature_file(aux_file, maindir, branch) + # Run the pip_.* function + match.fun(fn)(maindir = maindir, branch = branch) |> + suppressMessages() + after_hash <- read_signature_file(aux_file, maindir, branch) + + if (before_hash != after_hash) { + + cli::cli_alert_info("Updating csv for {fn}") + files_changed <- TRUE + + # find rows of of org to be modified + aux_row_org <- org_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + org_data$branch == branch + + # find rows in new that will be copied to org + aux_row_new <- new_data$Repo |> + fs::path_file() |> + sub('aux_', '', x = _) %in% aux_file & + new_data$branch == branch + + org_data$hash[aux_row_org] <- new_data$hash[aux_row_new] + + } # end of before_hash condition + + } # end of list_of_funcs loop + } # end of aux_fns loop + last_updated_time <- + aux_file_last_updated(maindir, names(dependencies), branch) + if (length(aux_fns) > 0 && files_changed) { + # Write the latest auxiliary file and corresponding hash to csv + # Always save at the end. + # sha - hash object of current csv file in Data/git_metadata.csv + # content - base64 of changed data + out <- gh::gh( + "GET /repos/{owner}/{repo}/contents/{file_path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + file_path = "Data/git_metadata.csv", + .params = list(ref = "metadata") + ) + # There is no way to update only the lines which has changed using Github API + # We need to update the entire file every time. Refer - https://stackoverflow.com/a/21315234/3962914 + res <- gh::gh( + "PUT /repos/{owner}/{repo}/contents/{path}", + owner = "PIP-Technical-Team", + repo = "pipaux", + path = "Data/git_metadata.csv", + .params = list( + branch = "metadata", + message = "updating csv file", + sha = out$sha, + content = convert_df_to_base64(org_data) + ), + .token = creds$password + ) + } + cli::cli_h2("File updated status.") + knitr::kable(last_updated_time) +} + + + +return_value <- function(aux, dependencies) { + val <- dependencies[[aux]] + if (length(val) > 0) { + for (i in val) { + val <- c(return_value(i, dependencies), val) + } + } + return(unique(c(val, aux))) +} + +#' Function to write dataframe to GitHub +#' +#' @param df A dataframe +#' +#' @return base64 encoded dataframe +#' @export +#' +#' @examples +#' \dontrun { +#' convert_df_to_base64(mtcars) +#' } +convert_df_to_base64 <- function(df) { + df |> + write.table(quote = FALSE, + row.names = FALSE, + sep = ",") |> + capture.output() |> + paste(collapse = "\n") |> + charToRaw() |> + base64enc::base64encode() +} + +aux_file_last_updated <- function(data_dir, aux_files, branch) { + filenames <- + glue::glue("{data_dir}/_aux/{branch}/{aux_files}/{aux_files}.qs") + data <- sapply(filenames, function(x) + qs::qattributes(x)$datetime) + data.frame( + filename = basename(names(data)), + time_last_update = as.POSIXct(data, format = "%Y%m%d%H%M%S"), + row.names = NULL + ) |> + dplyr::arrange(desc(time_last_update)) + +} + +read_dependencies <- function(gh_user, owner) { + dependencies <- paste(gh_user, + owner, + "pipaux/metadata/Data/dependency.yml", + sep = "/") |> + yaml::read_yaml() + + sapply(dependencies, \(x) if (length(x)) + strsplit(x, ",\\s+")[[1]] + else + character()) +} + +read_signature_file <- function(aux_file, maindir, branch) { + # Construct the path to data signature aux file + data_signature_path <- + fs::path(maindir, + "_aux", + branch, + aux_file, + glue::glue("{aux_file}_datasignature.txt")) + signature_hash <- readr::read_lines(data_signature_path) + return(signature_hash) +} + + #' Update Auxiliary data. Wrapper of measure-specific functions. #' -#' @inheritParams pip_aux_labels -#' @inheritParams pip_cpi +#' @inheritParams aux_labels_pip +#' @inheritParams aux_cpi #' @inheritParams pipfun::load_from_gh #' @param verbose logical : Do you want verbose output? #' @export @@ -32,9 +283,9 @@ update_aux <- function(measure, if ("all" %in% tolower(measure)) { measure <- lsf.str("package:pipaux", - pattern = "^pip_[a-z]+$") |> + pattern = "^aux_[a-z]+$") |> as.character() |> - {\(.) gsub("^pip_", "", .)}() |> + {\(.) gsub("^aux_", "", .)}() |> sort() } @@ -43,7 +294,7 @@ update_aux <- function(measure, al$verbose <- NULL # build function name - fun_name <- glue("pip_{measure}") + fun_name <- glue("aux_{measure}") rs <- lapply(fun_name, \(.x) { @@ -72,3 +323,50 @@ update_aux <- function(measure, return(rs) } +#' Update all auxiliary data at once +#' +#' @inheritParams aux_cpi +#' @param popsrc character: Source for population data. Defaults to `getOption("pipaux.popsrc")`. +#' @export +aux_update_all <- function(force = FALSE, + popsrc = getOption("pipaux.popsrc"), + maindir = gls$PIP_DATA_DIR) { + + # List of countries in WDI + aux_country_list(force = force, maindir = maindir) + + # PIP countries and regions + aux_countries(force = force, maindir = maindir) + aux_regions(force = force, maindir = maindir) + + # PIP Indicators + aux_indicators(force = force, maindir = maindir) + + # Poverty lines + aux_pl(force = force, maindir = maindir) + + # PFW, CPI and PPP from DLW + aux_pfw(force = force, maindir = maindir) + aux_cpi(force = force, maindir = maindir) + aux_ppp(force = force, maindir = maindir) + + # POP from Emi or WDI + aux_pop(force = force, maindir = maindir, src = popsrc) + + # GDP from WEO, Maddison and WDI (+ a few special cases) + aux_weo(force = force, maindir = maindir) + aux_maddison(force = force, maindir = maindir) + aux_gdp(force = force, maindir = maindir) + + # PCE from WDI (+ a few special cases) + aux_pce(force = force, maindir = maindir) + + # Country profiles (from Poverty GP) + aux_cp(force = force, maindir = maindir) + + # Survey metadata (from Poverty GP) + aux_metadata(force = force, maindir = maindir) + + return(invisible()) +} + diff --git a/R/utils-data-table.R b/R/utils-data-table.R deleted file mode 100644 index d2f2964..0000000 --- a/R/utils-data-table.R +++ /dev/null @@ -1,12 +0,0 @@ -# data.table is generally careful to minimize the scope for namespace -# conflicts (i.e., functions with the same name as in other packages); -# a more conservative approach using @importFrom should be careful to -# import any needed data.table special symbols as well, e.g., if you -# run DT[ , .N, by='grp'] in your package, you'll need to add -# @importFrom data.table .N to prevent the NOTE from R CMD check. -# See ?data.table::`special-symbols` for the list of such symbols -# data.table defines; see the 'Importing data.table' vignette for more -# advice (vignette('datatable-importing', 'data.table')). -# -#' @import data.table -NULL diff --git a/R/utils-pipe.R b/R/utils-pipe.R deleted file mode 100644 index fd0b1d1..0000000 --- a/R/utils-pipe.R +++ /dev/null @@ -1,14 +0,0 @@ -#' Pipe operator -#' -#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @param lhs A value or the magrittr placeholder. -#' @param rhs A function call using the magrittr semantics. -#' @return The result of calling `rhs(lhs)`. -NULL diff --git a/R/utils.R b/R/utils.R index 5375b09..442f43e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -409,12 +409,12 @@ get_gh <- function(owner, } #' SAve auxiliary file to Github Repo -#' -#' Sometimes we need to save auxiliary files to Github repo. +#' +#' Sometimes we need to save auxiliary files to Github repo. #' This function allows for this. #' #' @inheritParams pipfun::save_to_gh -#' @export +#' @export #' @return NULL save_aux_to_gh <- function(df, measure, @@ -426,13 +426,48 @@ save_aux_to_gh <- function(df, ext = "csv", ... ) { - + pipfun::save_to_gh(df = df, - repo = repo, + repo = repo, owner = owner, branch = branch, tag = tag, filename = filename, ext = ext, ...) -} \ No newline at end of file +} + + +# data.table is generally careful to minimize the scope for namespace +# conflicts (i.e., functions with the same name as in other packages); +# a more conservative approach using @importFrom should be careful to +# import any needed data.table special symbols as well, e.g., if you +# run DT[ , .N, by='grp'] in your package, you'll need to add +# @importFrom data.table .N to prevent the NOTE from R CMD check. +# See ?data.table::`special-symbols` for the list of such symbols +# data.table defines; see the 'Importing data.table' vignette for more +# advice (vignette('datatable-importing', 'data.table')). +# +#' @import data.table +NULL + +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL + + + + + + diff --git a/R/validation_report.R b/R/validation_report.R new file mode 100644 index 0000000..8e9ec8f --- /dev/null +++ b/R/validation_report.R @@ -0,0 +1,90 @@ +#' Get validation report data validation error report +#' +#' @param vlddata validation data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' +#' @export +get_error_validation <- function(vlddata, detail){ + + stopifnot("Validation data is not availabel" = !is.null(vlddata)) + + err_t <- NULL + + if (any(vlddata$type == "error")){ + + err_t <- vlddata[type == "error", + .(table_name, description, call, + message, type)] + } + + + if (isFALSE(detail)) { + + cli::cli_abort("Description of invalid cases for {unique(err_t$table_name)}, + {err_t$description}") + + } else { + + if (!rlang::env_has(.pipaux, "validation_report")){ + + rlang::env_poke(.pipaux, "validation_report", err_t) + + } else { + + compiled_result <- rbind(.pipaux$validation_report, err_t) + rlang::env_poke(.pipaux, "validation_report", compiled_result) + + } + + cli::cli_inform("Validation report ({.field validation_report}) has been added to the environment varaible ({.field .pipaux}).") + } + +} + +#' Remove data validation report from .pipaux environment variable +#' +#' @export +clean_validation_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + # rlang::env_bind(.pipaux, validation_report = rlang::zap()) + rlang::env_unbind(.pipaux, "validation_report") + + } +} + +#' Send an email that contains auxiliary data validation report +#' +#' @import blastula +#' +#' @export +send_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + print(.pipaux$validation_report) + + # fname <- file.path(tempdir(), "data_validation_report.csv") + # + # write.csv(.pipaux$validation_report, fname, row.names = FALSE) + # + # compose_email( + # body = md(glue::glue( + # + # "Hello, + # + # The attched file contains auxiliary data validation report. + # + # Regards"))) |> + # add_attachment(file = fname, filename = "data_validation_report") |> + # smtp_send( + # from = "tefera.degefu@outlook.com", + # to = "tdegefu@worldbank.org", + # subject = "Data validation report", + # credentials = creds_envvar(user = "tefera.degefu@outlook.com", + # pass_envvar = "SMTP_GPID_EMAIL", + # provider = "outlook") + # ) + } +} diff --git a/R/wdi_validate_raw.R b/R/wdi_validate_raw.R deleted file mode 100644 index 47fb1c8..0000000 --- a/R/wdi_validate_raw.R +++ /dev/null @@ -1,38 +0,0 @@ -#' Validate raw wdi data -#' -#' @param wdi raw wdi data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ - - stopifnot("WDI raw data is not loaded" = !is.null(wdi)) - - report <- data_validation_report() - - validate(wdi, name = "WDI raw data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(NE.CON.PRVT.PC.KD), - description = "`NE.CON.PRVT.PC.KD` should be numeric") |> - validate_if(is.numeric(NY.GDP.PCAP.KD), - description = "`NY.GDP.PCAP.KD` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R deleted file mode 100644 index 6392418..0000000 --- a/R/weo_validate_output.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Validate clean weo data -#' -#' @param weo clean weo data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ - - stopifnot("WEO output data is not loaded" = !is.null(weo)) - - report <- data_validation_report() - - validate(weo, name = "WEO output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(weo_gdp), - description = "`weo_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/weo_validate_raw.R b/R/weo_validate_raw.R deleted file mode 100644 index 679ffb6..0000000 --- a/R/weo_validate_raw.R +++ /dev/null @@ -1,62 +0,0 @@ -#' Validate raw weo data -#' -#' @param weo raw weo data, as loaded via `pipfun::load_from_gh` -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' @keywords internal -#' -#' @export -weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ - - stopifnot("WEO raw data is not loaded" = !is.null(weo)) - - report <- data_validation_report() - - weo <- weo[!is.na(`WEO Subject Code`), ] - - validate(weo, name = "WEO raw data validation") |> - validate_if(is.character(`WEO Country Code`), - description = "`WEO Country Code` should be character") |> - validate_if(is.character(ISO), - description = "ISO should be character") |> - validate_if(is.character(`WEO Subject Code`), - description = "`WEO Subject Code` should be character") |> - validate_if(is.character(Country), - description = "`Country` should be character") |> - validate_if(is.character(`Subject Descriptor`), - description = "`Subject Descriptor` should be character") |> - validate_if(is.character(`Subject Notes`), - description = "`Subject Notes` should be character") |> - validate_if(is.character(Units), - description = "`Units` should be character") |> - validate_if(is.character(Scale), - description = "`Scale` should be character") |> - validate_if(is.character(`Country/Series-specific Notes`), - description = "`Country/Series-specific Notes` should be character") |> - validate_if(is.numeric(`Estimates Start After`), - description = "`Estimates Start After` should be numeric") |> - validate_cols(not_na, ISO, `WEO Subject Code`, - description = "no missing values in key variables") |> - validate_if(is_uniq(ISO, `WEO Subject Code`), - description = "no duplicate records in key variables") |> - add_results(report) - - num_var_list <- grep("^[[:digit:]]", colnames(weo)) - - for (i in 1:length(num_var_list)) { - validate(weo, name = "WEO validation") |> - validate_cols(is.numeric, num_var_list[i], - description = "variables (with numeric var name) should be numeric") |> - add_results(report) - } - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} - diff --git a/man/auto_aux_update.Rd b/man/auto_aux_update.Rd index 5bdea0a..e2b3611 100644 --- a/man/auto_aux_update.Rd +++ b/man/auto_aux_update.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auto_aux_update.R +% Please edit documentation in R/update_aux.R \name{auto_aux_update} \alias{auto_aux_update} \title{Update the measure along with it's dependencies automatically.} diff --git a/man/pip_censoring.Rd b/man/aux_censoring.Rd similarity index 89% rename from man/pip_censoring.Rd rename to man/aux_censoring.Rd index 02e7923..260c872 100644 --- a/man/pip_censoring.Rd +++ b/man/aux_censoring.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_censoring.R -\name{pip_censoring} -\alias{pip_censoring} +% Please edit documentation in R/aux_censoring.R +\name{aux_censoring} +\alias{aux_censoring} \title{Censoring data} \usage{ -pip_censoring( +aux_censoring( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_countries.Rd b/man/aux_countries.Rd similarity index 89% rename from man/pip_countries.Rd rename to man/aux_countries.Rd index df445f8..101329b 100644 --- a/man/pip_countries.Rd +++ b/man/aux_countries.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_countries.R -\name{pip_countries} -\alias{pip_countries} +% Please edit documentation in R/aux_countries.R +\name{aux_countries} +\alias{aux_countries} \title{PIP Countries} \usage{ -pip_countries( +aux_countries( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_country_list.Rd b/man/aux_country_list.Rd similarity index 91% rename from man/pip_country_list.Rd rename to man/aux_country_list.Rd index 70eb8d4..f5f4900 100644 --- a/man/pip_country_list.Rd +++ b/man/aux_country_list.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_country_list.R -\name{pip_country_list} -\alias{pip_country_list} +% Please edit documentation in R/aux_country_list.R +\name{aux_country_list} +\alias{aux_country_list} \title{List of countries} \usage{ -pip_country_list( +aux_country_list( action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, diff --git a/man/pip_country_list_update.Rd b/man/aux_country_list_update.Rd similarity index 56% rename from man/pip_country_list_update.Rd rename to man/aux_country_list_update.Rd index 8a1c56d..1bc1618 100644 --- a/man/pip_country_list_update.Rd +++ b/man/aux_country_list_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_country_list_update.R -\name{pip_country_list_update} -\alias{pip_country_list_update} +% Please edit documentation in R/aux_country_list.R +\name{aux_country_list_update} +\alias{aux_country_list_update} \title{Update Country LIst} \usage{ -pip_country_list_update(class_branch = "master") +aux_country_list_update(class_branch = "master") } \arguments{ \item{class_branch}{character: names of branch of GPID-WB/class repo. Default diff --git a/man/pip_cp.Rd b/man/aux_cp.Rd similarity index 91% rename from man/pip_cp.Rd rename to man/aux_cp.Rd index 7bde8e2..f78c6a6 100644 --- a/man/pip_cp.Rd +++ b/man/aux_cp.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp.R -\name{pip_cp} -\alias{pip_cp} +% Please edit documentation in R/aux_cp.R +\name{aux_cp} +\alias{aux_cp} \title{Country Profiles} \usage{ -pip_cp( +aux_cp( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_cp_clean.Rd b/man/aux_cp_clean.Rd similarity index 69% rename from man/pip_cp_clean.Rd rename to man/aux_cp_clean.Rd index 80b0fce..2f1f192 100644 --- a/man/pip_cp_clean.Rd +++ b/man/aux_cp_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_clean.R -\name{pip_cp_clean} -\alias{pip_cp_clean} +% Please edit documentation in R/aux_cp.R +\name{aux_cp_clean} +\alias{aux_cp_clean} \title{Clean country profile data} \usage{ -pip_cp_clean(x, file_names) +aux_cp_clean(x, file_names) } \arguments{ \item{x}{database from pip_cp_update} diff --git a/man/pip_cp_update.Rd b/man/aux_cp_update.Rd similarity index 87% rename from man/pip_cp_update.Rd rename to man/aux_cp_update.Rd index 6ea06c5..4016c25 100644 --- a/man/pip_cp_update.Rd +++ b/man/aux_cp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_update.R -\name{pip_cp_update} -\alias{pip_cp_update} +% Please edit documentation in R/aux_cp.R +\name{aux_cp_update} +\alias{aux_cp_update} \title{Update Country Profiles} \usage{ -pip_cp_update( +aux_cp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_cpi.Rd b/man/aux_cpi.Rd similarity index 92% rename from man/pip_cpi.Rd rename to man/aux_cpi.Rd index 806c297..742e849 100644 --- a/man/pip_cpi.Rd +++ b/man/aux_cpi.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi.R -\name{pip_cpi} -\alias{pip_cpi} +% Please edit documentation in R/aux_cpi.R +\name{aux_cpi} +\alias{aux_cpi} \title{PIP CPI} \usage{ -pip_cpi( +aux_cpi( action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, diff --git a/man/pip_cpi_clean.Rd b/man/aux_cpi_clean.Rd similarity index 77% rename from man/pip_cpi_clean.Rd rename to man/aux_cpi_clean.Rd index 063dc31..20c3e01 100644 --- a/man/pip_cpi_clean.Rd +++ b/man/aux_cpi_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_clean.R -\name{pip_cpi_clean} -\alias{pip_cpi_clean} +% Please edit documentation in R/aux_cpi.R +\name{aux_cpi_clean} +\alias{aux_cpi_clean} \title{Clean CPI data} \usage{ -pip_cpi_clean( +aux_cpi_clean( y, cpivar = getOption("pipaux.cpivar"), maindir = gls$PIP_DATA_DIR, @@ -12,7 +12,7 @@ pip_cpi_clean( ) } \arguments{ -\item{y}{dataset with CPI data from \code{pip_cpi_update()}.} +\item{y}{dataset with CPI data from \code{aux_cpi_update()}.} \item{cpivar}{character: CPI variable to be used as default. Currently it is "cpi2011".} diff --git a/man/pip_cpi_update.Rd b/man/aux_cpi_update.Rd similarity index 87% rename from man/pip_cpi_update.Rd rename to man/aux_cpi_update.Rd index 0e9254d..3a73d83 100644 --- a/man/pip_cpi_update.Rd +++ b/man/aux_cpi_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_update.R -\name{pip_cpi_update} -\alias{pip_cpi_update} +% Please edit documentation in R/aux_cpi.R +\name{aux_cpi_update} +\alias{aux_cpi_update} \title{Update CPI} \usage{ -pip_cpi_update( +aux_cpi_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_cpi_vintage.Rd b/man/aux_cpi_vintage.Rd similarity index 66% rename from man/pip_cpi_vintage.Rd rename to man/aux_cpi_vintage.Rd index f3bb49a..2f94e44 100644 --- a/man/pip_cpi_vintage.Rd +++ b/man/aux_cpi_vintage.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_vintage.R -\name{pip_cpi_vintage} -\alias{pip_cpi_vintage} +% Please edit documentation in R/aux_cpi.R +\name{aux_cpi_vintage} +\alias{aux_cpi_vintage} \title{Check CPI Vintage} \usage{ -pip_cpi_vintage( +aux_cpi_vintage( msrdir = fs::path(gls$PIP_DATA_DIR, "_aux/", measure), dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), force = FALSE @@ -15,7 +15,7 @@ pip_cpi_vintage( \item{dlwdir}{character: Datalibweb directory} -\item{force}{logical: If TRUE force update of veintage level 1.} +\item{force}{logical: If TRUE force update of vintage level 1.} } \description{ Check CPI Vintage diff --git a/man/aux_data.Rd b/man/aux_data.Rd new file mode 100644 index 0000000..2919fb9 --- /dev/null +++ b/man/aux_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_data_files.R +\name{aux_data} +\alias{aux_data} +\title{Attache key values into auxiliary file} +\usage{ +aux_data(aux_file) +} +\arguments{ +\item{aux_file}{auxiliary file} +} +\value{ +data.table with key values +} +\description{ +Attache key values into auxiliary file +} diff --git a/man/pip_dictionary.Rd b/man/aux_dictionary.Rd similarity index 89% rename from man/pip_dictionary.Rd rename to man/aux_dictionary.Rd index 1998468..f42fe32 100644 --- a/man/pip_dictionary.Rd +++ b/man/aux_dictionary.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_dictionary.R -\name{pip_dictionary} -\alias{pip_dictionary} +% Please edit documentation in R/aux_dictionary.R +\name{aux_dictionary} +\alias{aux_dictionary} \title{PIP Dictionary} \usage{ -pip_dictionary( +aux_dictionary( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_gdm.Rd b/man/aux_gdm.Rd similarity index 94% rename from man/pip_gdm.Rd rename to man/aux_gdm.Rd index 232b43b..fa8b072 100644 --- a/man/pip_gdm.Rd +++ b/man/aux_gdm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdm.R -\name{pip_gdm} -\alias{pip_gdm} +% Please edit documentation in R/aux_gdm.R +\name{aux_gdm} +\alias{aux_gdm} \title{PIP GDM} \usage{ -pip_gdm( +aux_gdm( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_gdm_update.Rd b/man/aux_gdm_update.Rd similarity index 88% rename from man/pip_gdm_update.Rd rename to man/aux_gdm_update.Rd index 7fda4b1..8101a7c 100644 --- a/man/pip_gdm_update.Rd +++ b/man/aux_gdm_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdm_update.R -\name{pip_gdm_update} -\alias{pip_gdm_update} +% Please edit documentation in R/aux_gdm.R +\name{aux_gdm_update} +\alias{aux_gdm_update} \title{Update GDM} \usage{ -pip_gdm_update( +aux_gdm_update( force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_gdp.Rd b/man/aux_gdp.Rd similarity index 89% rename from man/pip_gdp.Rd rename to man/aux_gdp.Rd index 9c49cd1..3e237ed 100644 --- a/man/pip_gdp.Rd +++ b/man/aux_gdp.Rd @@ -1,41 +1,41 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp.R -\name{pip_gdp} -\alias{pip_gdp} -\title{PIP GDP} -\usage{ -pip_gdp( - action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - from = "file", - detail = getOption("pipaux.detail.raw") -) -} -\arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" -and "gh" are synonymous} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Update or load GDP data. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_gdp.R +\name{aux_gdp} +\alias{aux_gdp} +\title{PIP GDP} +\usage{ +aux_gdp( + action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = "file", + detail = getOption("pipaux.detail.raw") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" +and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Update or load GDP data. +} diff --git a/man/pip_gdp_update.Rd b/man/aux_gdp_update.Rd similarity index 89% rename from man/pip_gdp_update.Rd rename to man/aux_gdp_update.Rd index 16eb090..68fd158 100644 --- a/man/pip_gdp_update.Rd +++ b/man/aux_gdp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp_update.R -\name{pip_gdp_update} -\alias{pip_gdp_update} +% Please edit documentation in R/aux_gdp.R +\name{aux_gdp_update} +\alias{aux_gdp_update} \title{Update GDP} \usage{ -pip_gdp_update( +aux_gdp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_gdp_weo.Rd b/man/aux_gdp_weo.Rd similarity index 79% rename from man/pip_gdp_weo.Rd rename to man/aux_gdp_weo.Rd index 0f1bd98..94044ac 100644 --- a/man/pip_gdp_weo.Rd +++ b/man/aux_gdp_weo.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_gdp_weo.R -\name{pip_gdp_weo} -\alias{pip_gdp_weo} +% Please edit documentation in R/aux_gdp.R +\name{aux_gdp_weo} +\alias{aux_gdp_weo} \title{Fetch GDP data from WEO} \usage{ -pip_gdp_weo(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) +aux_gdp_weo(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) } \arguments{ \item{action}{character: Either "load" or "update". Default is "update". If @@ -22,6 +22,6 @@ Note that the most recent version most be downloaded from imf.org and saved as an .xls file in \verb{/_aux/weo/}. The filename should be in the following structure \verb{WEO_.xls}. Due to potential file corruption the file must be opened and re-saved before it can be updated with -\code{pip_gdp_weo()}. Hopefully in the future IMF will stop using an \code{.xls} file +\code{aux_gdp_weo()}. Hopefully in the future IMF will stop using an \code{.xls} file that's not really xls. } diff --git a/man/pip_income_groups.Rd b/man/aux_income_groups.Rd similarity index 89% rename from man/pip_income_groups.Rd rename to man/aux_income_groups.Rd index 100ad5e..75829fd 100644 --- a/man/pip_income_groups.Rd +++ b/man/aux_income_groups.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_income_groups.R -\name{pip_income_groups} -\alias{pip_income_groups} +% Please edit documentation in R/aux_income_groups.R +\name{aux_income_groups} +\alias{aux_income_groups} \title{PIP series of income group} \usage{ -pip_income_groups( +aux_income_groups( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_indicators.Rd b/man/aux_indicators.Rd similarity index 89% rename from man/pip_indicators.Rd rename to man/aux_indicators.Rd index 083f7fc..fd6c9bc 100644 --- a/man/pip_indicators.Rd +++ b/man/aux_indicators.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_indicators.R -\name{pip_indicators} -\alias{pip_indicators} +% Please edit documentation in R/aux_indicators.R +\name{aux_indicators} +\alias{aux_indicators} \title{PIP Indicators} \usage{ -pip_indicators( +aux_indicators( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_aux_labels.Rd b/man/aux_labels_pip.Rd similarity index 53% rename from man/pip_aux_labels.Rd rename to man/aux_labels_pip.Rd index 4f7525e..1d0190b 100644 --- a/man/pip_aux_labels.Rd +++ b/man/aux_labels_pip.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_aux_labels.R -\name{pip_aux_labels} -\alias{pip_aux_labels} -\title{pip_aux_labels} +% Please edit documentation in R/aux_labels_pip.R +\name{aux_labels_pip} +\alias{aux_labels_pip} +\title{PIP Auxiliary Labels} \usage{ -pip_aux_labels(x, measure) +aux_labels_pip(x, measure) } \arguments{ \item{x}{Data frame to be labeled.} @@ -12,6 +12,6 @@ pip_aux_labels(x, measure) \item{measure}{type of data frame, e.g., "cpi" or "PPP".} } \description{ -pip_aux_labels +PIP Auxiliary Labels } \keyword{internal} diff --git a/man/pip_maddison.Rd b/man/aux_maddison.Rd similarity index 90% rename from man/pip_maddison.Rd rename to man/aux_maddison.Rd index 75b905a..e63e31f 100644 --- a/man/pip_maddison.Rd +++ b/man/aux_maddison.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_maddison.R -\name{pip_maddison} -\alias{pip_maddison} +% Please edit documentation in R/aux_maddison.R +\name{aux_maddison} +\alias{aux_maddison} \title{Maddison data} \usage{ -pip_maddison( +aux_maddison( action = c("update", "load"), owner = getOption("pipfun.ghowner"), force = FALSE, diff --git a/man/pip_metadata.Rd b/man/aux_metadata.Rd similarity index 90% rename from man/pip_metadata.Rd rename to man/aux_metadata.Rd index 8d94cdb..748a91e 100644 --- a/man/pip_metadata.Rd +++ b/man/aux_metadata.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metadata.R -\name{pip_metadata} -\alias{pip_metadata} +% Please edit documentation in R/aux_metadata.R +\name{aux_metadata} +\alias{aux_metadata} \title{PIP Survey Metadata} \usage{ -pip_metadata( +aux_metadata( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_metadata_update.Rd b/man/aux_metadata_update.Rd similarity index 87% rename from man/pip_metadata_update.Rd rename to man/aux_metadata_update.Rd index 53b34bc..0e9f05b 100644 --- a/man/pip_metadata_update.Rd +++ b/man/aux_metadata_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metadata_update.R -\name{pip_metadata_update} -\alias{pip_metadata_update} +% Please edit documentation in R/aux_metadata.R +\name{aux_metadata_update} +\alias{aux_metadata_update} \title{Update metadata file} \usage{ -pip_metadata_update( +aux_metadata_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_metaregion.Rd b/man/aux_metaregion.Rd similarity index 89% rename from man/pip_metaregion.Rd rename to man/aux_metaregion.Rd index df279e8..ae7a680 100644 --- a/man/pip_metaregion.Rd +++ b/man/aux_metaregion.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_metaregion.R -\name{pip_metaregion} -\alias{pip_metaregion} +% Please edit documentation in R/aux_metadata.R +\name{aux_metaregion} +\alias{aux_metaregion} \title{Metadata for PIP regions} \usage{ -pip_metaregion( +aux_metaregion( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_missing_data.Rd b/man/aux_missing_data.Rd similarity index 89% rename from man/pip_missing_data.Rd rename to man/aux_missing_data.Rd index c8611b5..06d054a 100644 --- a/man/pip_missing_data.Rd +++ b/man/aux_missing_data.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_missing_data.R -\name{pip_missing_data} -\alias{pip_missing_data} +% Please edit documentation in R/aux_missing_data.R +\name{aux_missing_data} +\alias{aux_missing_data} \title{Create table with missing countries} \usage{ -pip_missing_data( +aux_missing_data( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_nan.Rd b/man/aux_nan.Rd similarity index 92% rename from man/pip_nan.Rd rename to man/aux_nan.Rd index b63772d..4c713ca 100644 --- a/man/pip_nan.Rd +++ b/man/aux_nan.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_nan.R -\name{pip_nan} -\alias{pip_nan} +% Please edit documentation in R/aux_nan.R +\name{aux_nan} +\alias{aux_nan} \title{PIP nowcast data} \usage{ -pip_nan( +aux_nan( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_npl.Rd b/man/aux_npl.Rd similarity index 92% rename from man/pip_npl.Rd rename to man/aux_npl.Rd index d73dd03..d796196 100644 --- a/man/pip_npl.Rd +++ b/man/aux_npl.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_npl.R -\name{pip_npl} -\alias{pip_npl} +% Please edit documentation in R/aux_npl.R +\name{aux_npl} +\alias{aux_npl} \title{National Poverty headcount} \usage{ -pip_npl( +aux_npl( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pce.Rd b/man/aux_pce.Rd similarity index 93% rename from man/pip_pce.Rd rename to man/aux_pce.Rd index 92aff3a..59d9bdd 100644 --- a/man/pip_pce.Rd +++ b/man/aux_pce.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pce.R -\name{pip_pce} -\alias{pip_pce} +% Please edit documentation in R/aux_pce.R +\name{aux_pce} +\alias{aux_pce} \title{PIP PCE} \usage{ -pip_pce( +aux_pce( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pce_update.Rd b/man/aux_pce_update.Rd similarity index 89% rename from man/pip_pce_update.Rd rename to man/aux_pce_update.Rd index 660a846..a967a30 100644 --- a/man/pip_pce_update.Rd +++ b/man/aux_pce_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pce_update.R -\name{pip_pce_update} -\alias{pip_pce_update} +% Please edit documentation in R/aux_pce.R +\name{aux_pce_update} +\alias{aux_pce_update} \title{Update PCE} \usage{ -pip_pce_update( +aux_pce_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pfw.Rd b/man/aux_pfw.Rd similarity index 92% rename from man/pip_pfw.Rd rename to man/aux_pfw.Rd index d2f8a6e..5606c17 100644 --- a/man/pip_pfw.Rd +++ b/man/aux_pfw.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw.R -\name{pip_pfw} -\alias{pip_pfw} +% Please edit documentation in R/aux_pfw.R +\name{aux_pfw} +\alias{aux_pfw} \title{PIP PFW} \usage{ -pip_pfw( +aux_pfw( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pfw_clean.Rd b/man/aux_pfw_clean.Rd similarity index 64% rename from man/pip_pfw_clean.Rd rename to man/aux_pfw_clean.Rd index 05a0dee..d55ba52 100644 --- a/man/pip_pfw_clean.Rd +++ b/man/aux_pfw_clean.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_clean.R -\name{pip_pfw_clean} -\alias{pip_pfw_clean} +% Please edit documentation in R/aux_pfw.R +\name{aux_pfw_clean} +\alias{aux_pfw_clean} \title{Clean PFW} \usage{ -pip_pfw_clean(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) +aux_pfw_clean(y, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main")) } \arguments{ -\item{y}{dataset with PPP data from \code{pip_pfw_update()}.} +\item{y}{dataset with PPP data from \code{aux_pfw_update()}.} \item{maindir}{character: Main directory of project.} diff --git a/man/pip_pfw_key.Rd b/man/aux_pfw_key.Rd similarity index 64% rename from man/pip_pfw_key.Rd rename to man/aux_pfw_key.Rd index 5afcf90..a6a58cb 100644 --- a/man/pip_pfw_key.Rd +++ b/man/aux_pfw_key.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_key.R -\name{pip_pfw_key} -\alias{pip_pfw_key} +% Please edit documentation in R/aux_pfw.R +\name{aux_pfw_key} +\alias{aux_pfw_key} \title{Generate a dataset that contains pfw keys} \usage{ -pip_pfw_key() +aux_pfw_key() } \value{ data.table diff --git a/man/pip_pfw_update.Rd b/man/aux_pfw_update.Rd similarity index 87% rename from man/pip_pfw_update.Rd rename to man/aux_pfw_update.Rd index 966833a..e876e5e 100644 --- a/man/pip_pfw_update.Rd +++ b/man/aux_pfw_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pfw_update.R -\name{pip_pfw_update} -\alias{pip_pfw_update} +% Please edit documentation in R/aux_pfw.R +\name{aux_pfw_update} +\alias{aux_pfw_update} \title{Update PFW} \usage{ -pip_pfw_update( +aux_pfw_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pl.Rd b/man/aux_pl.Rd similarity index 92% rename from man/pip_pl.Rd rename to man/aux_pl.Rd index 7c556e5..d1c300e 100644 --- a/man/pip_pl.Rd +++ b/man/aux_pl.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pl.R -\name{pip_pl} -\alias{pip_pl} +% Please edit documentation in R/aux_pl.R +\name{aux_pl} +\alias{aux_pl} \title{Poverty lines} \usage{ -pip_pl( +aux_pl( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_pl_clean.Rd b/man/aux_pl_clean.Rd similarity index 73% rename from man/pip_pl_clean.Rd rename to man/aux_pl_clean.Rd index a210dae..9a296c3 100644 --- a/man/pip_pl_clean.Rd +++ b/man/aux_pl_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pl_clean.R -\name{pip_pl_clean} -\alias{pip_pl_clean} +% Please edit documentation in R/aux_pl.R +\name{aux_pl_clean} +\alias{aux_pl_clean} \title{Build a data table for each list from yaml file with poverty lines info} \usage{ -pip_pl_clean(l) +aux_pl_clean(l) } \arguments{ \item{l}{list from yaml file} diff --git a/man/pip_pop.Rd b/man/aux_pop.Rd similarity index 92% rename from man/pip_pop.Rd rename to man/aux_pop.Rd index a20a57a..f541001 100644 --- a/man/pip_pop.Rd +++ b/man/aux_pop.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop.R -\name{pip_pop} -\alias{pip_pop} +% Please edit documentation in R/aux_pop.R +\name{aux_pop} +\alias{aux_pop} \title{PIP POP} \usage{ -pip_pop( +aux_pop( action = c("update", "load"), force = FALSE, from = c("gh", "file", "api"), diff --git a/man/pip_pop_update.Rd b/man/aux_pop_update.Rd similarity index 88% rename from man/pip_pop_update.Rd rename to man/aux_pop_update.Rd index 054bd38..82a85c7 100644 --- a/man/pip_pop_update.Rd +++ b/man/aux_pop_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R -\name{pip_pop_update} -\alias{pip_pop_update} +% Please edit documentation in R/aux_pop.R +\name{aux_pop_update} +\alias{aux_pop_update} \title{Update POP} \usage{ -pip_pop_update( +aux_pop_update( force = FALSE, from = c("gh", "file", "api"), maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_ppp.Rd b/man/aux_ppp.Rd similarity index 92% rename from man/pip_ppp.Rd rename to man/aux_ppp.Rd index 12840dc..e585f4f 100644 --- a/man/pip_ppp.Rd +++ b/man/aux_ppp.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp.R -\name{pip_ppp} -\alias{pip_ppp} +% Please edit documentation in R/aux_ppp.R +\name{aux_ppp} +\alias{aux_ppp} \title{PIP PPP} \usage{ -pip_ppp( +aux_ppp( action = c("update", "load"), maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_ppp_clean.Rd b/man/aux_ppp_clean.Rd similarity index 56% rename from man/pip_ppp_clean.Rd rename to man/aux_ppp_clean.Rd index 8418563..c092ebb 100644 --- a/man/pip_ppp_clean.Rd +++ b/man/aux_ppp_clean.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp_clean.R -\name{pip_ppp_clean} -\alias{pip_ppp_clean} +% Please edit documentation in R/aux_ppp.R +\name{aux_ppp_clean} +\alias{aux_ppp_clean} \title{Clean PPP data from datalibweb to meet PIP protocols} \usage{ -pip_ppp_clean(y, default_year = getOption("pipaux.pppyear")) +aux_ppp_clean(y, default_year = getOption("pipaux.pppyear")) } \arguments{ -\item{y}{dataset with PPP data from \code{pip_ppp_update()}.} +\item{y}{dataset with PPP data from \code{aux_ppp_update()}.} \item{default_year}{numeric: ICP round year. Default is 2011} } diff --git a/man/pip_ppp_update.Rd b/man/aux_ppp_update.Rd similarity index 85% rename from man/pip_ppp_update.Rd rename to man/aux_ppp_update.Rd index 2faedb9..f1cbe9e 100644 --- a/man/pip_ppp_update.Rd +++ b/man/aux_ppp_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_ppp_update.R -\name{pip_ppp_update} -\alias{pip_ppp_update} +% Please edit documentation in R/aux_ppp.R +\name{aux_ppp_update} +\alias{aux_ppp_update} \title{Update PPP} \usage{ -pip_ppp_update( +aux_ppp_update( maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_prices.Rd b/man/aux_prices.Rd similarity index 87% rename from man/pip_prices.Rd rename to man/aux_prices.Rd index b103845..fbea4cb 100644 --- a/man/pip_prices.Rd +++ b/man/aux_prices.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_prices.R -\name{pip_prices} -\alias{pip_prices} +% Please edit documentation in R/aux_prices.R +\name{aux_prices} +\alias{aux_prices} \title{PIP Prices} \usage{ -pip_prices( +aux_prices( measure = NULL, action = "update", maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_regions.Rd b/man/aux_regions.Rd similarity index 90% rename from man/pip_regions.Rd rename to man/aux_regions.Rd index 8b91baf..5c70ac7 100644 --- a/man/pip_regions.Rd +++ b/man/aux_regions.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_regions.R -\name{pip_regions} -\alias{pip_regions} +% Please edit documentation in R/aux_regions.R +\name{aux_regions} +\alias{aux_regions} \title{PIP Regions} \usage{ -pip_regions( +aux_regions( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_sign_save.Rd b/man/aux_sign_save.Rd similarity index 82% rename from man/pip_sign_save.Rd rename to man/aux_sign_save.Rd index 8124995..d5d3f50 100644 --- a/man/pip_sign_save.Rd +++ b/man/aux_sign_save.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_sign_save.R -\name{pip_sign_save} -\alias{pip_sign_save} +% Please edit documentation in R/aux_sign_save.R +\name{aux_sign_save} +\alias{aux_sign_save} \title{Save PIP auxiliary data} \usage{ -pip_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) +aux_sign_save(x, measure, msrdir, force = FALSE, save_dta = TRUE) } \arguments{ \item{x}{data.frame Data frame to be signed and saved.} diff --git a/man/aux_sna.Rd b/man/aux_sna.Rd new file mode 100644 index 0000000..c215f0a --- /dev/null +++ b/man/aux_sna.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_sna.R +\name{aux_sna} +\alias{aux_sna} +\title{PIP Special National accounts} +\usage{ +aux_sna( + action = c("update", "load"), + force = FALSE, + maindir = gls$PIP_DATA_DIR, + owner = getOption("pipfun.ghowner"), + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch) +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" +and "gh" are synonymous} +} +\description{ +Update special national accounts data +} diff --git a/man/pip_update_all_aux.Rd b/man/aux_update_all.Rd similarity index 78% rename from man/pip_update_all_aux.Rd rename to man/aux_update_all.Rd index c0ccf15..64b7147 100644 --- a/man/pip_update_all_aux.Rd +++ b/man/aux_update_all.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_update_all_aux.R -\name{pip_update_all_aux} -\alias{pip_update_all_aux} +% Please edit documentation in R/update_aux.R +\name{aux_update_all} +\alias{aux_update_all} \title{Update all auxiliary data at once} \usage{ -pip_update_all_aux( +aux_update_all( force = FALSE, popsrc = getOption("pipaux.popsrc"), maindir = gls$PIP_DATA_DIR diff --git a/man/pip_wdi.Rd b/man/aux_wdi.Rd similarity index 93% rename from man/pip_wdi.Rd rename to man/aux_wdi.Rd index ea9a06d..530f088 100644 --- a/man/pip_wdi.Rd +++ b/man/aux_wdi.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_wdi.R -\name{pip_wdi} -\alias{pip_wdi} +% Please edit documentation in R/aux_wdi.R +\name{aux_wdi} +\alias{aux_wdi} \title{PIP wdi} \usage{ -pip_wdi( +aux_wdi( action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, diff --git a/man/pip_wdi_update.Rd b/man/aux_wdi_update.Rd similarity index 89% rename from man/pip_wdi_update.Rd rename to man/aux_wdi_update.Rd index 9f0bba3..1986df6 100644 --- a/man/pip_wdi_update.Rd +++ b/man/aux_wdi_update.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_wdi_update.R -\name{pip_wdi_update} -\alias{pip_wdi_update} +% Please edit documentation in R/aux_wdi.R +\name{aux_wdi_update} +\alias{aux_wdi_update} \title{Update National accounts data from WDI} \usage{ -pip_wdi_update( +aux_wdi_update( force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), @@ -39,5 +39,5 @@ data.table with gdp and pce variables GDP and HFCE data from WDI. It could be either from API or from file } \examples{ -pip_wdi_update() +aux_wdi_update() } diff --git a/man/pip_weo.Rd b/man/aux_weo.Rd similarity index 94% rename from man/pip_weo.Rd rename to man/aux_weo.Rd index 2d55d0b..47f4e68 100644 --- a/man/pip_weo.Rd +++ b/man/aux_weo.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_weo.R -\name{pip_weo} -\alias{pip_weo} +% Please edit documentation in R/aux_weo.R +\name{aux_weo} +\alias{aux_weo} \title{Fetch GDP data from WEO} \usage{ -pip_weo( +aux_weo( action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), diff --git a/man/pip_weo_clean.Rd b/man/aux_weo_clean.Rd similarity index 77% rename from man/pip_weo_clean.Rd rename to man/aux_weo_clean.Rd index 4fa165a..d7d1d15 100644 --- a/man/pip_weo_clean.Rd +++ b/man/aux_weo_clean.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_weo_clean.R -\name{pip_weo_clean} -\alias{pip_weo_clean} +% Please edit documentation in R/aux_weo.R +\name{aux_weo_clean} +\alias{aux_weo_clean} \title{Clean WEO data} \usage{ -pip_weo_clean( +aux_weo_clean( dt, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main") diff --git a/man/cl_validate_raw.Rd b/man/cl_validate_raw.Rd index 21e4aff..399b7d8 100644 --- a/man/cl_validate_raw.Rd +++ b/man/cl_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cl_validate_raw.R +% Please edit documentation in R/aux_country_list.R \name{cl_validate_raw} \alias{cl_validate_raw} \title{Validate raw country list data} diff --git a/man/clean_cp_names.Rd b/man/clean_cp_names.Rd index bd71560..14a9c5e 100644 --- a/man/clean_cp_names.Rd +++ b/man/clean_cp_names.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cp_clean.R +% Please edit documentation in R/aux_cp.R \name{clean_cp_names} \alias{clean_cp_names} \title{Clean names from original CP files} diff --git a/man/clean_from_wide.Rd b/man/clean_from_wide.Rd index 22819be..f66adf4 100644 --- a/man/clean_from_wide.Rd +++ b/man/clean_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R +% Please edit documentation in R/aux_pop.R \name{clean_from_wide} \alias{clean_from_wide} \title{Clean from WDI format} diff --git a/man/clean_names_from_wide.Rd b/man/clean_names_from_wide.Rd index fb138f9..8ab393b 100644 --- a/man/clean_names_from_wide.Rd +++ b/man/clean_names_from_wide.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_pop_update.R +% Please edit documentation in R/aux_pop.R \name{clean_names_from_wide} \alias{clean_names_from_wide} \title{Clean names from wide WDI format} diff --git a/man/clean_validation_report.Rd b/man/clean_validation_report.Rd index 2573e0c..52a56d7 100644 --- a/man/clean_validation_report.Rd +++ b/man/clean_validation_report.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/clean_validation_report.R +% Please edit documentation in R/validation_report.R \name{clean_validation_report} \alias{clean_validation_report} \title{Remove data validation report from .pipaux environment variable} diff --git a/man/convert_df_to_base64.Rd b/man/convert_df_to_base64.Rd index 88fb3aa..46d0717 100644 --- a/man/convert_df_to_base64.Rd +++ b/man/convert_df_to_base64.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/auto_aux_update.R +% Please edit documentation in R/update_aux.R \name{convert_df_to_base64} \alias{convert_df_to_base64} \title{Function to write dataframe to GitHub} diff --git a/man/countries_validate_output.Rd b/man/countries_validate_output.Rd index 14d1105..d01d9da 100644 --- a/man/countries_validate_output.Rd +++ b/man/countries_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/countries_validate_output.R +% Please edit documentation in R/aux_countries.R \name{countries_validate_output} \alias{countries_validate_output} \title{Validate output countries data} diff --git a/man/cpi_validate_output.Rd b/man/cpi_validate_output.Rd index ca705b9..5bcd165 100644 --- a/man/cpi_validate_output.Rd +++ b/man/cpi_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cpi_validate_output.R +% Please edit documentation in R/aux_cpi.R \name{cpi_validate_output} \alias{cpi_validate_output} \title{Validate clean cpi data} @@ -7,7 +7,7 @@ cpi_validate_output(cpi, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{cpi}{clean cpi data, output via \code{pip_cpi_clean}} +\item{cpi}{clean cpi data, output via \code{aux_cpi_clean}} \item{detail}{has an option TRUE/FALSE, default value is FALSE} } diff --git a/man/cpi_validate_raw.Rd b/man/cpi_validate_raw.Rd index 16e3cad..caab9e6 100644 --- a/man/cpi_validate_raw.Rd +++ b/man/cpi_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cpi_validate_raw.R +% Please edit documentation in R/aux_cpi.R \name{cpi_validate_raw} \alias{cpi_validate_raw} \title{Validate raw cpi data} diff --git a/man/fake_aux_sna.Rd b/man/fake_aux_sna.Rd new file mode 100644 index 0000000..13c752c --- /dev/null +++ b/man/fake_aux_sna.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_sna.R +\name{fake_aux_sna} +\alias{fake_aux_sna} +\title{Fake PIP SNA function} +\usage{ +fake_aux_sna( + action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch), + from = c("gh", "file", "api") +) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{maindir}{character: Main directory of project.} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{tag}{character: specific release to be used in the update.} + +\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" +and "gh" are synonymous} +} +\description{ +Fake PIP SNA function +} diff --git a/man/gdm_validate_output.Rd b/man/gdm_validate_output.Rd index dce34c7..36bfa03 100644 --- a/man/gdm_validate_output.Rd +++ b/man/gdm_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdm_validate_output.R +% Please edit documentation in R/aux_gdm.R \name{gdm_validate_output} \alias{gdm_validate_output} \title{Validate clean gdm data} diff --git a/man/gdm_validate_raw.Rd b/man/gdm_validate_raw.Rd index 23cdb9a..57ba868 100644 --- a/man/gdm_validate_raw.Rd +++ b/man/gdm_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdm_validate_raw.R +% Please edit documentation in R/aux_gdm.R \name{gdm_validate_raw} \alias{gdm_validate_raw} \title{Validate raw gdm data} diff --git a/man/gdp_validate_output.Rd b/man/gdp_validate_output.Rd index b682f70..6c58583 100644 --- a/man/gdp_validate_output.Rd +++ b/man/gdp_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gdp_validate_output.R +% Please edit documentation in R/aux_gdp.R \name{gdp_validate_output} \alias{gdp_validate_output} \title{Validate output gdp data} diff --git a/man/get_error_validation.Rd b/man/get_error_validation.Rd index 397ab01..87d2efd 100644 --- a/man/get_error_validation.Rd +++ b/man/get_error_validation.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_error_validation.R +% Please edit documentation in R/validation_report.R \name{get_error_validation} \alias{get_error_validation} \title{Get validation report data validation error report} diff --git a/man/incgroup_validate_output.Rd b/man/incgroup_validate_output.Rd index e470eb1..ca12105 100644 --- a/man/incgroup_validate_output.Rd +++ b/man/incgroup_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/incgroup_validate_output.R +% Please edit documentation in R/aux_income_groups.R \name{incgroup_validate_output} \alias{incgroup_validate_output} \title{Validate income group output data} diff --git a/man/load_cpi.Rd b/man/load_cpi.Rd index fe4c0bc..2c732d1 100644 --- a/man/load_cpi.Rd +++ b/man/load_cpi.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_cpi_vintage.R +% Please edit documentation in R/aux_cpi.R \name{load_cpi} \alias{load_cpi} \title{Load cpi files and create CPI ID variable} diff --git a/man/load_raw_aux.Rd b/man/load_raw_aux.Rd index 5c01c5d..bd41da0 100644 --- a/man/load_raw_aux.Rd +++ b/man/load_raw_aux.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/load_raw_aux.R +% Please edit documentation in R/load_aux.R \name{load_raw_aux} \alias{load_raw_aux} \title{Load Raw Auxiliary data} diff --git a/man/metadata_validate_output.Rd b/man/metadata_validate_output.Rd index 811bb1c..b8fb37d 100644 --- a/man/metadata_validate_output.Rd +++ b/man/metadata_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metadata_validate_output.R +% Please edit documentation in R/aux_metadata.R \name{metadata_validate_output} \alias{metadata_validate_output} \title{Validate output metadata data} diff --git a/man/metadata_validate_raw.Rd b/man/metadata_validate_raw.Rd index 61e7cae..b4ea3af 100644 --- a/man/metadata_validate_raw.Rd +++ b/man/metadata_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/metadata_validate_raw.R +% Please edit documentation in R/aux_metadata.R \name{metadata_validate_raw} \alias{metadata_validate_raw} \title{Validate raw metadata data} diff --git a/man/mpd_validate_raw.Rd b/man/mpd_validate_raw.Rd index 9e9071b..6146b00 100644 --- a/man/mpd_validate_raw.Rd +++ b/man/mpd_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mpd_validate_raw.R +% Please edit documentation in R/aux_maddison.R \name{mpd_validate_raw} \alias{mpd_validate_raw} \title{Validate raw maddison data} diff --git a/man/npl_validate_output.Rd b/man/npl_validate_output.Rd index aaafeda..48faa47 100644 --- a/man/npl_validate_output.Rd +++ b/man/npl_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/npl_validate_output.R +% Please edit documentation in R/aux_npl.R \name{npl_validate_output} \alias{npl_validate_output} \title{Validate npl output data} diff --git a/man/npl_validate_raw.Rd b/man/npl_validate_raw.Rd index 89de168..6702bd3 100644 --- a/man/npl_validate_raw.Rd +++ b/man/npl_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/npl_validate_raw.R +% Please edit documentation in R/aux_npl.R \name{npl_validate_raw} \alias{npl_validate_raw} \title{Validate npl raw data} diff --git a/man/pce_validate_output.Rd b/man/pce_validate_output.Rd index 007c847..eedddf8 100644 --- a/man/pce_validate_output.Rd +++ b/man/pce_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pce_validate_output.R +% Please edit documentation in R/aux_pce.R \name{pce_validate_output} \alias{pce_validate_output} \title{Validate output pce data} diff --git a/man/pfw_validate_output.Rd b/man/pfw_validate_output.Rd index c2c29b1..59173f9 100644 --- a/man/pfw_validate_output.Rd +++ b/man/pfw_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pfw_validate_output.R +% Please edit documentation in R/aux_pfw.R \name{pfw_validate_output} \alias{pfw_validate_output} \title{Validate clean pfw data} @@ -7,7 +7,7 @@ pfw_validate_output(pfw, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{pfw}{clean pfw data, output via \code{pip_pfw_clean}} +\item{pfw}{clean pfw data, output via \code{aux_pfw_clean}} \item{detail}{has an option TRUE/FALSE, default value is FALSE} } diff --git a/man/pfw_validate_raw.Rd b/man/pfw_validate_raw.Rd index 1eb915c..64101c3 100644 --- a/man/pfw_validate_raw.Rd +++ b/man/pfw_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pfw_validate_raw.R +% Please edit documentation in R/aux_pfw.R \name{pfw_validate_raw} \alias{pfw_validate_raw} \title{Validate raw pfw data} diff --git a/man/pip_sna.Rd b/man/pip_sna.Rd deleted file mode 100644 index 479197b..0000000 --- a/man/pip_sna.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pip_sna.R, R/sna.R -\name{pip_sna} -\alias{pip_sna} -\title{Fake PIP SNA function} -\usage{ -pip_sna( - action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) - -pip_sna( - action = c("update", "load"), - force = FALSE, - maindir = gls$PIP_DATA_DIR, - owner = getOption("pipfun.ghowner"), - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) -) -} -\arguments{ -\item{action}{character: Either "load" or "update". Default is "update". If -"update" data will be updated on the system. If "load" data is loaded in memory.} - -\item{force}{logical: If TRUE data will be overwritten.} - -\item{maindir}{character: Main directory of project.} - -\item{owner}{character: Github repo owner. Default is -\code{getOption("pipfun.ghowner")}} - -\item{branch}{character: either "DEV" or "PROD". Refers to the branch that -will be used to update either the development server or production.} - -\item{tag}{character: specific release to be used in the update.} - -\item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" -and "gh" are synonymous} -} -\description{ -Update special national accounts data -} diff --git a/man/pipe.Rd b/man/pipe.Rd index a648c29..5fa90fe 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-pipe.R +% Please edit documentation in R/utils.R \name{\%>\%} \alias{\%>\%} \title{Pipe operator} diff --git a/man/pl_validate_output.Rd b/man/pl_validate_output.Rd index 6c21dbc..4eaabfd 100644 --- a/man/pl_validate_output.Rd +++ b/man/pl_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pl_validate_output.R +% Please edit documentation in R/aux_pl.R \name{pl_validate_output} \alias{pl_validate_output} \title{Validate output pl data} diff --git a/man/pop_validate_output.Rd b/man/pop_validate_output.Rd index f35f76d..118f796 100644 --- a/man/pop_validate_output.Rd +++ b/man/pop_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pop_validate_output.R +% Please edit documentation in R/aux_pop.R \name{pop_validate_output} \alias{pop_validate_output} \title{Validate output pop data} diff --git a/man/pop_validate_raw.Rd b/man/pop_validate_raw.Rd index 2f903a8..2722911 100644 --- a/man/pop_validate_raw.Rd +++ b/man/pop_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pop_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{pop_validate_raw} \alias{pop_validate_raw} \title{Validate pop raw data download from wdi} diff --git a/man/popmain_validate_raw.Rd b/man/popmain_validate_raw.Rd index 1ce94bd..f45b52e 100644 --- a/man/popmain_validate_raw.Rd +++ b/man/popmain_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/popmain_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{popmain_validate_raw} \alias{popmain_validate_raw} \title{Validate raw main pop data} diff --git a/man/ppp_validate_output.Rd b/man/ppp_validate_output.Rd index 1295262..37e7d8d 100644 --- a/man/ppp_validate_output.Rd +++ b/man/ppp_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ppp_validate_output.R +% Please edit documentation in R/aux_ppp.R \name{ppp_validate_output} \alias{ppp_validate_output} \title{Validate output ppp data} diff --git a/man/ppp_validate_raw.Rd b/man/ppp_validate_raw.Rd index 2afdcdb..991366a 100644 --- a/man/ppp_validate_raw.Rd +++ b/man/ppp_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ppp_validate_raw.R +% Please edit documentation in R/aux_ppp.R \name{ppp_validate_raw} \alias{ppp_validate_raw} \title{Validate raw ppp data} diff --git a/man/send_report.Rd b/man/send_report.Rd index cabd5f0..f77c2e9 100644 --- a/man/send_report.Rd +++ b/man/send_report.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/send_report.R +% Please edit documentation in R/validation_report.R \name{send_report} \alias{send_report} \title{Send an email that contains auxiliary data validation report} diff --git a/man/sna_fy_validate_raw.Rd b/man/sna_fy_validate_raw.Rd index f5d66d9..c1e051c 100644 --- a/man/sna_fy_validate_raw.Rd +++ b/man/sna_fy_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sna_fy_validate_raw.R +% Please edit documentation in R/aux_sna.R \name{sna_fy_validate_raw} \alias{sna_fy_validate_raw} \title{Validate raw sna_fy data} diff --git a/man/sna_validate_raw.Rd b/man/sna_validate_raw.Rd index 8fb8a5d..4c090dc 100644 --- a/man/sna_validate_raw.Rd +++ b/man/sna_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sna_validate_raw.R +% Please edit documentation in R/aux_sna.R \name{sna_validate_raw} \alias{sna_validate_raw} \title{Validate raw special national accounts (sna) data} diff --git a/man/spop_validate_raw.Rd b/man/spop_validate_raw.Rd index 0222f97..d0817f1 100644 --- a/man/spop_validate_raw.Rd +++ b/man/spop_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spop_validate_raw.R +% Please edit documentation in R/aux_pop.R \name{spop_validate_raw} \alias{spop_validate_raw} \title{Validate raw special cases pop data} diff --git a/man/wdi_validate_raw.Rd b/man/wdi_validate_raw.Rd index b56e209..0a82df0 100644 --- a/man/wdi_validate_raw.Rd +++ b/man/wdi_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wdi_validate_raw.R +% Please edit documentation in R/aux_wdi.R \name{wdi_validate_raw} \alias{wdi_validate_raw} \title{Validate raw wdi data} diff --git a/man/weo_validate_output.Rd b/man/weo_validate_output.Rd index 3c24bdf..39d8309 100644 --- a/man/weo_validate_output.Rd +++ b/man/weo_validate_output.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/weo_validate_output.R +% Please edit documentation in R/aux_weo.R \name{weo_validate_output} \alias{weo_validate_output} \title{Validate clean weo data} diff --git a/man/weo_validate_raw.Rd b/man/weo_validate_raw.Rd index 17462dc..ed3e53a 100644 --- a/man/weo_validate_raw.Rd +++ b/man/weo_validate_raw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/weo_validate_raw.R +% Please edit documentation in R/aux_weo.R \name{weo_validate_raw} \alias{weo_validate_raw} \title{Validate raw weo data} diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index ff442f3..33eb5e8 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -8,7 +8,7 @@ temp_fld <- "Y:/tefera_pipaux_test" test_that("cl_validate_raw() works identifying duplicate error", { - cl <- pip_country_list_update(class_branch = "master") + cl <- aux_country_list_update(class_branch = "master") cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] @@ -19,7 +19,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { test_that("cl_validate_raw() works identifying invalid value", { - cl <- pip_country_list_update(class_branch = "master") + cl <- aux_country_list_update(class_branch = "master") cl[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", "SSA", africa_split_code), diff --git a/tests/testthat/test-load_raw_aux.R b/tests/testthat/test-load_raw_aux.R deleted file mode 100644 index cc4c18f..0000000 --- a/tests/testthat/test-load_raw_aux.R +++ /dev/null @@ -1,10 +0,0 @@ -test_that("pipfun::load_raw_aux is deprecated", { - expect_snapshot({ - - lr <- pipfun::load_from_gh(measure = "cpi") - lf <- pipfun::load_from_gh(measure = "cpi") - - expect_equal(lr, lf, ignore_attr = TRUE) - - }) -}) diff --git a/tests/testthat/test-merger_aux.R b/tests/testthat/test-merger_aux.R index 83ded34..ba0a403 100644 --- a/tests/testthat/test-merger_aux.R +++ b/tests/testthat/test-merger_aux.R @@ -1,4 +1,4 @@ -temp_fld <- "Y:/tefera_pipaux_test" +temp_fld <- "Y:\\tefera_pipaux_test" pfw <- load_aux("pfw", branch = "DEV", diff --git a/vignettes/managing_release_branches.Rmd b/vignettes/managing_release_branches.Rmd new file mode 100644 index 0000000..8589217 --- /dev/null +++ b/vignettes/managing_release_branches.Rmd @@ -0,0 +1,100 @@ +--- +title: "Auxiliary Data - Version Control" +subtitle: "Release branch management" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Auxiliary Data - Version Control} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +#library(pipaux) +devtools::load_all(".") +``` + +The {pipaux} package manages auxiliary data used across PIP. It includes functions like `aux_ppp()` for PPP data and `aux_gdp()` for GDP data (formerly pip_*()), which allow users to update or load specific auxiliary data. For example, calling `aux_gdp()` will update or load GDP data depending on user specification. When updating data, these functions push the changes to the DEV branch of the respective auxiliary data repository. The DEV branch of each auxiliary data repository is therefore updated with changes. + +To ensure that the version of auxiliary data used for each release is properly tracked, 2 steps are necessary: + +- Each auxiliary data repository should have a dedicated **release branch**. +- The release branch must be periodically **updated** with changes from the **DEV branch**. +To facilitate the management of release branches and synchronization with the DEV branch, the {pipfun} package provides functions designed for this purpose. + +Specifically, + +### 1. Check if release branch is there, and if not, create it + +```{r initial-setup} +# install.packages("pak") +#pak::pak("PIP-Technical-Team/pipfun@DEV_v2") + +library("pipfun") + +``` + + +```{r} + +# Get branches in repo + +# Example 1: TEST repo +repo_branches <- pipfun::get_repo_branches(repo = "aux_test", + owner = getOption("pipfun.ghowner")) +repo_branches + +# Example 2: PPP repo + +pipfun::get_repo_branches(repo = "aux_ppp", + owner = getOption("pipfun.ghowner")) + + +# As no release branch is found, create it from DEV +# create_new_branch(new_branch = "20241005", +# ref_branch = "DEV", +# repo = "aux_ppp") + +``` + + +### 2. Check that release branch is updated with DEV +```{r} + +pipfun::compare_branch_content(repo = "aux_test", + branch1 = "DEV", + branch2 = "20241005") + +``` + +### 3. Update release branch if needed + +In this case, given that the release branch has just been created, it is up to date with DEV. However, had "same_content" been `FALSE`, we need to update release branch accordingly as follows: + + +```{r} + +# Option 1: Make release branch point to the latest commit as in DEV +pipfun::update_branches(repo = "aux_test", + branch1 = "DEV", + branch2 = "20241005") + +# Option 2: Merge DEV into release branch + +pipfun::merge_branch_into(repo = "aux_test", + source_branch = "DEV", + target_branch = "20241005") +``` + + + + + + +