From 31c6afc6c564b3cecab3ea7d1c55971fd6184984 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 29 Jan 2024 16:06:02 -0500 Subject: [PATCH 01/23] CPI raw and output sample data validation scripts --- R/cpi_validate_output.R | 80 +++++++++++++++++++++++++++++++++++++++++ R/cpi_validate_raw.R | 79 ++++++++++++++++++++++++++++++++++++++++ R/zzz.R | 4 ++- 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 R/cpi_validate_output.R create mode 100644 R/cpi_validate_raw.R diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R new file mode 100644 index 0000000..8336abc --- /dev/null +++ b/R/cpi_validate_output.R @@ -0,0 +1,80 @@ +#' Validate clean cpi data +#' +#' @param cpi clean cpi data, output via `pipfun::pip_cpi_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @import blastula +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ + + stopifnot("CPI clean data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI output data validation") %>% + validate_if(is.character(country_code), description = "country_code should be character") %>% + validate_if(is.integer(cpi_year), description = "cpi_year should be integer") %>% + validate_if(is.numeric(survey_year), description = "survey_year should be numeric") %>% + validate_if(is.numeric(cpi), description = "cpi should be numeric") %>% + validate_if(is.numeric(ccf), description = "ccf should be numeric") %>% + validate_if(is.character(survey_acronym), description = "survey_acronym should be character") %>% + validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") %>% + validate_cols(in_set(c(0, 1)), change_cpi2011, description = "change_cpi2011 values within range") %>% + validate_if(is.character(cpi_domain), description = "cpi_domain should be character") %>% + validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") %>% + validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") %>% + validate_cols(in_set(c(0, 1)), cpi_domain_value, description = "cpi_domain_value values within range") %>% + validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") %>% + validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") %>% + validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") %>% + validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") %>% + validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") %>% + validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") %>% + validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") %>% + validate_if(is.character(cpi_data_level), description = "cpi_data_level should be character") %>% + validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level, + description = "cpi_data_level values within range") %>% + validate_if(is.character(cpi_id), description = "cpi_id should be character") %>% + validate_if(is.numeric(cpi2011_SM23), description = "cpi2011_SM23 should be numeric") %>% + validate_if(is.numeric(cpi2017_SM23), description = "cpi2017_SM23 should be numeric") %>% + validate_cols(not_na, country_code, cpi_year, survey_acronym, + cpi_data_level, description = "no missing values in key variables") %>% + validate_if(is_uniq(country_code, cpi_year, survey_acronym, + cpi_data_level), description = "no duplicate records in key variables") %>% + add_results(report) + + if (any(report$get_validations(unnest = TRUE)$type == "error")){ + + detail <- TRUE + save_summary(report, "cpi_output_validation_log.txt", success = FALSE, warning = FALSE) + + } + + if (detail) { + + compose_email( + body = md(glue::glue( + "Hello, + + The attched file contains data validation report for output *cpi* data. + + Regards"))) |> + add_attachment(file = "cpi_output_validation_log.txt", filename = "cpi_output_validation_log") |> + smtp_send( + from = "tefera.degefu@outlook.com", + #to = "acastanedaa@worldbank.org", + to = "tdegefu@worldbank.org", + subject = "Raw cpi data validation report - data validator pkg", + credentials = creds_envvar(user = "tefera.degefu@outlook.com", + pass_envvar = "SMTP_GPID_EMAIL", + provider = "outlook") + ) + } + + + + + +} diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R new file mode 100644 index 0000000..a34e9b6 --- /dev/null +++ b/R/cpi_validate_raw.R @@ -0,0 +1,79 @@ +#' Validate raw cpi data +#' +#' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @import blastula +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("CPI raw data is not loaded" = !is.null(cpi)) + + report <- data_validation_report() + + validate(cpi, name = "CPI raw data validation") %>% + validate_if(is.character(region), description = "region should be character") %>% + validate_if(is.character(code), description = "code should be character") %>% + validate_if(is.character(countryname), description = "countryname should be character") %>% + validate_if(is.character(survname), description = "survname should be character") %>% + validate_if(is.character(cpi_domain), description = "cpi_domain should be character") %>% + validate_if(is.character(version), description = "version should be character") %>% + validate_if(is.character(survey_coverage), description = "survey_coverage should be character") %>% + validate_if(is.character(cpi_id), description = "cpi_id should be character") %>% + validate_if(is.character(year), description = "year should be character") %>% + validate_if(is.numeric(ref_year), description = "ref_year should be numeric") %>% + validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") %>% + validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") %>% + validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") %>% + validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") %>% + validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") %>% + validate_if(is.numeric(comparability), description = "comparability should be numeric") %>% + validate_if(is.numeric(cur_adj), description = "cur_adj should be numeric") %>% + validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") %>% + validate_if(is.numeric(comparable), description = "comparable should be numeric") %>% + validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") %>% + validate_if(is.numeric(cpi_data_level), description = "cpi_data_level should be numeric") %>% + validate_if(is.numeric(change_cpi2017), description = "change_cpi2017 should be numeric") %>% + validate_if(is.numeric(change_icp2017), description = "change_icp2017 should be numeric") %>% + validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") %>% + validate_if(is.numeric(change_icp2011), description = "change_icp2011 should be numeric") %>% + validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") %>% + validate_cols(not_na, code, year, survname, cpi_data_level, description = "no missing values in key variables") %>% + validate_if(is_uniq(code, year, cpi_data_level), description = "no duplicate records in key variables") %>% + validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") %>% + validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, description = "survey_coverage values within range") %>% + validate_cols(in_set(c(0, 1, 2)), cpi_data_level, description = "cpi_data_level values within range") %>% + add_results(report) + + if (any(report$get_validations(unnest = TRUE)$type == "error")){ + + detail <- TRUE + save_summary(report, "cpi_raw_validation_log.txt", success = FALSE, warning = FALSE) + + } + + if (detail) { + + compose_email( + body = md(glue::glue( + "Hello, + + The attched file contains data validation report for raw *cpi* data. + + Regards"))) |> + add_attachment(file = "cpi_raw_validation_log.txt", filename = "cpi_raw_validation_log") |> + smtp_send( + #from = "pipdata.wb@outlook.com", + from = "tefera.degefu@outlook.com", + #to = "acastanedaa@worldbank.org", + to = "tdegefu@worldbank.org", + subject = "Raw cpi data validation report - data validator pkg", + credentials = creds_envvar(user = "tefera.degefu@outlook.com", + pass_envvar = "SMTP_GPID_EMAIL", + provider = "outlook") + ) + } + +} diff --git a/R/zzz.R b/R/zzz.R index fb98f4f..2753343 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -9,7 +9,9 @@ pipuax_default_options <- list( pipaux.maindir = gls$PIP_DATA_DIR, pipfun.ghowner = "PIP-Technical-Team", joyn.verbose = FALSE, - pipfun.verbose = TRUE + pipfun.verbose = TRUE, + pipaux.detail.raw = FALSE, + pipaux.detail.output = FALSE ) From 9ec8dea683da00e67cb3b6222fa6857de51deea6 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 29 Jan 2024 21:16:16 -0500 Subject: [PATCH 02/23] CPI raw and output datasets validation scripts --- R/cpi_validate_output.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 8336abc..5347e39 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -72,9 +72,4 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) provider = "outlook") ) } - - - - - } From 82ec25ca2a11050f0db5374b5dce9152f63c8b1d Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Tue, 6 Feb 2024 09:02:37 -0500 Subject: [PATCH 03/23] Update cpi raw and output data validation scrips --- R/aaa.R | 1 + R/cpi_validate_output.R | 30 ++----------- R/cpi_validate_raw.R | 95 +++++++++++++++------------------------- R/get_error_validation.R | 40 +++++++++++++++++ R/send_report.R | 34 ++++++++++++++ 5 files changed, 113 insertions(+), 87 deletions(-) create mode 100644 R/aaa.R create mode 100644 R/get_error_validation.R create mode 100644 R/send_report.R diff --git a/R/aaa.R b/R/aaa.R new file mode 100644 index 0000000..9eee63b --- /dev/null +++ b/R/aaa.R @@ -0,0 +1 @@ +.pipaux <- new.env(parent = emptyenv()) diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 5347e39..23d46c4 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -3,7 +3,6 @@ #' @param cpi clean cpi data, output via `pipfun::pip_cpi_clean` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator -#' @import blastula #' @importFrom assertr in_set not_na is_uniq #' #' @export @@ -45,31 +44,8 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) cpi_data_level), description = "no duplicate records in key variables") %>% add_results(report) - if (any(report$get_validations(unnest = TRUE)$type == "error")){ + validation_record <- get_results(report, unnest = FALSE) |> + setDT() - detail <- TRUE - save_summary(report, "cpi_output_validation_log.txt", success = FALSE, warning = FALSE) - - } - - if (detail) { - - compose_email( - body = md(glue::glue( - "Hello, - - The attched file contains data validation report for output *cpi* data. - - Regards"))) |> - add_attachment(file = "cpi_output_validation_log.txt", filename = "cpi_output_validation_log") |> - smtp_send( - from = "tefera.degefu@outlook.com", - #to = "acastanedaa@worldbank.org", - to = "tdegefu@worldbank.org", - subject = "Raw cpi data validation report - data validator pkg", - credentials = creds_envvar(user = "tefera.degefu@outlook.com", - pass_envvar = "SMTP_GPID_EMAIL", - provider = "outlook") - ) - } + get_error_validation(validation_record, detail) } diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R index a34e9b6..d0a88f6 100644 --- a/R/cpi_validate_raw.R +++ b/R/cpi_validate_raw.R @@ -3,7 +3,6 @@ #' @param cpi raw cpi data, as loaded via `pipfun::load_from_gh` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator -#' @import blastula #' @importFrom assertr in_set not_na is_uniq #' #' @export @@ -13,67 +12,43 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ report <- data_validation_report() - validate(cpi, name = "CPI raw data validation") %>% - validate_if(is.character(region), description = "region should be character") %>% - validate_if(is.character(code), description = "code should be character") %>% - validate_if(is.character(countryname), description = "countryname should be character") %>% - validate_if(is.character(survname), description = "survname should be character") %>% - validate_if(is.character(cpi_domain), description = "cpi_domain should be character") %>% - validate_if(is.character(version), description = "version should be character") %>% - validate_if(is.character(survey_coverage), description = "survey_coverage should be character") %>% - validate_if(is.character(cpi_id), description = "cpi_id should be character") %>% - validate_if(is.character(year), description = "year should be character") %>% - validate_if(is.numeric(ref_year), description = "ref_year should be numeric") %>% - validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") %>% - validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") %>% - validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") %>% - validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") %>% - validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") %>% - validate_if(is.numeric(comparability), description = "comparability should be numeric") %>% - validate_if(is.numeric(cur_adj), description = "cur_adj should be numeric") %>% - validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") %>% - validate_if(is.numeric(comparable), description = "comparable should be numeric") %>% - validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") %>% - validate_if(is.numeric(cpi_data_level), description = "cpi_data_level should be numeric") %>% - validate_if(is.numeric(change_cpi2017), description = "change_cpi2017 should be numeric") %>% - validate_if(is.numeric(change_icp2017), description = "change_icp2017 should be numeric") %>% - validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") %>% - validate_if(is.numeric(change_icp2011), description = "change_icp2011 should be numeric") %>% - validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") %>% - validate_cols(not_na, code, year, survname, cpi_data_level, description = "no missing values in key variables") %>% - validate_if(is_uniq(code, year, cpi_data_level), description = "no duplicate records in key variables") %>% - validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") %>% - validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, description = "survey_coverage values within range") %>% - validate_cols(in_set(c(0, 1, 2)), cpi_data_level, description = "cpi_data_level values within range") %>% + validate(cpi, name = "CPI raw data validation") |> + validate_if(is.character(region), description = "region should be character") |> + validate_if(is.character(code), description = "code should be character") |> + validate_if(is.character(countryname), description = "countryname should be character") |> + validate_if(is.character(survname), description = "survname should be character") |> + validate_if(is.character(cpi_domain), description = "cpi_domain should be character") |> + validate_if(is.character(version), description = "version should be character") |> + validate_if(is.character(survey_coverage), description = "survey_coverage should be character") |> + validate_if(is.character(cpi_id), description = "cpi_id should be character") |> + validate_if(is.character(year), description = "year should be character") |> + validate_if(is.numeric(ref_year), description = "ref_year should be numeric") |> + validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") |> + validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") |> + validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") |> + validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") |> + validate_if(is.numeric(comparability), description = "comparability should be numeric") |> + validate_if(is.numeric(cur_adj), description = "cur_adj should be numeric") |> + validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") |> + validate_if(is.numeric(comparable), description = "comparable should be numeric") |> + validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") |> + validate_if(is.numeric(cpi_data_level), description = "cpi_data_level should be numeric") |> + validate_if(is.numeric(change_cpi2017), description = "change_cpi2017 should be numeric") |> + validate_if(is.numeric(change_icp2017), description = "change_icp2017 should be numeric") |> + validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") |> + validate_if(is.numeric(change_icp2011), description = "change_icp2011 should be numeric") |> + validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") |> + validate_cols(not_na, code, year, survname, cpi_data_level, description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, cpi_data_level), description = "no duplicate records in key variables") |> + validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") |> + validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, description = "survey_coverage values within range") |> + validate_cols(in_set(c(0, 1, 2)), cpi_data_level, description = "cpi_data_level values within range") |> add_results(report) - if (any(report$get_validations(unnest = TRUE)$type == "error")){ + validation_record <- get_results(report, unnest = FALSE) |> + setDT() - detail <- TRUE - save_summary(report, "cpi_raw_validation_log.txt", success = FALSE, warning = FALSE) - - } - - if (detail) { - - compose_email( - body = md(glue::glue( - "Hello, - - The attched file contains data validation report for raw *cpi* data. - - Regards"))) |> - add_attachment(file = "cpi_raw_validation_log.txt", filename = "cpi_raw_validation_log") |> - smtp_send( - #from = "pipdata.wb@outlook.com", - from = "tefera.degefu@outlook.com", - #to = "acastanedaa@worldbank.org", - to = "tdegefu@worldbank.org", - subject = "Raw cpi data validation report - data validator pkg", - credentials = creds_envvar(user = "tefera.degefu@outlook.com", - pass_envvar = "SMTP_GPID_EMAIL", - provider = "outlook") - ) - } + get_error_validation(validation_record, detail) } diff --git a/R/get_error_validation.R b/R/get_error_validation.R new file mode 100644 index 0000000..c5f0c3c --- /dev/null +++ b/R/get_error_validation.R @@ -0,0 +1,40 @@ +#' Get validation report data validation error report +#' +#' @param vlddata validation data +#' @import rlang +#' +#' @export +get_error_validation <- function(vlddata, detail){ + + stopifnot("Validation data is not availabel" = !is.null(vlddata)) + + err_t <- NULL + + if (any(vlddata$type == "error")){ + + err_t <- vlddata[type == "error", + .(table_name, description, call, + message, type)] + } + + + if (isFALSE(detail)) { + + cli::cli_abort("Description of invalid cases for {unique(err_t$table_name)}, + {err_t$description}") + + } else { + + if (!rlang::env_has(.pipaux, "validation_report")){ + + rlang::env_poke(.pipaux, "validation_report", err_t) + + } else { + + compiled_result <- rbind(.pipaux$validation_report, err_t) + rlang::env_poke(.pipaux, "validation_report", compiled_result) + + } + } + +} diff --git a/R/send_report.R b/R/send_report.R new file mode 100644 index 0000000..6e704f7 --- /dev/null +++ b/R/send_report.R @@ -0,0 +1,34 @@ +#' Send an email that contains auxiliary data validation report +#' +#' @import blastula +#' @import rlang +#' +#' @export +send_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + fname <- file.path(tempdir(), "data_validation_report.csv") + + write.csv(.pipaux$validation_report, fname, row.names = FALSE) + + compose_email( + body = md(glue::glue( + + "Hello, + + The attched file contains auxiliary data validation report. + + Regards"))) |> + add_attachment(file = fname, filename = "data_validation_report") |> + smtp_send( + from = "tefera.degefu@outlook.com", + to = "tdegefu@worldbank.org", + subject = "Data validation report", + credentials = creds_envvar(user = "tefera.degefu@outlook.com", + pass_envvar = "SMTP_GPID_EMAIL", + provider = "outlook") + ) + + } +} From 3295449b803b311e2bca5a37169cca182ea75a1c Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 4 Mar 2024 08:36:14 -0500 Subject: [PATCH 04/23] Add validation scripts --- R/cpi_validate_output.R | 81 +++++++++++------ R/cpi_validate_raw.R | 97 +++++++++++++------- R/gdm_validate_output.R | 56 ++++++++++++ R/gdm_validate_raw.R | 61 +++++++++++++ R/mpd_validate_output.R | 35 ++++++++ R/mpd_validate_raw.R | 35 ++++++++ R/pce_validate_output.R | 43 +++++++++ R/pfw_validate_output.R | 189 +++++++++++++++++++++++++++++++++++++++ R/pfw_validate_raw.R | 190 ++++++++++++++++++++++++++++++++++++++++ R/pop_validate_output.R | 43 +++++++++ R/ppp_validate_output.R | 53 +++++++++++ R/ppp_validate_raw.R | 77 ++++++++++++++++ R/wdi_validate_output.R | 35 ++++++++ R/wdi_validate_raw.R | 37 ++++++++ R/weo_validate_output.R | 37 ++++++++ R/weo_validate_raw.R | 150 +++++++++++++++++++++++++++++++ 16 files changed, 1160 insertions(+), 59 deletions(-) create mode 100644 R/gdm_validate_output.R create mode 100644 R/gdm_validate_raw.R create mode 100644 R/mpd_validate_output.R create mode 100644 R/mpd_validate_raw.R create mode 100644 R/pce_validate_output.R create mode 100644 R/pfw_validate_output.R create mode 100644 R/pfw_validate_raw.R create mode 100644 R/pop_validate_output.R create mode 100644 R/ppp_validate_output.R create mode 100644 R/ppp_validate_raw.R create mode 100644 R/wdi_validate_output.R create mode 100644 R/wdi_validate_raw.R create mode 100644 R/weo_validate_output.R create mode 100644 R/weo_validate_raw.R diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 23d46c4..e4e2c12 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -13,39 +13,66 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) report <- data_validation_report() validate(cpi, name = "CPI output data validation") %>% - validate_if(is.character(country_code), description = "country_code should be character") %>% - validate_if(is.integer(cpi_year), description = "cpi_year should be integer") %>% - validate_if(is.numeric(survey_year), description = "survey_year should be numeric") %>% - validate_if(is.numeric(cpi), description = "cpi should be numeric") %>% - validate_if(is.numeric(ccf), description = "ccf should be numeric") %>% - validate_if(is.character(survey_acronym), description = "survey_acronym should be character") %>% - validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") %>% - validate_cols(in_set(c(0, 1)), change_cpi2011, description = "change_cpi2011 values within range") %>% - validate_if(is.character(cpi_domain), description = "cpi_domain should be character") %>% - validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") %>% - validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") %>% - validate_cols(in_set(c(0, 1)), cpi_domain_value, description = "cpi_domain_value values within range") %>% - validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") %>% - validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") %>% - validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") %>% - validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") %>% - validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") %>% - validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") %>% - validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") %>% - validate_if(is.character(cpi_data_level), description = "cpi_data_level should be character") %>% + validate_if(is.character(country_code), + description = "`country_code` should be character") %>% + validate_if(is.integer(cpi_year), + description = "`cpi_year` should be integer") %>% + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") %>% + validate_if(is.numeric(cpi), + description = "`cpi` should be numeric") %>% + validate_if(is.numeric(ccf), + description = "`ccf` should be numeric") %>% + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") %>% + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") %>% + validate_cols(in_set(c(0, 1)), change_cpi2011, + description = "`change_cpi2011` values within range") %>% + validate_if(is.character(cpi_domain), + description = "`cpi_domain` should be character") %>% + validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, + description = "`cpi_domian` values within range") %>% + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") %>% + validate_cols(in_set(c(0, 1)), cpi_domain_value, + description = "`cpi_domain_value` values within range") %>% + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") %>% + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") %>% + validate_if(is.numeric(cpi2011), + description = "`cpi2011` should be numeric") %>% + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") %>% + validate_if(is.numeric(cpi2011_SM22), + description = "`cpi2011_SM22` should be numeric") %>% + validate_if(is.numeric(cpi2017_SM22), + description = "`cpi2017_SM22` should be numeric") %>% + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") %>% + validate_if(is.character(cpi_data_level), + description = "c`pi_data_level` should be character") %>% validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level, - description = "cpi_data_level values within range") %>% - validate_if(is.character(cpi_id), description = "cpi_id should be character") %>% - validate_if(is.numeric(cpi2011_SM23), description = "cpi2011_SM23 should be numeric") %>% - validate_if(is.numeric(cpi2017_SM23), description = "cpi2017_SM23 should be numeric") %>% + description = "`cpi_data_level` values within range") %>% + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") %>% + validate_if(is.numeric(cpi2011_SM23), + description = "`cpi2011_SM23` should be numeric") %>% + validate_if(is.numeric(cpi2017_SM23), + description = "`cpi2017_SM23` should be numeric") %>% validate_cols(not_na, country_code, cpi_year, survey_acronym, - cpi_data_level, description = "no missing values in key variables") %>% + cpi_data_level, + description = "no missing values in key variables") %>% validate_if(is_uniq(country_code, cpi_year, survey_acronym, - cpi_data_level), description = "no duplicate records in key variables") %>% + cpi_data_level), + description = "no duplicate records in key variables") %>% add_results(report) validation_record <- get_results(report, unnest = FALSE) |> setDT() - get_error_validation(validation_record, detail) + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } } diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R index d0a88f6..98f45e1 100644 --- a/R/cpi_validate_raw.R +++ b/R/cpi_validate_raw.R @@ -13,42 +13,75 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ report <- data_validation_report() validate(cpi, name = "CPI raw data validation") |> - validate_if(is.character(region), description = "region should be character") |> - validate_if(is.character(code), description = "code should be character") |> - validate_if(is.character(countryname), description = "countryname should be character") |> - validate_if(is.character(survname), description = "survname should be character") |> - validate_if(is.character(cpi_domain), description = "cpi_domain should be character") |> - validate_if(is.character(version), description = "version should be character") |> - validate_if(is.character(survey_coverage), description = "survey_coverage should be character") |> - validate_if(is.character(cpi_id), description = "cpi_id should be character") |> - validate_if(is.character(year), description = "year should be character") |> - validate_if(is.numeric(ref_year), description = "ref_year should be numeric") |> - validate_if(is.numeric(cpi_domain_value), description = "cpi_domain_value should be numeric") |> - validate_if(is.numeric(cpi2017_unadj), description = "cpi2017_unadj should be numeric") |> - validate_if(is.numeric(cpi2011_unadj), description = "cpi2011_unadj should be numeric") |> - validate_if(is.numeric(cpi2011), description = "cpi2011 should be numeric") |> - validate_if(is.numeric(cpi2017), description = "cpi2017 should be numeric") |> - validate_if(is.numeric(comparability), description = "comparability should be numeric") |> - validate_if(is.numeric(cur_adj), description = "cur_adj should be numeric") |> - validate_if(is.numeric(cpi2011_SM22), description = "cpi2011_SM22 should be numeric") |> - validate_if(is.numeric(comparable), description = "comparable should be numeric") |> - validate_if(is.numeric(cpi2017_SM22), description = "cpi2017_SM22 should be numeric") |> - validate_if(is.numeric(cpi_data_level), description = "cpi_data_level should be numeric") |> - validate_if(is.numeric(change_cpi2017), description = "change_cpi2017 should be numeric") |> - validate_if(is.numeric(change_icp2017), description = "change_icp2017 should be numeric") |> - validate_if(is.numeric(change_cpi2011), description = "change_cpi2011 should be numeric") |> - validate_if(is.numeric(change_icp2011), description = "change_icp2011 should be numeric") |> - validate_cols(is.logical, cpi2005, description = "cpi2005 should be logical") |> - validate_cols(not_na, code, year, survname, cpi_data_level, description = "no missing values in key variables") |> - validate_if(is_uniq(code, year, cpi_data_level), description = "no duplicate records in key variables") |> - validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, description = "cpi_domian values within range") |> - validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, description = "survey_coverage values within range") |> - validate_cols(in_set(c(0, 1, 2)), cpi_data_level, description = "cpi_data_level values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.character(cpi_domain), + description = "`cpi_domain` should be character") |> + validate_if(is.character(version), + description = "`version` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.numeric(cpi_domain_value), + description = "`cpi_domain_value` should be numeric") |> + validate_if(is.numeric(cpi2017_unadj), + description = "`cpi2017_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011_unadj), + description = "`cpi2011_unadj` should be numeric") |> + validate_if(is.numeric(cpi2011), + description = "`cpi201`1 should be numeric") |> + validate_if(is.numeric(cpi2017), + description = "`cpi2017` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.numeric(cur_adj), + description = "`cur_adj` should be numeric") |> + validate_if(is.numeric(cpi2011_SM22), + description = "`cpi2011_SM22` should be numeric") |> + validate_if(is.numeric(comparable), + description = "`comparable` should be numeric") |> + validate_if(is.numeric(cpi2017_SM22), + description = "`cpi2017_SM22` should be numeric") |> + validate_if(is.numeric(cpi_data_level), + description = "`cpi_data_level` should be numeric") |> + validate_if(is.numeric(change_cpi2017), + description = "`change_cpi2017` should be numeric") |> + validate_if(is.numeric(change_icp2017), + description = "`change_icp2017` should be numeric") |> + validate_if(is.numeric(change_cpi2011), + description = "`change_cpi2011` should be numeric") |> + validate_if(is.numeric(change_icp2011), + description = "`change_icp2011` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> + validate_cols(not_na, code, year, survname, cpi_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname, cpi_data_level), + description = "no duplicate records in key variables") |> + validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, + description = "`cpi_domian` values within range") |> + validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, + description = "`survey_coverage` values within range") |> + validate_cols(in_set(c(0, 1, 2)), cpi_data_level, + description = "`cpi_data_level` values within range") |> add_results(report) validation_record <- get_results(report, unnest = FALSE) |> setDT() - get_error_validation(validation_record, detail) + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } } diff --git a/R/gdm_validate_output.R b/R/gdm_validate_output.R new file mode 100644 index 0000000..4ae41aa --- /dev/null +++ b/R/gdm_validate_output.R @@ -0,0 +1,56 @@ +#' Validate clean gdm data +#' +#' @param gdm clean gdm data, output via `pipfun::pip_gdm_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDM output data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM output data validation") |> + validate_if(is.character(survey_id), + description = "`survey_id` should be character") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.integer(surveyid_year), + description = "`surveyid_year` should be integer") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), welfare_type, + description = "`welfare_type` values within range") |> + validate_if(is.numeric(survey_mean_lcu), + description = "`survey_mean_lcu` should be numeric") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregate", "group")), distribution_type, + description = "`distribution_type` values within range") |> + validate_if(is.character(gd_type), + description = "`gd_type` should be character") |> + validate_if(is.character(pop_data_level), + description = "`pop_data_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), pop_data_level, + description = "`pop_data_level` values within range") |> + validate_if(is.character(pcn_source_file), + description = "`pcn_source_file` should be character") |> + validate_if(is.character(pcn_survey_id), + description = "`pcn_survey_id` should be character") |> + validate_cols(not_na, country_code, surveyid_year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, surveyid_year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } +} diff --git a/R/gdm_validate_raw.R b/R/gdm_validate_raw.R new file mode 100644 index 0000000..7c7a512 --- /dev/null +++ b/R/gdm_validate_raw.R @@ -0,0 +1,61 @@ +#' Validate raw gdm data +#' +#' @param gdm raw gdm data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ + + stopifnot("GDM raw data is not loaded" = !is.null(gdm)) + + report <- data_validation_report() + + validate(gdm, name = "GDM raw data validation") |> + validate_if(is.character(Region), + description = "`Region` should be character") |> + validate_if(is.character(countryName), + description = "`countryName` should be character") |> + validate_if(is.character(Coverage), + description = "`Coverage` should be character") |> + validate_if(is.character(CountryCode), + description = "`CountryCode` should be character") |> + validate_if(is.numeric(SurveyTime), + description = "`SurveyTime` should be numeric") |> + validate_if(is.numeric(CPI_Time), + description = "`CPI_Time` should be numeric") |> + validate_if(is.character(DataType), + description = "`DataType` should be character") |> + validate_if(is.numeric(SurveyMean_LCU), + description = "`SurveyMean_LCU` should be numeric") |> + validate_if(is.numeric(currency), + description = "`currency` should be numeric") |> + validate_if(is.character(source), + description = "`source` should be character") |> + validate_if(is.character(SurveyID), + description = "`SurveyID` should be character") |> + validate_if(is.numeric(SurveyMean_PPP), + description = "`SurveyMean_PPP` should be numeric") |> + validate_if(is.character(DistributionFileName), + description = "`DistributionFileName` should be character") |> + validate_cols(is.logical, Comment, description = "Comment should be logical") |> + validate_cols(not_na, CountryCode, Coverage, SurveyTime, DataType, + description = "no missing values in key variables") |> + validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), + description = "no duplicate records in key variables") |> + validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), + Region, description = "`Region` values within range") |> + validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), + Coverage, description = "`Coverage` values within range") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/mpd_validate_output.R b/R/mpd_validate_output.R new file mode 100644 index 0000000..6021b91 --- /dev/null +++ b/R/mpd_validate_output.R @@ -0,0 +1,35 @@ +#' Validate raw maddison data +#' +#' @param mpd output mpd data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ + + stopifnot("mpd/ maddison output data is not loaded" = !is.null(mpd)) + + report <- data_validation_report() + + validate(mpd, name = "mdp output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(mpd_gdp), + description = "`mpd_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/mpd_validate_raw.R b/R/mpd_validate_raw.R new file mode 100644 index 0000000..402e900 --- /dev/null +++ b/R/mpd_validate_raw.R @@ -0,0 +1,35 @@ +#' Validate raw maddison data +#' +#' @param mpd raw mpd data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ + + stopifnot("mpd/ maddison raw data is not loaded" = !is.null(mpd)) + + report <- data_validation_report() + + validate(mpd, name = "mdp raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(mpd_gdp), + description = "`mpd_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R new file mode 100644 index 0000000..63171bb --- /dev/null +++ b/R/pce_validate_output.R @@ -0,0 +1,43 @@ +#' Validate output pce data +#' +#' @param pce output pce data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ + + stopifnot("PCE clean data is not loaded" = !is.null(pce)) + + report <- data_validation_report() + + validate(pce, name = "PCE output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pce), + description = "`pce` should be numeric") |> + validate_if(is.character(pce_data_level), + description = "`pce_data_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + pce_data_level, description = "`pce_data_level` values within range") |> + validate_if(is.character(pce_domain), + description = "`pce_domain` should be character") |> + validate_cols(in_set(c("national", "urban/rural")), + pce_domain, description = "`pce_domain` values within range") |> + validate_cols(not_na, country_code, year, pce_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pce_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R new file mode 100644 index 0000000..e5daeaa --- /dev/null +++ b/R/pfw_validate_output.R @@ -0,0 +1,189 @@ +#' Validate clean pfw data +#' +#' @param pfw clean pfw data, output via `pipfun::pip_pfw_clean` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ + + stopifnot("PFW clean data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW output data validation") |> + validate_if(is.character(wb_region_code), + description = "`wb_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + wb_region_code, description = "`wb_region_code` values within range") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survey_acronym), + description = "`survey_acronym` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(reporting_year), + description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(survey_comparability), + description = "`survey_comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, country_code, year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pfw_validate_raw.R b/R/pfw_validate_raw.R new file mode 100644 index 0000000..e45bfd2 --- /dev/null +++ b/R/pfw_validate_raw.R @@ -0,0 +1,190 @@ +#' Validate raw pfw data +#' +#' @param pfw raw pfw data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PFW raw data is not loaded" = !is.null(pfw)) + + report <- data_validation_report() + + validate(pfw, name = "PFW raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(reg_pcn), + description = "`reg_pcn` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + reg_pcn, description = "`reg_pcn` values within range") |> + validate_if(is.character(ctryname), + description = "`ctryname` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(surveyid_year), + description = "`surveyid_year` should be numeric") |> + validate_if(is.numeric(timewp), + description = "`timewp` should be numeric") |> + validate_if(is.numeric(fieldwork), + description = "`fieldwork` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(altname), + description = "`altname` should be character") |> + validate_if(is.character(survey_time), + description = "`survey_time` should be character") |> + validate_if(is.numeric(wbint_link), + description = "`wbint_link` should be numeric") |> + validate_if(is.numeric(wbext_link), + description = "`wbext_link` should be numeric") |> + validate_if(is.numeric(alt_link), + description = "`alt_link` should be numeric") |> + validate_if(is.numeric(pip_meta), + description = "`pip_meta` should be numeric") |> + validate_if(is.character(surv_title), + description = "`surv_title` should be character") |> + validate_if(is.character(surv_producer), + description = "`surv_producer` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(datatype), + description = "`datatype` should be character") |> + validate_cols(in_set(c("C", "I", "c", "i")), + datatype, description = "`datatype` values within range") |> + validate_if(is.numeric(use_imputed), + description = "`use_imputed` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_imputed, description = "`use_imputed` values within range") |> + validate_if(is.numeric(use_microdata), + description = "`use_microdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_microdata, description = "`use_microdata` values within range") |> + validate_if(is.numeric(use_bin), + description = "`use_bin` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_bin, description = "`use_bin` values within range") |> + validate_if(is.numeric(use_groupdata), + description = "`use_groupdata` should be numeric") |> + validate_cols(in_set(c(0, 1)), + use_groupdata, description = "`use_groupdata` values within range") |> + validate_if(is.numeric(rep_year), + description = "`rep_year` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(comp_note), + description = "`comp_note` should be character") |> + validate_if(is.character(preferable), + description = "`preferable` should be character") |> + validate_if(is.numeric(display_cp), + description = "`display_cp` should be numeric") |> + validate_cols(in_set(c(0, 1)), + display_cp, description = "`display_cp` values within range") |> + validate_if(is.character(fieldwork_range), + description = "`fieldwork_range` should be character") |> + validate_if(is.numeric(ref_year), + description = "`ref_year` should be numeric") |> + validate_if(is.character(newref), + description = "`newref` should be character") |> + validate_if(is.numeric(ref_year_des), + description = "`ref_year_des` should be numeric") |> + validate_if(is.character(wf_baseprice), + description = "`wf_baseprice` should be character") |> + validate_if(is.character(wf_baseprice_note), + description = "`wf_baseprice_note` should be character") |> + validate_if(is.numeric(wf_baseprice_des), + description = "`wf_baseprice_des` should be numeric") |> + validate_cols(in_set(c(-9, -8, -7)), wf_baseprice_des, + description = "`wf_baseprice_des` values within range") |> + validate_if(is.numeric(wf_spatial_des), + description = "`wf_spatial_des` should be numeric") |> + validate_if(is.character(wf_spatial_var), + description = "`wf_spatial_var` should be character") |> + validate_if(is.numeric(cpi_replication), + description = "`cpi_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + cpi_replication, description = "`cpi_replication` values within range") |> + validate_if(is.numeric(cpi_domain), + description = "`cpi_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + cpi_domain, description = "`cpi_domain` values within range") |> + validate_if(is.character(cpi_domain_var), + description = "`cpi_domain_var` should be character") |> + validate_if(is.numeric(wf_currency_des), + description = "`wf_currency_des` should be numeric") |> + validate_cols(in_set(c(0, 2)), + wf_currency_des, description = "`wf_currency_des` values within range") |> + validate_if(is.numeric(ppp_replication), + description = "`ppp_replication` should be numeric") |> + validate_cols(in_set(c(-9, 1)), + ppp_replication, description = "`ppp_replication` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_domain_var), + description = "`ppp_domain_var` should be character") |> + validate_if(is.numeric(wf_add_temp_des), + description = "`wf_add_temp_des` should be numeric") |> + validate_cols(in_set(c(-9, 0)), + wf_add_temp_des, description = "`wf_add_temp_des` values within range") |> + validate_if(is.numeric(wf_add_temp_var), + description = "`wf_add_temp_var` should be numeric") |> + validate_if(is.numeric(wf_add_spatial_des), + description = "`wf_add_spatial_des` should be numeric") |> + validate_cols(in_set(c(-9, 0, 1)), wf_add_spatial_des, + description = "`wf_add_spatial_des` values within range") |> + validate_if(is.numeric(wf_add_spatial_var), + description = "`wf_add_spatial_var` should be numeric") |> + validate_if(is.numeric(tosplit), + description = "`tosplit` should be numeric") |> + validate_cols(in_set(c(NA, 1)), tosplit, + description = "`tosplit` values within range") |> + validate_if(is.character(tosplit_var), + description = "`tosplit_var` should be character") |> + validate_if(is.numeric(inpovcal), + description = "`inpovcal` should be numeric") |> + validate_cols(in_set(c(1)), inpovcal, + description = "`inpovcal` values within range") |> + validate_if(is.character(oth_welfare1_type), + description = "`oth_welfare1_type` should be character") |> + validate_if(is.character(oth_welfare1_var), + description = "`oth_welfare1_var` should be character") |> + validate_if(is.numeric(gdp_domain), + description = "`gdp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), gdp_domain, + description = "`gdp_domain` values within range") |> + validate_if(is.numeric(pce_domain), + description = "`pce_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pce_domain, + description = "`pce_domain` values within range") |> + validate_if(is.numeric(pop_domain), + description = "`pop_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), pop_domain, + description = "`pop_domain` values within range") |> + validate_if(is.character(pfw_id), + description = "`pfw_id` should be character") |> + validate_cols(not_na, code, year, survname, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, year, survname), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R new file mode 100644 index 0000000..b5bfae5 --- /dev/null +++ b/R/pop_validate_output.R @@ -0,0 +1,43 @@ +#' Validate output pop data +#' +#' @param pop output pop data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("POP clean data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "POP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(pop_data_level), + description = "`pop_data_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_if(is.character(pop_domain), + description = "`pop_domain` should be character") |> + validate_cols(in_set(c("national", "urban/rural")), + pop_domain, description = "`pop_domain` values within range") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R new file mode 100644 index 0000000..d336be5 --- /dev/null +++ b/R/ppp_validate_output.R @@ -0,0 +1,53 @@ +#' Validate output ppp data +#' +#' @param ppp output ppp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PPP output data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(ppp_year), + description = "`ppp_year` should be character") |> + validate_if(is.character(release_version), + description = "`release_version` should be character") |> + validate_if(is.character(adaptation_version), + description = "`adaptation_version` should be character") |> + validate_if(is.numeric(ppp), + description = "`ppp` should be numeric") |> + validate_if(is.logical(ppp_default), + description = "`ppp_default` should be numeric") |> + validate_if(is.logical(ppp_default_by_year), + description = "`ppp_default_by_year` should be numeric") |> + validate_if(is.character(ppp_domain), + description = "`ppp_domain` should be character") |> + validate_cols(in_set(c("1", "2")), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.character(ppp_data_level), + description = "`ppp_data_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + ppp_data_level, description = "`ppp_data_level` values within range") |> + validate_cols(not_na, country_code, ppp_year, ppp_data_level, + adaptation_version, release_version, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, ppp_year, + ppp_data_level, adaptation_version, release_version), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/ppp_validate_raw.R b/R/ppp_validate_raw.R new file mode 100644 index 0000000..046806f --- /dev/null +++ b/R/ppp_validate_raw.R @@ -0,0 +1,77 @@ +#' Validate raw ppp data +#' +#' @param ppp raw ppp data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ + + stopifnot("PPP raw data is not loaded" = !is.null(ppp)) + + report <- data_validation_report() + + validate(ppp, name = "PPP raw data validation") |> + validate_if(is.character(CountryName), + description = "`CountryName` should be character") |> + validate_if(is.character(code), + description = "`code` should be character") |> + validate_if(is.character(CoverageType), + description = "`CoverageType` should be character") |> + validate_cols(in_set(c("National", "Rural", "Urban")), + CoverageType, description = "`CoverageType` values within range") |> + validate_if(is.numeric(ppp_2005_v1_v1), + description = "`ppp_2005_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v1), + description = "`ppp_2011_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v1), + description = "`ppp_2011_v2_v1` should be numeric") |> + validate_if(is.numeric(ppp_2011_v1_v2), + description = "`ppp_2011_v1_v2` should be numeric") |> + validate_if(is.numeric(ppp_2011_v2_v2), + description = "`ppp_2011_v2_v2` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v1), + description = "`ppp_2017_v1_v1` should be numeric") |> + validate_if(is.numeric(ppp_2017_v1_v2), + description = "`ppp_2017_v1_v2` should be numeric") |> + validate_if(is.numeric(source_ppp_2011), + description = "`source_ppp_2011` should be numeric") |> + validate_if(is.numeric(source_ppp_2005), + description = "`source_ppp_2005` should be numeric") |> + validate_if(is.numeric(datalevel), + description = "`datalevel` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + datalevel, description = "`datalevel` values within range") |> + validate_if(is.numeric(ppp_domain), + description = "`ppp_domain` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain, description = "`ppp_domain` values within range") |> + validate_if(is.numeric(ppp_domain_value), + description = "`ppp_domain_value` should be numeric") |> + validate_cols(in_set(c(1, 2)), + ppp_domain_value, description = "`ppp_domain_value` values within range") |> + validate_if(is.numeric(oldicp2005), + description = "`oldicp2005` should be numeric") |> + validate_if(is.numeric(oldicp2011), + description = "`oldicp2011` should be numeric") |> + validate_if(is.character(Seriesname), + description = "`Seriesname` should be character") |> + validate_if(is.character(note_may192020), + description = "`note_may192020` should be character") |> + validate_if(is.character(ppp_2017_v1_v2_note), + description = "`ppp_2017_v1_v2_note` should be character") |> + validate_cols(not_na, code, CoverageType, + description = "no missing values in key variables") |> + validate_if(is_uniq(code, CoverageType), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/wdi_validate_output.R b/R/wdi_validate_output.R new file mode 100644 index 0000000..6868efa --- /dev/null +++ b/R/wdi_validate_output.R @@ -0,0 +1,35 @@ +#' Validate output weo data +#' +#' @param weo output weo data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +weo_validate_output <- function(weo, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WEO output data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + validate(weo, name = "WEO output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(weo_gdp), + description = "`weo_gdp` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/wdi_validate_raw.R b/R/wdi_validate_raw.R new file mode 100644 index 0000000..a70a237 --- /dev/null +++ b/R/wdi_validate_raw.R @@ -0,0 +1,37 @@ +#' Validate raw wdi data +#' +#' @param wdi raw wdi data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WDI raw data is not loaded" = !is.null(wdi)) + + report <- data_validation_report() + + validate(wdi, name = "WDI raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(NE.CON.PRVT.PC.KD), + description = "`NE.CON.PRVT.PC.KD` should be numeric") |> + validate_if(is.numeric(NY.GDP.PCAP.KD), + description = "`NY.GDP.PCAP.KD` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R new file mode 100644 index 0000000..1e6d487 --- /dev/null +++ b/R/weo_validate_output.R @@ -0,0 +1,37 @@ +#' Validate output wdi data +#' +#' @param wdi output wdi data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +wdi_validate_output <- function(wdi, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WDI output data is not loaded" = !is.null(wdi)) + + report <- data_validation_report() + + validate(wdi, name = "WDI output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(NE.CON.PRVT.PC.KD), + description = "`NE.CON.PRVT.PC.KD` should be numeric") |> + validate_if(is.numeric(NY.GDP.PCAP.KD), + description = "`NY.GDP.PCAP.KD` should be numeric") |> + validate_cols(not_na, country_code, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/weo_validate_raw.R b/R/weo_validate_raw.R new file mode 100644 index 0000000..78a47ff --- /dev/null +++ b/R/weo_validate_raw.R @@ -0,0 +1,150 @@ +#' Validate raw weo data +#' +#' @param weo raw weo data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ + + stopifnot("WEO raw data is not loaded" = !is.null(weo)) + + report <- data_validation_report() + + weo <- weo[!is.na(`WEO Subject Code`), ] + + validate(weo, name = "WEO raw data validation") |> + validate_if(is.character(`WEO Country Code`), + description = "`WEO Country Code` should be character") |> + validate_if(is.character(ISO), + description = "ISO should be character") |> + validate_if(is.character(`WEO Subject Code`), + description = "`WEO Subject Code` should be character") |> + validate_if(is.character(Country), + description = "`Country` should be character") |> + validate_if(is.character(`Subject Descriptor`), + description = "`Subject Descriptor` should be character") |> + validate_if(is.character(`Subject Notes`), + description = "`Subject Notes` should be character") |> + validate_if(is.character(Units), + description = "`Units` should be character") |> + validate_if(is.character(Scale), + description = "`Scale` should be character") |> + validate_if(is.character(`Country/Series-specific Notes`), + description = "`Country/Series-specific Notes` should be character") |> + validate_if(is.character(`1980`), + description = "`1980` should be character") |> + validate_if(is.character(`1981`), + description = "`1981` should be character") |> + validate_if(is.character(`1982`), + description = "`1982` should be character") |> + validate_if(is.character(`1983`), + description = "`1983` should be character") |> + validate_if(is.character(`1984`), + description = "`1984` should be character") |> + validate_if(is.character(`1985`), + description = "`1985` should be character") |> + validate_if(is.character(`1986`), + description = "`1986` should be character") |> + validate_if(is.character(`1987`), + description = "`1987` should be character") |> + validate_if(is.character(`1988`), + description = "`1988` should be character") |> + validate_if(is.character(`1989`), + description = "`1989` should be character") |> + validate_if(is.character(`1990`), + description = "`1990` should be character") |> + validate_if(is.character(`1991`), + description = "`1991` should be character") |> + validate_if(is.character(`1992`), + description = "`1992` should be character") |> + validate_if(is.character(`1993`), + description = "`1993` should be character") |> + validate_if(is.character(`1994`), + description = "`1994` should be character") |> + validate_if(is.character(`1995`), + description = "`1995` should be character") |> + validate_if(is.character(`1996`), + description = "`1996` should be character") |> + validate_if(is.character(`1997`), + description = "`1997` should be character") |> + validate_if(is.character(`1998`), + description = "`1998` should be character") |> + validate_if(is.character(`1999`), + description = "`1999` should be character") |> + validate_if(is.character(`2000`), + description = "`2000` should be character") |> + validate_if(is.character(`2001`), + description = "`2001` should be character") |> + validate_if(is.character(`2002`), + description = "`2002` should be character") |> + validate_if(is.character(`2003`), + description = "`2003` should be character") |> + validate_if(is.character(`2004`), + description = "`2004` should be character") |> + validate_if(is.character(`2005`), + description = "`2005` should be character") |> + validate_if(is.character(`2006`), + description = "`2006` should be character") |> + validate_if(is.character(`2007`), + description = "`2007` should be character") |> + validate_if(is.character(`2008`), + description = "`2008` should be character") |> + validate_if(is.character(`2009`), + description = "`2009` should be character") |> + validate_if(is.character(`2010`), + description = "`2010` should be character") |> + validate_if(is.character(`2011`), + description = "`2011` should be character") |> + validate_if(is.character(`2012`), + description = "`2012` should be character") |> + validate_if(is.character(`2013`), + description = "`2013` should be character") |> + validate_if(is.character(`2014`), + description = "`2014` should be character") |> + validate_if(is.character(`2015`), + description = "`2015` should be character") |> + validate_if(is.character(`2016`), + description = "`2016` should be character") |> + validate_if(is.character(`2017`), + description = "`2017` should be character") |> + validate_if(is.character(`2018`), + description = "`2018` should be character") |> + validate_if(is.character(`2019`), + description = "`2019` should be character") |> + validate_if(is.character(`2020`), + description = "`2020` should be character") |> + validate_if(is.character(`2021`), + description = "`2021` should be character") |> + validate_if(is.character(`2022`), + description = "`2022` should be character") |> + validate_if(is.character(`2023`), + description = "`2023` should be character") |> + validate_if(is.character(`2024`), + description = "`2024` should be character") |> + validate_if(is.character(`2025`), + description = "`2025` should be character") |> + validate_if(is.character(`2026`), + description = "`2026` should be character") |> + validate_if(is.character(`2027`), + description = "`2027` should be character") |> + validate_if(is.character(`2028`), + description = "`2028` should be character") |> + validate_if(is.numeric(`Estimates Start After`), + description = "`Estimates Start After` should be numeric") |> + validate_cols(not_na, ISO, `WEO Subject Code`, + description = "no missing values in key variables") |> + validate_if(is_uniq(ISO, `WEO Subject Code`), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + From a439e4df567ed122ce0ee0e1a25ff01135cb536c Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Fri, 8 Mar 2024 21:45:42 -0500 Subject: [PATCH 05/23] Updated validation scripts --- DESCRIPTION | 4 ++- R/cl_validate_raw.R | 62 ++++++++++++++++++++++++++++++++++ R/countries_validate_output.R | 56 +++++++++++++++++++++++++++++++ R/cpi_validate_output.R | 63 +++++++++++++++++------------------ R/cpi_validate_raw.R | 44 ++++++++++++++---------- R/gdm_validate_raw.R | 10 +++--- R/gdp_validate_output.R | 43 ++++++++++++++++++++++++ R/mpd_validate_output.R | 4 +-- R/pfw_validate_output.R | 2 +- R/pip_countries.R | 3 ++ R/pip_country_list.R | 3 ++ R/pip_cpi_update.R | 5 +++ R/pip_gdm_update.R | 4 +++ R/pip_gdp_update.R | 5 ++- R/pip_maddison.R | 5 +++ R/pip_pce_update.R | 9 ++++- R/pip_pfw_update.R | 6 ++++ R/pip_pop_update.R | 7 ++++ R/pip_ppp_update.R | 6 ++++ R/pip_wdi_update.R | 5 +++ R/pip_weo.R | 7 ++++ R/popmain_validate_raw.R | 39 ++++++++++++++++++++++ R/ppp_validate_output.R | 2 +- R/sna_fy_validate_raw.R | 38 +++++++++++++++++++++ R/sna_validate_raw.R | 48 ++++++++++++++++++++++++++ R/spop_validate_raw.R | 39 ++++++++++++++++++++++ R/wdi_validate_output.R | 2 +- R/weo_validate_output.R | 2 +- 28 files changed, 460 insertions(+), 63 deletions(-) create mode 100644 R/cl_validate_raw.R create mode 100644 R/countries_validate_output.R create mode 100644 R/gdp_validate_output.R create mode 100644 R/popmain_validate_raw.R create mode 100644 R/sna_fy_validate_raw.R create mode 100644 R/sna_validate_raw.R create mode 100644 R/spop_validate_raw.R diff --git a/DESCRIPTION b/DESCRIPTION index c032bbd..0e1abef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -53,7 +53,9 @@ Imports: joyn, dm, config, - collapse + collapse, + data.validator, + assertr VignetteBuilder: knitr Remotes: github::PIP-Technical-Team/pipload@ongoing, diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R new file mode 100644 index 0000000..4f7b3a0 --- /dev/null +++ b/R/cl_validate_raw.R @@ -0,0 +1,62 @@ +#' Validate raw country list data +#' +#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("Country list raw data is not loaded" = !is.null(cl)) + + report <- data_validation_report() + + validate(cl, name = "CL raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.character(africa_split), + description = "`africa_split` should be character") |> + validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), + africa_split, description = "`africa_split` values within range") |> + validate_if(is.character(africa_split_code), + description = "`africa_split_code` should be character") |> + validate_cols(in_set(c("AFE", "AFW", NA)), + africa_split_code, description = "`africa_split_code` values within range") |> + validate_if(is.character(pcn_region), + description = "`pcn_region` should be character") |> + validate_if(is.character(pcn_region_code), + description = "`pcn_region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + pcn_region_code, description = "`pcn_region_code` values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(region_code), + description = "`region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + region_code, description = "`region_code` values within range") |> + validate_if(is.character(world), + description = "`world` should be character") |> + validate_cols(in_set(c("World")), + world, description = "`world` values within range") |> + validate_if(is.character(world_code), + description = "`world_code` should be character") |> + validate_cols(in_set(c("WLD")), + world_code, description = "`world_code` values within range") |> + validate_cols(not_na, country_code, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/countries_validate_output.R b/R/countries_validate_output.R new file mode 100644 index 0000000..b29b109 --- /dev/null +++ b/R/countries_validate_output.R @@ -0,0 +1,56 @@ +#' Validate output countries data +#' +#' @param countries output countries data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){ + + stopifnot("Countries output data is not loaded" = !is.null(countries)) + + report <- data_validation_report() + + validate(countries, name = "countries output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.character(africa_split), + description = "`africa_split` should be character") |> + validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), + africa_split, description = "`africa_split` values within range") |> + validate_if(is.character(africa_split_code), + description = "`africa_split_code` should be character") |> + validate_cols(in_set(c("AFE", "AFW", NA)), + africa_split_code, description = "`africa_split_code` values within range") |> + validate_if(is.character(region), + description = "`region` should be character") |> + validate_if(is.character(region_code), + description = "`region_code` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), + region_code, description = "`region_code` values within range") |> + validate_if(is.character(world), + description = "`world` should be character") |> + validate_cols(in_set(c("World")), + world, description = "`world` values within range") |> + validate_if(is.character(world_code), + description = "`world_code` should be character") |> + validate_cols(in_set(c("WLD")), + world_code, description = "`world_code` values within range") |> + validate_cols(not_na, country_code, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index e4e2c12..36b7363 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -1,6 +1,6 @@ #' Validate clean cpi data #' -#' @param cpi clean cpi data, output via `pipfun::pip_cpi_clean` +#' @param cpi clean cpi data, output via `pip_cpi_clean` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq @@ -12,61 +12,60 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) report <- data_validation_report() - validate(cpi, name = "CPI output data validation") %>% + validate(cpi, name = "CPI output data validation") |> validate_if(is.character(country_code), - description = "`country_code` should be character") %>% + description = "`country_code` should be character") |> validate_if(is.integer(cpi_year), - description = "`cpi_year` should be integer") %>% + description = "`cpi_year` should be integer") |> validate_if(is.numeric(survey_year), - description = "`survey_year` should be numeric") %>% + description = "`survey_year` should be numeric") |> validate_if(is.numeric(cpi), - description = "`cpi` should be numeric") %>% + description = "`cpi` should be numeric") |> validate_if(is.numeric(ccf), - description = "`ccf` should be numeric") %>% + description = "`ccf` should be numeric") |> validate_if(is.character(survey_acronym), - description = "`survey_acronym` should be character") %>% + description = "`survey_acronym` should be character") |> validate_if(is.numeric(change_cpi2011), - description = "`change_cpi2011` should be numeric") %>% + description = "`change_cpi2011` should be numeric") |> validate_cols(in_set(c(0, 1)), change_cpi2011, - description = "`change_cpi2011` values within range") %>% + description = "`change_cpi2011` values within range") |> validate_if(is.character(cpi_domain), - description = "`cpi_domain` should be character") %>% + description = "`cpi_domain` should be character") |> validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, - description = "`cpi_domian` values within range") %>% + description = "`cpi_domian` values within range") |> validate_if(is.numeric(cpi_domain_value), - description = "`cpi_domain_value` should be numeric") %>% + description = "`cpi_domain_value` should be numeric") |> validate_cols(in_set(c(0, 1)), cpi_domain_value, - description = "`cpi_domain_value` values within range") %>% + description = "`cpi_domain_value` values within range") |> validate_if(is.numeric(cpi2017_unadj), - description = "`cpi2017_unadj` should be numeric") %>% + description = "`cpi2017_unadj` should be numeric") |> validate_if(is.numeric(cpi2011_unadj), - description = "`cpi2011_unadj` should be numeric") %>% + description = "`cpi2011_unadj` should be numeric") |> validate_if(is.numeric(cpi2011), - description = "`cpi2011` should be numeric") %>% + description = "`cpi2011` should be numeric") |> validate_if(is.numeric(cpi2017), - description = "`cpi2017` should be numeric") %>% + description = "`cpi2017` should be numeric") |> validate_if(is.numeric(cpi2011_SM22), - description = "`cpi2011_SM22` should be numeric") %>% + description = "`cpi2011_SM22` should be numeric") |> validate_if(is.numeric(cpi2017_SM22), - description = "`cpi2017_SM22` should be numeric") %>% + description = "`cpi2017_SM22` should be numeric") |> validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") %>% + description = "`cpi2005` should be logical") |> validate_if(is.character(cpi_data_level), - description = "c`pi_data_level` should be character") %>% + description = "`cpi_data_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level, - description = "`cpi_data_level` values within range") %>% + description = "`cpi_data_level` values within range") |> + validate_if(is.numeric(cpi2011_AM23), + description = "`cpi2011_AM23` should be numeric") |> + validate_if(is.numeric(cpi2017_AM23), + description = "`cpi2017_AM23` should be numeric") |> validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") %>% - validate_if(is.numeric(cpi2011_SM23), - description = "`cpi2011_SM23` should be numeric") %>% - validate_if(is.numeric(cpi2017_SM23), - description = "`cpi2017_SM23` should be numeric") %>% - validate_cols(not_na, country_code, cpi_year, survey_acronym, - cpi_data_level, - description = "no missing values in key variables") %>% + description = "`cpi_id` should be character") |> + validate_cols(not_na, country_code, cpi_year, survey_acronym, cpi_data_level, + description = "no missing values in key variables") |> validate_if(is_uniq(country_code, cpi_year, survey_acronym, cpi_data_level), - description = "no duplicate records in key variables") %>% + description = "no duplicate records in key variables") |> add_results(report) validation_record <- get_results(report, unnest = FALSE) |> diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R index 98f45e1..b23b766 100644 --- a/R/cpi_validate_raw.R +++ b/R/cpi_validate_raw.R @@ -15,24 +15,22 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ validate(cpi, name = "CPI raw data validation") |> validate_if(is.character(region), description = "`region` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "NAC", "SAR", "SSA")), + region, description = "`region` values within range") |> validate_if(is.character(code), description = "`code` should be character") |> validate_if(is.character(countryname), description = "`countryname` should be character") |> - validate_if(is.character(survname), - description = "`survname` should be character") |> - validate_if(is.character(cpi_domain), - description = "`cpi_domain` should be character") |> - validate_if(is.character(version), - description = "`version` should be character") |> - validate_if(is.character(survey_coverage), - description = "`survey_coverage` should be character") |> - validate_if(is.character(cpi_id), - description = "`cpi_id` should be character") |> validate_if(is.numeric(year), description = "`year` should be numeric") |> + validate_if(is.character(survname), + description = "`survname` should be character") |> validate_if(is.numeric(ref_year), description = "`ref_year` should be numeric") |> + validate_if(is.character(cpi_domain), + description = "`cpi_domain` should be character") |> + validate_cols(in_set(c("National", "Urban/Rural")), + cpi_domain, description = "`cpi_domain` values within range") |> validate_if(is.numeric(cpi_domain_value), description = "`cpi_domain_value` should be numeric") |> validate_if(is.numeric(cpi2017_unadj), @@ -43,18 +41,34 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ description = "`cpi201`1 should be numeric") |> validate_if(is.numeric(cpi2017), description = "`cpi2017` should be numeric") |> + validate_if(is.character(version), + description = "`version` should be character") |> validate_if(is.numeric(comparability), description = "`comparability` should be numeric") |> validate_if(is.numeric(cur_adj), description = "`cur_adj` should be numeric") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("N", "R", "U", NA)), + survey_coverage, description = "`survey_coverage` values within range") |> validate_if(is.numeric(cpi2011_SM22), description = "`cpi2011_SM22` should be numeric") |> validate_if(is.numeric(comparable), description = "`comparable` should be numeric") |> validate_if(is.numeric(cpi2017_SM22), description = "`cpi2017_SM22` should be numeric") |> + validate_cols(is.logical, cpi2005, + description = "`cpi2005` should be logical") |> validate_if(is.numeric(cpi_data_level), description = "`cpi_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + cpi_data_level, description = "`cpi_data_level` values within range") |> + validate_if(is.numeric(ref_year_AM23), + description = "`ref_year_AM23` should be numeric") |> + validate_if(is.numeric(cpi2011_AM23), + description = "`cpi2011_AM23` should be numeric") |> + validate_if(is.numeric(cpi2017_AM23), + description = "`cpi2011_AM23` should be numeric") |> validate_if(is.numeric(change_cpi2017), description = "`change_cpi2017` should be numeric") |> validate_if(is.numeric(change_icp2017), @@ -63,18 +77,12 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ description = "`change_cpi2011` should be numeric") |> validate_if(is.numeric(change_icp2011), description = "`change_icp2011` should be numeric") |> - validate_cols(is.logical, cpi2005, - description = "`cpi2005` should be logical") |> + validate_if(is.character(cpi_id), + description = "`cpi_id` should be character") |> validate_cols(not_na, code, year, survname, cpi_data_level, description = "no missing values in key variables") |> validate_if(is_uniq(code, year, survname, cpi_data_level), description = "no duplicate records in key variables") |> - validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, - description = "`cpi_domian` values within range") |> - validate_cols(in_set(c("N", "R", "U", NA)), survey_coverage, - description = "`survey_coverage` values within range") |> - validate_cols(in_set(c(0, 1, 2)), cpi_data_level, - description = "`cpi_data_level` values within range") |> add_results(report) validation_record <- get_results(report, unnest = FALSE) |> diff --git a/R/gdm_validate_raw.R b/R/gdm_validate_raw.R index 7c7a512..0d3df0f 100644 --- a/R/gdm_validate_raw.R +++ b/R/gdm_validate_raw.R @@ -15,10 +15,14 @@ gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ validate(gdm, name = "GDM raw data validation") |> validate_if(is.character(Region), description = "`Region` should be character") |> + validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), + Region, description = "`Region` values within range") |> validate_if(is.character(countryName), description = "`countryName` should be character") |> validate_if(is.character(Coverage), description = "`Coverage` should be character") |> + validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), + Coverage, description = "`Coverage` values within range") |> validate_if(is.character(CountryCode), description = "`CountryCode` should be character") |> validate_if(is.numeric(SurveyTime), @@ -27,6 +31,8 @@ gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ description = "`CPI_Time` should be numeric") |> validate_if(is.character(DataType), description = "`DataType` should be character") |> + validate_cols(in_set(c("x", "X", "y", "Y")), + DataType, description = "`DataType` values within range") |> validate_if(is.numeric(SurveyMean_LCU), description = "`SurveyMean_LCU` should be numeric") |> validate_if(is.numeric(currency), @@ -44,10 +50,6 @@ gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ description = "no missing values in key variables") |> validate_if(is_uniq(CountryCode, Coverage, SurveyTime, DataType), description = "no duplicate records in key variables") |> - validate_cols(in_set(c("SSA", "ECA", "OHI", "LAC", "SAS", "EAP", "MNA")), - Region, description = "`Region` values within range") |> - validate_cols(in_set(c("National", "Urban", "Aggregated", "Rural", "rural", "urban")), - Coverage, description = "`Coverage` values within range") |> add_results(report) validation_record <- get_results(report, unnest = FALSE) |> diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R new file mode 100644 index 0000000..d24c6c0 --- /dev/null +++ b/R/gdp_validate_output.R @@ -0,0 +1,43 @@ +#' Validate output gdp data +#' +#' @param gdp output gdp data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ + + stopifnot("GDP output data is not loaded" = !is.null(gdp)) + + report <- data_validation_report() + + validate(gdp, name = "GDP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.character(gdp_data_level), + description = "`gdp_data_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + gdp_data_level, description = "`gdp_data_level` values within range") |> + validate_if(is.numeric(gdp), + description = "`gdp` should be numeric") |> + validate_if(is.character(gdp_domain), + description = "`gdp_domain` should be character") |> + validate_cols(in_set(c("national", "urban/rural")), + gdp_domain, description = "`gdp_domain` values within range") |> + validate_cols(not_na, country_code, year, gdp_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, gdp_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/mpd_validate_output.R b/R/mpd_validate_output.R index 6021b91..c5ffced 100644 --- a/R/mpd_validate_output.R +++ b/R/mpd_validate_output.R @@ -1,4 +1,4 @@ -#' Validate raw maddison data +#' Validate output maddison data #' #' @param mpd output mpd data #' @param detail has an option TRUE/FALSE, default value is FALSE @@ -6,7 +6,7 @@ #' @importFrom assertr in_set not_na is_uniq #' #' @export -mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ +mpd_validate_output <- function(mpd, detail = getOption("pipaux.detail.output")){ stopifnot("mpd/ maddison output data is not loaded" = !is.null(mpd)) diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R index e5daeaa..9674a1e 100644 --- a/R/pfw_validate_output.R +++ b/R/pfw_validate_output.R @@ -1,6 +1,6 @@ #' Validate clean pfw data #' -#' @param pfw clean pfw data, output via `pipfun::pip_pfw_clean` +#' @param pfw clean pfw data, output via `pip_pfw_clean` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq diff --git a/R/pip_countries.R b/R/pip_countries.R index 6332692..d1dd26d 100644 --- a/R/pip_countries.R +++ b/R/pip_countries.R @@ -26,6 +26,9 @@ pip_countries <- function(action = c("update", "load"), tag = tag ) + # validate country list raw data + cl_validate_raw(cl) + pfw <- load_aux(measure = "pfw", maindir = maindir, branch = branch) diff --git a/R/pip_country_list.R b/R/pip_country_list.R index f233b90..6125e69 100644 --- a/R/pip_country_list.R +++ b/R/pip_country_list.R @@ -33,6 +33,9 @@ pip_country_list <- function(action = c("update", "load"), tag = tag ) + # validate country list raw data + cl_validate_raw(cl) + # Save if (branch == "main") { branch <- "" diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R index 45d0c00..c2690e9 100644 --- a/R/pip_cpi_update.R +++ b/R/pip_cpi_update.R @@ -25,6 +25,8 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, tag = tag ) + # validate cpi raw data + cpi_validate_raw(cpi) # ____________________________________________________________________________ # Cleaning #### @@ -34,6 +36,9 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, maindir = maindir, branch = branch) + # validate cpi clean data before saving it + cpi_validate_output(cpi) + # Save if (branch == "main") { branch <- "" diff --git a/R/pip_gdm_update.R b/R/pip_gdm_update.R index f8f9386..af12d23 100644 --- a/R/pip_gdm_update.R +++ b/R/pip_gdm_update.R @@ -21,6 +21,8 @@ pip_gdm_update <- function(force = FALSE, branch = branch, tag = tag) + # validate gdm raw data + gdm_validate_raw(gdm) # ____________________________________________________________________________ # Transform dataset #### @@ -204,6 +206,8 @@ pip_gdm_update <- function(force = FALSE, # ---- Save and sign ---- + # validate gdm output data + gdm_validate_output(gdm) if (branch == "main") { branch <- "" diff --git a/R/pip_gdp_update.R b/R/pip_gdp_update.R index 3b34478..d63e135 100644 --- a/R/pip_gdp_update.R +++ b/R/pip_gdp_update.R @@ -63,6 +63,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, owner = owner, branch = branch ) + # validate sna data + sna_validate_raw(sna) sna_fy <- pipfun::load_from_gh( measure = "sna", @@ -70,7 +72,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch, filename = "sna_metadata" ) - + # validate sna_fy data + sna_fy_validate_raw(sna_fy) cl <- load_aux(maindir = maindir, measure = "country_list", diff --git a/R/pip_maddison.R b/R/pip_maddison.R index 7d0c4ad..7712c66 100644 --- a/R/pip_maddison.R +++ b/R/pip_maddison.R @@ -23,6 +23,11 @@ pip_maddison <- function(action = c("update", "load"), branch = branch, tag = tag ) + # validate raw data + mpd_validate_raw(mpd) + + # # validate output data + # mpd_validate_output(mpd) if (branch == "main") { branch <- "" diff --git a/R/pip_pce_update.R b/R/pip_pce_update.R index 0ddd878..5dde93c 100644 --- a/R/pip_pce_update.R +++ b/R/pip_pce_update.R @@ -39,13 +39,17 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch ) + # validate sna data + sna_validate_raw(sna) + sna_fy <- pipfun::load_from_gh( measure = "sna", owner = owner, branch = branch, filename = "sna_metadata" ) - + # validate sna_fy data + sna_fy_validate_raw(sna_fy) # ____________________________________________________________________________ # Clean PCE from WDI #### @@ -224,6 +228,9 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ## ---- Sign and save ---- + # validate pce output data + pce_validate_output(pce) + if (branch == "main") { branch <- "" } diff --git a/R/pip_pfw_update.R b/R/pip_pfw_update.R index 19dcffc..772bd79 100644 --- a/R/pip_pfw_update.R +++ b/R/pip_pfw_update.R @@ -17,11 +17,17 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, owner = owner, branch = branch, ext = "dta") + # validate pfw raw data + pfw_validate_raw(pfw) + # Clean data pfw <- pip_pfw_clean(pfw, maindir = maindir, branch = branch) + # validate pfw raw data + pfw_validate_output(pfw) + # Save dataset if (branch == "main") { branch <- "" diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index 79e72cf..75a97d1 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -91,6 +91,8 @@ pip_pop_update <- function(force = FALSE, clean_names_from_wide() |> clean_from_wide() + # validate pop main raw data + popmain_validate_raw(pop_main) ### Ger special cases --------- spop <- pipfun::load_from_gh( @@ -103,6 +105,8 @@ pip_pop_update <- function(force = FALSE, clean_names_from_wide() |> clean_from_wide() + # validate special cases pop raw data + spop_validate_raw(spop) pop <- joyn::joyn(pop_main, spop, by = c("country_code", "year", "pop_data_level"), @@ -166,6 +170,9 @@ pip_pop_update <- function(force = FALSE, # Save data --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # validate output pop data + pop_validate_output(pop) + # Save if (branch == "main") { branch <- "" diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R index dfb239e..a04470f 100644 --- a/R/pip_ppp_update.R +++ b/R/pip_ppp_update.R @@ -26,6 +26,8 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, tag = tag ) + # validate ppp raw data + ppp_validate_raw(ppp) # ____________________________________________________________________________ # cleaning #### @@ -58,6 +60,10 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, # ____________________________________________________________________________ # Saving #### + + # validate ppp output data + ppp_validate_output(ppp) + if (branch == "main") { branch <- "" } diff --git a/R/pip_wdi_update.R b/R/pip_wdi_update.R index da00521..9ec63ae 100644 --- a/R/pip_wdi_update.R +++ b/R/pip_wdi_update.R @@ -48,10 +48,15 @@ pip_wdi_update <- function(force = FALSE, new = c("country_code", "year") ) } + # validate wdi raw data + wdi_validate_raw(wdi) # _________________________________________________________________________ # Save and Return #### + # validate wdi output data + wdi_validate_output(wdi) + if (branch == "main") { branch <- "" } diff --git a/R/pip_weo.R b/R/pip_weo.R index 87343e7..c3c1ad0 100644 --- a/R/pip_weo.R +++ b/R/pip_weo.R @@ -33,11 +33,18 @@ pip_weo <- function(action = c("update", "load"), branch = branch, tag = tag ) + + # validate weo raw data + weo_validate_raw(weo) + dt <- pip_weo_clean(dt, maindir = maindir, branch = branch) # Save dataset + # validate weo raw data + weo_validate_output(weo) + if (branch == "main") { branch <- "" } diff --git a/R/popmain_validate_raw.R b/R/popmain_validate_raw.R new file mode 100644 index 0000000..2dac3dd --- /dev/null +++ b/R/popmain_validate_raw.R @@ -0,0 +1,39 @@ +#' Validate raw main pop data +#' +#' @param pop_main raw pop main data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ + + stopifnot("POP main raw data is not loaded" = !is.null(pop_main)) + + report <- data_validation_report() + + validate(pop_main, name = "POP main raw data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R index d336be5..d0dd002 100644 --- a/R/ppp_validate_output.R +++ b/R/ppp_validate_output.R @@ -6,7 +6,7 @@ #' @importFrom assertr in_set not_na is_uniq #' #' @export -ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.raw")){ +ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ stopifnot("PPP output data is not loaded" = !is.null(ppp)) diff --git a/R/sna_fy_validate_raw.R b/R/sna_fy_validate_raw.R new file mode 100644 index 0000000..5a3d8b7 --- /dev/null +++ b/R/sna_fy_validate_raw.R @@ -0,0 +1,38 @@ +#' Validate raw sna_fy data +#' +#' @param sna_fy raw sna_fy data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ + + stopifnot("sna_fy raw data is not loaded" = !is.null(sna_fy)) + + report <- data_validation_report() + + validate(sna_fy, name = "sna_fy raw data validation") |> + validate_if(is.character(Code), + description = "`Code` should be character") |> + validate_if(is.character(LongName), + description = "`LongName` should be character") |> + validate_if(is.character(SpecialNotes), + description = "`SpecialNotes` should be character") |> + validate_if(is.character(Month), + description = "`Month` should be character") |> + validate_cols(not_na, Code, Month, Day, + description = "no missing values in key variables") |> + # validate_if(is_uniq(Code, LongName), + # description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/sna_validate_raw.R b/R/sna_validate_raw.R new file mode 100644 index 0000000..26e9278 --- /dev/null +++ b/R/sna_validate_raw.R @@ -0,0 +1,48 @@ +#' Validate raw special national accounts (sna) data +#' +#' @param sna raw sna data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ + + stopifnot("SNA raw data is not loaded" = !is.null(sna)) + + report <- data_validation_report() + + validate(sna, name = "SNA raw data validation") |> + validate_if(is.character(countryname), + description = "`countryname` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_cols(in_set(c("National")), + coverage, description = "`coverage` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(GDP), + description = "`GDP` should be numeric") |> + validate_if(is.logical(PCE), + description = "`PCE` should be logical") |> + validate_if(is.character(sourceGDP), + description = "`sourceGDP` should be character") |> + validate_if(is.logical(sourcePCE), + description = "`sourcePCE` should be logical") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/spop_validate_raw.R b/R/spop_validate_raw.R new file mode 100644 index 0000000..c075567 --- /dev/null +++ b/R/spop_validate_raw.R @@ -0,0 +1,39 @@ +#' Validate raw special cases pop data +#' +#' @param spop raw special case pop data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ + + stopifnot("Special POP raw data is not loaded" = !is.null(spop)) + + report <- data_validation_report() + + validate(spop, name = "POP output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(pop_data_level), + description = "`pop_data_level` should be numeric") |> + validate_cols(in_set(c(0, 1, 2)), + pop_data_level, description = "`pop_data_level` values within range") |> + validate_if(is.numeric(pop), + description = "`pop` should be numeric") |> + validate_cols(not_na, country_code, year, pop_data_level, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year, pop_data_level), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/wdi_validate_output.R b/R/wdi_validate_output.R index 6868efa..01b1c1b 100644 --- a/R/wdi_validate_output.R +++ b/R/wdi_validate_output.R @@ -6,7 +6,7 @@ #' @importFrom assertr in_set not_na is_uniq #' #' @export -weo_validate_output <- function(weo, detail = getOption("pipaux.detail.raw")){ +weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ stopifnot("WEO output data is not loaded" = !is.null(weo)) diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R index 1e6d487..a8fdad8 100644 --- a/R/weo_validate_output.R +++ b/R/weo_validate_output.R @@ -6,7 +6,7 @@ #' @importFrom assertr in_set not_na is_uniq #' #' @export -wdi_validate_output <- function(wdi, detail = getOption("pipaux.detail.raw")){ +wdi_validate_output <- function(wdi, detail = getOption("pipaux.detail.output")){ stopifnot("WDI output data is not loaded" = !is.null(wdi)) From 7ad30527a3cf855a7707191b5d5b76e260490902 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Fri, 8 Mar 2024 23:04:53 -0500 Subject: [PATCH 06/23] DESCRIPTION and NEMSPACE updated --- DESCRIPTION | 5 ++++- NAMESPACE | 31 +++++++++++++++++++++++++++++++ man/cl_validate_raw.Rd | 16 ++++++++++++++++ man/countries_validate_output.Rd | 19 +++++++++++++++++++ man/cpi_validate_output.Rd | 16 ++++++++++++++++ man/cpi_validate_raw.Rd | 16 ++++++++++++++++ man/gdm_validate_output.Rd | 16 ++++++++++++++++ man/gdm_validate_raw.Rd | 16 ++++++++++++++++ man/gdp_validate_output.Rd | 16 ++++++++++++++++ man/get_error_validation.Rd | 14 ++++++++++++++ man/mpd_validate_output.Rd | 16 ++++++++++++++++ man/mpd_validate_raw.Rd | 16 ++++++++++++++++ man/pce_validate_output.Rd | 16 ++++++++++++++++ man/pfw_validate_output.Rd | 16 ++++++++++++++++ man/pfw_validate_raw.Rd | 16 ++++++++++++++++ man/pop_validate_output.Rd | 16 ++++++++++++++++ man/popmain_validate_raw.Rd | 16 ++++++++++++++++ man/ppp_validate_output.Rd | 16 ++++++++++++++++ man/ppp_validate_raw.Rd | 16 ++++++++++++++++ man/send_report.Rd | 11 +++++++++++ man/sna_fy_validate_raw.Rd | 16 ++++++++++++++++ man/sna_validate_raw.Rd | 16 ++++++++++++++++ man/spop_validate_raw.Rd | 16 ++++++++++++++++ man/wdi_validate_output.Rd | 16 ++++++++++++++++ man/wdi_validate_raw.Rd | 16 ++++++++++++++++ man/weo_validate_output.Rd | 16 ++++++++++++++++ man/weo_validate_raw.Rd | 16 ++++++++++++++++ 27 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 man/cl_validate_raw.Rd create mode 100644 man/countries_validate_output.Rd create mode 100644 man/cpi_validate_output.Rd create mode 100644 man/cpi_validate_raw.Rd create mode 100644 man/gdm_validate_output.Rd create mode 100644 man/gdm_validate_raw.Rd create mode 100644 man/gdp_validate_output.Rd create mode 100644 man/get_error_validation.Rd create mode 100644 man/mpd_validate_output.Rd create mode 100644 man/mpd_validate_raw.Rd create mode 100644 man/pce_validate_output.Rd create mode 100644 man/pfw_validate_output.Rd create mode 100644 man/pfw_validate_raw.Rd create mode 100644 man/pop_validate_output.Rd create mode 100644 man/popmain_validate_raw.Rd create mode 100644 man/ppp_validate_output.Rd create mode 100644 man/ppp_validate_raw.Rd create mode 100644 man/send_report.Rd create mode 100644 man/sna_fy_validate_raw.Rd create mode 100644 man/sna_validate_raw.Rd create mode 100644 man/spop_validate_raw.Rd create mode 100644 man/wdi_validate_output.Rd create mode 100644 man/wdi_validate_raw.Rd create mode 100644 man/weo_validate_output.Rd create mode 100644 man/weo_validate_raw.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0e1abef..9188803 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,7 +55,10 @@ Imports: config, collapse, data.validator, - assertr + assertr, + covr, + blastula, + rlang VignetteBuilder: knitr Remotes: github::PIP-Technical-Team/pipload@ongoing, diff --git a/NAMESPACE b/NAMESPACE index f4707f0..959e7dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,8 +2,21 @@ export("%>%") export(auto_aux_update) +export(cl_validate_raw) +export(countries_validate_output) +export(cpi_validate_output) +export(cpi_validate_raw) export(draw_model) +export(gdm_validate_output) +export(gdm_validate_raw) +export(gdp_validate_output) +export(get_error_validation) export(load_aux) +export(mpd_validate_output) +export(mpd_validate_raw) +export(pce_validate_output) +export(pfw_validate_output) +export(pfw_validate_raw) export(pip_censoring) export(pip_countries) export(pip_country_list) @@ -31,10 +44,28 @@ export(pip_wdi) export(pip_wdi_update) export(pip_weo) export(pip_weo_clean) +export(pop_validate_output) +export(popmain_validate_raw) +export(ppp_validate_output) +export(ppp_validate_raw) +export(send_report) +export(sna_fy_validate_raw) +export(sna_validate_raw) +export(spop_validate_raw) export(update_aux) +export(wdi_validate_output) +export(wdi_validate_raw) +export(weo_validate_output) +export(weo_validate_raw) +import(blastula) import(collapse, except = fdroplevels) import(data.table) import(data.table, except = fdroplevels) +import(data.validator) +import(rlang) +importFrom(assertr,in_set) +importFrom(assertr,is_uniq) +importFrom(assertr,not_na) importFrom(glue,glue) importFrom(lifecycle,deprecated) importFrom(magrittr,"%<>%") diff --git a/man/cl_validate_raw.Rd b/man/cl_validate_raw.Rd new file mode 100644 index 0000000..2b5f785 --- /dev/null +++ b/man/cl_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cl_validate_raw.R +\name{cl_validate_raw} +\alias{cl_validate_raw} +\title{Validate raw country list data} +\usage{ +cl_validate_raw(cl, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{cl}{raw country list data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw country list data +} diff --git a/man/countries_validate_output.Rd b/man/countries_validate_output.Rd new file mode 100644 index 0000000..25d0708 --- /dev/null +++ b/man/countries_validate_output.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countries_validate_output.R +\name{countries_validate_output} +\alias{countries_validate_output} +\title{Validate output countries data} +\usage{ +countries_validate_output( + countries, + detail = getOption("pipaux.detail.output") +) +} +\arguments{ +\item{countries}{output countries data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output countries data +} diff --git a/man/cpi_validate_output.Rd b/man/cpi_validate_output.Rd new file mode 100644 index 0000000..efa42de --- /dev/null +++ b/man/cpi_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cpi_validate_output.R +\name{cpi_validate_output} +\alias{cpi_validate_output} +\title{Validate clean cpi data} +\usage{ +cpi_validate_output(cpi, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{cpi}{clean cpi data, output via \code{pip_cpi_clean}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate clean cpi data +} diff --git a/man/cpi_validate_raw.Rd b/man/cpi_validate_raw.Rd new file mode 100644 index 0000000..e818e1c --- /dev/null +++ b/man/cpi_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cpi_validate_raw.R +\name{cpi_validate_raw} +\alias{cpi_validate_raw} +\title{Validate raw cpi data} +\usage{ +cpi_validate_raw(cpi, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{cpi}{raw cpi data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw cpi data +} diff --git a/man/gdm_validate_output.Rd b/man/gdm_validate_output.Rd new file mode 100644 index 0000000..abb40ae --- /dev/null +++ b/man/gdm_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gdm_validate_output.R +\name{gdm_validate_output} +\alias{gdm_validate_output} +\title{Validate clean gdm data} +\usage{ +gdm_validate_output(gdm, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{gdm}{clean gdm data, output via \code{pipfun::pip_gdm_clean}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate clean gdm data +} diff --git a/man/gdm_validate_raw.Rd b/man/gdm_validate_raw.Rd new file mode 100644 index 0000000..44171c2 --- /dev/null +++ b/man/gdm_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gdm_validate_raw.R +\name{gdm_validate_raw} +\alias{gdm_validate_raw} +\title{Validate raw gdm data} +\usage{ +gdm_validate_raw(gdm, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{gdm}{raw gdm data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw gdm data +} diff --git a/man/gdp_validate_output.Rd b/man/gdp_validate_output.Rd new file mode 100644 index 0000000..8c857a0 --- /dev/null +++ b/man/gdp_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gdp_validate_output.R +\name{gdp_validate_output} +\alias{gdp_validate_output} +\title{Validate output gdp data} +\usage{ +gdp_validate_output(gdp, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{gdp}{output gdp data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output gdp data +} diff --git a/man/get_error_validation.Rd b/man/get_error_validation.Rd new file mode 100644 index 0000000..559c64c --- /dev/null +++ b/man/get_error_validation.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_error_validation.R +\name{get_error_validation} +\alias{get_error_validation} +\title{Get validation report data validation error report} +\usage{ +get_error_validation(vlddata, detail) +} +\arguments{ +\item{vlddata}{validation data} +} +\description{ +Get validation report data validation error report +} diff --git a/man/mpd_validate_output.Rd b/man/mpd_validate_output.Rd new file mode 100644 index 0000000..14aca18 --- /dev/null +++ b/man/mpd_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mpd_validate_output.R +\name{mpd_validate_output} +\alias{mpd_validate_output} +\title{Validate output maddison data} +\usage{ +mpd_validate_output(mpd, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{mpd}{output mpd data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output maddison data +} diff --git a/man/mpd_validate_raw.Rd b/man/mpd_validate_raw.Rd new file mode 100644 index 0000000..b612f26 --- /dev/null +++ b/man/mpd_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mpd_validate_raw.R +\name{mpd_validate_raw} +\alias{mpd_validate_raw} +\title{Validate raw maddison data} +\usage{ +mpd_validate_raw(mpd, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{mpd}{raw mpd data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw maddison data +} diff --git a/man/pce_validate_output.Rd b/man/pce_validate_output.Rd new file mode 100644 index 0000000..6c071f1 --- /dev/null +++ b/man/pce_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pce_validate_output.R +\name{pce_validate_output} +\alias{pce_validate_output} +\title{Validate output pce data} +\usage{ +pce_validate_output(pce, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{pce}{output pce data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output pce data +} diff --git a/man/pfw_validate_output.Rd b/man/pfw_validate_output.Rd new file mode 100644 index 0000000..a1a9563 --- /dev/null +++ b/man/pfw_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pfw_validate_output.R +\name{pfw_validate_output} +\alias{pfw_validate_output} +\title{Validate clean pfw data} +\usage{ +pfw_validate_output(pfw, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{pfw}{clean pfw data, output via \code{pip_pfw_clean}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate clean pfw data +} diff --git a/man/pfw_validate_raw.Rd b/man/pfw_validate_raw.Rd new file mode 100644 index 0000000..bd3a413 --- /dev/null +++ b/man/pfw_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pfw_validate_raw.R +\name{pfw_validate_raw} +\alias{pfw_validate_raw} +\title{Validate raw pfw data} +\usage{ +pfw_validate_raw(pfw, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{pfw}{raw pfw data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw pfw data +} diff --git a/man/pop_validate_output.Rd b/man/pop_validate_output.Rd new file mode 100644 index 0000000..f1a63bf --- /dev/null +++ b/man/pop_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pop_validate_output.R +\name{pop_validate_output} +\alias{pop_validate_output} +\title{Validate output pop data} +\usage{ +pop_validate_output(pop, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{pop}{output pop data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output pop data +} diff --git a/man/popmain_validate_raw.Rd b/man/popmain_validate_raw.Rd new file mode 100644 index 0000000..4dbdbba --- /dev/null +++ b/man/popmain_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/popmain_validate_raw.R +\name{popmain_validate_raw} +\alias{popmain_validate_raw} +\title{Validate raw main pop data} +\usage{ +popmain_validate_raw(pop_main, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{pop_main}{raw pop main data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw main pop data +} diff --git a/man/ppp_validate_output.Rd b/man/ppp_validate_output.Rd new file mode 100644 index 0000000..8da5124 --- /dev/null +++ b/man/ppp_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ppp_validate_output.R +\name{ppp_validate_output} +\alias{ppp_validate_output} +\title{Validate output ppp data} +\usage{ +ppp_validate_output(ppp, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{ppp}{output ppp data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output ppp data +} diff --git a/man/ppp_validate_raw.Rd b/man/ppp_validate_raw.Rd new file mode 100644 index 0000000..41aaff4 --- /dev/null +++ b/man/ppp_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ppp_validate_raw.R +\name{ppp_validate_raw} +\alias{ppp_validate_raw} +\title{Validate raw ppp data} +\usage{ +ppp_validate_raw(ppp, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{ppp}{raw ppp data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw ppp data +} diff --git a/man/send_report.Rd b/man/send_report.Rd new file mode 100644 index 0000000..cabd5f0 --- /dev/null +++ b/man/send_report.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/send_report.R +\name{send_report} +\alias{send_report} +\title{Send an email that contains auxiliary data validation report} +\usage{ +send_report() +} +\description{ +Send an email that contains auxiliary data validation report +} diff --git a/man/sna_fy_validate_raw.Rd b/man/sna_fy_validate_raw.Rd new file mode 100644 index 0000000..74fb3c1 --- /dev/null +++ b/man/sna_fy_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sna_fy_validate_raw.R +\name{sna_fy_validate_raw} +\alias{sna_fy_validate_raw} +\title{Validate raw sna_fy data} +\usage{ +sna_fy_validate_raw(sna_fy, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{sna_fy}{raw sna_fy data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw sna_fy data +} diff --git a/man/sna_validate_raw.Rd b/man/sna_validate_raw.Rd new file mode 100644 index 0000000..abdc3fa --- /dev/null +++ b/man/sna_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sna_validate_raw.R +\name{sna_validate_raw} +\alias{sna_validate_raw} +\title{Validate raw special national accounts (sna) data} +\usage{ +sna_validate_raw(sna, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{sna}{raw sna data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw special national accounts (sna) data +} diff --git a/man/spop_validate_raw.Rd b/man/spop_validate_raw.Rd new file mode 100644 index 0000000..8bc25e9 --- /dev/null +++ b/man/spop_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spop_validate_raw.R +\name{spop_validate_raw} +\alias{spop_validate_raw} +\title{Validate raw special cases pop data} +\usage{ +spop_validate_raw(spop, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{spop}{raw special case pop data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw special cases pop data +} diff --git a/man/wdi_validate_output.Rd b/man/wdi_validate_output.Rd new file mode 100644 index 0000000..03a163c --- /dev/null +++ b/man/wdi_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/weo_validate_output.R +\name{wdi_validate_output} +\alias{wdi_validate_output} +\title{Validate output wdi data} +\usage{ +wdi_validate_output(wdi, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{wdi}{output wdi data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output wdi data +} diff --git a/man/wdi_validate_raw.Rd b/man/wdi_validate_raw.Rd new file mode 100644 index 0000000..d97fc77 --- /dev/null +++ b/man/wdi_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wdi_validate_raw.R +\name{wdi_validate_raw} +\alias{wdi_validate_raw} +\title{Validate raw wdi data} +\usage{ +wdi_validate_raw(wdi, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{wdi}{raw wdi data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw wdi data +} diff --git a/man/weo_validate_output.Rd b/man/weo_validate_output.Rd new file mode 100644 index 0000000..68e06e8 --- /dev/null +++ b/man/weo_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wdi_validate_output.R +\name{weo_validate_output} +\alias{weo_validate_output} +\title{Validate output weo data} +\usage{ +weo_validate_output(weo, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{weo}{output weo data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output weo data +} diff --git a/man/weo_validate_raw.Rd b/man/weo_validate_raw.Rd new file mode 100644 index 0000000..9e79af0 --- /dev/null +++ b/man/weo_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/weo_validate_raw.R +\name{weo_validate_raw} +\alias{weo_validate_raw} +\title{Validate raw weo data} +\usage{ +weo_validate_raw(weo, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{weo}{raw weo data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw weo data +} From 7fa88c215e93adee6ce7982bf8c7511ccad96758 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Sat, 9 Mar 2024 18:08:31 -0500 Subject: [PATCH 07/23] Adding validation rule for metadata file --- DESCRIPTION | 2 +- NAMESPACE | 3 ++ R/metadata_validate_output.R | 53 ++++++++++++++++++++++ R/metadata_validate_raw.R | 80 +++++++++++++++++++++++++++++++++ R/pip_metadata_update.R | 5 +++ R/pip_pl.R | 8 +++- R/pl_validate_output.R | 39 ++++++++++++++++ man/metadata_validate_output.Rd | 16 +++++++ man/metadata_validate_raw.Rd | 16 +++++++ man/pl_validate_output.Rd | 16 +++++++ 10 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 R/metadata_validate_output.R create mode 100644 R/metadata_validate_raw.R create mode 100644 R/pl_validate_output.R create mode 100644 man/metadata_validate_output.Rd create mode 100644 man/metadata_validate_raw.Rd create mode 100644 man/pl_validate_output.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 9188803..8c0231a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,9 +54,9 @@ Imports: dm, config, collapse, + covr, data.validator, assertr, - covr, blastula, rlang VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 959e7dd..69940d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,8 @@ export(gdm_validate_raw) export(gdp_validate_output) export(get_error_validation) export(load_aux) +export(metadata_validate_output) +export(metadata_validate_raw) export(mpd_validate_output) export(mpd_validate_raw) export(pce_validate_output) @@ -44,6 +46,7 @@ export(pip_wdi) export(pip_wdi_update) export(pip_weo) export(pip_weo_clean) +export(pl_validate_output) export(pop_validate_output) export(popmain_validate_raw) export(ppp_validate_output) diff --git a/R/metadata_validate_output.R b/R/metadata_validate_output.R new file mode 100644 index 0000000..40cf63f --- /dev/null +++ b/R/metadata_validate_output.R @@ -0,0 +1,53 @@ +#' Validate output metadata data +#' +#' @param metadata metadata data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ + + stopifnot("Metadata data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "Metadata output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.character(country_name), + description = "`country_name` should be character") |> + validate_if(is.numeric(reporting_year), + description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(survey_year), + description = "`survey_year` should be numeric") |> + validate_if(is.character(survey_title), + description = "`survey_title` should be character") |> + validate_if(is.character(survey_conductor), + description = "`survey_conductor` should be character") |> + validate_if(is.character(survey_coverage), + description = "`survey_coverage` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), + survey_coverage, description = "`survey_coverage` values within range") |> + validate_if(is.character(welfare_type), + description = "`welfare_type` should be character") |> + validate_cols(in_set(c("consumption", "income")), + welfare_type, description = "`welfare_type` values within range") |> + validate_if(is.character(distribution_type), + description = "`distribution_type` should be character") |> + validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), + distribution_type, description = "`distribution_type` values within range") |> + validate_cols(not_na, country_code, reporting_year, welfare_type, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, reporting_year, welfare_type), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/metadata_validate_raw.R b/R/metadata_validate_raw.R new file mode 100644 index 0000000..840b225 --- /dev/null +++ b/R/metadata_validate_raw.R @@ -0,0 +1,80 @@ +#' Validate raw metadata data +#' +#' @param metadata raw metadata data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ + + stopifnot("metadata raw data is not loaded" = !is.null(metadata)) + + report <- data_validation_report() + + validate(metadata, name = "metadata raw data validation") |> + validate_if(is.character(status), + description = "`status` should be character") |> + validate_if(is.character(reg), + description = "`reg` should be character") |> + validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAR", "SSA")), + reg, description = "`reg` values within range") |> + validate_if(is.numeric(id), + description = "`id` should be numeric") |> + validate_if(is.character(svy_id), + description = "`svy_id` should be character") |> + validate_if(is.character(link), + description = "`link` should be character") |> + validate_if(is.character(title), + description = "`title` should be character") |> + validate_if(is.character(data_access), + description = "`data_access` should be character") |> + validate_if(is.numeric(year_start), + description = "`year_start` should be numeric") |> + validate_if(is.numeric(year_end), + description = "`year_end` should be numeric") |> + validate_if(is.character(authoring_entity_name), + description = "`authoring_entity_name` should be character") |> + validate_if(is.character(authoring_entity_affiliation), + description = "`authoring_entity_affiliation` should be character") |> + validate_if(is.character(contact_email), + description = "`contact_email` should be character") |> + validate_if(is.character(contact_uri), + description = "`contact_uri` should be character") |> + validate_if(is.character(abstract), + description = "`abstract` should be character") |> + validate_if(is.character(collection_dates_cycle), + description = "`collection_dates_cycle` should be character") |> + validate_if(is.character(collection_dates_start), + description = "`collection_dates_start` should be character") |> + validate_if(is.character(collection_dates_end), + description = "`collection_dates_end` should be character") |> + validate_if(is.character(coverage), + description = "`coverage` should be character") |> + validate_if(is.character(sampling_procedure), + description = "`sampling_procedure` should be character") |> + validate_if(is.character(collection_mode), + description = "`collection_mode` should be character") |> + validate_if(is.character(coll_situation), + description = "coll_situation` should be character") |> + validate_if(is.character(weight), + description = "`weight` should be character") |> + validate_if(is.character(cleaning_operations), + description = "`cleaning_operations` should be character") |> + validate_if(is.character(coverage_notes), + description = "`coverage_notes` should be character") |> + validate_cols(not_na, svy_id, + description = "no missing values in key variables") |> + validate_if(is_uniq(svy_id), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} + diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R index 497dda4..56465fb 100644 --- a/R/pip_metadata_update.R +++ b/R/pip_metadata_update.R @@ -20,6 +20,9 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch, tag = tag) + # validate raw metdata data + metadata_validate_raw(metadata = df) + # Load pfw pfw <- load_aux(measure = "pfw", maindir = maindir, @@ -100,6 +103,8 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, ## ............................................................................ ## Save #### + # validate raw metdata data + metadata_validate_output(metadata = df) if (branch == "main") { branch <- "" diff --git a/R/pip_pl.R b/R/pip_pl.R index 06ab67d..2f0753c 100644 --- a/R/pip_pl.R +++ b/R/pip_pl.R @@ -32,9 +32,13 @@ pip_pl <- function(action = c("update", "load"), dt <- purrr::map_df(dl,pip_pl_clean) # Save + + # validate pl clean data + pl_validate_output(pl = dt) + if (branch == "main") { - branch <- "" - } + branch <- "" + } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir saved <- pipfun::pip_sign_save( x = dt, diff --git a/R/pl_validate_output.R b/R/pl_validate_output.R new file mode 100644 index 0000000..43f07b5 --- /dev/null +++ b/R/pl_validate_output.R @@ -0,0 +1,39 @@ +#' Validate output pl data +#' +#' @param pl output pl data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ + + stopifnot("PL clean data is not loaded" = !is.null(pl)) + + report <- data_validation_report() + + validate(pl, name = "PL output data validation") |> + validate_if(is.character(name), + description = "`name` should be character") |> + validate_if(is.numeric(poverty_line), + description = "`poverty_line` should be numeric") |> + validate_if(is.logical(is_default), + description = "`is_default` should be logical") |> + validate_if(is.logical(is_visible), + description = "`is_visible` should be logical") |> + validate_if(is.integer(ppp_year), + description = "`ppp_year` should be numeric") |> + validate_cols(not_na, name, ppp_year, + description = "no missing values in key variables") |> + validate_if(is_uniq(name, ppp_year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/man/metadata_validate_output.Rd b/man/metadata_validate_output.Rd new file mode 100644 index 0000000..37fac0b --- /dev/null +++ b/man/metadata_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadata_validate_output.R +\name{metadata_validate_output} +\alias{metadata_validate_output} +\title{Validate output metadata data} +\usage{ +metadata_validate_output(metadata, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{metadata}{metadata data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output metadata data +} diff --git a/man/metadata_validate_raw.Rd b/man/metadata_validate_raw.Rd new file mode 100644 index 0000000..eec128f --- /dev/null +++ b/man/metadata_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metadata_validate_raw.R +\name{metadata_validate_raw} +\alias{metadata_validate_raw} +\title{Validate raw metadata data} +\usage{ +metadata_validate_raw(metadata, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{metadata}{raw metadata data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate raw metadata data +} diff --git a/man/pl_validate_output.Rd b/man/pl_validate_output.Rd new file mode 100644 index 0000000..f5e143c --- /dev/null +++ b/man/pl_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pl_validate_output.R +\name{pl_validate_output} +\alias{pl_validate_output} +\title{Validate output pl data} +\usage{ +pl_validate_output(pl, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{pl}{output pl data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate output pl data +} From e0aefac34a972d6c7575b3b9f609e0e23dbb4d93 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Sat, 9 Mar 2024 23:20:32 -0500 Subject: [PATCH 08/23] Adding validation rule for npl and income group files --- NAMESPACE | 3 +++ R/incgroup_validate_output.R | 41 ++++++++++++++++++++++++++++++ R/npl_validate_output.R | 39 ++++++++++++++++++++++++++++ R/npl_validate_raw.R | 45 +++++++++++++++++++++++++++++++++ R/pip_income_groups.R | 3 +++ R/pip_npl.R | 6 +++++ man/incgroup_validate_output.Rd | 16 ++++++++++++ man/npl_validate_output.Rd | 16 ++++++++++++ man/npl_validate_raw.Rd | 16 ++++++++++++ 9 files changed, 185 insertions(+) create mode 100644 R/incgroup_validate_output.R create mode 100644 R/npl_validate_output.R create mode 100644 R/npl_validate_raw.R create mode 100644 man/incgroup_validate_output.Rd create mode 100644 man/npl_validate_output.Rd create mode 100644 man/npl_validate_raw.Rd diff --git a/NAMESPACE b/NAMESPACE index 69940d8..e7977af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,11 +11,14 @@ export(gdm_validate_output) export(gdm_validate_raw) export(gdp_validate_output) export(get_error_validation) +export(incgroup_validate_output) export(load_aux) export(metadata_validate_output) export(metadata_validate_raw) export(mpd_validate_output) export(mpd_validate_raw) +export(npl_validate_output) +export(npl_validate_raw) export(pce_validate_output) export(pfw_validate_output) export(pfw_validate_raw) diff --git a/R/incgroup_validate_output.R b/R/incgroup_validate_output.R new file mode 100644 index 0000000..aab9c39 --- /dev/null +++ b/R/incgroup_validate_output.R @@ -0,0 +1,41 @@ +#' Validate income group output data +#' +#' @param incgroup income group output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ + + stopifnot("Income group output data is not loaded" = !is.null(incgroup)) + + report <- data_validation_report() + + validate(incgroup, name = "Income group output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(year_data), + description = "`year_data` should be numeric") |> + validate_if(is.character(incgroup_historical), + description = "`incgroup_historical` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + incgroup_historical, description = "`incgroup_historical` values within range") |> + validate_if(is.character(fcv_historical), + description = "`fcv_historical` should be character") |> + validate_if(is.character(ssa_subregion_code), + description = "`ssa_subregion_code` should be character") |> + validate_cols(not_na, country_code, year_data, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, year_data), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/npl_validate_output.R b/R/npl_validate_output.R new file mode 100644 index 0000000..ee0f8fe --- /dev/null +++ b/R/npl_validate_output.R @@ -0,0 +1,39 @@ +#' Validate npl output data +#' +#' @param npl output data +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ + + stopifnot("NPL output data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL output data validation") |> + validate_if(is.character(country_code), + description = "`country_code` should be character") |> + validate_if(is.numeric(reporting_year), + description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(nat_headcount), + description = "`nat_headcount` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, country_code, reporting_year, + description = "no missing values in key variables") |> + validate_if(is_uniq(country_code, reporting_year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/npl_validate_raw.R b/R/npl_validate_raw.R new file mode 100644 index 0000000..00e8ae0 --- /dev/null +++ b/R/npl_validate_raw.R @@ -0,0 +1,45 @@ +#' Validate npl raw data +#' +#' @param npl raw npl data, as loaded via `pipfun::load_from_gh` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ + + stopifnot("NPL raw data is not loaded" = !is.null(npl)) + + report <- data_validation_report() + + validate(npl, name = "NPL raw data validation") |> + validate_if(is.character(region), + description = "`region` should be character") |> + # validate_cols(in_set(c("AFE", "AFW", "EAP", "ECA", "LAC", "MNA", "SAR")), + # region, description = "`region` values within range") |> + validate_if(is.character(countrycode), + description = "`countrycode` should be character") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc_nc), + description = "`vsi_pov_nahc_nc` should be numeric") |> + validate_if(is.numeric(vsi_pov_nahc), + description = "`vsi_pov_nahc` should be numeric") |> + validate_if(is.numeric(comparability), + description = "`comparability` should be numeric") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_cols(not_na, countrycode, year, + description = "no missing values in key variables") |> + validate_if(is_uniq(countrycode, year), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/pip_income_groups.R b/R/pip_income_groups.R index bdb893a..875a14e 100644 --- a/R/pip_income_groups.R +++ b/R/pip_income_groups.R @@ -52,6 +52,9 @@ pip_income_groups <- function(action = c("update", "load"), #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## save -------- + # validate income group output data + incgroup_validate_output(ig) + if (branch == "main") { branch <- "" } diff --git a/R/pip_npl.R b/R/pip_npl.R index c4af49f..72b60df 100644 --- a/R/pip_npl.R +++ b/R/pip_npl.R @@ -29,6 +29,9 @@ pip_npl <- function(action = c("update", "load"), ext = "dta") |> setDT() + # validate npl raw data + npl_validate_raw(npl) + setnames(x = npl, old = c("countrycode", "year", "vsi_pov_nahc_nc"), new = c("country_code", "reporting_year", "nat_headcount"), @@ -41,6 +44,9 @@ pip_npl <- function(action = c("update", "load"), #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## save -------- + # validate npl output data + npl_validate_output(npl) + if (branch == "main") { branch <- "" } diff --git a/man/incgroup_validate_output.Rd b/man/incgroup_validate_output.Rd new file mode 100644 index 0000000..a69160a --- /dev/null +++ b/man/incgroup_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/incgroup_validate_output.R +\name{incgroup_validate_output} +\alias{incgroup_validate_output} +\title{Validate income group output data} +\usage{ +incgroup_validate_output(incgroup, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{incgroup}{income group output data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate income group output data +} diff --git a/man/npl_validate_output.Rd b/man/npl_validate_output.Rd new file mode 100644 index 0000000..e74c484 --- /dev/null +++ b/man/npl_validate_output.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/npl_validate_output.R +\name{npl_validate_output} +\alias{npl_validate_output} +\title{Validate npl output data} +\usage{ +npl_validate_output(npl, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{npl}{output data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate npl output data +} diff --git a/man/npl_validate_raw.Rd b/man/npl_validate_raw.Rd new file mode 100644 index 0000000..ec2338c --- /dev/null +++ b/man/npl_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/npl_validate_raw.R +\name{npl_validate_raw} +\alias{npl_validate_raw} +\title{Validate npl raw data} +\usage{ +npl_validate_raw(npl, detail = getOption("pipaux.detail.raw")) +} +\arguments{ +\item{npl}{raw npl data, as loaded via \code{pipfun::load_from_gh}} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} +} +\description{ +Validate npl raw data +} From 3edaa7088411e7c51fbd3a6b0763b851662d5f01 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Wed, 13 Mar 2024 10:35:40 -0400 Subject: [PATCH 09/23] Add 'detail' argument in higher level functions --- NAMESPACE | 2 +- R/get_error_validation.R | 1 - R/pip_countries.R | 8 ++++-- R/pip_country_list.R | 5 ++-- R/pip_cpi.R | 6 +++-- R/pip_cpi_update.R | 7 ++--- R/pip_gdm.R | 6 +++-- R/pip_gdm_update.R | 8 +++--- R/pip_gdp.R | 6 +++-- R/pip_gdp_update.R | 9 ++++--- R/pip_income_groups.R | 5 ++-- R/pip_maddison.R | 5 ++-- R/pip_metadata.R | 6 +++-- R/pip_metadata_update.R | 7 ++--- R/pip_npl.R | 7 ++--- R/pip_pce.R | 6 +++-- R/pip_pce_update.R | 9 ++++--- R/pip_pfw.R | 6 +++-- R/pip_pfw_update.R | 7 ++--- R/pip_pl.R | 5 ++-- R/pip_pop.R | 6 +++-- R/pip_pop_update.R | 12 +++++---- R/pip_ppp.R | 6 +++-- R/pip_ppp_update.R | 7 ++--- R/pip_wdi.R | 6 +++-- R/pip_wdi_update.R | 8 +++--- R/pip_weo.R | 9 ++++--- R/pop_validate_raw.R | 53 ++++++++++++++++++++++++++++++++++++++ R/spop_validate_raw.R | 2 +- R/wdi_validate_output.R | 35 ------------------------- R/weo_validate_output.R | 16 +++++------- man/pip_countries.Rd | 3 ++- man/pip_country_list.Rd | 3 ++- man/pip_cpi.Rd | 3 ++- man/pip_cpi_update.Rd | 3 ++- man/pip_gdm.Rd | 3 ++- man/pip_gdm_update.Rd | 3 ++- man/pip_gdp.Rd | 3 ++- man/pip_gdp_update.Rd | 3 ++- man/pip_income_groups.Rd | 3 ++- man/pip_maddison.Rd | 3 ++- man/pip_metadata.Rd | 3 ++- man/pip_metadata_update.Rd | 3 ++- man/pip_npl.Rd | 3 ++- man/pip_pce.Rd | 3 ++- man/pip_pce_update.Rd | 3 ++- man/pip_pfw.Rd | 3 ++- man/pip_pfw_update.Rd | 3 ++- man/pip_pl.Rd | 3 ++- man/pip_pop.Rd | 3 ++- man/pip_pop_update.Rd | 3 ++- man/pip_ppp.Rd | 3 ++- man/pip_ppp_update.Rd | 3 ++- man/pip_wdi.Rd | 3 ++- man/pip_wdi_update.Rd | 3 ++- man/pip_weo.Rd | 3 ++- man/pop_validate_raw.Rd | 16 ++++++++++++ man/wdi_validate_output.Rd | 16 ------------ man/weo_validate_output.Rd | 8 +++--- 59 files changed, 236 insertions(+), 160 deletions(-) create mode 100644 R/pop_validate_raw.R delete mode 100644 R/wdi_validate_output.R create mode 100644 man/pop_validate_raw.Rd delete mode 100644 man/wdi_validate_output.Rd diff --git a/NAMESPACE b/NAMESPACE index e7977af..4f8ee37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ export(pip_weo) export(pip_weo_clean) export(pl_validate_output) export(pop_validate_output) +export(pop_validate_raw) export(popmain_validate_raw) export(ppp_validate_output) export(ppp_validate_raw) @@ -59,7 +60,6 @@ export(sna_fy_validate_raw) export(sna_validate_raw) export(spop_validate_raw) export(update_aux) -export(wdi_validate_output) export(wdi_validate_raw) export(weo_validate_output) export(weo_validate_raw) diff --git a/R/get_error_validation.R b/R/get_error_validation.R index c5f0c3c..aee7cfd 100644 --- a/R/get_error_validation.R +++ b/R/get_error_validation.R @@ -1,7 +1,6 @@ #' Get validation report data validation error report #' #' @param vlddata validation data -#' @import rlang #' #' @export get_error_validation <- function(vlddata, detail){ diff --git a/R/pip_countries.R b/R/pip_countries.R index d1dd26d..2f91d6c 100644 --- a/R/pip_countries.R +++ b/R/pip_countries.R @@ -10,7 +10,8 @@ pip_countries <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "countries" action <- match.arg(action) @@ -27,7 +28,7 @@ pip_countries <- function(action = c("update", "load"), ) # validate country list raw data - cl_validate_raw(cl) + cl_validate_raw(cl, detail = detail) pfw <- load_aux(measure = "pfw", maindir = maindir, @@ -48,6 +49,9 @@ pip_countries <- function(action = c("update", "load"), #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## save -------- + # validate country output data + countries_validate_output(countries, detail = detail) + if (branch == "main") { branch <- "" } diff --git a/R/pip_country_list.R b/R/pip_country_list.R index 6125e69..372a78e 100644 --- a/R/pip_country_list.R +++ b/R/pip_country_list.R @@ -17,7 +17,8 @@ pip_country_list <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) { measure <- "country_list" branch <- match.arg(branch) @@ -34,7 +35,7 @@ pip_country_list <- function(action = c("update", "load"), ) # validate country list raw data - cl_validate_raw(cl) + cl_validate_raw(cl, detail = detail) # Save if (branch == "main") { diff --git a/R/pip_cpi.R b/R/pip_cpi.R index 83ad978..55d96ce 100644 --- a/R/pip_cpi.R +++ b/R/pip_cpi.R @@ -16,7 +16,8 @@ pip_cpi <- function(action = c("update", "load"), force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { # ____________________________________________________________________________ # on.exit #### @@ -48,7 +49,8 @@ pip_cpi <- function(action = c("update", "load"), force = force, owner = owner, branch = branch, - tag = tag) + tag = tag, + detail = detail) } else { dt <- load_aux( diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R index c2690e9..c8927f1 100644 --- a/R/pip_cpi_update.R +++ b/R/pip_cpi_update.R @@ -6,7 +6,8 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { # ____________________________________________________________________________ # Set up #### @@ -26,7 +27,7 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, ) # validate cpi raw data - cpi_validate_raw(cpi) + cpi_validate_raw(cpi, detail = detail) # ____________________________________________________________________________ # Cleaning #### @@ -37,7 +38,7 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch) # validate cpi clean data before saving it - cpi_validate_output(cpi) + cpi_validate_output(cpi, detail = detail) # Save if (branch == "main") { diff --git a/R/pip_gdm.R b/R/pip_gdm.R index 447b991..70bafde 100644 --- a/R/pip_gdm.R +++ b/R/pip_gdm.R @@ -17,7 +17,8 @@ pip_gdm <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "gdm" branch <- match.arg(branch) @@ -29,7 +30,8 @@ pip_gdm <- function(action = c("update", "load"), maindir = maindir, owner = owner, branch = branch, - tag = tag) + tag = tag, + detail = detail) } else { dt <- load_aux( diff --git a/R/pip_gdm_update.R b/R/pip_gdm_update.R index af12d23..f7146ad 100644 --- a/R/pip_gdm_update.R +++ b/R/pip_gdm_update.R @@ -8,8 +8,8 @@ pip_gdm_update <- function(force = FALSE, owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) - ) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "gdm" branch <- match.arg(branch) @@ -22,7 +22,7 @@ pip_gdm_update <- function(force = FALSE, tag = tag) # validate gdm raw data - gdm_validate_raw(gdm) + gdm_validate_raw(gdm = df, detail = detail) # ____________________________________________________________________________ # Transform dataset #### @@ -207,7 +207,7 @@ pip_gdm_update <- function(force = FALSE, # ---- Save and sign ---- # validate gdm output data - gdm_validate_output(gdm) + gdm_validate_output(gdm = df, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_gdp.R b/R/pip_gdp.R index 855a7f4..7ab34f2 100644 --- a/R/pip_gdp.R +++ b/R/pip_gdp.R @@ -13,7 +13,8 @@ pip_gdp <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = "file") { + from = "file", + detail = getOption("pipaux.detail.raw")) { measure <- "gdp" branch <- match.arg(branch) @@ -26,7 +27,8 @@ pip_gdp <- function(action = c("update", "load"), owner = owner, branch = branch, tag = tag, - from = from) + from = from, + detail = detail) } else { dt <- load_aux( diff --git a/R/pip_gdp_update.R b/R/pip_gdp_update.R index d63e135..78ae89a 100644 --- a/R/pip_gdp_update.R +++ b/R/pip_gdp_update.R @@ -10,7 +10,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api")) { + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { branch <- match.arg(branch) measure <- "gdp" @@ -64,7 +65,7 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch ) # validate sna data - sna_validate_raw(sna) + sna_validate_raw(sna, detail = detail) sna_fy <- pipfun::load_from_gh( measure = "sna", @@ -73,7 +74,7 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, filename = "sna_metadata" ) # validate sna_fy data - sna_fy_validate_raw(sna_fy) + sna_fy_validate_raw(sna_fy, detail = detail) cl <- load_aux(maindir = maindir, measure = "country_list", @@ -277,6 +278,8 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, gdp <- gdp[country_code %in% cl$country_code] # ---- Save and sign ---- + # validate gdp output data + gdp_validate_output(gdp = gdp, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_income_groups.R b/R/pip_income_groups.R index 875a14e..748b4ef 100644 --- a/R/pip_income_groups.R +++ b/R/pip_income_groups.R @@ -11,7 +11,8 @@ pip_income_groups <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - class_branch = "master" + class_branch = "master", + detail = getOption("pipaux.detail.raw") ) { measure <- "income_groups" @@ -53,7 +54,7 @@ pip_income_groups <- function(action = c("update", "load"), ## save -------- # validate income group output data - incgroup_validate_output(ig) + incgroup_validate_output(incgroup = ig, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_maddison.R b/R/pip_maddison.R index 7712c66..8533cb9 100644 --- a/R/pip_maddison.R +++ b/R/pip_maddison.R @@ -11,7 +11,8 @@ pip_maddison <- function(action = c("update", "load"), force = FALSE, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "maddison" action <- match.arg(action) branch <- match.arg(branch) @@ -24,7 +25,7 @@ pip_maddison <- function(action = c("update", "load"), tag = tag ) # validate raw data - mpd_validate_raw(mpd) + mpd_validate_raw(mpd = mpd, detail = detail) # # validate output data # mpd_validate_output(mpd) diff --git a/R/pip_metadata.R b/R/pip_metadata.R index fc4473b..7488b91 100644 --- a/R/pip_metadata.R +++ b/R/pip_metadata.R @@ -10,7 +10,8 @@ pip_metadata <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "metadata" branch <- match.arg(branch) action <- match.arg(action) @@ -22,7 +23,8 @@ pip_metadata <- function(action = c("update", "load"), force = force, owner = owner, branch = branch, - tag = tag + tag = tag, + detail = detail ) } else { diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R index 56465fb..f813be1 100644 --- a/R/pip_metadata_update.R +++ b/R/pip_metadata_update.R @@ -8,7 +8,8 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "metadata" branch <- match.arg(branch) @@ -21,7 +22,7 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, tag = tag) # validate raw metdata data - metadata_validate_raw(metadata = df) + metadata_validate_raw(metadata = df, detail = detail) # Load pfw pfw <- load_aux(measure = "pfw", @@ -104,7 +105,7 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, ## ............................................................................ ## Save #### # validate raw metdata data - metadata_validate_output(metadata = df) + metadata_validate_output(metadata = df, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_npl.R b/R/pip_npl.R index 72b60df..58b8ccc 100644 --- a/R/pip_npl.R +++ b/R/pip_npl.R @@ -10,7 +10,8 @@ pip_npl <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## setup -------- @@ -30,7 +31,7 @@ pip_npl <- function(action = c("update", "load"), setDT() # validate npl raw data - npl_validate_raw(npl) + npl_validate_raw(npl = npl, detail = detail) setnames(x = npl, old = c("countrycode", "year", "vsi_pov_nahc_nc"), @@ -45,7 +46,7 @@ pip_npl <- function(action = c("update", "load"), ## save -------- # validate npl output data - npl_validate_output(npl) + npl_validate_output(npl = npl, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_pce.R b/R/pip_pce.R index e657e70..991a986 100644 --- a/R/pip_pce.R +++ b/R/pip_pce.R @@ -12,7 +12,8 @@ pip_pce <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api")) { + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { measure <- "pce" branch <- match.arg(branch) action <- match.arg(action) @@ -23,7 +24,8 @@ pip_pce <- function(action = c("update", "load"), owner = owner, branch = branch, tag = tag, - from = from) + from = from, + detail = detail) } else { dt <- load_aux( diff --git a/R/pip_pce_update.R b/R/pip_pce_update.R index 5dde93c..ba5a9c8 100644 --- a/R/pip_pce_update.R +++ b/R/pip_pce_update.R @@ -10,7 +10,8 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api")) { + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { measure <- "pce" branch <- match.arg(branch) from <- match.arg(from) @@ -40,7 +41,7 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ) # validate sna data - sna_validate_raw(sna) + sna_validate_raw(sna = sna, detail = detail) sna_fy <- pipfun::load_from_gh( measure = "sna", @@ -49,7 +50,7 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, filename = "sna_metadata" ) # validate sna_fy data - sna_fy_validate_raw(sna_fy) + sna_fy_validate_raw(sna_fy = sna_fy, detail = detail) # ____________________________________________________________________________ # Clean PCE from WDI #### @@ -229,7 +230,7 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, ## ---- Sign and save ---- # validate pce output data - pce_validate_output(pce) + pce_validate_output(pce = pce, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_pfw.R b/R/pip_pfw.R index 2a22664..9434390 100644 --- a/R/pip_pfw.R +++ b/R/pip_pfw.R @@ -14,7 +14,8 @@ pip_pfw <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "pfw" branch <- match.arg(branch) action <- match.arg(action) @@ -24,7 +25,8 @@ pip_pfw <- function(action = c("update", "load"), force = force, owner = owner, branch = branch, - tag = tag) + tag = tag, + detail = detail) } else { diff --git a/R/pip_pfw_update.R b/R/pip_pfw_update.R index 772bd79..e700e12 100644 --- a/R/pip_pfw_update.R +++ b/R/pip_pfw_update.R @@ -7,7 +7,8 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "pfw" branch <- match.arg(branch) @@ -18,7 +19,7 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch, ext = "dta") # validate pfw raw data - pfw_validate_raw(pfw) + pfw_validate_raw(pfw = pfw, detail = detail) # Clean data pfw <- pip_pfw_clean(pfw, @@ -26,7 +27,7 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, branch = branch) # validate pfw raw data - pfw_validate_output(pfw) + pfw_validate_output(pfw = pfw, detail = detail) # Save dataset if (branch == "main") { diff --git a/R/pip_pl.R b/R/pip_pl.R index 2f0753c..b97d472 100644 --- a/R/pip_pl.R +++ b/R/pip_pl.R @@ -10,7 +10,8 @@ pip_pl <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) { measure <- "pl" @@ -34,7 +35,7 @@ pip_pl <- function(action = c("update", "load"), # Save # validate pl clean data - pl_validate_output(pl = dt) + pl_validate_output(pl = dt, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_pop.R b/R/pip_pop.R index 7cee55f..65079e6 100644 --- a/R/pip_pop.R +++ b/R/pip_pop.R @@ -12,7 +12,8 @@ pip_pop <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "pop" from <- tolower(from) action <- match.arg(action) @@ -24,7 +25,8 @@ pip_pop <- function(action = c("update", "load"), maindir = maindir, owner = owner, branch = branch, - tag = tag ) + tag = tag, + detail = detail) } else { diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index 75a97d1..bd8a125 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -7,7 +7,8 @@ pip_pop_update <- function(force = FALSE, maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { # Check arguments from <- match.arg(from) @@ -35,7 +36,8 @@ pip_pop_update <- function(force = FALSE, return_wide = FALSE) |> setDT() - + # validate wb pop data + pop_validate_raw(pop = pop, detail = detail) # rename vars pop <- pop[, c("iso3c", "date", "indicator_id", "value")] @@ -92,7 +94,7 @@ pip_pop_update <- function(force = FALSE, clean_from_wide() # validate pop main raw data - popmain_validate_raw(pop_main) + popmain_validate_raw(pop_main = pop_main, detail = detail) ### Ger special cases --------- spop <- pipfun::load_from_gh( @@ -106,7 +108,7 @@ pip_pop_update <- function(force = FALSE, clean_from_wide() # validate special cases pop raw data - spop_validate_raw(spop) + spop_validate_raw(spop = spop, detail = detail) pop <- joyn::joyn(pop_main, spop, by = c("country_code", "year", "pop_data_level"), @@ -171,7 +173,7 @@ pip_pop_update <- function(force = FALSE, #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # validate output pop data - pop_validate_output(pop) + pop_validate_output(pop = pop, detail = detail) # Save if (branch == "main") { diff --git a/R/pip_ppp.R b/R/pip_ppp.R index 89f2b96..b09d283 100644 --- a/R/pip_ppp.R +++ b/R/pip_ppp.R @@ -11,7 +11,8 @@ pip_ppp <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), force = FALSE, - tag = branch) { + tag = branch, + detail = getOption("pipaux.detail.raw")) { # ____________________________________________________________________________ # on.exit #### @@ -43,7 +44,8 @@ pip_ppp <- function(action = c("update", "load"), force = force, owner = owner, branch = branch, - tag = tag) + tag = tag, + detail = detail) } else { load_aux( diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R index a04470f..b95b7f7 100644 --- a/R/pip_ppp_update.R +++ b/R/pip_ppp_update.R @@ -6,7 +6,8 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { # ____________________________________________________________________________ @@ -27,7 +28,7 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, ) # validate ppp raw data - ppp_validate_raw(ppp) + ppp_validate_raw(ppp = ppp, detail = detail) # ____________________________________________________________________________ # cleaning #### @@ -62,7 +63,7 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, # Saving #### # validate ppp output data - ppp_validate_output(ppp) + ppp_validate_output(ppp = ppp, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pip_wdi.R b/R/pip_wdi.R index 7e5168c..08eda65 100644 --- a/R/pip_wdi.R +++ b/R/pip_wdi.R @@ -13,7 +13,8 @@ pip_wdi <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api")) { + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { measure <- "wdi" branch <- match.arg(branch) @@ -26,7 +27,8 @@ pip_wdi <- function(action = c("update", "load"), owner = owner, branch = branch, tag = tag, - from = from) + from = from, + detail = detail) } else { dt <- load_aux( diff --git a/R/pip_wdi_update.R b/R/pip_wdi_update.R index 9ec63ae..3c5ddc5 100644 --- a/R/pip_wdi_update.R +++ b/R/pip_wdi_update.R @@ -13,7 +13,8 @@ pip_wdi_update <- function(force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api")) { + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw")) { from <- match.arg(from) @@ -49,14 +50,11 @@ pip_wdi_update <- function(force = FALSE, ) } # validate wdi raw data - wdi_validate_raw(wdi) + wdi_validate_raw(wdi = wdi, detail = detail) # _________________________________________________________________________ # Save and Return #### - # validate wdi output data - wdi_validate_output(wdi) - if (branch == "main") { branch <- "" } diff --git a/R/pip_weo.R b/R/pip_weo.R index c3c1ad0..f8a3ce8 100644 --- a/R/pip_weo.R +++ b/R/pip_weo.R @@ -17,7 +17,8 @@ pip_weo <- function(action = c("update", "load"), owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch)) { + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw")) { measure <- "weo" branch <- match.arg(branch) action <- match.arg(action) @@ -35,15 +36,15 @@ pip_weo <- function(action = c("update", "load"), ) # validate weo raw data - weo_validate_raw(weo) + weo_validate_raw(weo = dt, detail = detail) dt <- pip_weo_clean(dt, maindir = maindir, branch = branch) # Save dataset - # validate weo raw data - weo_validate_output(weo) + # validate weo clean data + weo_validate_output(weo = dt, detail = detail) if (branch == "main") { branch <- "" diff --git a/R/pop_validate_raw.R b/R/pop_validate_raw.R new file mode 100644 index 0000000..8ddc02f --- /dev/null +++ b/R/pop_validate_raw.R @@ -0,0 +1,53 @@ +#' Validate pop raw data download from wdi +#' +#' @param spop raw pop data, as loaded via `wbstats::wb_data` +#' @param detail has an option TRUE/FALSE, default value is FALSE +#' @import data.validator +#' @importFrom assertr in_set not_na is_uniq +#' +#' @export +pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ + + stopifnot("WB POP raw data is not loaded" = !is.null(pop)) + + report <- data_validation_report() + + validate(pop, name = "WB POP raw data validation") |> + validate_if(is.character(indicator_id), + description = "`indicator_id` should be character") |> + validate_cols(in_set(c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL")), + indicator_id, description = "`indicator_id` values within range") |> + validate_if(is.character(indicator), + description = "`indicator` should be character") |> + validate_if(is.character(iso2c), + description = "`iso2c` should be character") |> + validate_if(is.character(iso3c), + description = "`iso3c` should be character") |> + validate_if(is.character(country), + description = "`country` should be character") |> + validate_if(is.numeric(date), + description = "`date` should be numeric") |> + validate_if(is.numeric(value), + description = "`value` should be numeric") |> + validate_if(is.character(unit), + description = "`unit` should be character") |> + validate_if(is.character(obs_status), + description = "`obs_status` should be character") |> + validate_if(is.character(footnote), + description = "`footnote` should be character") |> + validate_if(is_date(last_updated), + description = "`last_updated` should be date") |> + validate_cols(not_na, indicator_id, iso3c, date, + description = "no missing values in key variables") |> + validate_if(is_uniq(indicator_id, iso3c, date), + description = "no duplicate records in key variables") |> + add_results(report) + + validation_record <- get_results(report, unnest = FALSE) |> + setDT() + + if (any(validation_record[["type"]] == "error")){ + get_error_validation(validation_record, detail) + } + +} diff --git a/R/spop_validate_raw.R b/R/spop_validate_raw.R index c075567..b05b2c4 100644 --- a/R/spop_validate_raw.R +++ b/R/spop_validate_raw.R @@ -12,7 +12,7 @@ spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ report <- data_validation_report() - validate(spop, name = "POP output data validation") |> + validate(spop, name = "Special POP raw data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> validate_if(is.numeric(year), diff --git a/R/wdi_validate_output.R b/R/wdi_validate_output.R deleted file mode 100644 index 01b1c1b..0000000 --- a/R/wdi_validate_output.R +++ /dev/null @@ -1,35 +0,0 @@ -#' Validate output weo data -#' -#' @param weo output weo data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' -#' @export -weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ - - stopifnot("WEO output data is not loaded" = !is.null(weo)) - - report <- data_validation_report() - - validate(weo, name = "WEO output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(weo_gdp), - description = "`weo_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R index a8fdad8..fcde15a 100644 --- a/R/weo_validate_output.R +++ b/R/weo_validate_output.R @@ -1,26 +1,24 @@ -#' Validate output wdi data +#' Validate clean weo data #' -#' @param wdi output wdi data +#' @param weo clean weo data #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq #' #' @export -wdi_validate_output <- function(wdi, detail = getOption("pipaux.detail.output")){ +weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ - stopifnot("WDI output data is not loaded" = !is.null(wdi)) + stopifnot("WEO output data is not loaded" = !is.null(weo)) report <- data_validation_report() - validate(wdi, name = "WDI output data validation") |> + validate(weo, name = "WEO output data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> validate_if(is.numeric(year), description = "`year` should be numeric") |> - validate_if(is.numeric(NE.CON.PRVT.PC.KD), - description = "`NE.CON.PRVT.PC.KD` should be numeric") |> - validate_if(is.numeric(NY.GDP.PCAP.KD), - description = "`NY.GDP.PCAP.KD` should be numeric") |> + validate_if(is.numeric(weo_gdp), + description = "`weo_gdp` should be numeric") |> validate_cols(not_na, country_code, year, description = "no missing values in key variables") |> validate_if(is_uniq(country_code, year), diff --git a/man/pip_countries.Rd b/man/pip_countries.Rd index df445f8..ab5d795 100644 --- a/man/pip_countries.Rd +++ b/man/pip_countries.Rd @@ -10,7 +10,8 @@ pip_countries( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_country_list.Rd b/man/pip_country_list.Rd index fbc7794..6aa643f 100644 --- a/man/pip_country_list.Rd +++ b/man/pip_country_list.Rd @@ -10,7 +10,8 @@ pip_country_list( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_cpi.Rd b/man/pip_cpi.Rd index 1f93dda..219c79f 100644 --- a/man/pip_cpi.Rd +++ b/man/pip_cpi.Rd @@ -10,7 +10,8 @@ pip_cpi( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_cpi_update.Rd b/man/pip_cpi_update.Rd index ece197f..841c71c 100644 --- a/man/pip_cpi_update.Rd +++ b/man/pip_cpi_update.Rd @@ -9,7 +9,8 @@ pip_cpi_update( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_gdm.Rd b/man/pip_gdm.Rd index e1ad282..d980379 100644 --- a/man/pip_gdm.Rd +++ b/man/pip_gdm.Rd @@ -10,7 +10,8 @@ pip_gdm( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_gdm_update.Rd b/man/pip_gdm_update.Rd index 8bb6a68..8af9c6a 100644 --- a/man/pip_gdm_update.Rd +++ b/man/pip_gdm_update.Rd @@ -9,7 +9,8 @@ pip_gdm_update( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_gdp.Rd b/man/pip_gdp.Rd index 7ec0890..a3bf8c2 100644 --- a/man/pip_gdp.Rd +++ b/man/pip_gdp.Rd @@ -11,7 +11,8 @@ pip_gdp( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = "file" + from = "file", + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_gdp_update.Rd b/man/pip_gdp_update.Rd index b9651cb..037d12a 100644 --- a/man/pip_gdp_update.Rd +++ b/man/pip_gdp_update.Rd @@ -10,7 +10,8 @@ pip_gdp_update( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api") + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_income_groups.Rd b/man/pip_income_groups.Rd index e0c2cf7..16cd767 100644 --- a/man/pip_income_groups.Rd +++ b/man/pip_income_groups.Rd @@ -10,7 +10,8 @@ pip_income_groups( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - class_branch = "master" + class_branch = "master", + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_maddison.Rd b/man/pip_maddison.Rd index 7fb55eb..8034a08 100644 --- a/man/pip_maddison.Rd +++ b/man/pip_maddison.Rd @@ -10,7 +10,8 @@ pip_maddison( force = FALSE, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_metadata.Rd b/man/pip_metadata.Rd index 203bbaa..efb6e22 100644 --- a/man/pip_metadata.Rd +++ b/man/pip_metadata.Rd @@ -10,7 +10,8 @@ pip_metadata( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_metadata_update.Rd b/man/pip_metadata_update.Rd index e1f5308..5c275ba 100644 --- a/man/pip_metadata_update.Rd +++ b/man/pip_metadata_update.Rd @@ -9,7 +9,8 @@ pip_metadata_update( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_npl.Rd b/man/pip_npl.Rd index a2da5db..5056be5 100644 --- a/man/pip_npl.Rd +++ b/man/pip_npl.Rd @@ -10,7 +10,8 @@ pip_npl( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pce.Rd b/man/pip_pce.Rd index f7d89b3..909f28a 100644 --- a/man/pip_pce.Rd +++ b/man/pip_pce.Rd @@ -11,7 +11,8 @@ pip_pce( maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api") + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pce_update.Rd b/man/pip_pce_update.Rd index fc5e702..513e0ca 100644 --- a/man/pip_pce_update.Rd +++ b/man/pip_pce_update.Rd @@ -10,7 +10,8 @@ pip_pce_update( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api") + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pfw.Rd b/man/pip_pfw.Rd index 7d3a7ab..d1986d1 100644 --- a/man/pip_pfw.Rd +++ b/man/pip_pfw.Rd @@ -10,7 +10,8 @@ pip_pfw( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pfw_update.Rd b/man/pip_pfw_update.Rd index dfdeb2c..3dc843f 100644 --- a/man/pip_pfw_update.Rd +++ b/man/pip_pfw_update.Rd @@ -9,7 +9,8 @@ pip_pfw_update( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pl.Rd b/man/pip_pl.Rd index 9a0733a..06c0c7e 100644 --- a/man/pip_pl.Rd +++ b/man/pip_pl.Rd @@ -10,7 +10,8 @@ pip_pl( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pop.Rd b/man/pip_pop.Rd index 8e2c974..f659b08 100644 --- a/man/pip_pop.Rd +++ b/man/pip_pop.Rd @@ -11,7 +11,8 @@ pip_pop( maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pop_update.Rd b/man/pip_pop_update.Rd index d472d70..3216208 100644 --- a/man/pip_pop_update.Rd +++ b/man/pip_pop_update.Rd @@ -10,7 +10,8 @@ pip_pop_update( maindir = gls$PIP_DATA_DIR, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_ppp.Rd b/man/pip_ppp.Rd index 072a6af..e50fa35 100644 --- a/man/pip_ppp.Rd +++ b/man/pip_ppp.Rd @@ -10,7 +10,8 @@ pip_ppp( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), force = FALSE, - tag = branch + tag = branch, + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_ppp_update.Rd b/man/pip_ppp_update.Rd index a84e3e5..2faedb9 100644 --- a/man/pip_ppp_update.Rd +++ b/man/pip_ppp_update.Rd @@ -9,7 +9,8 @@ pip_ppp_update( force = FALSE, owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_wdi.Rd b/man/pip_wdi.Rd index 4a77bfa..259c609 100644 --- a/man/pip_wdi.Rd +++ b/man/pip_wdi.Rd @@ -11,7 +11,8 @@ pip_wdi( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api") + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_wdi_update.Rd b/man/pip_wdi_update.Rd index 26b08e5..20862f9 100644 --- a/man/pip_wdi_update.Rd +++ b/man/pip_wdi_update.Rd @@ -10,7 +10,8 @@ pip_wdi_update( owner = getOption("pipfun.ghowner"), branch = c("DEV", "PROD", "main"), tag = match.arg(branch), - from = c("gh", "file", "api") + from = c("gh", "file", "api"), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_weo.Rd b/man/pip_weo.Rd index 7c0d11f..dcc654c 100644 --- a/man/pip_weo.Rd +++ b/man/pip_weo.Rd @@ -10,7 +10,8 @@ pip_weo( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch) + tag = match.arg(branch), + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pop_validate_raw.Rd b/man/pop_validate_raw.Rd new file mode 100644 index 0000000..e81ab08 --- /dev/null +++ b/man/pop_validate_raw.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pop_validate_raw.R +\name{pop_validate_raw} +\alias{pop_validate_raw} +\title{Validate pop raw data download from wdi} +\usage{ +pop_validate_raw(pop, detail = getOption("pipaux.detail.output")) +} +\arguments{ +\item{detail}{has an option TRUE/FALSE, default value is FALSE} + +\item{spop}{raw pop data, as loaded via \code{wbstats::wb_data}} +} +\description{ +Validate pop raw data download from wdi +} diff --git a/man/wdi_validate_output.Rd b/man/wdi_validate_output.Rd deleted file mode 100644 index 03a163c..0000000 --- a/man/wdi_validate_output.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/weo_validate_output.R -\name{wdi_validate_output} -\alias{wdi_validate_output} -\title{Validate output wdi data} -\usage{ -wdi_validate_output(wdi, detail = getOption("pipaux.detail.output")) -} -\arguments{ -\item{wdi}{output wdi data} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Validate output wdi data -} diff --git a/man/weo_validate_output.Rd b/man/weo_validate_output.Rd index 68e06e8..633e54b 100644 --- a/man/weo_validate_output.Rd +++ b/man/weo_validate_output.Rd @@ -1,16 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wdi_validate_output.R +% Please edit documentation in R/weo_validate_output.R \name{weo_validate_output} \alias{weo_validate_output} -\title{Validate output weo data} +\title{Validate clean weo data} \usage{ weo_validate_output(weo, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{weo}{output weo data} +\item{weo}{clean weo data} \item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ -Validate output weo data +Validate clean weo data } From c8c6989fc1d1b6f1bb038ede2d0a61a805a45fd4 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 14 Mar 2024 13:58:47 -0400 Subject: [PATCH 10/23] Add 'detail' argument in higher level functions --- R/get_error_validation.R | 1 + R/pip_countries.R | 1 + R/pip_country_list.R | 1 + R/pip_cpi.R | 1 + R/pip_gdm.R | 1 + R/pip_gdp.R | 1 + R/pip_income_groups.R | 1 + R/pip_maddison.R | 1 + R/pip_metadata.R | 1 + R/pip_metadata_update.R | 1 + R/pip_npl.R | 1 + R/pip_pce.R | 1 + R/pip_pfw.R | 1 + R/pip_pl.R | 1 + R/pip_pop.R | 1 + R/pip_pop_update.R | 2 ++ R/pip_ppp.R | 1 + R/pip_wdi.R | 1 + R/pip_wdi_update.R | 1 + R/pip_weo.R | 1 + R/pop_validate_raw.R | 2 +- man/get_error_validation.Rd | 2 ++ man/pip_countries.Rd | 2 ++ man/pip_country_list.Rd | 2 ++ man/pip_cpi.Rd | 2 ++ man/pip_cpi_update.Rd | 2 ++ man/pip_gdm.Rd | 2 ++ man/pip_gdm_update.Rd | 2 ++ man/pip_gdp.Rd | 2 ++ man/pip_gdp_update.Rd | 2 ++ man/pip_income_groups.Rd | 2 ++ man/pip_maddison.Rd | 2 ++ man/pip_metadata.Rd | 2 ++ man/pip_metadata_update.Rd | 2 ++ man/pip_npl.Rd | 2 ++ man/pip_pce.Rd | 2 ++ man/pip_pce_update.Rd | 2 ++ man/pip_pfw.Rd | 2 ++ man/pip_pfw_update.Rd | 2 ++ man/pip_pl.Rd | 2 ++ man/pip_pop.Rd | 2 ++ man/pip_pop_update.Rd | 2 ++ man/pip_ppp.Rd | 2 ++ man/pip_wdi.Rd | 2 ++ man/pip_wdi_update.Rd | 2 ++ man/pip_weo.Rd | 2 ++ man/pop_validate_raw.Rd | 4 ++-- 47 files changed, 74 insertions(+), 3 deletions(-) diff --git a/R/get_error_validation.R b/R/get_error_validation.R index aee7cfd..ac8fd4a 100644 --- a/R/get_error_validation.R +++ b/R/get_error_validation.R @@ -1,6 +1,7 @@ #' Get validation report data validation error report #' #' @param vlddata validation data +#' @param detail has an option TRUE/FALSE, default value is FALSE #' #' @export get_error_validation <- function(vlddata, detail){ diff --git a/R/pip_countries.R b/R/pip_countries.R index 2f91d6c..62c1243 100644 --- a/R/pip_countries.R +++ b/R/pip_countries.R @@ -3,6 +3,7 @@ #' Update or load a dataset with countries. #' #' @inheritParams pip_cpi +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh #' @export pip_countries <- function(action = c("update", "load"), diff --git a/R/pip_country_list.R b/R/pip_country_list.R index 372a78e..3a19787 100644 --- a/R/pip_country_list.R +++ b/R/pip_country_list.R @@ -8,6 +8,7 @@ #' #' The dependency on the PCN Masterfile should be changed in the future. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_cpi.R b/R/pip_cpi.R index 55d96ce..6303ed2 100644 --- a/R/pip_cpi.R +++ b/R/pip_cpi.R @@ -7,6 +7,7 @@ #' memory. #' @param maindir character: Main directory of project. #' @param force logical: If TRUE data will be overwritten. +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh #' #' @export diff --git a/R/pip_gdm.R b/R/pip_gdm.R index 70bafde..3fc7dfc 100644 --- a/R/pip_gdm.R +++ b/R/pip_gdm.R @@ -10,6 +10,7 @@ #' The dependency on the PCN Masterfile should be changed in the future. #' #' @inheritParams pip_cpi +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh #' @export pip_gdm <- function(action = c("update", "load"), diff --git a/R/pip_gdp.R b/R/pip_gdp.R index 7ab34f2..b012729 100644 --- a/R/pip_gdp.R +++ b/R/pip_gdp.R @@ -2,6 +2,7 @@ #' #' Update or load GDP data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" diff --git a/R/pip_income_groups.R b/R/pip_income_groups.R index 748b4ef..29743ee 100644 --- a/R/pip_income_groups.R +++ b/R/pip_income_groups.R @@ -3,6 +3,7 @@ #' Update or load a dataset with historical income groups. The raw files are not #' available in the PIP-Technical-Team group but in the Povcalnet-team group. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_cpi #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_maddison.R b/R/pip_maddison.R index 8533cb9..d0cab00 100644 --- a/R/pip_maddison.R +++ b/R/pip_maddison.R @@ -2,6 +2,7 @@ #' #' Load or update data from the Maddison project. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_metadata.R b/R/pip_metadata.R index 7488b91..b82b52f 100644 --- a/R/pip_metadata.R +++ b/R/pip_metadata.R @@ -2,6 +2,7 @@ #' #' Update or load a dataset with survey metadata. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams load_raw_indicators #' @export diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R index f813be1..8bf7cf4 100644 --- a/R/pip_metadata_update.R +++ b/R/pip_metadata_update.R @@ -1,5 +1,6 @@ #' Update metadata file #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pipfun::load_from_gh #' @inheritParams pip_metadata #' @return logical. TRUE if saved correctly. FALSE if error happened diff --git a/R/pip_npl.R b/R/pip_npl.R index 58b8ccc..4de68e2 100644 --- a/R/pip_npl.R +++ b/R/pip_npl.R @@ -2,6 +2,7 @@ #' #' Update series of national poverty lines #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_cpi #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_pce.R b/R/pip_pce.R index 991a986..c19d26d 100644 --- a/R/pip_pce.R +++ b/R/pip_pce.R @@ -2,6 +2,7 @@ #' #' Load or update PCE data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_gdp #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh diff --git a/R/pip_pfw.R b/R/pip_pfw.R index 9434390..3c20bff 100644 --- a/R/pip_pfw.R +++ b/R/pip_pfw.R @@ -2,6 +2,7 @@ #' #' Load or update PIP Price Framework data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @param action character: Either "load" or "update". Default is "update". If #' "update" data will be updated on the system. If "load" data is loaded in memory. #' @param maindir character: Main directory of project. diff --git a/R/pip_pl.R b/R/pip_pl.R index b97d472..61c4c15 100644 --- a/R/pip_pl.R +++ b/R/pip_pl.R @@ -2,6 +2,7 @@ #' #' Update or load a dataset with poverty lines. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_pop.R b/R/pip_pop.R index 65079e6..4b0ec7e 100644 --- a/R/pip_pop.R +++ b/R/pip_pop.R @@ -2,6 +2,7 @@ #' #' Load or update population data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_cpi #' @inheritParams pipfun::load_from_gh #' @param from character: Source for population data. diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index bd8a125..953bf4d 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -1,6 +1,8 @@ #' Update POP #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @param from character: Source for population data. +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pop pip_pop_update <- function(force = FALSE, from = c("gh", "file", "api"), diff --git a/R/pip_ppp.R b/R/pip_ppp.R index b09d283..63f1eb8 100644 --- a/R/pip_ppp.R +++ b/R/pip_ppp.R @@ -2,6 +2,7 @@ #' #' Load or update PPP data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pip_wdi.R b/R/pip_wdi.R index 08eda65..af5073f 100644 --- a/R/pip_wdi.R +++ b/R/pip_wdi.R @@ -2,6 +2,7 @@ #' #' Update or load wdi data. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @param from character: Either "gh", "file" or "api". Default is "gh". "file" diff --git a/R/pip_wdi_update.R b/R/pip_wdi_update.R index 3c5ddc5..8bdf833 100644 --- a/R/pip_wdi_update.R +++ b/R/pip_wdi_update.R @@ -2,6 +2,7 @@ #' #' GDP and HFCE data from WDI. It could be either from API or from file #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_gdp #' @return data.table with gdp and pce variables #' @export diff --git a/R/pip_weo.R b/R/pip_weo.R index f8a3ce8..b51301e 100644 --- a/R/pip_weo.R +++ b/R/pip_weo.R @@ -9,6 +9,7 @@ #' `pip_weo()`. Hopefully in the future IMF will stop using an `.xls` file #' that's not really xls. #' +#' @param detail has an option TRUE/FALSE, default value is FALSE #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @export diff --git a/R/pop_validate_raw.R b/R/pop_validate_raw.R index 8ddc02f..01a59c8 100644 --- a/R/pop_validate_raw.R +++ b/R/pop_validate_raw.R @@ -1,6 +1,6 @@ #' Validate pop raw data download from wdi #' -#' @param spop raw pop data, as loaded via `wbstats::wb_data` +#' @param pop raw pop data, as loaded via `wbstats::wb_data` #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq diff --git a/man/get_error_validation.Rd b/man/get_error_validation.Rd index 559c64c..397ab01 100644 --- a/man/get_error_validation.Rd +++ b/man/get_error_validation.Rd @@ -8,6 +8,8 @@ get_error_validation(vlddata, detail) } \arguments{ \item{vlddata}{validation data} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Get validation report data validation error report diff --git a/man/pip_countries.Rd b/man/pip_countries.Rd index ab5d795..faa57d5 100644 --- a/man/pip_countries.Rd +++ b/man/pip_countries.Rd @@ -30,6 +30,8 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load a dataset with countries. diff --git a/man/pip_country_list.Rd b/man/pip_country_list.Rd index 6aa643f..cc724b7 100644 --- a/man/pip_country_list.Rd +++ b/man/pip_country_list.Rd @@ -29,6 +29,8 @@ pip_country_list( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \value{ logical if \code{action = "update"} or data.table if \code{action = "load"} diff --git a/man/pip_cpi.Rd b/man/pip_cpi.Rd index 219c79f..806c297 100644 --- a/man/pip_cpi.Rd +++ b/man/pip_cpi.Rd @@ -30,6 +30,8 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update PIP CPI data. diff --git a/man/pip_cpi_update.Rd b/man/pip_cpi_update.Rd index 841c71c..0e9254d 100644 --- a/man/pip_cpi_update.Rd +++ b/man/pip_cpi_update.Rd @@ -25,6 +25,8 @@ pip_cpi_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update CPI diff --git a/man/pip_gdm.Rd b/man/pip_gdm.Rd index d980379..232b43b 100644 --- a/man/pip_gdm.Rd +++ b/man/pip_gdm.Rd @@ -30,6 +30,8 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update grouped data means dataset from PovcalNet Masterfile. See diff --git a/man/pip_gdm_update.Rd b/man/pip_gdm_update.Rd index 8af9c6a..7fda4b1 100644 --- a/man/pip_gdm_update.Rd +++ b/man/pip_gdm_update.Rd @@ -25,6 +25,8 @@ pip_gdm_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update GDM data using the PovcalNet Masterfile. diff --git a/man/pip_gdp.Rd b/man/pip_gdp.Rd index a3bf8c2..9c49cd1 100644 --- a/man/pip_gdp.Rd +++ b/man/pip_gdp.Rd @@ -33,6 +33,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load GDP data. diff --git a/man/pip_gdp_update.Rd b/man/pip_gdp_update.Rd index 037d12a..16eb090 100644 --- a/man/pip_gdp_update.Rd +++ b/man/pip_gdp_update.Rd @@ -29,6 +29,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update GDP data using WDI, Maddison and Special cases. diff --git a/man/pip_income_groups.Rd b/man/pip_income_groups.Rd index 16cd767..100ad5e 100644 --- a/man/pip_income_groups.Rd +++ b/man/pip_income_groups.Rd @@ -28,6 +28,8 @@ memory.} \item{branch}{character: either "DEV" or "PROD". Refers to the branch that will be used to update either the development server or production.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load a dataset with historical income groups. The raw files are not diff --git a/man/pip_maddison.Rd b/man/pip_maddison.Rd index 8034a08..75b905a 100644 --- a/man/pip_maddison.Rd +++ b/man/pip_maddison.Rd @@ -29,6 +29,8 @@ pip_maddison( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update data from the Maddison project. diff --git a/man/pip_metadata.Rd b/man/pip_metadata.Rd index efb6e22..8d94cdb 100644 --- a/man/pip_metadata.Rd +++ b/man/pip_metadata.Rd @@ -29,6 +29,8 @@ pip_metadata( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load a dataset with survey metadata. diff --git a/man/pip_metadata_update.Rd b/man/pip_metadata_update.Rd index 5c275ba..53b34bc 100644 --- a/man/pip_metadata_update.Rd +++ b/man/pip_metadata_update.Rd @@ -25,6 +25,8 @@ pip_metadata_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \value{ logical. TRUE if saved correctly. FALSE if error happened diff --git a/man/pip_npl.Rd b/man/pip_npl.Rd index 5056be5..d73dd03 100644 --- a/man/pip_npl.Rd +++ b/man/pip_npl.Rd @@ -30,6 +30,8 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update series of national poverty lines diff --git a/man/pip_pce.Rd b/man/pip_pce.Rd index 909f28a..92aff3a 100644 --- a/man/pip_pce.Rd +++ b/man/pip_pce.Rd @@ -33,6 +33,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update PCE data. diff --git a/man/pip_pce_update.Rd b/man/pip_pce_update.Rd index 513e0ca..660a846 100644 --- a/man/pip_pce_update.Rd +++ b/man/pip_pce_update.Rd @@ -29,6 +29,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update PCE data using WDI and Special cases. diff --git a/man/pip_pfw.Rd b/man/pip_pfw.Rd index d1986d1..d2f8a6e 100644 --- a/man/pip_pfw.Rd +++ b/man/pip_pfw.Rd @@ -29,6 +29,8 @@ pip_pfw( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update PIP Price Framework data. diff --git a/man/pip_pfw_update.Rd b/man/pip_pfw_update.Rd index 3dc843f..966833a 100644 --- a/man/pip_pfw_update.Rd +++ b/man/pip_pfw_update.Rd @@ -25,6 +25,8 @@ pip_pfw_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update PFW diff --git a/man/pip_pl.Rd b/man/pip_pl.Rd index 06c0c7e..7c556e5 100644 --- a/man/pip_pl.Rd +++ b/man/pip_pl.Rd @@ -29,6 +29,8 @@ pip_pl( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load a dataset with poverty lines. diff --git a/man/pip_pop.Rd b/man/pip_pop.Rd index f659b08..a20a57a 100644 --- a/man/pip_pop.Rd +++ b/man/pip_pop.Rd @@ -33,6 +33,8 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update population data. diff --git a/man/pip_pop_update.Rd b/man/pip_pop_update.Rd index 3216208..054bd38 100644 --- a/man/pip_pop_update.Rd +++ b/man/pip_pop_update.Rd @@ -28,6 +28,8 @@ pip_pop_update( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update POP diff --git a/man/pip_ppp.Rd b/man/pip_ppp.Rd index e50fa35..6333c4d 100644 --- a/man/pip_ppp.Rd +++ b/man/pip_ppp.Rd @@ -29,6 +29,8 @@ will be used to update either the development server or production.} \item{force}{logical: If TRUE data will be overwritten.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Load or update PPP data. diff --git a/man/pip_wdi.Rd b/man/pip_wdi.Rd index 259c609..ea9a06d 100644 --- a/man/pip_wdi.Rd +++ b/man/pip_wdi.Rd @@ -33,6 +33,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load wdi data. diff --git a/man/pip_wdi_update.Rd b/man/pip_wdi_update.Rd index 20862f9..9f0bba3 100644 --- a/man/pip_wdi_update.Rd +++ b/man/pip_wdi_update.Rd @@ -29,6 +29,8 @@ will be used to update either the development server or production.} \item{from}{character: Either "gh", "file" or "api". Default is "gh". "file" and "gh" are synonymous} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \value{ data.table with gdp and pce variables diff --git a/man/pip_weo.Rd b/man/pip_weo.Rd index dcc654c..2d55d0b 100644 --- a/man/pip_weo.Rd +++ b/man/pip_weo.Rd @@ -29,6 +29,8 @@ pip_weo( will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} + +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Create a dataset with GDP data from World Economic Outlook. diff --git a/man/pop_validate_raw.Rd b/man/pop_validate_raw.Rd index e81ab08..34bd1bc 100644 --- a/man/pop_validate_raw.Rd +++ b/man/pop_validate_raw.Rd @@ -7,9 +7,9 @@ pop_validate_raw(pop, detail = getOption("pipaux.detail.output")) } \arguments{ -\item{detail}{has an option TRUE/FALSE, default value is FALSE} +\item{pop}{raw pop data, as loaded via \code{wbstats::wb_data}} -\item{spop}{raw pop data, as loaded via \code{wbstats::wb_data}} +\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Validate pop raw data download from wdi From 38719e3bcde7d56aa435f9d39f0784b5de63d9c1 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 18 Mar 2024 15:25:05 -0400 Subject: [PATCH 11/23] Unit test functions are added --- NAMESPACE | 2 - R/incgroup_validate_output.R | 8 ++ R/mpd_validate_output.R | 35 ------ R/send_report.R | 1 - R/sna_fy_validate_raw.R | 2 + man/mpd_validate_output.Rd | 16 --- tests/testthat/test-cl-validation.R | 39 +++++++ tests/testthat/test-countries-validation.R | 39 +++++++ tests/testthat/test-cpi-validation.R | 101 +++++++++++++++++ tests/testthat/test-gdm-validation.R | 98 ++++++++++++++++ tests/testthat/test-gdp-validation.R | 50 ++++++++ .../testthat/test-income-groups-validation.R | 35 ++++++ tests/testthat/test-maddison-validation.R | 20 ++++ tests/testthat/test-metadata-validation.R | 95 ++++++++++++++++ tests/testthat/test-npl-validation.R | 69 +++++++++++ tests/testthat/test-pce-validation.R | 50 ++++++++ tests/testthat/test-pfw-validation.R | 107 ++++++++++++++++++ tests/testthat/test-pl-validation.R | 34 ++++++ tests/testthat/test-pop-validation.R | 98 ++++++++++++++++ tests/testthat/test-ppp-validation.R | 96 ++++++++++++++++ tests/testthat/test-sna-fy-validation.R | 20 ++++ tests/testthat/test-sna-validation.R | 34 ++++++ tests/testthat/test-wdi-validation.R | 36 ++++++ tests/testthat/test-weo-validation.R | 51 +++++++++ 24 files changed, 1082 insertions(+), 54 deletions(-) delete mode 100644 R/mpd_validate_output.R delete mode 100644 man/mpd_validate_output.Rd create mode 100644 tests/testthat/test-cl-validation.R create mode 100644 tests/testthat/test-countries-validation.R create mode 100644 tests/testthat/test-cpi-validation.R create mode 100644 tests/testthat/test-gdm-validation.R create mode 100644 tests/testthat/test-gdp-validation.R create mode 100644 tests/testthat/test-income-groups-validation.R create mode 100644 tests/testthat/test-maddison-validation.R create mode 100644 tests/testthat/test-metadata-validation.R create mode 100644 tests/testthat/test-npl-validation.R create mode 100644 tests/testthat/test-pce-validation.R create mode 100644 tests/testthat/test-pfw-validation.R create mode 100644 tests/testthat/test-pl-validation.R create mode 100644 tests/testthat/test-pop-validation.R create mode 100644 tests/testthat/test-ppp-validation.R create mode 100644 tests/testthat/test-sna-fy-validation.R create mode 100644 tests/testthat/test-sna-validation.R create mode 100644 tests/testthat/test-wdi-validation.R create mode 100644 tests/testthat/test-weo-validation.R diff --git a/NAMESPACE b/NAMESPACE index 4f8ee37..5222cd1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,6 @@ export(incgroup_validate_output) export(load_aux) export(metadata_validate_output) export(metadata_validate_raw) -export(mpd_validate_output) export(mpd_validate_raw) export(npl_validate_output) export(npl_validate_raw) @@ -68,7 +67,6 @@ import(collapse, except = fdroplevels) import(data.table) import(data.table, except = fdroplevels) import(data.validator) -import(rlang) importFrom(assertr,in_set) importFrom(assertr,is_uniq) importFrom(assertr,not_na) diff --git a/R/incgroup_validate_output.R b/R/incgroup_validate_output.R index aab9c39..8a3ffb1 100644 --- a/R/incgroup_validate_output.R +++ b/R/incgroup_validate_output.R @@ -17,6 +17,14 @@ incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail description = "`country_code` should be character") |> validate_if(is.numeric(year_data), description = "`year_data` should be numeric") |> + validate_if(is.character(income_group), + description = "`income_group` should be character") |> + validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), + income_group, description = "`income_group` values within range") |> + validate_if(is.character(income_group_code), + description = "`income_group_code` should be character") |> + validate_cols(in_set(c("HIC", "LIC", "LMIC", "UMIC")), + income_group_code, description = "`income_group_code` values within range") |> validate_if(is.character(incgroup_historical), description = "`incgroup_historical` should be character") |> validate_cols(in_set(c("High income", "Low income", "Lower middle income", "Upper middle income")), diff --git a/R/mpd_validate_output.R b/R/mpd_validate_output.R deleted file mode 100644 index c5ffced..0000000 --- a/R/mpd_validate_output.R +++ /dev/null @@ -1,35 +0,0 @@ -#' Validate output maddison data -#' -#' @param mpd output mpd data -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @import data.validator -#' @importFrom assertr in_set not_na is_uniq -#' -#' @export -mpd_validate_output <- function(mpd, detail = getOption("pipaux.detail.output")){ - - stopifnot("mpd/ maddison output data is not loaded" = !is.null(mpd)) - - report <- data_validation_report() - - validate(mpd, name = "mdp output data validation") |> - validate_if(is.character(country_code), - description = "`country_code` should be character") |> - validate_if(is.numeric(year), - description = "`year` should be numeric") |> - validate_if(is.numeric(mpd_gdp), - description = "`mpd_gdp` should be numeric") |> - validate_cols(not_na, country_code, year, - description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year), - description = "no duplicate records in key variables") |> - add_results(report) - - validation_record <- get_results(report, unnest = FALSE) |> - setDT() - - if (any(validation_record[["type"]] == "error")){ - get_error_validation(validation_record, detail) - } - -} diff --git a/R/send_report.R b/R/send_report.R index 6e704f7..9f8e1cc 100644 --- a/R/send_report.R +++ b/R/send_report.R @@ -1,7 +1,6 @@ #' Send an email that contains auxiliary data validation report #' #' @import blastula -#' @import rlang #' #' @export send_report <- function(){ diff --git a/R/sna_fy_validate_raw.R b/R/sna_fy_validate_raw.R index 5a3d8b7..cb39c40 100644 --- a/R/sna_fy_validate_raw.R +++ b/R/sna_fy_validate_raw.R @@ -21,6 +21,8 @@ sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")) description = "`SpecialNotes` should be character") |> validate_if(is.character(Month), description = "`Month` should be character") |> + validate_if(is.numeric(Day), + description = "`Day` should be numeric") |> validate_cols(not_na, Code, Month, Day, description = "no missing values in key variables") |> # validate_if(is_uniq(Code, LongName), diff --git a/man/mpd_validate_output.Rd b/man/mpd_validate_output.Rd deleted file mode 100644 index 14aca18..0000000 --- a/man/mpd_validate_output.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mpd_validate_output.R -\name{mpd_validate_output} -\alias{mpd_validate_output} -\title{Validate output maddison data} -\usage{ -mpd_validate_output(mpd, detail = getOption("pipaux.detail.output")) -} -\arguments{ -\item{mpd}{output mpd data} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} -} -\description{ -Validate output maddison data -} diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R new file mode 100644 index 0000000..fc395a2 --- /dev/null +++ b/tests/testthat/test-cl-validation.R @@ -0,0 +1,39 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure = "country_list" + +test_that("cl_validate_raw() works identifying duplicate error", { + + cl <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + cl[, `:=` (country_code = fifelse(country_code == "ABW", + "ALB", country_code))] + + expect_error(cl_validate_raw(cl), "Duplicate error") + +}) + +test_that("cl_validate_raw() works identifying invalid value", { + + cl <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + cl[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", + "SSA", africa_split_code), + pcn_region_code = fifelse(pcn_region_code == "SSA", + "SAR", pcn_region_code), + region_code = fifelse(region_code == "SSA", + "SAR", region_code))] + + expect_error(cl_validate_raw(cl), "Invalid values") + +}) diff --git a/tests/testthat/test-countries-validation.R b/tests/testthat/test-countries-validation.R new file mode 100644 index 0000000..6c3c3d0 --- /dev/null +++ b/tests/testthat/test-countries-validation.R @@ -0,0 +1,39 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "countries" + +test_that("countries_validate_output() works identifying duplicate error", { + + countries <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + countries[, `:=` (country_code = fifelse(country_code == "ABW", + "ALB", country_code))] + + expect_error(countries_validate_output(countries), "Duplicate error") + +}) + +test_that("countries_validate_output() works identifying invalid value", { + + countries <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + countries[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", + "SSA", africa_split_code), + pcn_region_code = fifelse(pcn_region_code == "SSA", + "SAR", pcn_region_code), + region_code = fifelse(region_code == "SSA", + "SAR", region_code))] + + expect_error(countries_validate_output(countries), "Invalid values") + +}) diff --git a/tests/testthat/test-cpi-validation.R b/tests/testthat/test-cpi-validation.R new file mode 100644 index 0000000..a71a991 --- /dev/null +++ b/tests/testthat/test-cpi-validation.R @@ -0,0 +1,101 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure = "cpi" + +test_that("cpi_validate_raw() works identifying duplicate error", { + + cpi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + cpi[, `:=` (cpi_data_level = fifelse(cpi_data_level == 0, + 1, cpi_data_level))] + + expect_error(cpi_validate_raw(cpi), "Duplicate error") + +}) + + +test_that("cpi_validate_raw() works identifying type/ formating error", { + + cpi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + cpi[, `:=` (year = as.character(year), + ref_year = as.character(year), + cpi_domain_value = as.character(cpi_domain_value), + cpi2011 = as.character(cpi2011), + cpi2017 = as.character(cpi2017))] + + expect_error(cpi_validate_raw(cpi), "Type/ format error") + +}) + +test_that("cpi_validate_raw() works identifying invalid value", { + + cpi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] + + expect_error(cpi_validate_raw(cpi), "Invalid value in `cpi_domain`") + +}) + +test_that("cpi_validate_output() works identifying duplicate error", { + + cpi <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + cpi[, `:=` (cpi_data_level = fifelse(cpi_data_level == "rural", + "urban", cpi_data_level))] + + expect_error(cpi_validate_output(cpi), "Duplicate error") + +}) + +test_that("cpi_validate_output() works identifying type/ formating error", { + + cpi <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + cpi[, `:=` (cpi_year = as.character(cpi_year), + survey_year = as.character(survey_year), + cpi_domain_value = as.character(cpi_domain_value), + cpi2011 = as.character(cpi2011), + cpi2017 = as.character(cpi2017))] + + expect_error(cpi_validate_output(cpi), "Type/ format error") + +}) + + +test_that("cpi_validate_output() works identifying invalid value", { + + cpi <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] + + expect_error(cpi_validate_output(cpi), "Invalid value in `cpi_domain`") + +}) diff --git a/tests/testthat/test-gdm-validation.R b/tests/testthat/test-gdm-validation.R new file mode 100644 index 0000000..502b431 --- /dev/null +++ b/tests/testthat/test-gdm-validation.R @@ -0,0 +1,98 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "gdm" + +test_that("gdm_validate_raw() works identifying duplicate error", { + + gdm <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + gdm[, `:=` (Coverage = fifelse(Coverage == "Urban", + "Rural", Coverage))] + + expect_error(gdm_validate_raw(gdm), "Duplicate error") + +}) + +test_that("gdm_validate_raw() works identifying type/ formating error", { + + gdm <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + gdm[, `:=` (SurveyTime = as.character(SurveyTime), + CPI_Time = as.character(CPI_Time), + SurveyMean_LCU = as.character(SurveyMean_LCU), + currency = as.character(currency), + SurveyMean_PPP = as.character(SurveyMean_PPP))] + + expect_error(gdm_validate_raw(gdm), "Type/ format error") + +}) + +test_that("gdm_validate_raw() works identifying invalid value", { + + gdm <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + gdm[, DataType := fifelse(DataType == "x", "i", DataType)] + + expect_error(gdm_validate_raw(gdm), "Invalid value in `DataType`") + +}) + +test_that("gdm_validate_output() works identifying duplicate error", { + + gdm <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdm[, `:=` (pop_data_level = fifelse(pop_data_level == "rural", + "urban", pop_data_level))] + + expect_error(gdm_validate_output(gdm), "Duplicate error") + +}) + +test_that("gdm_validate_output() works identifying type/ formating error", { + + gdm <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdm[, `:=` (surveyid_year = as.character(surveyid_year), + survey_year = as.character(survey_year), + survey_mean_lcu = as.character(survey_mean_lcu))] + + expect_error(gdm_validate_output(gdm), "Type/ format error") + +}) + +test_that("gdm_validate_output() works identifying invalid value", { + + gdm <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdm[, pop_data_level := fifelse(pop_data_level == "national", + "national1", pop_data_level)] + + expect_error(gdm_validate_output(gdm), "Invalid value in `pop_data_level`") + +}) diff --git a/tests/testthat/test-gdp-validation.R b/tests/testthat/test-gdp-validation.R new file mode 100644 index 0000000..68739e7 --- /dev/null +++ b/tests/testthat/test-gdp-validation.R @@ -0,0 +1,50 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "gdp" + +test_that("gdp_validate_output() works identifying duplicate error", { + + gdp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdp[, `:=` (gdp_data_level = fifelse(gdp_data_level == "rural", + "urban", gdp_data_level))] + + expect_error(gdp_validate_output(gdp), "Duplicate error") + +}) + +test_that("gdp_validate_output() works identifying type/ formating error", { + + gdp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdp[, `:=` (year = as.character(year), + gdp = as.character(gdp))] + + expect_error(gdp_validate_output(gdp), "Type/ format error") + +}) + +test_that("gdp_validate_output() works identifying invalid value", { + + gdp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + gdp[, gdp_data_level := fifelse(gdp_data_level == "national", + "national1", gdp_data_level)] + + expect_error(gdp_validate_output(gdp), "Invalid value in `gdp_data_level`") + +}) diff --git a/tests/testthat/test-income-groups-validation.R b/tests/testthat/test-income-groups-validation.R new file mode 100644 index 0000000..ce90853 --- /dev/null +++ b/tests/testthat/test-income-groups-validation.R @@ -0,0 +1,35 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "income_groups" + +test_that("gdp_validate_output() works identifying type/ formating error", { + + incgroups <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + incgroups[, `:=` (year_data = as.character(year_data), + year = as.character(year))] + + expect_error(incgroup_validate_output(incgroups), "Type/ format error") + +}) + +test_that("incgroup_validate_output() works identifying invalid value", { + + incgroups <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + incgroups[, income_group_code := fifelse(income_group_code == "HIC", + "hic", income_group_code)] + + expect_error(incgroup_validate_output(incgroups), "Invalid value in `income_group_code`") + +}) diff --git a/tests/testthat/test-maddison-validation.R b/tests/testthat/test-maddison-validation.R new file mode 100644 index 0000000..683b6e3 --- /dev/null +++ b/tests/testthat/test-maddison-validation.R @@ -0,0 +1,20 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "maddison" + +test_that("mpd_validate_raw() works identifying type/ formating error", { + + mpd <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + mpd[, `:=` (year = as.character(year), + mpd_gdp = as.character(mpd_gdp))] + + expect_error(mpd_validate_raw(mpd), "Type/ format error") + +}) diff --git a/tests/testthat/test-metadata-validation.R b/tests/testthat/test-metadata-validation.R new file mode 100644 index 0000000..720486d --- /dev/null +++ b/tests/testthat/test-metadata-validation.R @@ -0,0 +1,95 @@ + +## Initial parameters -------- + +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "metadata" + +test_that("metadata_validate_raw() works identifying duplicate error", { + + metadata <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + metadata[, `:=` (svy_id = fifelse(svy_id == "CNH_2005_URHS_v01_M", + "CNH_2008_URHS_v01_M", svy_id))] + + expect_error(metadata_validate_raw(metadata), "Duplicate error") + +}) + +test_that("metadata_validate_raw() works identifying type/ formating error", { + + metadata <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + metadata[, `:=` (year_start = as.character(year_start), + year_end = as.character(year_end))] + + expect_error(metadata_validate_raw(metadata), "Type/ format error") + +}) + +test_that("metadata_validate_raw() works identifying invalid value", { + + metadata <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + metadata[, reg := fifelse(reg == "SSA", "ssa", reg)] + + expect_error(metadata_validate_raw(metadata), "Invalid value in `reg`") + +}) + +test_that("metadata_validate_output() works identifying duplicate error", { + + metadata <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + metadata[, `:=` (welfare_type = fifelse(welfare_type == "consumption", + "income", welfare_type))] + + expect_error(metadata_validate_output(metadata), "Duplicate error") + +}) + +test_that("metadata_validate_output() works identifying type/ formating error", { + + metadata <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + metadata[, `:=` (reporting_year = as.character(reporting_year), + survey_year = as.character(survey_year))] + + expect_error(metadata_validate_output(metadata), "Type/ format error") + +}) + +test_that("metadata_validate_output() works identifying invalid value", { + + metadata <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + metadata[, survey_coverage := fifelse(survey_coverage == "national", + "national1", survey_coverage)] + + expect_error(metadata_validate_output(metadata), "Invalid value in `survey_coverage`") + +}) diff --git a/tests/testthat/test-npl-validation.R b/tests/testthat/test-npl-validation.R new file mode 100644 index 0000000..ab294fc --- /dev/null +++ b/tests/testthat/test-npl-validation.R @@ -0,0 +1,69 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "npl" + +test_that("npl_validate_raw() works identifying duplicate error", { + + npl <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + ext = "dta" + ) |> setDT() + + npl[, `:=` (year = fifelse((year == 2007 & countrycode == "AFG"), + 2011, year))] + + expect_error(npl_validate_raw(npl), "Duplicate error") + +}) + +test_that("npl_validate_raw() works identifying type/ formating error", { + + npl <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + npl[, `:=` (year = as.character(year), + comparability = as.character(comparability))] + + expect_error(npl_validate_raw(npl), "Type/ format error") + +}) + + +test_that("npl_validate_output() works identifying duplicate error", { + + npl <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + npl[, `:=` (reporting_year = fifelse((reporting_year == 2007 & country_code == "AFG"), + 2011, reporting_year))] + + expect_error(npl_validate_output(npl), "Duplicate error") + +}) + +test_that("npl_validate_output() works identifying type/ formating error", { + + npl <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + npl[, `:=` (reporting_year = as.character(reporting_year), + comparability = as.character(comparability), + nat_headcount = as.character(nat_headcount))] + + expect_error(npl_validate_output(npl), "Type/ format error") + +}) + diff --git a/tests/testthat/test-pce-validation.R b/tests/testthat/test-pce-validation.R new file mode 100644 index 0000000..f0dec16 --- /dev/null +++ b/tests/testthat/test-pce-validation.R @@ -0,0 +1,50 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "pce" + +test_that("pce_validate_output() works identifying duplicate error", { + + pce <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pce[, `:=` (pce_data_level = fifelse(pce_data_level == "rural", + "urban", pce_data_level))] + + expect_error(pce_validate_output(pce), "Duplicate error") + +}) + +test_that("pce_validate_output() works identifying type/ formating error", { + + pce <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pce[, `:=` (year = as.character(year), + pce = as.character(pce))] + + expect_error(pce_validate_output(pce), "Type/ format error") + +}) + +test_that("pce_validate_output() works identifying invalid value", { + + pce <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pce[, pce_data_level := fifelse(pce_data_level == "national", + "national1", pce_data_level)] + + expect_error(pce_validate_output(pce), "Invalid value in `pce_data_level`") + +}) diff --git a/tests/testthat/test-pfw-validation.R b/tests/testthat/test-pfw-validation.R new file mode 100644 index 0000000..15b7919 --- /dev/null +++ b/tests/testthat/test-pfw-validation.R @@ -0,0 +1,107 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "pfw" + +test_that("pfw_validate_raw() works identifying duplicate error", { + + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + + pfw[, `:=` (year = fifelse((year == 1981 & code == "CHN"), + 1984, year))] + + expect_error(pfw_validate_raw(pfw), "Duplicate error") + +}) + +test_that("pfw_validate_raw() works identifying type/ formating error", { + + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + + pfw[, `:=` (year = as.character(year), + gdp_domain = as.character(gdp_domain), + pce_domain = as.character(pce_domain), + pop_domain = as.character(pop_domain))] + + expect_error(pfw_validate_raw(pfw), "Type/ format error") + +}) + +test_that("pfw_validate_raw() works identifying invalid value", { + + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + + pfw[, datatype := fifelse(datatype == "c", "x", datatype)] + + expect_error(pfw_validate_raw(pfw), "Invalid value in `datatype`") + +}) + +test_that("pfw_validate_raw() works identifying invalid value", { + + pfw <- pipfun::load_from_gh(measure = measure, + owner = owner, + branch = branch, + ext = "dta") + + pfw[, pce_domain := fifelse(pce_domain == 1, 3, pce_domain)] + + expect_error(pfw_validate_raw(pfw), "Invalid value in `pce_domain`") + +}) + +test_that("pfw_validate_output() works identifying duplicate error", { + + pfw <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pfw[, `:=` (welfare_type = fifelse(welfare_type == "consumption", + "income", welfare_type))] + + expect_error(pfw_validate_output(pfw), "Duplicate error") + +}) + +test_that("pfw_validate_output() works identifying type/ formating error", { + + pfw <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pfw[, `:=` (year = as.character(year), + gdp_domain = as.character(gdp_domain), + pce_domain = as.character(pce_domain), + pop_domain = as.character(pop_domain))] + + expect_error(pfw_validate_output(pfw), "Type/ format error") + +}) + +test_that("pfw_validate_output() works identifying invalid value", { + + pfw <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pfw[, pce_domain := fifelse(pce_domain == 1, 3, pce_domain)] + + expect_error(pfw_validate_output(pfw), "Invalid value in `pce_domain`") + +}) diff --git a/tests/testthat/test-pl-validation.R b/tests/testthat/test-pl-validation.R new file mode 100644 index 0000000..249f928 --- /dev/null +++ b/tests/testthat/test-pl-validation.R @@ -0,0 +1,34 @@ + +## Initial parameters -------- +branch <- "DEV" +measure <- "pl" + +test_that("pl_validate_output() works identifying duplicate error", { + + pl <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pl[, `:=` (ppp_year = fifelse(ppp_year == 2011, + 2017, ppp_year))] + + expect_error(pl_validate_output(pl), "Duplicate error") + +}) + +test_that("pl_validate_output() works identifying type/ formating error", { + + pl <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pl[, `:=` (poverty_line = as.character(poverty_line), + ppp_year = as.character(ppp_year))] + + expect_error(pl_validate_output(pl), "Type/ format error") + +}) diff --git a/tests/testthat/test-pop-validation.R b/tests/testthat/test-pop-validation.R new file mode 100644 index 0000000..a3ee6ba --- /dev/null +++ b/tests/testthat/test-pop-validation.R @@ -0,0 +1,98 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "pop" + +test_that("pop_validate_raw() works identifying duplicate error", { + + pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") + pop <- wbstats::wb_data(indicator = pop_indicators, + country = "all", # this is new + lang = "en", + return_wide = FALSE) |> + setDT() + + pop[, `:=` (indicator_id = fifelse(indicator_id == "SP.RUR.TOTL", + "SP.URB.TOTL", indicator_id))] + + expect_error(pop_validate_raw(pop), "Duplicate error") + +}) + +test_that("pop_validate_raw() works identifying type/ formating error", { + + pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") + pop <- wbstats::wb_data(indicator = pop_indicators, + country = "all", # this is new + lang = "en", + return_wide = FALSE) |> + setDT() + + pop[, `:=` (date = as.character(date), + value = as.character(value))] + + expect_error(pop_validate_raw(pop), "Type/ format error") + +}) + +test_that("pop_validate_raw() works identifying invalid value", { + + pop_indicators <- c("SP.POP.TOTL", "SP.RUR.TOTL", "SP.URB.TOTL") + pop <- wbstats::wb_data(indicator = pop_indicators, + country = "all", # this is new + lang = "en", + return_wide = FALSE) |> + setDT() + + pop[, `:=` (indicator_id = fifelse(indicator_id == "SP.RUR.TOTL", + "SP.RUR.totl", indicator_id))] + + expect_error(pop_validate_raw(pop), "Invalid value in `indicator_id`") + +}) + +test_that("pop_validate_output() works identifying duplicate error", { + + pop <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pop[, `:=` (pop_data_level = fifelse((pop_data_level == "rural" & country_code == "ABW"), + "urban", pop_data_level))] + + expect_error(pop_validate_output(pop), "Duplicate error") + +}) + +test_that("pop_validate_output() works identifying type/ formating error", { + + pop <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pop[, `:=` (year = as.character(year), + pop = as.character(pop))] + + expect_error(pop_validate_output(pop), "Type/ format error") + +}) + +test_that("pop_validate_output() works identifying invalid value", { + + pop <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + pop[, pop_data_level := fifelse(pop_data_level == "national", + "national1", pop_data_level)] + + expect_error(pop_validate_output(pop), "Invalid value in `pop_data_level`") + +}) diff --git a/tests/testthat/test-ppp-validation.R b/tests/testthat/test-ppp-validation.R new file mode 100644 index 0000000..90a868b --- /dev/null +++ b/tests/testthat/test-ppp-validation.R @@ -0,0 +1,96 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "ppp" + +test_that("ppp_validate_raw() works identifying duplicate error", { + + ppp <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + ppp[, `:=` (CoverageType = fifelse(CoverageType == "Urban", + "Rural", CoverageType))] + + expect_error(ppp_validate_raw(ppp), "Duplicate error") + +}) + +test_that("ppp_validate_raw() works identifying type/ formating error", { + + ppp <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + ppp[, `:=` (ppp_2017_v1_v1 = as.character(ppp_2017_v1_v1), + ppp_2017_v1_v2 = as.character(ppp_2017_v1_v2), + ppp_2011_v2_v1 = as.character(ppp_2011_v2_v1), + ppp_2011_v2_v2 = as.character(ppp_2011_v2_v2))] + + expect_error(ppp_validate_raw(ppp), "Type/ format error") + +}) + +test_that("ppp_validate_raw() works identifying invalid value", { + + ppp <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + ppp[, ppp_domain := fifelse(ppp_domain == 1, 3, ppp_domain)] + + expect_error(ppp_validate_raw(ppp), "Invalid value in `ppp_domain`") + +}) + +test_that("ppp_validate_output() works identifying duplicate error", { + + ppp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + ppp[, `:=` (ppp_data_level = fifelse(ppp_data_level == "rural", + "urban", ppp_data_level))] + + expect_error(ppp_validate_output(ppp), "Duplicate error") + +}) + +test_that("ppp_validate_output() works identifying type/ formating error", { + + ppp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + ppp[, `:=` (ppp_year = as.character(ppp_year), + ppp = as.character(ppp))] + + expect_error(ppp_validate_output(ppp), "Type/ format error") + +}) + +test_that("ppp_validate_output() works identifying invalid value", { + + ppp <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + ppp[, ppp_data_level := fifelse(ppp_data_level == "national", + "national1", ppp_data_level)] + + expect_error(ppp_validate_output(ppp), "Invalid value in `ppp_data_level`") + +}) diff --git a/tests/testthat/test-sna-fy-validation.R b/tests/testthat/test-sna-fy-validation.R new file mode 100644 index 0000000..f34dc84 --- /dev/null +++ b/tests/testthat/test-sna-fy-validation.R @@ -0,0 +1,20 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "sna" + +test_that("sna_validate_raw() works identifying type/ formating error", { + + sna_fy <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch, + filename = "sna_metadata" + ) + + sna_fy[, Day := as.character(Day)] + + expect_error(sna_fy_validate_raw(sna_fy), "Type/ format error") + +}) diff --git a/tests/testthat/test-sna-validation.R b/tests/testthat/test-sna-validation.R new file mode 100644 index 0000000..f0c9cca --- /dev/null +++ b/tests/testthat/test-sna-validation.R @@ -0,0 +1,34 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "sna" + +test_that("sna_validate_raw() works identifying type/ formating error", { + + sna <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + sna[, `:=` (year = as.character(year), + GDP = as.character(GDP))] + + expect_error(sna_validate_raw(sna), "Type/ format error") + +}) + +test_that("sna_validate_raw() works identifying invalid value", { + + sna <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + sna[, coverage := fifelse(coverage == "National", "national1", coverage)] + + expect_error(sna_validate_raw(sna), "Invalid value in `DataType`") + +}) diff --git a/tests/testthat/test-wdi-validation.R b/tests/testthat/test-wdi-validation.R new file mode 100644 index 0000000..f59d4ac --- /dev/null +++ b/tests/testthat/test-wdi-validation.R @@ -0,0 +1,36 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "wdi" + +test_that("wdi_validate_raw() works identifying duplicate error", { + + wdi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + wdi[, `:=` (year = fifelse((year == 1960 & country_code == "ABW"), + 1961, year))] + + expect_error(wdi_validate_raw(wdi), "Duplicate error") + +}) + +test_that("wdi_validate_raw() works identifying type/ formating error", { + + wdi <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + wdi[, `:=` (year = as.character(year), + NE.CON.PRVT.PC.KD = as.character(NE.CON.PRVT.PC.KD), + NY.GDP.PCAP.KD = as.character(NY.GDP.PCAP.KD))] + + expect_error(wdi_validate_raw(wdi), "Type/ format error") + +}) diff --git a/tests/testthat/test-weo-validation.R b/tests/testthat/test-weo-validation.R new file mode 100644 index 0000000..a110c9e --- /dev/null +++ b/tests/testthat/test-weo-validation.R @@ -0,0 +1,51 @@ + +## Initial parameters -------- +branch <- "DEV" +owner <- getOption("pipfun.ghowner") +measure <- "weo" + +test_that("weo_validate_raw() works identifying duplicate error", { + + weo <- pipfun::load_from_gh( + measure = measure, + owner = owner, + branch = branch + ) + + weo[, `:=` (`WEO Subject Code` = fifelse(`WEO Subject Code` == "NGDP", + "NGDPD", `WEO Subject Code`))] + + expect_error(weo_validate_raw(weo), "Duplicate error") + +}) + + +test_that("weo_validate_output() works identifying duplicate error", { + + weo <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + weo[, `:=` (year = fifelse(year == 1986 & country_code == "ABW", + 1987, year))] + + expect_error(weo_validate_output(weo), "Duplicate error") + +}) + +test_that("weo_validate_output() works identifying type/ formating error", { + + weo <- load_aux( + maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch + ) + + weo[, `:=` (year = as.character(year), + weo_gdp = as.character(weo_gdp))] + + expect_error(weo_validate_output(weo), "Type/ format error") + +}) From 29eb56eb79a78bc4119355bddefcc529ca197aec Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 28 Mar 2024 10:55:02 -0400 Subject: [PATCH 12/23] Update validation scripts --- NAMESPACE | 1 + R/auto_aux_update.R | 2 ++ R/cl_validate_raw.R | 18 ++++++++++++ R/clean_validation_report.R | 12 ++++++++ R/get_error_validation.R | 2 ++ R/send_report.R | 45 +++++++++++++++-------------- man/clean_validation_report.Rd | 11 +++++++ tests/testthat/test-cl-validation.R | 42 ++++++++++++++++++++------- 8 files changed, 101 insertions(+), 32 deletions(-) create mode 100644 R/clean_validation_report.R create mode 100644 man/clean_validation_report.Rd diff --git a/NAMESPACE b/NAMESPACE index e5082c6..0ba869f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export("%>%") export(auto_aux_update) export(cl_validate_raw) +export(clean_validation_report) export(countries_validate_output) export(cpi_validate_output) export(cpi_validate_raw) diff --git a/R/auto_aux_update.R b/R/auto_aux_update.R index 0a51100..51f94b6 100644 --- a/R/auto_aux_update.R +++ b/R/auto_aux_update.R @@ -29,6 +29,8 @@ auto_aux_update <- function(measure = NULL, ) } + # if there is validation report in the environment - remove it + clean_validation_report() assertthat::assert_that(Sys.getenv("GITHUB_PAT") != "", msg = "Enviroment variable `GITHUB_PAT` is empty. diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R index 4f7b3a0..6f03450 100644 --- a/R/cl_validate_raw.R +++ b/R/cl_validate_raw.R @@ -15,6 +15,24 @@ cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ validate(cl, name = "CL raw data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> + validate_cols(in_set(c("ABW", "AFG", "AGO", "ALB", "AND", "ARE", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE", + "BDI", "BEL", "BEN", "BFA", "BGD", "BGR", "BHR", "BHS", "BIH", "BLR", "BLZ", "BMU", "BOL", + "BRA", "BRB", "BRN", "BTN", "BWA", "CAF", "CAN", "CHE", "CHI", "CHL", "CHN", "CIV", "CMR", + "COD", "COG", "COL", "COM", "CPV", "CRI", "CUB", "CUW", "CYM", "CYP", "CZE", "DEU", "DJI", + "DMA", "DNK", "DOM", "DZA", "ECU", "EGY", "ERI", "ESP", "EST", "ETH", "FIN", "FJI", "FRA", + "FRO", "FSM", "GAB", "GBR", "GEO", "GHA", "GIB", "GIN", "GMB", "GNB", "GNQ", "GRC", "GRD", + "GRL", "GTM", "GUM", "GUY", "HKG", "HND", "HRV", "HTI", "HUN", "IDN", "IMN", "IND", "IRL", + "IRN", "IRQ", "ISL", "ISR", "ITA", "JAM", "JOR", "JPN", "KAZ", "KEN", "KGZ", "KHM", "KIR", + "KNA", "KOR", "KWT", "LAO", "LBN", "LBR", "LBY", "LCA", "LIE", "LKA", "LSO", "LTU", "LUX", + "LVA", "MAC", "MAF", "MAR", "MCO", "MDA", "MDG", "MDV", "MEX", "MHL", "MKD", "MLI", "MLT", + "MMR", "MNE", "MNG", "MNP", "MOZ", "MRT", "MUS", "MWI", "MYS", "NAM", "NCL", "NER", "NGA", + "NIC", "NLD", "NOR", "NPL", "NRU", "NZL", "OMN", "PAK", "PAN", "PER", "PHL", "PLW", "PNG", + "POL", "PRI", "PRK", "PRT", "PRY", "PSE", "PYF", "QAT", "ROU", "RUS", "RWA", "SAU", "SDN", + "SEN", "SGP", "SLB", "SLE", "SLV", "SMR", "SOM", "SRB", "SSD", "STP", "SUR", "SVK", "SVN", + "SWE", "SWZ", "SXM", "SYC", "SYR", "TCA", "TCD", "TGO", "THA", "TJK", "TKM", "TLS", "TON", + "TTO", "TUN", "TUR", "TUV", "TWN", "TZA", "UGA", "UKR", "URY", "USA", "UZB", "VCT", "VEN", + "VGB", "VIR", "VNM", "VUT", "WSM", "XKX", "YEM", "ZAF", "ZMB", "ZWE")), + country_code, description = "`country_code` values within range") |> validate_if(is.character(country_name), description = "`country_name` should be character") |> validate_if(is.character(africa_split), diff --git a/R/clean_validation_report.R b/R/clean_validation_report.R new file mode 100644 index 0000000..e15ee53 --- /dev/null +++ b/R/clean_validation_report.R @@ -0,0 +1,12 @@ +#' Remove data validation report from .pipaux environment variable +#' +#' @export +clean_validation_report <- function(){ + + if (rlang::env_has(.pipaux, "validation_report")){ + + # rlang::env_bind(.pipaux, validation_report = rlang::zap()) + rlang::env_unbind(.pipaux, "validation_report") + + } +} diff --git a/R/get_error_validation.R b/R/get_error_validation.R index ac8fd4a..f857e31 100644 --- a/R/get_error_validation.R +++ b/R/get_error_validation.R @@ -35,6 +35,8 @@ get_error_validation <- function(vlddata, detail){ rlang::env_poke(.pipaux, "validation_report", compiled_result) } + + cli::cli_inform("Validation report ('validation_report') has been added to the environment varaible (.pipaux).") } } diff --git a/R/send_report.R b/R/send_report.R index 9f8e1cc..a14b81f 100644 --- a/R/send_report.R +++ b/R/send_report.R @@ -7,27 +7,28 @@ send_report <- function(){ if (rlang::env_has(.pipaux, "validation_report")){ - fname <- file.path(tempdir(), "data_validation_report.csv") - - write.csv(.pipaux$validation_report, fname, row.names = FALSE) - - compose_email( - body = md(glue::glue( - - "Hello, - - The attched file contains auxiliary data validation report. - - Regards"))) |> - add_attachment(file = fname, filename = "data_validation_report") |> - smtp_send( - from = "tefera.degefu@outlook.com", - to = "tdegefu@worldbank.org", - subject = "Data validation report", - credentials = creds_envvar(user = "tefera.degefu@outlook.com", - pass_envvar = "SMTP_GPID_EMAIL", - provider = "outlook") - ) - + print(.pipaux$validation_report) + + # fname <- file.path(tempdir(), "data_validation_report.csv") + # + # write.csv(.pipaux$validation_report, fname, row.names = FALSE) + # + # compose_email( + # body = md(glue::glue( + # + # "Hello, + # + # The attched file contains auxiliary data validation report. + # + # Regards"))) |> + # add_attachment(file = fname, filename = "data_validation_report") |> + # smtp_send( + # from = "tefera.degefu@outlook.com", + # to = "tdegefu@worldbank.org", + # subject = "Data validation report", + # credentials = creds_envvar(user = "tefera.degefu@outlook.com", + # pass_envvar = "SMTP_GPID_EMAIL", + # provider = "outlook") + # ) } } diff --git a/man/clean_validation_report.Rd b/man/clean_validation_report.Rd new file mode 100644 index 0000000..2573e0c --- /dev/null +++ b/man/clean_validation_report.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_validation_report.R +\name{clean_validation_report} +\alias{clean_validation_report} +\title{Remove data validation report from .pipaux environment variable} +\usage{ +clean_validation_report() +} +\description{ +Remove data validation report from .pipaux environment variable +} diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index fc395a2..9a33081 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -6,11 +6,7 @@ measure = "country_list" test_that("cl_validate_raw() works identifying duplicate error", { - cl <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch - ) + cl <- pip_country_list_update(class_branch = "master") cl[, `:=` (country_code = fifelse(country_code == "ABW", "ALB", country_code))] @@ -21,11 +17,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { test_that("cl_validate_raw() works identifying invalid value", { - cl <- pipfun::load_from_gh( - measure = measure, - owner = owner, - branch = branch - ) + cl <- pip_country_list_update(class_branch = "master") cl[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", "SSA", africa_split_code), @@ -37,3 +29,33 @@ test_that("cl_validate_raw() works identifying invalid value", { expect_error(cl_validate_raw(cl), "Invalid values") }) + +test_that("cl_validate_raw() works identifying duplicate error", { + + cl <- load_aux(maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch) + + cl[, `:=` (country_code = fifelse(country_code == "ABW", + "ALB", country_code))] + + expect_error(cl_validate_raw(cl), "Duplicate error") + +}) + +test_that("cl_validate_raw() works identifying invalid value", { + + cl <- load_aux(maindir = gls$PIP_DATA_DIR, + measure = measure, + branch = branch) + + cl[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", + "SSA", africa_split_code), + pcn_region_code = fifelse(pcn_region_code == "SSA", + "SAR", pcn_region_code), + region_code = fifelse(region_code == "SSA", + "SAR", region_code))] + + expect_error(cl_validate_raw(cl), "Invalid values") + +}) From 4dba4f9c1c6cac70f00fede11542a0e8755ca3c4 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Tue, 9 Apr 2024 10:57:52 -0400 Subject: [PATCH 13/23] Merge branch 'tefera_dev' into tefera_update_branch From eb0d961cfc1a07936e90c0e93c553711df4f5a4f Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Wed, 10 Apr 2024 07:23:36 -0400 Subject: [PATCH 14/23] check the correct file --- R/pip_countries.R | 72 ----------------------------------------------- 1 file changed, 72 deletions(-) delete mode 100644 R/pip_countries.R diff --git a/R/pip_countries.R b/R/pip_countries.R deleted file mode 100644 index 8866dae..0000000 --- a/R/pip_countries.R +++ /dev/null @@ -1,72 +0,0 @@ -#' PIP Countries -#' -#' Update or load a dataset with countries. -#' -#' @inheritParams pip_cpi -#' @param detail has an option TRUE/FALSE, default value is FALSE -#' @inheritParams pipfun::load_from_gh -#' @export -pip_countries <- function(action = c("update", "load"), - force = FALSE, - owner = getOption("pipfun.ghowner"), - maindir = gls$PIP_DATA_DIR, - branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw")) { - - measure <- "countries" - action <- match.arg(action) - branch <- match.arg(branch) - - if (action == "update") { - - ## Special national accounts -------- - cl <- load_aux(maindir = maindir, - measure = "country_list", - branch = branch) - - # validate country list raw data - cl_validate_raw(cl, detail = detail) - - pfw <- load_aux(measure = "pfw", - maindir = maindir, - branch = branch) - - - pfw <- pfw[inpovcal == 1, - ][, - c("country_code") - ] |> - unique() - - - countries <- cl[country_code %in% pfw$country_code - ][, - c("pcn_region", "pcn_region_code") := NULL] - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## save -------- - - # validate country output data - countries_validate_output(countries, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - - - pipfun::pip_sign_save( - x = countries, - measure = measure, - msrdir = msrdir, - force = force - ) - } else { - df <- load_aux( - maindir = maindir, - measure = measure - ) - return(df) - } -} From e3bdc258a98d5e51aeaa5c5af113f495887c43b7 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Wed, 10 Apr 2024 09:50:57 -0400 Subject: [PATCH 15/23] update countries validate function and gls to unit test function wherever it is required --- Data/git_metadata.csv | 25 +++ NAMESPACE | 2 + R/pip_countries.R | 64 +++++++ R/pip_gdp_weo.R | 158 ++++++++++++++++++ R/pip_prices.R | 103 ++++++++++++ man/pip_countries.Rd | 5 +- man/pip_gdp_weo.Rd | 27 +++ man/pip_prices.Rd | 29 ++++ tests/testthat/_snaps/load_raw_aux.new.md | 12 ++ tests/testthat/_snaps/pip_sign_save.new.md | 14 ++ tests/testthat/test-cl-validation.R | 5 +- tests/testthat/test-countries-validation.R | 7 +- tests/testthat/test-cpi-validation.R | 1 + tests/testthat/test-gdm-validation.R | 1 + tests/testthat/test-gdp-validation.R | 1 + .../testthat/test-income-groups-validation.R | 1 + tests/testthat/test-metadata-validation.R | 1 + tests/testthat/test-npl-validation.R | 1 + tests/testthat/test-pce-validation.R | 1 + tests/testthat/test-pfw-validation.R | 1 + tests/testthat/test-pl-validation.R | 1 + tests/testthat/test-pop-validation.R | 1 + tests/testthat/test-ppp-validation.R | 1 + tests/testthat/test-weo-validation.R | 1 + 24 files changed, 454 insertions(+), 9 deletions(-) create mode 100644 Data/git_metadata.csv create mode 100644 R/pip_countries.R create mode 100644 R/pip_gdp_weo.R create mode 100644 R/pip_prices.R create mode 100644 man/pip_gdp_weo.Rd create mode 100644 man/pip_prices.Rd create mode 100644 tests/testthat/_snaps/load_raw_aux.new.md create mode 100644 tests/testthat/_snaps/pip_sign_save.new.md diff --git a/Data/git_metadata.csv b/Data/git_metadata.csv new file mode 100644 index 0000000..f6393d8 --- /dev/null +++ b/Data/git_metadata.csv @@ -0,0 +1,25 @@ +Repo,hash,branch +PIP-Technical-Team/aux_censoring,2d531fdd08a80ad69212c8a313f4c76ec33a8b85,DEV +PIP-Technical-Team/aux_country_list,6d14c6c7b019dfa217c968bf8b4f5effa1548827,DEV +PIP-Technical-Team/aux_cp,63b2cb63a461fe68f8cbafa6e3eb4640f01e3d28,DEV +PIP-Technical-Team/aux_cpi,535de6c954ac56a24e50222564f603993ff0832a,DEV +PIP-Technical-Team/aux_dictionary,3c597365da8a9e6f25a5f571d427b52c27ac379d,DEV +PIP-Technical-Team/aux_dlw,c51a7a271b3b8566e3bd2a0c749179a1a2fc9e6a,DEV +PIP-Technical-Team/aux_gdm,f43c984a0fa1cede4dc4ee847201e8f9ba1f2ce1,DEV +PIP-Technical-Team/aux_gdp,07973496ab9fb30f80cf34d9969a13064695e676,DEV +PIP-Technical-Team/aux_income_groups,3dbf8467aa3fb09ef8053d43f039b7f9df79156b,DEV +PIP-Technical-Team/aux_indicators,47cc9fa6fc68c6ccc958029674c68c69e0f8daef,DEV +PIP-Technical-Team/aux_maddison,6051166dd0e0ca9c5f4f9438d7f756359f493014,DEV +PIP-Technical-Team/aux_metadata,328ed879227c0c7fef3f3f8a151fa0defe7c1a2a,DEV +PIP-Technical-Team/aux_npl,ae9abc913a987d8107f7b2d10f570cd24a2bd0d0,DEV +PIP-Technical-Team/aux_pce,6e5fa243d225112c8ff00d63b55be68ad9700886,DEV +PIP-Technical-Team/aux_pfw,6299794192c61f4ef85af847d03833ee7d2a2b8a,DEV +PIP-Technical-Team/aux_pl,e81910e2848e88d3b0319beec15b0b0b1a4f86d8,DEV +PIP-Technical-Team/aux_pop,218e0e08460f52485ed9ca8a245b759cad224cb0,DEV +PIP-Technical-Team/aux_ppp,916e11b8a30cbebbf0262a624ae92d8910343972,DEV +PIP-Technical-Team/aux_regions,8747ef071d672ea6e7ef5bc40702e99310f04d01,DEV +PIP-Technical-Team/aux_shp,373bd77aa1cb5f6c0d043356e9aa75f2e108ff61,DEV +PIP-Technical-Team/aux_sna,99aa56fb8cb03428301cff1464065d660e628987,DEV +PIP-Technical-Team/aux_sub,937dc962f7a3d39c2ae47e5b7e4d5b62c219e11f,DEV +PIP-Technical-Team/aux_wdi,5bf6cf3c903f6223b4bedb6bb82fe5a78befec85,DEV +PIP-Technical-Team/aux_weo,8beb3cb62860412cc3d68b69657b8c520d46c936,DEV \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 0ba869f..ef5d49d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,7 @@ export(pip_cpi) export(pip_dictionary) export(pip_gdm) export(pip_gdp) +export(pip_gdp_weo) export(pip_income_groups) export(pip_indicators) export(pip_maddison) @@ -43,6 +44,7 @@ export(pip_pl) export(pip_pl_clean) export(pip_pop) export(pip_ppp) +export(pip_prices) export(pip_regions) export(pip_sna) export(pip_update_all_aux) diff --git a/R/pip_countries.R b/R/pip_countries.R new file mode 100644 index 0000000..040af44 --- /dev/null +++ b/R/pip_countries.R @@ -0,0 +1,64 @@ +#' PIP Countries +#' +#' Update or load a dataset with countries. +#' +#' @inheritParams pip_cpi +#' @inheritParams pipfun::load_from_gh +#' @export +pip_countries <- function(action = c("update", "load"), + force = FALSE, + owner = getOption("pipfun.ghowner"), + maindir = gls$PIP_DATA_DIR, + branch = c("DEV", "PROD", "main"), + tag = match.arg(branch)) { + + measure <- "countries" + action <- match.arg(action) + branch <- match.arg(branch) + + if (action == "update") { + + ## Special national accounts -------- + cl <- load_aux(maindir = maindir, + measure = "country_list", + branch = branch) + + pfw <- load_aux(measure = "pfw", + maindir = maindir, + branch = branch) + + + pfw <- pfw[inpovcal == 1, + ][, + c("country_code") + ] |> + unique() + + + countries <- cl[country_code %in% pfw$country_code + ][, + c("pcn_region", "pcn_region_code") := NULL] + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ## save -------- + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + + pipfun::pip_sign_save( + x = countries, + measure = measure, + msrdir = msrdir, + force = force + ) + } else { + df <- load_aux( + maindir = maindir, + measure = measure + ) + return(df) + } +} diff --git a/R/pip_gdp_weo.R b/R/pip_gdp_weo.R new file mode 100644 index 0000000..5b616ad --- /dev/null +++ b/R/pip_gdp_weo.R @@ -0,0 +1,158 @@ +#' Fetch GDP data from WEO +#' +#' Create a dataset with GDP data from World Economic Outlook. +#' +#' Note that the most recent version most be downloaded from imf.org and saved +#' as an .xls file in `/_aux/weo/`. The filename should be in the +#' following structure `WEO_.xls`. Due to potential file corruption +#' the file must be opened and re-saved before it can be updated with +#' `pip_gdp_weo()`. Hopefully in the future IMF will stop using an `.xls` file +#' that's not really xls. +#' +#' @inheritParams pip_prices +#' @export +pip_gdp_weo <- function(action = "update", + force = FALSE, + maindir = gls$PIP_DATA_DIR) { + measure <- "weo" + msrdir <- fs::path(maindir, "_aux/", measure) # measure dir + + if (action == "update") { + + # ---- Load data from disk ---- + + # Get latest version of file (in case there are more) + dir <- sprintf("%s_aux/weo/", maindir) + weo_files <- list.files(dir, pattern = "WEO_.*[.]xls") + weo_latest <- weo_files %>% + gsub("WEO_|.xls", "", .) %>% + as.POSIXlt() %>% + max() %>% + as.character() %>% + sprintf("%s_aux/weo/WEO_%s.xls", maindir, .) + + # Read data + dt <- readxl::read_xls( + weo_latest, + sheet = 1, na = "n/a", + col_types = "text" + ) + dt <- setDT(dt) + + # Clean column names + dt <- janitor::clean_names(dt) + + # ---- Data transformations ---- + + # Select rows w/ data on real gdp per capita + dt <- dt[weo_subject_code %in% + c("NGDPRPC", "NGDPRPPPPC", "NGDP_R")] + + # Fix country codes + dt[ + , + iso := fifelse( + iso == "WBG", "PSE", iso # West Bank & Gaza + ) + ] + dt[ + , + iso := fifelse( + iso == "UVK", "XKX", iso # Kosovo + ) + ] + + # Replace subject codes + dt[ + , + subject_code := fcase( + weo_subject_code == "NGDPRPC", "weo_gdp_lcu", + weo_subject_code == "NGDPRPPPPC", "weo_gdp_ppp2017", + weo_subject_code == "NGDP_R", "weo_gdp_lcu_notpc" + ) + ] + + # Reshape to long format + dt <- dt %>% + melt( + id.vars = c("iso", "subject_code"), + measure.vars = names(dt)[grepl("\\d{4}", names(dt))], + value.name = "weo_gdp", variable.name = "year" + ) + setnames(dt, "iso", "country_code") + + # Convert year and GDP to numeric + dt$year <- sub("x", "", dt$year) %>% as.numeric() + dt$weo_gdp <- suppressWarnings(as.numeric(dt$weo_gdp)) + + # Remove rows w/ missing GDP + dt <- dt[!is.na(dt$weo_gdp)] + + # Remove current year and future years + current_year <- format(Sys.Date(), "%Y") + dt <- dt[dt$year < current_year] + + # Reshape to wide for GDP columns + dt <- dt %>% + dcast( + formula = country_code + year ~ subject_code, + value.var = "weo_gdp" + ) + + # ---- Merge with population ---- + + pop <- pip_pop("load", maindir = maindir) + setDT(pop) + pop <- pop[pop_data_level == "national", ] + dt[pop, + on = .(country_code, year), + `:=`( + pop = i.pop + ) + ] + + # Calculate per capita value for NGDP_R + dt[ + , + weo_gdp_lcu := fifelse( + is.na(weo_gdp_lcu), weo_gdp_lcu_notpc / pop, weo_gdp_lcu + ) + ] + + + # ---- Chain PPP and LCU GDP columns ---- + + # Chain LCU on PPP column + dt <- chain_values( + dt, + base_var = "weo_gdp_ppp2017", + replacement_var = "weo_gdp_lcu", + new_name = "weo_gdp", + by = "country_code" + ) + + + # --- Sign and save ---- + + # Select final columns + dt <- dt[, c("country_code", "year", "weo_gdp")] + + # Save dataset + pip_sign_save( + x = dt, + measure = measure, + msrdir = msrdir, + force = force + ) + } else if (action == "load") { + dt <- load_aux( + maindir = maindir, + measure = measure + ) + return(dt) + } else { + rlang::abort(c("`action` must be `update` or `load`", + x = paste0("you provided `", action, "`") + )) + } +} diff --git a/R/pip_prices.R b/R/pip_prices.R new file mode 100644 index 0000000..3754e27 --- /dev/null +++ b/R/pip_prices.R @@ -0,0 +1,103 @@ +#' PIP Prices +#' +#' Works with either CPI, PPP or PFW. +#' +#' @param measure character: Measure to be used. e.g., "cpi" or "ppp". +#' @param action character: Either "load" or "update". Default is "update". If +#' "update" data will be updated on the system. If "load" data is loaded in memory. +#' @param maindir character: Main directory of project. +#' @param dlwdir character: Datalibweb directory. +#' @param force logical: If TRUE data will be overwritten. +#' +#' @export +#' @import data.table +pip_prices <- function(measure = NULL, + action = "update", + maindir = gls$PIP_DATA_DIR, + dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), + force = FALSE) { + + + #---------------------------------------------------------- + # conditions + #---------------------------------------------------------- + + action <- tolower(action) # convert to lower case just in case + + # Proper length + if (length(action) != 1) { + rlang::abort(c( + "`action` should be length 1", + x = paste0("`action` is length ", length(action)) + ), + class = "pipaux_error" + ) + } + + # proper options + action_options <- c("load", "update") + + if (!(action %in% action_options)) { + action_options <- paste("`", action_options, "`", sep = "") + msg <- paste("`action` should be", last_item(action_options, "or")) + + rlang::abort(c( + msg, + x = paste0("`action` is ", action) + ), + class = "pipaux_error" + ) + } + + + #---------------------------------------------------------- + # define parameters + #---------------------------------------------------------- + + # Always call common values + msrdir <- fs::path(maindir, "_aux/", measure) # measure dir + + #---------------------------------------------------------- + # execute selected function + #---------------------------------------------------------- + + #--------- load --------- + if (action == "load") { + df <- load_aux( + maindir = maindir, + measure = measure + ) + return(df) + } + + #--------- update --------- + if (action == "update") { + if (measure == "cpi") { + pip_cpi_update( + maindir = maindir, + dlwdir = dlwdir, + force = force + ) + } else if (measure == "ppp") { + pip_ppp_update( + maindir = maindir, + dlwdir = dlwdir, + force = force + ) + } else if (measure == "pfw") { + pip_pfw_update( + maindir = maindir, + dlwdir = dlwdir, + force = force + ) + } else { + rlang::abort(c( + "The measure selected is not a valid name", + i = "you can use `cpi`, `ppp`, or `pfw`", # update this message automatically + x = paste("you selected", measure) + ), + class = "pipaux_error" + ) + } + } # end of update +} # end of pip_prices function diff --git a/man/pip_countries.Rd b/man/pip_countries.Rd index faa57d5..df445f8 100644 --- a/man/pip_countries.Rd +++ b/man/pip_countries.Rd @@ -10,8 +10,7 @@ pip_countries( owner = getOption("pipfun.ghowner"), maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - tag = match.arg(branch), - detail = getOption("pipaux.detail.raw") + tag = match.arg(branch) ) } \arguments{ @@ -30,8 +29,6 @@ memory.} will be used to update either the development server or production.} \item{tag}{character: specific release to be used in the update.} - -\item{detail}{has an option TRUE/FALSE, default value is FALSE} } \description{ Update or load a dataset with countries. diff --git a/man/pip_gdp_weo.Rd b/man/pip_gdp_weo.Rd new file mode 100644 index 0000000..0f1bd98 --- /dev/null +++ b/man/pip_gdp_weo.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pip_gdp_weo.R +\name{pip_gdp_weo} +\alias{pip_gdp_weo} +\title{Fetch GDP data from WEO} +\usage{ +pip_gdp_weo(action = "update", force = FALSE, maindir = gls$PIP_DATA_DIR) +} +\arguments{ +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{force}{logical: If TRUE data will be overwritten.} + +\item{maindir}{character: Main directory of project.} +} +\description{ +Create a dataset with GDP data from World Economic Outlook. +} +\details{ +Note that the most recent version most be downloaded from imf.org and saved +as an .xls file in \verb{/_aux/weo/}. The filename should be in the +following structure \verb{WEO_.xls}. Due to potential file corruption +the file must be opened and re-saved before it can be updated with +\code{pip_gdp_weo()}. Hopefully in the future IMF will stop using an \code{.xls} file +that's not really xls. +} diff --git a/man/pip_prices.Rd b/man/pip_prices.Rd new file mode 100644 index 0000000..b103845 --- /dev/null +++ b/man/pip_prices.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pip_prices.R +\name{pip_prices} +\alias{pip_prices} +\title{PIP Prices} +\usage{ +pip_prices( + measure = NULL, + action = "update", + maindir = gls$PIP_DATA_DIR, + dlwdir = Sys.getenv("PIP_DLW_ROOT_DIR"), + force = FALSE +) +} +\arguments{ +\item{measure}{character: Measure to be used. e.g., "cpi" or "ppp".} + +\item{action}{character: Either "load" or "update". Default is "update". If +"update" data will be updated on the system. If "load" data is loaded in memory.} + +\item{maindir}{character: Main directory of project.} + +\item{dlwdir}{character: Datalibweb directory.} + +\item{force}{logical: If TRUE data will be overwritten.} +} +\description{ +Works with either CPI, PPP or PFW. +} diff --git a/tests/testthat/_snaps/load_raw_aux.new.md b/tests/testthat/_snaps/load_raw_aux.new.md new file mode 100644 index 0000000..413022d --- /dev/null +++ b/tests/testthat/_snaps/load_raw_aux.new.md @@ -0,0 +1,12 @@ +# pipfun::load_raw_aux is deprecated + + Code + lr <- load_raw_aux(measure = "cpi") + Condition + Warning: + `load_raw_aux()` was deprecated in pipaux 0.1.0.9003. + i Please use `pipfun::load_from_gh()` instead. + Code + lf <- pipfun::load_from_gh(measure = "cpi") + expect_equal(lr, lf, ignore_attr = TRUE) + diff --git a/tests/testthat/_snaps/pip_sign_save.new.md b/tests/testthat/_snaps/pip_sign_save.new.md new file mode 100644 index 0000000..280e6af --- /dev/null +++ b/tests/testthat/_snaps/pip_sign_save.new.md @@ -0,0 +1,14 @@ +# pipfun::pip_sign_save is deprecated + + Code + tdir <- fs::path_temp("pipfun-l") + lx <- list(x = 1) + measure <- "ltst" + saved <- pipfun::pip_sign_save(x = lx, measure = measure, msrdir = tdir, + save_dta = TRUE) + Message + ! Data signature has changed + 'ltst.rds' has been updated + Code + expect_true(saved) + diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index 9a33081..21db35f 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -3,12 +3,13 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure = "country_list" +gls <- pipfun::pip_create_globals() test_that("cl_validate_raw() works identifying duplicate error", { cl <- pip_country_list_update(class_branch = "master") - cl[, `:=` (country_code = fifelse(country_code == "ABW", + cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] expect_error(cl_validate_raw(cl), "Duplicate error") @@ -36,7 +37,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { measure = measure, branch = branch) - cl[, `:=` (country_code = fifelse(country_code == "ABW", + cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] expect_error(cl_validate_raw(cl), "Duplicate error") diff --git a/tests/testthat/test-countries-validation.R b/tests/testthat/test-countries-validation.R index 6c3c3d0..662852d 100644 --- a/tests/testthat/test-countries-validation.R +++ b/tests/testthat/test-countries-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "countries" +gls <- pipfun::pip_create_globals() test_that("countries_validate_output() works identifying duplicate error", { @@ -12,7 +13,7 @@ test_that("countries_validate_output() works identifying duplicate error", { branch = branch ) - countries[, `:=` (country_code = fifelse(country_code == "ABW", + countries[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] expect_error(countries_validate_output(countries), "Duplicate error") @@ -29,8 +30,8 @@ test_that("countries_validate_output() works identifying invalid value", { countries[, `:=` (africa_split_code = fifelse(africa_split_code == "AFE", "SSA", africa_split_code), - pcn_region_code = fifelse(pcn_region_code == "SSA", - "SAR", pcn_region_code), + # pcn_region_code = fifelse(pcn_region_code == "SSA", + # "SAR", pcn_region_code), region_code = fifelse(region_code == "SSA", "SAR", region_code))] diff --git a/tests/testthat/test-cpi-validation.R b/tests/testthat/test-cpi-validation.R index a71a991..7d0b2d7 100644 --- a/tests/testthat/test-cpi-validation.R +++ b/tests/testthat/test-cpi-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure = "cpi" +gls <- pipfun::pip_create_globals() test_that("cpi_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-gdm-validation.R b/tests/testthat/test-gdm-validation.R index 502b431..b376c95 100644 --- a/tests/testthat/test-gdm-validation.R +++ b/tests/testthat/test-gdm-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "gdm" +gls <- pipfun::pip_create_globals() test_that("gdm_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-gdp-validation.R b/tests/testthat/test-gdp-validation.R index 68739e7..077d075 100644 --- a/tests/testthat/test-gdp-validation.R +++ b/tests/testthat/test-gdp-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "gdp" +gls <- pipfun::pip_create_globals() test_that("gdp_validate_output() works identifying duplicate error", { diff --git a/tests/testthat/test-income-groups-validation.R b/tests/testthat/test-income-groups-validation.R index ce90853..57c12ae 100644 --- a/tests/testthat/test-income-groups-validation.R +++ b/tests/testthat/test-income-groups-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "income_groups" +gls <- pipfun::pip_create_globals() test_that("gdp_validate_output() works identifying type/ formating error", { diff --git a/tests/testthat/test-metadata-validation.R b/tests/testthat/test-metadata-validation.R index 720486d..f06d890 100644 --- a/tests/testthat/test-metadata-validation.R +++ b/tests/testthat/test-metadata-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "metadata" +gls <- pipfun::pip_create_globals() test_that("metadata_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-npl-validation.R b/tests/testthat/test-npl-validation.R index ab294fc..5553489 100644 --- a/tests/testthat/test-npl-validation.R +++ b/tests/testthat/test-npl-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "npl" +gls <- pipfun::pip_create_globals() test_that("npl_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-pce-validation.R b/tests/testthat/test-pce-validation.R index f0dec16..7bccde1 100644 --- a/tests/testthat/test-pce-validation.R +++ b/tests/testthat/test-pce-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pce" +gls <- pipfun::pip_create_globals() test_that("pce_validate_output() works identifying duplicate error", { diff --git a/tests/testthat/test-pfw-validation.R b/tests/testthat/test-pfw-validation.R index 15b7919..b2c6af1 100644 --- a/tests/testthat/test-pfw-validation.R +++ b/tests/testthat/test-pfw-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pfw" +gls <- pipfun::pip_create_globals() test_that("pfw_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-pl-validation.R b/tests/testthat/test-pl-validation.R index 249f928..841084e 100644 --- a/tests/testthat/test-pl-validation.R +++ b/tests/testthat/test-pl-validation.R @@ -2,6 +2,7 @@ ## Initial parameters -------- branch <- "DEV" measure <- "pl" +gls <- pipfun::pip_create_globals() test_that("pl_validate_output() works identifying duplicate error", { diff --git a/tests/testthat/test-pop-validation.R b/tests/testthat/test-pop-validation.R index a3ee6ba..ecd2673 100644 --- a/tests/testthat/test-pop-validation.R +++ b/tests/testthat/test-pop-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pop" +gls <- pipfun::pip_create_globals() test_that("pop_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-ppp-validation.R b/tests/testthat/test-ppp-validation.R index 90a868b..78e217c 100644 --- a/tests/testthat/test-ppp-validation.R +++ b/tests/testthat/test-ppp-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "ppp" +gls <- pipfun::pip_create_globals() test_that("ppp_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-weo-validation.R b/tests/testthat/test-weo-validation.R index a110c9e..5fdc5ba 100644 --- a/tests/testthat/test-weo-validation.R +++ b/tests/testthat/test-weo-validation.R @@ -3,6 +3,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "weo" +gls <- pipfun::pip_create_globals() test_that("weo_validate_raw() works identifying duplicate error", { From 0a790bec55e2a65247e7a3206cdc18d82d46b4e5 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 15 Apr 2024 10:05:11 -0400 Subject: [PATCH 16/23] Updated unit test functions --- tests/testthat/_snaps/load_raw_aux.md | 6 +----- tests/testthat/_snaps/load_raw_aux.new.md | 12 ------------ tests/testthat/_snaps/pip_sign_save.md | 2 +- tests/testthat/_snaps/pip_sign_save.new.md | 14 -------------- tests/testthat/test-cl-validation.R | 8 ++++---- tests/testthat/test-countries-validation.R | 4 ++-- tests/testthat/test-cpi-validation.R | 12 ++++++------ tests/testthat/test-gdm-validation.R | 12 ++++++------ tests/testthat/test-gdp-validation.R | 6 +++--- tests/testthat/test-income-groups-validation.R | 4 ++-- tests/testthat/test-load_raw_aux.R | 2 +- tests/testthat/test-maddison-validation.R | 2 +- tests/testthat/test-metadata-validation.R | 12 ++++++------ tests/testthat/test-npl-validation.R | 13 +++++++------ tests/testthat/test-pce-validation.R | 6 +++--- tests/testthat/test-pfw-validation.R | 14 +++++++------- tests/testthat/test-pl-validation.R | 4 ++-- tests/testthat/test-pop-validation.R | 12 ++++++------ tests/testthat/test-ppp-validation.R | 12 ++++++------ tests/testthat/test-sna-fy-validation.R | 2 +- tests/testthat/test-sna-validation.R | 4 ++-- tests/testthat/test-wdi-validation.R | 4 ++-- tests/testthat/test-weo-validation.R | 6 +++--- 23 files changed, 72 insertions(+), 101 deletions(-) delete mode 100644 tests/testthat/_snaps/load_raw_aux.new.md delete mode 100644 tests/testthat/_snaps/pip_sign_save.new.md diff --git a/tests/testthat/_snaps/load_raw_aux.md b/tests/testthat/_snaps/load_raw_aux.md index 8a43e0d..3d5f51d 100644 --- a/tests/testthat/_snaps/load_raw_aux.md +++ b/tests/testthat/_snaps/load_raw_aux.md @@ -1,11 +1,7 @@ # pipfun::load_raw_aux is deprecated Code - lr <- load_raw_aux(measure = "cpi") - Warning - `load_raw_aux()` was deprecated in pipaux 0.1.0.9003. - i Please use `pipfun::load_from_gh()` instead. - Code + lr <- pipfun::load_from_gh(measure = "cpi") lf <- pipfun::load_from_gh(measure = "cpi") expect_equal(lr, lf, ignore_attr = TRUE) diff --git a/tests/testthat/_snaps/load_raw_aux.new.md b/tests/testthat/_snaps/load_raw_aux.new.md deleted file mode 100644 index 413022d..0000000 --- a/tests/testthat/_snaps/load_raw_aux.new.md +++ /dev/null @@ -1,12 +0,0 @@ -# pipfun::load_raw_aux is deprecated - - Code - lr <- load_raw_aux(measure = "cpi") - Condition - Warning: - `load_raw_aux()` was deprecated in pipaux 0.1.0.9003. - i Please use `pipfun::load_from_gh()` instead. - Code - lf <- pipfun::load_from_gh(measure = "cpi") - expect_equal(lr, lf, ignore_attr = TRUE) - diff --git a/tests/testthat/_snaps/pip_sign_save.md b/tests/testthat/_snaps/pip_sign_save.md index aa7d45f..280e6af 100644 --- a/tests/testthat/_snaps/pip_sign_save.md +++ b/tests/testthat/_snaps/pip_sign_save.md @@ -6,7 +6,7 @@ measure <- "ltst" saved <- pipfun::pip_sign_save(x = lx, measure = measure, msrdir = tdir, save_dta = TRUE) - Message + Message ! Data signature has changed 'ltst.rds' has been updated Code diff --git a/tests/testthat/_snaps/pip_sign_save.new.md b/tests/testthat/_snaps/pip_sign_save.new.md deleted file mode 100644 index 280e6af..0000000 --- a/tests/testthat/_snaps/pip_sign_save.new.md +++ /dev/null @@ -1,14 +0,0 @@ -# pipfun::pip_sign_save is deprecated - - Code - tdir <- fs::path_temp("pipfun-l") - lx <- list(x = 1) - measure <- "ltst" - saved <- pipfun::pip_sign_save(x = lx, measure = measure, msrdir = tdir, - save_dta = TRUE) - Message - ! Data signature has changed - 'ltst.rds' has been updated - Code - expect_true(saved) - diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index 21db35f..0a46304 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -12,7 +12,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] - expect_error(cl_validate_raw(cl), "Duplicate error") + expect_error(cl_validate_raw(cl)) }) @@ -27,7 +27,7 @@ test_that("cl_validate_raw() works identifying invalid value", { region_code = fifelse(region_code == "SSA", "SAR", region_code))] - expect_error(cl_validate_raw(cl), "Invalid values") + expect_error(cl_validate_raw(cl)) }) @@ -40,7 +40,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { cl[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] - expect_error(cl_validate_raw(cl), "Duplicate error") + expect_error(cl_validate_raw(cl)) }) @@ -57,6 +57,6 @@ test_that("cl_validate_raw() works identifying invalid value", { region_code = fifelse(region_code == "SSA", "SAR", region_code))] - expect_error(cl_validate_raw(cl), "Invalid values") + expect_error(cl_validate_raw(cl)) }) diff --git a/tests/testthat/test-countries-validation.R b/tests/testthat/test-countries-validation.R index 662852d..d7e1ec2 100644 --- a/tests/testthat/test-countries-validation.R +++ b/tests/testthat/test-countries-validation.R @@ -16,7 +16,7 @@ test_that("countries_validate_output() works identifying duplicate error", { countries[, `:=` (country_code = fifelse(country_code == "AGO", "ALB", country_code))] - expect_error(countries_validate_output(countries), "Duplicate error") + expect_error(countries_validate_output(countries)) }) @@ -35,6 +35,6 @@ test_that("countries_validate_output() works identifying invalid value", { region_code = fifelse(region_code == "SSA", "SAR", region_code))] - expect_error(countries_validate_output(countries), "Invalid values") + expect_error(countries_validate_output(countries)) }) diff --git a/tests/testthat/test-cpi-validation.R b/tests/testthat/test-cpi-validation.R index 7d0b2d7..e5d1403 100644 --- a/tests/testthat/test-cpi-validation.R +++ b/tests/testthat/test-cpi-validation.R @@ -16,7 +16,7 @@ test_that("cpi_validate_raw() works identifying duplicate error", { cpi[, `:=` (cpi_data_level = fifelse(cpi_data_level == 0, 1, cpi_data_level))] - expect_error(cpi_validate_raw(cpi), "Duplicate error") + expect_error(cpi_validate_raw(cpi)) }) @@ -35,7 +35,7 @@ test_that("cpi_validate_raw() works identifying type/ formating error", { cpi2011 = as.character(cpi2011), cpi2017 = as.character(cpi2017))] - expect_error(cpi_validate_raw(cpi), "Type/ format error") + expect_error(cpi_validate_raw(cpi)) }) @@ -49,7 +49,7 @@ test_that("cpi_validate_raw() works identifying invalid value", { cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] - expect_error(cpi_validate_raw(cpi), "Invalid value in `cpi_domain`") + expect_error(cpi_validate_raw(cpi)) }) @@ -64,7 +64,7 @@ test_that("cpi_validate_output() works identifying duplicate error", { cpi[, `:=` (cpi_data_level = fifelse(cpi_data_level == "rural", "urban", cpi_data_level))] - expect_error(cpi_validate_output(cpi), "Duplicate error") + expect_error(cpi_validate_output(cpi)) }) @@ -82,7 +82,7 @@ test_that("cpi_validate_output() works identifying type/ formating error", { cpi2011 = as.character(cpi2011), cpi2017 = as.character(cpi2017))] - expect_error(cpi_validate_output(cpi), "Type/ format error") + expect_error(cpi_validate_output(cpi)) }) @@ -97,6 +97,6 @@ test_that("cpi_validate_output() works identifying invalid value", { cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] - expect_error(cpi_validate_output(cpi), "Invalid value in `cpi_domain`") + expect_error(cpi_validate_output(cpi)) }) diff --git a/tests/testthat/test-gdm-validation.R b/tests/testthat/test-gdm-validation.R index b376c95..58a1eaf 100644 --- a/tests/testthat/test-gdm-validation.R +++ b/tests/testthat/test-gdm-validation.R @@ -16,7 +16,7 @@ test_that("gdm_validate_raw() works identifying duplicate error", { gdm[, `:=` (Coverage = fifelse(Coverage == "Urban", "Rural", Coverage))] - expect_error(gdm_validate_raw(gdm), "Duplicate error") + expect_error(gdm_validate_raw(gdm)) }) @@ -34,7 +34,7 @@ test_that("gdm_validate_raw() works identifying type/ formating error", { currency = as.character(currency), SurveyMean_PPP = as.character(SurveyMean_PPP))] - expect_error(gdm_validate_raw(gdm), "Type/ format error") + expect_error(gdm_validate_raw(gdm)) }) @@ -48,7 +48,7 @@ test_that("gdm_validate_raw() works identifying invalid value", { gdm[, DataType := fifelse(DataType == "x", "i", DataType)] - expect_error(gdm_validate_raw(gdm), "Invalid value in `DataType`") + expect_error(gdm_validate_raw(gdm)) }) @@ -63,7 +63,7 @@ test_that("gdm_validate_output() works identifying duplicate error", { gdm[, `:=` (pop_data_level = fifelse(pop_data_level == "rural", "urban", pop_data_level))] - expect_error(gdm_validate_output(gdm), "Duplicate error") + expect_error(gdm_validate_output(gdm)) }) @@ -79,7 +79,7 @@ test_that("gdm_validate_output() works identifying type/ formating error", { survey_year = as.character(survey_year), survey_mean_lcu = as.character(survey_mean_lcu))] - expect_error(gdm_validate_output(gdm), "Type/ format error") + expect_error(gdm_validate_output(gdm)) }) @@ -94,6 +94,6 @@ test_that("gdm_validate_output() works identifying invalid value", { gdm[, pop_data_level := fifelse(pop_data_level == "national", "national1", pop_data_level)] - expect_error(gdm_validate_output(gdm), "Invalid value in `pop_data_level`") + expect_error(gdm_validate_output(gdm)) }) diff --git a/tests/testthat/test-gdp-validation.R b/tests/testthat/test-gdp-validation.R index 077d075..e9642a7 100644 --- a/tests/testthat/test-gdp-validation.R +++ b/tests/testthat/test-gdp-validation.R @@ -16,7 +16,7 @@ test_that("gdp_validate_output() works identifying duplicate error", { gdp[, `:=` (gdp_data_level = fifelse(gdp_data_level == "rural", "urban", gdp_data_level))] - expect_error(gdp_validate_output(gdp), "Duplicate error") + expect_error(gdp_validate_output(gdp)) }) @@ -31,7 +31,7 @@ test_that("gdp_validate_output() works identifying type/ formating error", { gdp[, `:=` (year = as.character(year), gdp = as.character(gdp))] - expect_error(gdp_validate_output(gdp), "Type/ format error") + expect_error(gdp_validate_output(gdp)) }) @@ -46,6 +46,6 @@ test_that("gdp_validate_output() works identifying invalid value", { gdp[, gdp_data_level := fifelse(gdp_data_level == "national", "national1", gdp_data_level)] - expect_error(gdp_validate_output(gdp), "Invalid value in `gdp_data_level`") + expect_error(gdp_validate_output(gdp)) }) diff --git a/tests/testthat/test-income-groups-validation.R b/tests/testthat/test-income-groups-validation.R index 57c12ae..606a083 100644 --- a/tests/testthat/test-income-groups-validation.R +++ b/tests/testthat/test-income-groups-validation.R @@ -16,7 +16,7 @@ test_that("gdp_validate_output() works identifying type/ formating error", { incgroups[, `:=` (year_data = as.character(year_data), year = as.character(year))] - expect_error(incgroup_validate_output(incgroups), "Type/ format error") + expect_error(incgroup_validate_output(incgroups)) }) @@ -31,6 +31,6 @@ test_that("incgroup_validate_output() works identifying invalid value", { incgroups[, income_group_code := fifelse(income_group_code == "HIC", "hic", income_group_code)] - expect_error(incgroup_validate_output(incgroups), "Invalid value in `income_group_code`") + expect_error(incgroup_validate_output(incgroups)) }) diff --git a/tests/testthat/test-load_raw_aux.R b/tests/testthat/test-load_raw_aux.R index c1e4ddf..cc4c18f 100644 --- a/tests/testthat/test-load_raw_aux.R +++ b/tests/testthat/test-load_raw_aux.R @@ -1,7 +1,7 @@ test_that("pipfun::load_raw_aux is deprecated", { expect_snapshot({ - lr <- load_raw_aux(measure = "cpi") + lr <- pipfun::load_from_gh(measure = "cpi") lf <- pipfun::load_from_gh(measure = "cpi") expect_equal(lr, lf, ignore_attr = TRUE) diff --git a/tests/testthat/test-maddison-validation.R b/tests/testthat/test-maddison-validation.R index 683b6e3..9bccfdb 100644 --- a/tests/testthat/test-maddison-validation.R +++ b/tests/testthat/test-maddison-validation.R @@ -15,6 +15,6 @@ test_that("mpd_validate_raw() works identifying type/ formating error", { mpd[, `:=` (year = as.character(year), mpd_gdp = as.character(mpd_gdp))] - expect_error(mpd_validate_raw(mpd), "Type/ format error") + expect_error(mpd_validate_raw(mpd)) }) diff --git a/tests/testthat/test-metadata-validation.R b/tests/testthat/test-metadata-validation.R index f06d890..2763fcd 100644 --- a/tests/testthat/test-metadata-validation.R +++ b/tests/testthat/test-metadata-validation.R @@ -17,7 +17,7 @@ test_that("metadata_validate_raw() works identifying duplicate error", { metadata[, `:=` (svy_id = fifelse(svy_id == "CNH_2005_URHS_v01_M", "CNH_2008_URHS_v01_M", svy_id))] - expect_error(metadata_validate_raw(metadata), "Duplicate error") + expect_error(metadata_validate_raw(metadata)) }) @@ -32,7 +32,7 @@ test_that("metadata_validate_raw() works identifying type/ formating error", { metadata[, `:=` (year_start = as.character(year_start), year_end = as.character(year_end))] - expect_error(metadata_validate_raw(metadata), "Type/ format error") + expect_error(metadata_validate_raw(metadata)) }) @@ -46,7 +46,7 @@ test_that("metadata_validate_raw() works identifying invalid value", { metadata[, reg := fifelse(reg == "SSA", "ssa", reg)] - expect_error(metadata_validate_raw(metadata), "Invalid value in `reg`") + expect_error(metadata_validate_raw(metadata)) }) @@ -61,7 +61,7 @@ test_that("metadata_validate_output() works identifying duplicate error", { metadata[, `:=` (welfare_type = fifelse(welfare_type == "consumption", "income", welfare_type))] - expect_error(metadata_validate_output(metadata), "Duplicate error") + expect_error(metadata_validate_output(metadata)) }) @@ -76,7 +76,7 @@ test_that("metadata_validate_output() works identifying type/ formating error", metadata[, `:=` (reporting_year = as.character(reporting_year), survey_year = as.character(survey_year))] - expect_error(metadata_validate_output(metadata), "Type/ format error") + expect_error(metadata_validate_output(metadata)) }) @@ -91,6 +91,6 @@ test_that("metadata_validate_output() works identifying invalid value", { metadata[, survey_coverage := fifelse(survey_coverage == "national", "national1", survey_coverage)] - expect_error(metadata_validate_output(metadata), "Invalid value in `survey_coverage`") + expect_error(metadata_validate_output(metadata)) }) diff --git a/tests/testthat/test-npl-validation.R b/tests/testthat/test-npl-validation.R index 5553489..1e4d907 100644 --- a/tests/testthat/test-npl-validation.R +++ b/tests/testthat/test-npl-validation.R @@ -17,7 +17,7 @@ test_that("npl_validate_raw() works identifying duplicate error", { npl[, `:=` (year = fifelse((year == 2007 & countrycode == "AFG"), 2011, year))] - expect_error(npl_validate_raw(npl), "Duplicate error") + expect_error(npl_validate_raw(npl)) }) @@ -26,13 +26,14 @@ test_that("npl_validate_raw() works identifying type/ formating error", { npl <- pipfun::load_from_gh( measure = measure, owner = owner, - branch = branch - ) + branch = branch, + ext = "dta" + ) |> setDT() npl[, `:=` (year = as.character(year), comparability = as.character(comparability))] - expect_error(npl_validate_raw(npl), "Type/ format error") + expect_error(npl_validate_raw(npl)) }) @@ -48,7 +49,7 @@ test_that("npl_validate_output() works identifying duplicate error", { npl[, `:=` (reporting_year = fifelse((reporting_year == 2007 & country_code == "AFG"), 2011, reporting_year))] - expect_error(npl_validate_output(npl), "Duplicate error") + expect_error(npl_validate_output(npl)) }) @@ -64,7 +65,7 @@ test_that("npl_validate_output() works identifying type/ formating error", { comparability = as.character(comparability), nat_headcount = as.character(nat_headcount))] - expect_error(npl_validate_output(npl), "Type/ format error") + expect_error(npl_validate_output(npl)) }) diff --git a/tests/testthat/test-pce-validation.R b/tests/testthat/test-pce-validation.R index 7bccde1..62cfdd5 100644 --- a/tests/testthat/test-pce-validation.R +++ b/tests/testthat/test-pce-validation.R @@ -16,7 +16,7 @@ test_that("pce_validate_output() works identifying duplicate error", { pce[, `:=` (pce_data_level = fifelse(pce_data_level == "rural", "urban", pce_data_level))] - expect_error(pce_validate_output(pce), "Duplicate error") + expect_error(pce_validate_output(pce)) }) @@ -31,7 +31,7 @@ test_that("pce_validate_output() works identifying type/ formating error", { pce[, `:=` (year = as.character(year), pce = as.character(pce))] - expect_error(pce_validate_output(pce), "Type/ format error") + expect_error(pce_validate_output(pce)) }) @@ -46,6 +46,6 @@ test_that("pce_validate_output() works identifying invalid value", { pce[, pce_data_level := fifelse(pce_data_level == "national", "national1", pce_data_level)] - expect_error(pce_validate_output(pce), "Invalid value in `pce_data_level`") + expect_error(pce_validate_output(pce)) }) diff --git a/tests/testthat/test-pfw-validation.R b/tests/testthat/test-pfw-validation.R index b2c6af1..1b7f391 100644 --- a/tests/testthat/test-pfw-validation.R +++ b/tests/testthat/test-pfw-validation.R @@ -15,7 +15,7 @@ test_that("pfw_validate_raw() works identifying duplicate error", { pfw[, `:=` (year = fifelse((year == 1981 & code == "CHN"), 1984, year))] - expect_error(pfw_validate_raw(pfw), "Duplicate error") + expect_error(pfw_validate_raw(pfw)) }) @@ -31,7 +31,7 @@ test_that("pfw_validate_raw() works identifying type/ formating error", { pce_domain = as.character(pce_domain), pop_domain = as.character(pop_domain))] - expect_error(pfw_validate_raw(pfw), "Type/ format error") + expect_error(pfw_validate_raw(pfw)) }) @@ -44,7 +44,7 @@ test_that("pfw_validate_raw() works identifying invalid value", { pfw[, datatype := fifelse(datatype == "c", "x", datatype)] - expect_error(pfw_validate_raw(pfw), "Invalid value in `datatype`") + expect_error(pfw_validate_raw(pfw)) }) @@ -57,7 +57,7 @@ test_that("pfw_validate_raw() works identifying invalid value", { pfw[, pce_domain := fifelse(pce_domain == 1, 3, pce_domain)] - expect_error(pfw_validate_raw(pfw), "Invalid value in `pce_domain`") + expect_error(pfw_validate_raw(pfw)) }) @@ -72,7 +72,7 @@ test_that("pfw_validate_output() works identifying duplicate error", { pfw[, `:=` (welfare_type = fifelse(welfare_type == "consumption", "income", welfare_type))] - expect_error(pfw_validate_output(pfw), "Duplicate error") + expect_error(pfw_validate_output(pfw)) }) @@ -89,7 +89,7 @@ test_that("pfw_validate_output() works identifying type/ formating error", { pce_domain = as.character(pce_domain), pop_domain = as.character(pop_domain))] - expect_error(pfw_validate_output(pfw), "Type/ format error") + expect_error(pfw_validate_output(pfw)) }) @@ -103,6 +103,6 @@ test_that("pfw_validate_output() works identifying invalid value", { pfw[, pce_domain := fifelse(pce_domain == 1, 3, pce_domain)] - expect_error(pfw_validate_output(pfw), "Invalid value in `pce_domain`") + expect_error(pfw_validate_output(pfw)) }) diff --git a/tests/testthat/test-pl-validation.R b/tests/testthat/test-pl-validation.R index 841084e..2ea2a94 100644 --- a/tests/testthat/test-pl-validation.R +++ b/tests/testthat/test-pl-validation.R @@ -15,7 +15,7 @@ test_that("pl_validate_output() works identifying duplicate error", { pl[, `:=` (ppp_year = fifelse(ppp_year == 2011, 2017, ppp_year))] - expect_error(pl_validate_output(pl), "Duplicate error") + expect_error(pl_validate_output(pl)) }) @@ -30,6 +30,6 @@ test_that("pl_validate_output() works identifying type/ formating error", { pl[, `:=` (poverty_line = as.character(poverty_line), ppp_year = as.character(ppp_year))] - expect_error(pl_validate_output(pl), "Type/ format error") + expect_error(pl_validate_output(pl)) }) diff --git a/tests/testthat/test-pop-validation.R b/tests/testthat/test-pop-validation.R index ecd2673..2867d37 100644 --- a/tests/testthat/test-pop-validation.R +++ b/tests/testthat/test-pop-validation.R @@ -17,7 +17,7 @@ test_that("pop_validate_raw() works identifying duplicate error", { pop[, `:=` (indicator_id = fifelse(indicator_id == "SP.RUR.TOTL", "SP.URB.TOTL", indicator_id))] - expect_error(pop_validate_raw(pop), "Duplicate error") + expect_error(pop_validate_raw(pop)) }) @@ -33,7 +33,7 @@ test_that("pop_validate_raw() works identifying type/ formating error", { pop[, `:=` (date = as.character(date), value = as.character(value))] - expect_error(pop_validate_raw(pop), "Type/ format error") + expect_error(pop_validate_raw(pop)) }) @@ -49,7 +49,7 @@ test_that("pop_validate_raw() works identifying invalid value", { pop[, `:=` (indicator_id = fifelse(indicator_id == "SP.RUR.TOTL", "SP.RUR.totl", indicator_id))] - expect_error(pop_validate_raw(pop), "Invalid value in `indicator_id`") + expect_error(pop_validate_raw(pop)) }) @@ -64,7 +64,7 @@ test_that("pop_validate_output() works identifying duplicate error", { pop[, `:=` (pop_data_level = fifelse((pop_data_level == "rural" & country_code == "ABW"), "urban", pop_data_level))] - expect_error(pop_validate_output(pop), "Duplicate error") + expect_error(pop_validate_output(pop)) }) @@ -79,7 +79,7 @@ test_that("pop_validate_output() works identifying type/ formating error", { pop[, `:=` (year = as.character(year), pop = as.character(pop))] - expect_error(pop_validate_output(pop), "Type/ format error") + expect_error(pop_validate_output(pop)) }) @@ -94,6 +94,6 @@ test_that("pop_validate_output() works identifying invalid value", { pop[, pop_data_level := fifelse(pop_data_level == "national", "national1", pop_data_level)] - expect_error(pop_validate_output(pop), "Invalid value in `pop_data_level`") + expect_error(pop_validate_output(pop)) }) diff --git a/tests/testthat/test-ppp-validation.R b/tests/testthat/test-ppp-validation.R index 78e217c..ea64396 100644 --- a/tests/testthat/test-ppp-validation.R +++ b/tests/testthat/test-ppp-validation.R @@ -16,7 +16,7 @@ test_that("ppp_validate_raw() works identifying duplicate error", { ppp[, `:=` (CoverageType = fifelse(CoverageType == "Urban", "Rural", CoverageType))] - expect_error(ppp_validate_raw(ppp), "Duplicate error") + expect_error(ppp_validate_raw(ppp)) }) @@ -33,7 +33,7 @@ test_that("ppp_validate_raw() works identifying type/ formating error", { ppp_2011_v2_v1 = as.character(ppp_2011_v2_v1), ppp_2011_v2_v2 = as.character(ppp_2011_v2_v2))] - expect_error(ppp_validate_raw(ppp), "Type/ format error") + expect_error(ppp_validate_raw(ppp)) }) @@ -47,7 +47,7 @@ test_that("ppp_validate_raw() works identifying invalid value", { ppp[, ppp_domain := fifelse(ppp_domain == 1, 3, ppp_domain)] - expect_error(ppp_validate_raw(ppp), "Invalid value in `ppp_domain`") + expect_error(ppp_validate_raw(ppp)) }) @@ -62,7 +62,7 @@ test_that("ppp_validate_output() works identifying duplicate error", { ppp[, `:=` (ppp_data_level = fifelse(ppp_data_level == "rural", "urban", ppp_data_level))] - expect_error(ppp_validate_output(ppp), "Duplicate error") + expect_error(ppp_validate_output(ppp)) }) @@ -77,7 +77,7 @@ test_that("ppp_validate_output() works identifying type/ formating error", { ppp[, `:=` (ppp_year = as.character(ppp_year), ppp = as.character(ppp))] - expect_error(ppp_validate_output(ppp), "Type/ format error") + expect_error(ppp_validate_output(ppp)) }) @@ -92,6 +92,6 @@ test_that("ppp_validate_output() works identifying invalid value", { ppp[, ppp_data_level := fifelse(ppp_data_level == "national", "national1", ppp_data_level)] - expect_error(ppp_validate_output(ppp), "Invalid value in `ppp_data_level`") + expect_error(ppp_validate_output(ppp)) }) diff --git a/tests/testthat/test-sna-fy-validation.R b/tests/testthat/test-sna-fy-validation.R index f34dc84..be8d4d6 100644 --- a/tests/testthat/test-sna-fy-validation.R +++ b/tests/testthat/test-sna-fy-validation.R @@ -15,6 +15,6 @@ test_that("sna_validate_raw() works identifying type/ formating error", { sna_fy[, Day := as.character(Day)] - expect_error(sna_fy_validate_raw(sna_fy), "Type/ format error") + expect_error(sna_fy_validate_raw(sna_fy)) }) diff --git a/tests/testthat/test-sna-validation.R b/tests/testthat/test-sna-validation.R index f0c9cca..8df8ca2 100644 --- a/tests/testthat/test-sna-validation.R +++ b/tests/testthat/test-sna-validation.R @@ -15,7 +15,7 @@ test_that("sna_validate_raw() works identifying type/ formating error", { sna[, `:=` (year = as.character(year), GDP = as.character(GDP))] - expect_error(sna_validate_raw(sna), "Type/ format error") + expect_error(sna_validate_raw(sna)) }) @@ -29,6 +29,6 @@ test_that("sna_validate_raw() works identifying invalid value", { sna[, coverage := fifelse(coverage == "National", "national1", coverage)] - expect_error(sna_validate_raw(sna), "Invalid value in `DataType`") + expect_error(sna_validate_raw(sna)) }) diff --git a/tests/testthat/test-wdi-validation.R b/tests/testthat/test-wdi-validation.R index f59d4ac..ee20811 100644 --- a/tests/testthat/test-wdi-validation.R +++ b/tests/testthat/test-wdi-validation.R @@ -15,7 +15,7 @@ test_that("wdi_validate_raw() works identifying duplicate error", { wdi[, `:=` (year = fifelse((year == 1960 & country_code == "ABW"), 1961, year))] - expect_error(wdi_validate_raw(wdi), "Duplicate error") + expect_error(wdi_validate_raw(wdi)) }) @@ -31,6 +31,6 @@ test_that("wdi_validate_raw() works identifying type/ formating error", { NE.CON.PRVT.PC.KD = as.character(NE.CON.PRVT.PC.KD), NY.GDP.PCAP.KD = as.character(NY.GDP.PCAP.KD))] - expect_error(wdi_validate_raw(wdi), "Type/ format error") + expect_error(wdi_validate_raw(wdi)) }) diff --git a/tests/testthat/test-weo-validation.R b/tests/testthat/test-weo-validation.R index 5fdc5ba..08caece 100644 --- a/tests/testthat/test-weo-validation.R +++ b/tests/testthat/test-weo-validation.R @@ -16,7 +16,7 @@ test_that("weo_validate_raw() works identifying duplicate error", { weo[, `:=` (`WEO Subject Code` = fifelse(`WEO Subject Code` == "NGDP", "NGDPD", `WEO Subject Code`))] - expect_error(weo_validate_raw(weo), "Duplicate error") + expect_error(weo_validate_raw(weo)) }) @@ -32,7 +32,7 @@ test_that("weo_validate_output() works identifying duplicate error", { weo[, `:=` (year = fifelse(year == 1986 & country_code == "ABW", 1987, year))] - expect_error(weo_validate_output(weo), "Duplicate error") + expect_error(weo_validate_output(weo)) }) @@ -47,6 +47,6 @@ test_that("weo_validate_output() works identifying type/ formating error", { weo[, `:=` (year = as.character(year), weo_gdp = as.character(weo_gdp))] - expect_error(weo_validate_output(weo), "Type/ format error") + expect_error(weo_validate_output(weo)) }) From bca92ede57cc960351a67b4111cafb3b1406e7f7 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Tue, 7 May 2024 15:35:12 -0400 Subject: [PATCH 17/23] Update based on comments --- DESCRIPTION | 9 ++- Data/git_metadata.csv | 25 ------- R/cl_validate_raw.R | 26 +++----- R/countries_validate_output.R | 1 + R/cpi_validate_output.R | 3 + R/cpi_validate_raw.R | 1 + R/gdm_validate_output.R | 1 + R/gdm_validate_raw.R | 1 + R/gdp_validate_output.R | 1 + R/get_error_validation.R | 2 +- R/incgroup_validate_output.R | 1 + R/metadata_validate_output.R | 1 + R/metadata_validate_raw.R | 1 + R/mpd_validate_raw.R | 1 + R/npl_validate_output.R | 1 + R/npl_validate_raw.R | 1 + R/pce_validate_output.R | 1 + R/pfw_validate_output.R | 1 + R/pfw_validate_raw.R | 1 + R/pl_validate_output.R | 1 + R/pop_validate_output.R | 1 + R/pop_validate_raw.R | 1 + R/popmain_validate_raw.R | 1 + R/ppp_validate_output.R | 1 + R/ppp_validate_raw.R | 1 + R/sna_fy_validate_raw.R | 1 + R/sna_validate_raw.R | 1 + R/spop_validate_raw.R | 1 + R/wdi_validate_raw.R | 1 + R/weo_validate_output.R | 1 + R/weo_validate_raw.R | 108 +++---------------------------- man/cl_validate_raw.Rd | 1 + man/countries_validate_output.Rd | 1 + man/cpi_validate_output.Rd | 1 + man/cpi_validate_raw.Rd | 1 + man/gdm_validate_output.Rd | 1 + man/gdm_validate_raw.Rd | 1 + man/gdp_validate_output.Rd | 1 + man/incgroup_validate_output.Rd | 1 + man/metadata_validate_output.Rd | 1 + man/metadata_validate_raw.Rd | 1 + man/mpd_validate_raw.Rd | 1 + man/npl_validate_output.Rd | 1 + man/npl_validate_raw.Rd | 1 + man/pce_validate_output.Rd | 1 + man/pfw_validate_output.Rd | 1 + man/pfw_validate_raw.Rd | 1 + man/pl_validate_output.Rd | 1 + man/pop_validate_output.Rd | 1 + man/pop_validate_raw.Rd | 1 + man/popmain_validate_raw.Rd | 1 + man/ppp_validate_output.Rd | 1 + man/ppp_validate_raw.Rd | 1 + man/sna_fy_validate_raw.Rd | 1 + man/sna_validate_raw.Rd | 1 + man/spop_validate_raw.Rd | 1 + man/wdi_validate_raw.Rd | 1 + man/weo_validate_output.Rd | 1 + man/weo_validate_raw.Rd | 1 + 59 files changed, 80 insertions(+), 146 deletions(-) delete mode 100644 Data/git_metadata.csv diff --git a/DESCRIPTION b/DESCRIPTION index fe578fd..d0eadb1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,9 @@ Suggests: testthat (>= 3.0.0), knitr, rmarkdown, - covr + covr, + dm, + config Imports: haven, digest, @@ -50,11 +52,8 @@ Imports: httr, pipfun (>= 0.0.0.9007), lifecycle, - joyn, - dm, - config, collapse, - covr, + joyn, data.validator, assertr, blastula, diff --git a/Data/git_metadata.csv b/Data/git_metadata.csv deleted file mode 100644 index f6393d8..0000000 --- a/Data/git_metadata.csv +++ /dev/null @@ -1,25 +0,0 @@ -Repo,hash,branch -PIP-Technical-Team/aux_censoring,2d531fdd08a80ad69212c8a313f4c76ec33a8b85,DEV -PIP-Technical-Team/aux_country_list,6d14c6c7b019dfa217c968bf8b4f5effa1548827,DEV -PIP-Technical-Team/aux_cp,63b2cb63a461fe68f8cbafa6e3eb4640f01e3d28,DEV -PIP-Technical-Team/aux_cpi,535de6c954ac56a24e50222564f603993ff0832a,DEV -PIP-Technical-Team/aux_dictionary,3c597365da8a9e6f25a5f571d427b52c27ac379d,DEV -PIP-Technical-Team/aux_dlw,c51a7a271b3b8566e3bd2a0c749179a1a2fc9e6a,DEV -PIP-Technical-Team/aux_gdm,f43c984a0fa1cede4dc4ee847201e8f9ba1f2ce1,DEV -PIP-Technical-Team/aux_gdp,07973496ab9fb30f80cf34d9969a13064695e676,DEV -PIP-Technical-Team/aux_income_groups,3dbf8467aa3fb09ef8053d43f039b7f9df79156b,DEV -PIP-Technical-Team/aux_indicators,47cc9fa6fc68c6ccc958029674c68c69e0f8daef,DEV -PIP-Technical-Team/aux_maddison,6051166dd0e0ca9c5f4f9438d7f756359f493014,DEV -PIP-Technical-Team/aux_metadata,328ed879227c0c7fef3f3f8a151fa0defe7c1a2a,DEV -PIP-Technical-Team/aux_npl,ae9abc913a987d8107f7b2d10f570cd24a2bd0d0,DEV -PIP-Technical-Team/aux_pce,6e5fa243d225112c8ff00d63b55be68ad9700886,DEV -PIP-Technical-Team/aux_pfw,6299794192c61f4ef85af847d03833ee7d2a2b8a,DEV -PIP-Technical-Team/aux_pl,e81910e2848e88d3b0319beec15b0b0b1a4f86d8,DEV -PIP-Technical-Team/aux_pop,218e0e08460f52485ed9ca8a245b759cad224cb0,DEV -PIP-Technical-Team/aux_ppp,916e11b8a30cbebbf0262a624ae92d8910343972,DEV -PIP-Technical-Team/aux_regions,8747ef071d672ea6e7ef5bc40702e99310f04d01,DEV -PIP-Technical-Team/aux_shp,373bd77aa1cb5f6c0d043356e9aa75f2e108ff61,DEV -PIP-Technical-Team/aux_sna,99aa56fb8cb03428301cff1464065d660e628987,DEV -PIP-Technical-Team/aux_sub,937dc962f7a3d39c2ae47e5b7e4d5b62c219e11f,DEV -PIP-Technical-Team/aux_wdi,5bf6cf3c903f6223b4bedb6bb82fe5a78befec85,DEV -PIP-Technical-Team/aux_weo,8beb3cb62860412cc3d68b69657b8c520d46c936,DEV \ No newline at end of file diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R index 6f03450..693c391 100644 --- a/R/cl_validate_raw.R +++ b/R/cl_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ @@ -12,26 +13,17 @@ cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ report <- data_validation_report() + country_list <- pipfun::load_from_gh(measure = "pfw", + owner = getOption("pipfun.ghowner"), + branch = "DEV", + ext = "dta") + + country_list <- unique(country_list[, country_code]) + validate(cl, name = "CL raw data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> - validate_cols(in_set(c("ABW", "AFG", "AGO", "ALB", "AND", "ARE", "ARG", "ARM", "ASM", "ATG", "AUS", "AUT", "AZE", - "BDI", "BEL", "BEN", "BFA", "BGD", "BGR", "BHR", "BHS", "BIH", "BLR", "BLZ", "BMU", "BOL", - "BRA", "BRB", "BRN", "BTN", "BWA", "CAF", "CAN", "CHE", "CHI", "CHL", "CHN", "CIV", "CMR", - "COD", "COG", "COL", "COM", "CPV", "CRI", "CUB", "CUW", "CYM", "CYP", "CZE", "DEU", "DJI", - "DMA", "DNK", "DOM", "DZA", "ECU", "EGY", "ERI", "ESP", "EST", "ETH", "FIN", "FJI", "FRA", - "FRO", "FSM", "GAB", "GBR", "GEO", "GHA", "GIB", "GIN", "GMB", "GNB", "GNQ", "GRC", "GRD", - "GRL", "GTM", "GUM", "GUY", "HKG", "HND", "HRV", "HTI", "HUN", "IDN", "IMN", "IND", "IRL", - "IRN", "IRQ", "ISL", "ISR", "ITA", "JAM", "JOR", "JPN", "KAZ", "KEN", "KGZ", "KHM", "KIR", - "KNA", "KOR", "KWT", "LAO", "LBN", "LBR", "LBY", "LCA", "LIE", "LKA", "LSO", "LTU", "LUX", - "LVA", "MAC", "MAF", "MAR", "MCO", "MDA", "MDG", "MDV", "MEX", "MHL", "MKD", "MLI", "MLT", - "MMR", "MNE", "MNG", "MNP", "MOZ", "MRT", "MUS", "MWI", "MYS", "NAM", "NCL", "NER", "NGA", - "NIC", "NLD", "NOR", "NPL", "NRU", "NZL", "OMN", "PAK", "PAN", "PER", "PHL", "PLW", "PNG", - "POL", "PRI", "PRK", "PRT", "PRY", "PSE", "PYF", "QAT", "ROU", "RUS", "RWA", "SAU", "SDN", - "SEN", "SGP", "SLB", "SLE", "SLV", "SMR", "SOM", "SRB", "SSD", "STP", "SUR", "SVK", "SVN", - "SWE", "SWZ", "SXM", "SYC", "SYR", "TCA", "TCD", "TGO", "THA", "TJK", "TKM", "TLS", "TON", - "TTO", "TUN", "TUR", "TUV", "TWN", "TZA", "UGA", "UKR", "URY", "USA", "UZB", "VCT", "VEN", - "VGB", "VIR", "VNM", "VUT", "WSM", "XKX", "YEM", "ZAF", "ZMB", "ZWE")), + validate_cols(in_set(country_list), country_code, description = "`country_code` values within range") |> validate_if(is.character(country_name), description = "`country_name` should be character") |> diff --git a/R/countries_validate_output.R b/R/countries_validate_output.R index b29b109..baa016f 100644 --- a/R/countries_validate_output.R +++ b/R/countries_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export countries_validate_output <- function(countries, detail = getOption("pipaux.detail.output")){ diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 36b7363..423bf90 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")){ @@ -66,6 +67,8 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) validate_if(is_uniq(country_code, cpi_year, survey_acronym, cpi_data_level), description = "no duplicate records in key variables") |> + validate_if(is_uniq(country_code, cpi), + description = "no duplicate cpi values") |> add_results(report) validation_record <- get_results(report, unnest = FALSE) |> diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R index b23b766..7fda767 100644 --- a/R/cpi_validate_raw.R +++ b/R/cpi_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ diff --git a/R/gdm_validate_output.R b/R/gdm_validate_output.R index 4ae41aa..6c6e2c2 100644 --- a/R/gdm_validate_output.R +++ b/R/gdm_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")){ diff --git a/R/gdm_validate_raw.R b/R/gdm_validate_raw.R index 0d3df0f..c841013 100644 --- a/R/gdm_validate_raw.R +++ b/R/gdm_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export gdm_validate_raw <- function(gdm, detail = getOption("pipaux.detail.raw")){ diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R index d24c6c0..eda5ea7 100644 --- a/R/gdp_validate_output.R +++ b/R/gdp_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")){ diff --git a/R/get_error_validation.R b/R/get_error_validation.R index f857e31..a6de5e6 100644 --- a/R/get_error_validation.R +++ b/R/get_error_validation.R @@ -36,7 +36,7 @@ get_error_validation <- function(vlddata, detail){ } - cli::cli_inform("Validation report ('validation_report') has been added to the environment varaible (.pipaux).") + cli::cli_inform("Validation report ({.field validation_report}) has been added to the environment varaible ({.field .pipaux}).") } } diff --git a/R/incgroup_validate_output.R b/R/incgroup_validate_output.R index 8a3ffb1..defeebf 100644 --- a/R/incgroup_validate_output.R +++ b/R/incgroup_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export incgroup_validate_output <- function(incgroup, detail = getOption("pipaux.detail.output")){ diff --git a/R/metadata_validate_output.R b/R/metadata_validate_output.R index 40cf63f..421fe4c 100644 --- a/R/metadata_validate_output.R +++ b/R/metadata_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail.output")){ diff --git a/R/metadata_validate_raw.R b/R/metadata_validate_raw.R index 840b225..32331fd 100644 --- a/R/metadata_validate_raw.R +++ b/R/metadata_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export metadata_validate_raw <- function(metadata, detail = getOption("pipaux.detail.raw")){ diff --git a/R/mpd_validate_raw.R b/R/mpd_validate_raw.R index 402e900..faa3411 100644 --- a/R/mpd_validate_raw.R +++ b/R/mpd_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export mpd_validate_raw <- function(mpd, detail = getOption("pipaux.detail.raw")){ diff --git a/R/npl_validate_output.R b/R/npl_validate_output.R index ee0f8fe..4980560 100644 --- a/R/npl_validate_output.R +++ b/R/npl_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")){ diff --git a/R/npl_validate_raw.R b/R/npl_validate_raw.R index 00e8ae0..0e16fcd 100644 --- a/R/npl_validate_raw.R +++ b/R/npl_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export npl_validate_raw <- function(npl, detail = getOption("pipaux.detail.raw")){ diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R index 63171bb..383da86 100644 --- a/R/pce_validate_output.R +++ b/R/pce_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")){ diff --git a/R/pfw_validate_output.R b/R/pfw_validate_output.R index 9674a1e..b7af2f6 100644 --- a/R/pfw_validate_output.R +++ b/R/pfw_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pfw_validate_output <- function(pfw, detail = getOption("pipaux.detail.output")){ diff --git a/R/pfw_validate_raw.R b/R/pfw_validate_raw.R index e45bfd2..69b7f66 100644 --- a/R/pfw_validate_raw.R +++ b/R/pfw_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pfw_validate_raw <- function(pfw, detail = getOption("pipaux.detail.raw")){ diff --git a/R/pl_validate_output.R b/R/pl_validate_output.R index 43f07b5..013483b 100644 --- a/R/pl_validate_output.R +++ b/R/pl_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pl_validate_output <- function(pl, detail = getOption("pipaux.detail.output")){ diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R index b5bfae5..13971f4 100644 --- a/R/pop_validate_output.R +++ b/R/pop_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")){ diff --git a/R/pop_validate_raw.R b/R/pop_validate_raw.R index 01a59c8..377fc67 100644 --- a/R/pop_validate_raw.R +++ b/R/pop_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export pop_validate_raw <- function(pop, detail = getOption("pipaux.detail.output")){ diff --git a/R/popmain_validate_raw.R b/R/popmain_validate_raw.R index 2dac3dd..c6d5d20 100644 --- a/R/popmain_validate_raw.R +++ b/R/popmain_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export popmain_validate_raw <- function(pop_main, detail = getOption("pipaux.detail.raw")){ diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R index d0dd002..e1b585b 100644 --- a/R/ppp_validate_output.R +++ b/R/ppp_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")){ diff --git a/R/ppp_validate_raw.R b/R/ppp_validate_raw.R index 046806f..6b562eb 100644 --- a/R/ppp_validate_raw.R +++ b/R/ppp_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ diff --git a/R/sna_fy_validate_raw.R b/R/sna_fy_validate_raw.R index cb39c40..edfff25 100644 --- a/R/sna_fy_validate_raw.R +++ b/R/sna_fy_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export sna_fy_validate_raw <- function(sna_fy, detail = getOption("pipaux.detail.raw")){ diff --git a/R/sna_validate_raw.R b/R/sna_validate_raw.R index 26e9278..7433a4b 100644 --- a/R/sna_validate_raw.R +++ b/R/sna_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export sna_validate_raw <- function(sna, detail = getOption("pipaux.detail.raw")){ diff --git a/R/spop_validate_raw.R b/R/spop_validate_raw.R index b05b2c4..e73af29 100644 --- a/R/spop_validate_raw.R +++ b/R/spop_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export spop_validate_raw <- function(spop, detail = getOption("pipaux.detail.output")){ diff --git a/R/wdi_validate_raw.R b/R/wdi_validate_raw.R index a70a237..47fb1c8 100644 --- a/R/wdi_validate_raw.R +++ b/R/wdi_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export wdi_validate_raw <- function(wdi, detail = getOption("pipaux.detail.raw")){ diff --git a/R/weo_validate_output.R b/R/weo_validate_output.R index fcde15a..6392418 100644 --- a/R/weo_validate_output.R +++ b/R/weo_validate_output.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export weo_validate_output <- function(weo, detail = getOption("pipaux.detail.output")){ diff --git a/R/weo_validate_raw.R b/R/weo_validate_raw.R index 78a47ff..679ffb6 100644 --- a/R/weo_validate_raw.R +++ b/R/weo_validate_raw.R @@ -4,6 +4,7 @@ #' @param detail has an option TRUE/FALSE, default value is FALSE #' @import data.validator #' @importFrom assertr in_set not_na is_uniq +#' @keywords internal #' #' @export weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ @@ -33,104 +34,6 @@ weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ description = "`Scale` should be character") |> validate_if(is.character(`Country/Series-specific Notes`), description = "`Country/Series-specific Notes` should be character") |> - validate_if(is.character(`1980`), - description = "`1980` should be character") |> - validate_if(is.character(`1981`), - description = "`1981` should be character") |> - validate_if(is.character(`1982`), - description = "`1982` should be character") |> - validate_if(is.character(`1983`), - description = "`1983` should be character") |> - validate_if(is.character(`1984`), - description = "`1984` should be character") |> - validate_if(is.character(`1985`), - description = "`1985` should be character") |> - validate_if(is.character(`1986`), - description = "`1986` should be character") |> - validate_if(is.character(`1987`), - description = "`1987` should be character") |> - validate_if(is.character(`1988`), - description = "`1988` should be character") |> - validate_if(is.character(`1989`), - description = "`1989` should be character") |> - validate_if(is.character(`1990`), - description = "`1990` should be character") |> - validate_if(is.character(`1991`), - description = "`1991` should be character") |> - validate_if(is.character(`1992`), - description = "`1992` should be character") |> - validate_if(is.character(`1993`), - description = "`1993` should be character") |> - validate_if(is.character(`1994`), - description = "`1994` should be character") |> - validate_if(is.character(`1995`), - description = "`1995` should be character") |> - validate_if(is.character(`1996`), - description = "`1996` should be character") |> - validate_if(is.character(`1997`), - description = "`1997` should be character") |> - validate_if(is.character(`1998`), - description = "`1998` should be character") |> - validate_if(is.character(`1999`), - description = "`1999` should be character") |> - validate_if(is.character(`2000`), - description = "`2000` should be character") |> - validate_if(is.character(`2001`), - description = "`2001` should be character") |> - validate_if(is.character(`2002`), - description = "`2002` should be character") |> - validate_if(is.character(`2003`), - description = "`2003` should be character") |> - validate_if(is.character(`2004`), - description = "`2004` should be character") |> - validate_if(is.character(`2005`), - description = "`2005` should be character") |> - validate_if(is.character(`2006`), - description = "`2006` should be character") |> - validate_if(is.character(`2007`), - description = "`2007` should be character") |> - validate_if(is.character(`2008`), - description = "`2008` should be character") |> - validate_if(is.character(`2009`), - description = "`2009` should be character") |> - validate_if(is.character(`2010`), - description = "`2010` should be character") |> - validate_if(is.character(`2011`), - description = "`2011` should be character") |> - validate_if(is.character(`2012`), - description = "`2012` should be character") |> - validate_if(is.character(`2013`), - description = "`2013` should be character") |> - validate_if(is.character(`2014`), - description = "`2014` should be character") |> - validate_if(is.character(`2015`), - description = "`2015` should be character") |> - validate_if(is.character(`2016`), - description = "`2016` should be character") |> - validate_if(is.character(`2017`), - description = "`2017` should be character") |> - validate_if(is.character(`2018`), - description = "`2018` should be character") |> - validate_if(is.character(`2019`), - description = "`2019` should be character") |> - validate_if(is.character(`2020`), - description = "`2020` should be character") |> - validate_if(is.character(`2021`), - description = "`2021` should be character") |> - validate_if(is.character(`2022`), - description = "`2022` should be character") |> - validate_if(is.character(`2023`), - description = "`2023` should be character") |> - validate_if(is.character(`2024`), - description = "`2024` should be character") |> - validate_if(is.character(`2025`), - description = "`2025` should be character") |> - validate_if(is.character(`2026`), - description = "`2026` should be character") |> - validate_if(is.character(`2027`), - description = "`2027` should be character") |> - validate_if(is.character(`2028`), - description = "`2028` should be character") |> validate_if(is.numeric(`Estimates Start After`), description = "`Estimates Start After` should be numeric") |> validate_cols(not_na, ISO, `WEO Subject Code`, @@ -139,6 +42,15 @@ weo_validate_raw <- function(weo, detail = getOption("pipaux.detail.raw")){ description = "no duplicate records in key variables") |> add_results(report) + num_var_list <- grep("^[[:digit:]]", colnames(weo)) + + for (i in 1:length(num_var_list)) { + validate(weo, name = "WEO validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + validation_record <- get_results(report, unnest = FALSE) |> setDT() diff --git a/man/cl_validate_raw.Rd b/man/cl_validate_raw.Rd index 2b5f785..21e4aff 100644 --- a/man/cl_validate_raw.Rd +++ b/man/cl_validate_raw.Rd @@ -14,3 +14,4 @@ cl_validate_raw(cl, detail = getOption("pipaux.detail.raw")) \description{ Validate raw country list data } +\keyword{internal} diff --git a/man/countries_validate_output.Rd b/man/countries_validate_output.Rd index 25d0708..14d1105 100644 --- a/man/countries_validate_output.Rd +++ b/man/countries_validate_output.Rd @@ -17,3 +17,4 @@ countries_validate_output( \description{ Validate output countries data } +\keyword{internal} diff --git a/man/cpi_validate_output.Rd b/man/cpi_validate_output.Rd index efa42de..ca705b9 100644 --- a/man/cpi_validate_output.Rd +++ b/man/cpi_validate_output.Rd @@ -14,3 +14,4 @@ cpi_validate_output(cpi, detail = getOption("pipaux.detail.output")) \description{ Validate clean cpi data } +\keyword{internal} diff --git a/man/cpi_validate_raw.Rd b/man/cpi_validate_raw.Rd index e818e1c..16e3cad 100644 --- a/man/cpi_validate_raw.Rd +++ b/man/cpi_validate_raw.Rd @@ -14,3 +14,4 @@ cpi_validate_raw(cpi, detail = getOption("pipaux.detail.raw")) \description{ Validate raw cpi data } +\keyword{internal} diff --git a/man/gdm_validate_output.Rd b/man/gdm_validate_output.Rd index abb40ae..dce34c7 100644 --- a/man/gdm_validate_output.Rd +++ b/man/gdm_validate_output.Rd @@ -14,3 +14,4 @@ gdm_validate_output(gdm, detail = getOption("pipaux.detail.output")) \description{ Validate clean gdm data } +\keyword{internal} diff --git a/man/gdm_validate_raw.Rd b/man/gdm_validate_raw.Rd index 44171c2..23cdb9a 100644 --- a/man/gdm_validate_raw.Rd +++ b/man/gdm_validate_raw.Rd @@ -14,3 +14,4 @@ gdm_validate_raw(gdm, detail = getOption("pipaux.detail.raw")) \description{ Validate raw gdm data } +\keyword{internal} diff --git a/man/gdp_validate_output.Rd b/man/gdp_validate_output.Rd index 8c857a0..b682f70 100644 --- a/man/gdp_validate_output.Rd +++ b/man/gdp_validate_output.Rd @@ -14,3 +14,4 @@ gdp_validate_output(gdp, detail = getOption("pipaux.detail.output")) \description{ Validate output gdp data } +\keyword{internal} diff --git a/man/incgroup_validate_output.Rd b/man/incgroup_validate_output.Rd index a69160a..e470eb1 100644 --- a/man/incgroup_validate_output.Rd +++ b/man/incgroup_validate_output.Rd @@ -14,3 +14,4 @@ incgroup_validate_output(incgroup, detail = getOption("pipaux.detail.output")) \description{ Validate income group output data } +\keyword{internal} diff --git a/man/metadata_validate_output.Rd b/man/metadata_validate_output.Rd index 37fac0b..811bb1c 100644 --- a/man/metadata_validate_output.Rd +++ b/man/metadata_validate_output.Rd @@ -14,3 +14,4 @@ metadata_validate_output(metadata, detail = getOption("pipaux.detail.output")) \description{ Validate output metadata data } +\keyword{internal} diff --git a/man/metadata_validate_raw.Rd b/man/metadata_validate_raw.Rd index eec128f..61e7cae 100644 --- a/man/metadata_validate_raw.Rd +++ b/man/metadata_validate_raw.Rd @@ -14,3 +14,4 @@ metadata_validate_raw(metadata, detail = getOption("pipaux.detail.raw")) \description{ Validate raw metadata data } +\keyword{internal} diff --git a/man/mpd_validate_raw.Rd b/man/mpd_validate_raw.Rd index b612f26..9e9071b 100644 --- a/man/mpd_validate_raw.Rd +++ b/man/mpd_validate_raw.Rd @@ -14,3 +14,4 @@ mpd_validate_raw(mpd, detail = getOption("pipaux.detail.raw")) \description{ Validate raw maddison data } +\keyword{internal} diff --git a/man/npl_validate_output.Rd b/man/npl_validate_output.Rd index e74c484..aaafeda 100644 --- a/man/npl_validate_output.Rd +++ b/man/npl_validate_output.Rd @@ -14,3 +14,4 @@ npl_validate_output(npl, detail = getOption("pipaux.detail.output")) \description{ Validate npl output data } +\keyword{internal} diff --git a/man/npl_validate_raw.Rd b/man/npl_validate_raw.Rd index ec2338c..89de168 100644 --- a/man/npl_validate_raw.Rd +++ b/man/npl_validate_raw.Rd @@ -14,3 +14,4 @@ npl_validate_raw(npl, detail = getOption("pipaux.detail.raw")) \description{ Validate npl raw data } +\keyword{internal} diff --git a/man/pce_validate_output.Rd b/man/pce_validate_output.Rd index 6c071f1..007c847 100644 --- a/man/pce_validate_output.Rd +++ b/man/pce_validate_output.Rd @@ -14,3 +14,4 @@ pce_validate_output(pce, detail = getOption("pipaux.detail.output")) \description{ Validate output pce data } +\keyword{internal} diff --git a/man/pfw_validate_output.Rd b/man/pfw_validate_output.Rd index a1a9563..c2c29b1 100644 --- a/man/pfw_validate_output.Rd +++ b/man/pfw_validate_output.Rd @@ -14,3 +14,4 @@ pfw_validate_output(pfw, detail = getOption("pipaux.detail.output")) \description{ Validate clean pfw data } +\keyword{internal} diff --git a/man/pfw_validate_raw.Rd b/man/pfw_validate_raw.Rd index bd3a413..1eb915c 100644 --- a/man/pfw_validate_raw.Rd +++ b/man/pfw_validate_raw.Rd @@ -14,3 +14,4 @@ pfw_validate_raw(pfw, detail = getOption("pipaux.detail.raw")) \description{ Validate raw pfw data } +\keyword{internal} diff --git a/man/pl_validate_output.Rd b/man/pl_validate_output.Rd index f5e143c..6c21dbc 100644 --- a/man/pl_validate_output.Rd +++ b/man/pl_validate_output.Rd @@ -14,3 +14,4 @@ pl_validate_output(pl, detail = getOption("pipaux.detail.output")) \description{ Validate output pl data } +\keyword{internal} diff --git a/man/pop_validate_output.Rd b/man/pop_validate_output.Rd index f1a63bf..f35f76d 100644 --- a/man/pop_validate_output.Rd +++ b/man/pop_validate_output.Rd @@ -14,3 +14,4 @@ pop_validate_output(pop, detail = getOption("pipaux.detail.output")) \description{ Validate output pop data } +\keyword{internal} diff --git a/man/pop_validate_raw.Rd b/man/pop_validate_raw.Rd index 34bd1bc..2f903a8 100644 --- a/man/pop_validate_raw.Rd +++ b/man/pop_validate_raw.Rd @@ -14,3 +14,4 @@ pop_validate_raw(pop, detail = getOption("pipaux.detail.output")) \description{ Validate pop raw data download from wdi } +\keyword{internal} diff --git a/man/popmain_validate_raw.Rd b/man/popmain_validate_raw.Rd index 4dbdbba..1ce94bd 100644 --- a/man/popmain_validate_raw.Rd +++ b/man/popmain_validate_raw.Rd @@ -14,3 +14,4 @@ popmain_validate_raw(pop_main, detail = getOption("pipaux.detail.raw")) \description{ Validate raw main pop data } +\keyword{internal} diff --git a/man/ppp_validate_output.Rd b/man/ppp_validate_output.Rd index 8da5124..1295262 100644 --- a/man/ppp_validate_output.Rd +++ b/man/ppp_validate_output.Rd @@ -14,3 +14,4 @@ ppp_validate_output(ppp, detail = getOption("pipaux.detail.output")) \description{ Validate output ppp data } +\keyword{internal} diff --git a/man/ppp_validate_raw.Rd b/man/ppp_validate_raw.Rd index 41aaff4..2afdcdb 100644 --- a/man/ppp_validate_raw.Rd +++ b/man/ppp_validate_raw.Rd @@ -14,3 +14,4 @@ ppp_validate_raw(ppp, detail = getOption("pipaux.detail.raw")) \description{ Validate raw ppp data } +\keyword{internal} diff --git a/man/sna_fy_validate_raw.Rd b/man/sna_fy_validate_raw.Rd index 74fb3c1..f5d66d9 100644 --- a/man/sna_fy_validate_raw.Rd +++ b/man/sna_fy_validate_raw.Rd @@ -14,3 +14,4 @@ sna_fy_validate_raw(sna_fy, detail = getOption("pipaux.detail.raw")) \description{ Validate raw sna_fy data } +\keyword{internal} diff --git a/man/sna_validate_raw.Rd b/man/sna_validate_raw.Rd index abdc3fa..8fb8a5d 100644 --- a/man/sna_validate_raw.Rd +++ b/man/sna_validate_raw.Rd @@ -14,3 +14,4 @@ sna_validate_raw(sna, detail = getOption("pipaux.detail.raw")) \description{ Validate raw special national accounts (sna) data } +\keyword{internal} diff --git a/man/spop_validate_raw.Rd b/man/spop_validate_raw.Rd index 8bc25e9..0222f97 100644 --- a/man/spop_validate_raw.Rd +++ b/man/spop_validate_raw.Rd @@ -14,3 +14,4 @@ spop_validate_raw(spop, detail = getOption("pipaux.detail.output")) \description{ Validate raw special cases pop data } +\keyword{internal} diff --git a/man/wdi_validate_raw.Rd b/man/wdi_validate_raw.Rd index d97fc77..b56e209 100644 --- a/man/wdi_validate_raw.Rd +++ b/man/wdi_validate_raw.Rd @@ -14,3 +14,4 @@ wdi_validate_raw(wdi, detail = getOption("pipaux.detail.raw")) \description{ Validate raw wdi data } +\keyword{internal} diff --git a/man/weo_validate_output.Rd b/man/weo_validate_output.Rd index 633e54b..3c24bdf 100644 --- a/man/weo_validate_output.Rd +++ b/man/weo_validate_output.Rd @@ -14,3 +14,4 @@ weo_validate_output(weo, detail = getOption("pipaux.detail.output")) \description{ Validate clean weo data } +\keyword{internal} diff --git a/man/weo_validate_raw.Rd b/man/weo_validate_raw.Rd index 9e79af0..17462dc 100644 --- a/man/weo_validate_raw.Rd +++ b/man/weo_validate_raw.Rd @@ -14,3 +14,4 @@ weo_validate_raw(weo, detail = getOption("pipaux.detail.raw")) \description{ Validate raw weo data } +\keyword{internal} From a5438ea48d617c1d8125b41f8e6bccf15cf64d77 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Tue, 3 Sep 2024 08:50:27 -0400 Subject: [PATCH 18/23] Validation functions are updated --- NAMESPACE | 3 + R/aux_data_files.R | 136 ++++++++++++++++++++++++++++++++++++++++ R/cl_validate_raw.R | 7 ++- R/cpi_validate_output.R | 30 ++++++--- R/cpi_validate_raw.R | 12 ++-- R/load_aux.R | 25 +++++++- R/merge_aux_files.R | 123 ++++++++++++++++++++++++++++++++++++ R/pip_aux_labels.R | 15 +++-- R/pip_countries.R | 4 ++ R/pip_country_list.R | 9 ++- R/pip_cpi_update.R | 9 +++ R/pip_gdm_update.R | 30 ++++++--- R/pip_gdp_update.R | 8 +++ R/pip_income_groups.R | 6 ++ R/pip_maddison.R | 7 +++ R/pip_metadata_update.R | 8 +++ R/pip_npl.R | 8 +++ R/pip_pce_update.R | 8 +++ R/pip_pfw_key.R | 40 ++++++++++++ R/pip_pfw_update.R | 3 + R/pip_pop_update.R | 9 +++ R/pip_ppp.R | 6 +- R/pip_ppp_update.R | 19 ++++++ R/pip_regions.R | 6 ++ R/pip_wdi_update.R | 6 ++ R/pip_weo.R | 6 ++ R/ppp_validate_raw.R | 4 +- man/aux_data.Rd | 17 +++++ man/load_aux.Rd | 5 +- man/merge_aux_files.Rd | 34 ++++++++++ man/pip_country_list.Rd | 3 +- man/pip_pfw_key.Rd | 14 +++++ man/pip_ppp.Rd | 3 +- 33 files changed, 581 insertions(+), 42 deletions(-) create mode 100644 R/aux_data_files.R create mode 100644 R/merge_aux_files.R create mode 100644 R/pip_pfw_key.R create mode 100644 man/aux_data.Rd create mode 100644 man/merge_aux_files.Rd create mode 100644 man/pip_pfw_key.Rd diff --git a/NAMESPACE b/NAMESPACE index ef5d49d..7101096 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(auto_aux_update) +export(aux_data) export(cl_validate_raw) export(clean_validation_report) export(countries_validate_output) @@ -14,6 +15,7 @@ export(gdp_validate_output) export(get_error_validation) export(incgroup_validate_output) export(load_aux) +export(merge_aux_files) export(metadata_validate_output) export(metadata_validate_raw) export(mpd_validate_raw) @@ -40,6 +42,7 @@ export(pip_missing_data) export(pip_npl) export(pip_pce) export(pip_pfw) +export(pip_pfw_key) export(pip_pl) export(pip_pl_clean) export(pip_pop) diff --git a/R/aux_data_files.R b/R/aux_data_files.R new file mode 100644 index 0000000..415aa56 --- /dev/null +++ b/R/aux_data_files.R @@ -0,0 +1,136 @@ +#' Attache key values into auxiliary file +#' +#' @param aux_file auxiliary file +#' +#' @return data.table with key values +#' @export +#' +#' @examples +aux_data <- function(aux_file){ + + # list of possible auxiliary keys -------------------------------------------- + keycolsg0 <- c("country_code") # countries, country_list + keycolsg1 <- c("country_code", "surveyid_year") # maddison, weo, npl, income_group + keycolsg2 <- c("country_code", "surveyid_year", "reporting_level") # gdp, pce, pop + keycolsg3 <- c("country_code", "surveyid_year", "reporting_level") # gdm + keycolsg4 <- c("country_code", "surveyid_year", "survey_acronym", "reporting_level") # cpi "survey_acronym" + keypfw <- c("country_code", "surveyid_year", "year", "survey_acronym", + "reporting_level") + + # list of all the auxiliary files + aux_file_names <- c("pfw", "cpi", "gdp", "gdm", "pce", "pop", "ppp", "maddison", + "weo", "npl", "countries", "country_list", "regions", + "income_groups", "metadata") + + if (deparse(substitute(aux_file)) %chin% aux_file_names) { + + # pfw --------------------------------------------------------------------- + if (deparse(substitute(aux_file)) == "pfw"){ + + setkeyv(pfw, + c("country_code", "survey_year", "survey_acronym", "cpi_domain")) + + # generate a dataset that can be used to add reporting_level variable to pfw data + pfw_key <- pip_pfw_key() + + aux_file <- pfw_key[aux_file] |> + setkeyv(keypfw) + } else if (deparse(substitute(aux_file)) == "ppp"){ + + # ppp -------------------------------------------------------------------- + # filter ppp based on defualt ppp value + aux_file <- ppp[ppp_default == TRUE, + .(country_code, ppp_year, ppp, ppp_data_level)] + + setnames(aux_file, "ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setkeyv(aux_file, c("country_code", "reporting_level")) + + } else if (deparse(substitute(aux_file)) == "cpi"){ + + # cpi -------------------------------------------------------------------- + # rename two variables cpi_year to surveyid_year and cpi_data_level to reporting_level + aux_file <- aux_file |> + setnames(c("cpi_year", "cpi_data_level"), + c("surveyid_year", "reporting_level"), + skip_absent=TRUE) + + setkeyv(aux_file, c("country_code", "surveyid_year", "survey_acronym", "reporting_level")) #keycolsg4) + + } else if (deparse(substitute(aux_file)) == "gdm"){ + + # gdm -------------------------------------------------------------------- + aux_file <- aux_file |> + setnames("pop_daaux_file ta_level", "reporting_level", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg3) + + } else if (deparse(substitute(aux_file)) == "npl"){ + + # npl -------------------------------------------------------------------- + aux_file <- aux_file |> + setnames("reporting_year", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) == "income_groups"){ + + # income_groups ------------------------------------------------------------ + # rename year_data into surveyid_year + aux_file <- aux_file |> + setnames("year_data", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) == "countries"){ + + # countries ---------------------------------------------------------------- + setkeyv(aux_file, keycolsg0) + + } else if (deparse(substitute(aux_file)) == "country_list"){ + + # country_list-------------------------------------------------------------- + setkeyv(aux_file, keycolsg0) + + } else if (deparse(substitute(aux_file)) == "metadata"){ + + # metadata ----------------------------------------------------------------- + setkeyv(aux_file, keycolsg5) + + } else if (deparse(substitute(aux_file)) %chin% c("maddison", "weo")){ + + # auxiliary datasets - group 1 (maddison and weo) -------------------------- + aux_file |> + setnames("year", "surveyid_year", + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg1) + + } else if (deparse(substitute(aux_file)) %chin% c("gdp", "pop", "pce")){ + + # auxiliary datasets - group 2 (gdp, pop, pce) --------------------------- + aux_data_level <- paste0(deparse(substitute(aux_file)), "_data_level") + + aux_file |> + setnames(c(aux_data_level, "year"), + c("reporting_level", "surveyid_year"), + skip_absent=TRUE) + + setkeyv(aux_file, keycolsg2) + + } + + return(aux_file) + + } else { + + return(aux_file) + + } + +} + diff --git a/R/cl_validate_raw.R b/R/cl_validate_raw.R index 693c391..3dda7ae 100644 --- a/R/cl_validate_raw.R +++ b/R/cl_validate_raw.R @@ -13,18 +13,19 @@ cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ report <- data_validation_report() + # country_list <- pipload::pip_load_aux("pfw") country_list <- pipfun::load_from_gh(measure = "pfw", owner = getOption("pipfun.ghowner"), branch = "DEV", ext = "dta") - country_list <- unique(country_list[, country_code]) + country_list <- unique(country_list[, code]) validate(cl, name = "CL raw data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> - validate_cols(in_set(country_list), - country_code, description = "`country_code` values within range") |> + # validate_cols(in_set(country_list), + # country_code, description = "`country_code` values within range") |> validate_if(is.character(country_name), description = "`country_name` should be character") |> validate_if(is.character(africa_split), diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 423bf90..a8af6f9 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -46,20 +46,20 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) description = "`cpi2011` should be numeric") |> validate_if(is.numeric(cpi2017), description = "`cpi2017` should be numeric") |> - validate_if(is.numeric(cpi2011_SM22), - description = "`cpi2011_SM22` should be numeric") |> - validate_if(is.numeric(cpi2017_SM22), - description = "`cpi2017_SM22` should be numeric") |> + # validate_if(is.numeric(cpi2011_SM22), + # description = "`cpi2011_SM22` should be numeric") |> + # validate_if(is.numeric(cpi2017_SM22), + # description = "`cpi2017_SM22` should be numeric") |> validate_cols(is.logical, cpi2005, description = "`cpi2005` should be logical") |> validate_if(is.character(cpi_data_level), description = "`cpi_data_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level, description = "`cpi_data_level` values within range") |> - validate_if(is.numeric(cpi2011_AM23), - description = "`cpi2011_AM23` should be numeric") |> - validate_if(is.numeric(cpi2017_AM23), - description = "`cpi2017_AM23` should be numeric") |> + # validate_if(is.numeric(cpi2011_AM23), + # description = "`cpi2011_AM23` should be numeric") |> + # validate_if(is.numeric(cpi2017_AM23), + # description = "`cpi2017_AM23` should be numeric") |> validate_if(is.character(cpi_id), description = "`cpi_id` should be character") |> validate_cols(not_na, country_code, cpi_year, survey_acronym, cpi_data_level, @@ -67,10 +67,22 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) validate_if(is_uniq(country_code, cpi_year, survey_acronym, cpi_data_level), description = "no duplicate records in key variables") |> - validate_if(is_uniq(country_code, cpi), + validate_if(is_uniq(country_code, cpi_year, survey_acronym, + cpi_data_level), description = "no duplicate cpi values") |> add_results(report) + num_var_list1 <- grep("cpi2011_", colnames(cpi)) + num_var_list2 <- grep("cpi2017_", colnames(cpi)) + num_var_list <- c(num_var_list1, num_var_list2) + + for (i in 1:length(num_var_list)) { + validate(cpi, name = "CPI validation") |> + validate_cols(is.numeric, num_var_list[i], + description = "variables (with numeric var name) should be numeric") |> + add_results(report) + } + validation_record <- get_results(report, unnest = FALSE) |> setDT() diff --git a/R/cpi_validate_raw.R b/R/cpi_validate_raw.R index 7fda767..2441e6a 100644 --- a/R/cpi_validate_raw.R +++ b/R/cpi_validate_raw.R @@ -64,12 +64,12 @@ cpi_validate_raw <- function(cpi, detail = getOption("pipaux.detail.raw")){ description = "`cpi_data_level` should be numeric") |> validate_cols(in_set(c(0, 1, 2)), cpi_data_level, description = "`cpi_data_level` values within range") |> - validate_if(is.numeric(ref_year_AM23), - description = "`ref_year_AM23` should be numeric") |> - validate_if(is.numeric(cpi2011_AM23), - description = "`cpi2011_AM23` should be numeric") |> - validate_if(is.numeric(cpi2017_AM23), - description = "`cpi2011_AM23` should be numeric") |> + validate_if(is.numeric(ref_year_SM24), + description = "`ref_year_SM24` should be numeric") |> + validate_if(is.numeric(cpi2011_SM24), + description = "`cpi2011_SM24` should be numeric") |> + validate_if(is.numeric(cpi2017_SM24), + description = "`cpi2011_SM24` should be numeric") |> validate_if(is.numeric(change_cpi2017), description = "`change_cpi2017` should be numeric") |> validate_if(is.numeric(change_icp2017), diff --git a/R/load_aux.R b/R/load_aux.R index 1fded25..8f858b9 100644 --- a/R/load_aux.R +++ b/R/load_aux.R @@ -3,11 +3,14 @@ #' @inheritParams pip_pfw #' @inheritParams pipfun::load_from_gh #' @param apply_label logical: If TRUE, predefined labels will applied. +#' @param ppp_defaults logical: If TRUE, wider format ppp data will be returned +#' #' @export load_aux <- function(measure, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - apply_label = TRUE) { + apply_label = TRUE, + ppp_defaults = TRUE) { branch <- match.arg(branch) if (branch == "main") { @@ -41,6 +44,26 @@ load_aux <- function(measure, if (inherits(df, "data.frame")) { setDT(df) } + + if (measure == "ppp" & ppp_defaults == TRUE) { + + df <- df[ppp_default_by_year == TRUE, + .(country_code, ppp_year, ppp, reporting_level)] |> + dcast(country_code + reporting_level ~ ppp_year, + value.var = "ppp") + + num_var_list <- grep("^[[:digit:]]", names(df)) + var_names <- names(df)[num_var_list] + setnames(df, + var_names, + paste(rep("ppp", length(var_names)), var_names, sep = "_")) + + setattr(df, "aux_name", "ppp") + setattr(df, + "aux_key", + c("country_code", "reporting_level")) + } + return(df) } diff --git a/R/merge_aux_files.R b/R/merge_aux_files.R new file mode 100644 index 0000000..6444e78 --- /dev/null +++ b/R/merge_aux_files.R @@ -0,0 +1,123 @@ +#' Merge auxiliary datasets +#' +#' @param aux_data1 auxiliary data one +#' @param aux_data1 auxiliary data two +#' @param keep merge/ join type, the default is left join, options (left, right, full, using, master, inner) +#' +#' @return data.table with key information +#' @export +#' +#' @examples +#' pfw <- pip_pfw(action = "load") +#' ppp <- pip_ppp(action = "load") +#' pfw_ppp <- merge_aux_files(pfw, ppp) +#' +#' cpi <- pip_cpi(action = "cpi") +#' cpi <- cpi[, -c("cpi_domain")] +#' pfw_cpi <- merge_aux_files(cpi, pfw, keep = "right") +#' +merge_aux_files <- function(aux_data1, + aux_data2, + keep = c("left", "right", "full", + "using", "master", "inner"), + ... +){ + + keep <- match.arg(keep) + print(keep) + + stopifnot("First data is empty" = !is.null(aux_data1)) + stopifnot("Second data is empty" = !is.null(aux_data2)) + + # extract measure name and dataset key's + + if (!is.null(attr(aux_data1, "aux_name"))) measure1 <- attr(aux_data1, "aux_name") + print(measure1) + + if (!is.null(attr(aux_data2, "aux_name"))) measure2 <- attr(aux_data2, "aux_name") + print(measure2) + + if (measure1 == "pfw" || measure2 == "pfw"){ + + # generate a dataset that can be used to add reporting_level variable to pfw data + pfw_key <- pip_pfw_key() + + pfw <- pfw_key[pfw, on = .(country_code, survey_year, survey_acronym, cpi_domain_var)] + + setattr(pfw, + "aux_key", + c("country_code", "year", "reporting_level", "survey_acronym", "welfare_type")) + + setattr(pfw, "aux_name", "pfw") + + if (measure1 == "pfw") { + + aux_data1 <- pfw_key[aux_data1, + on = .(country_code, survey_year, survey_acronym, cpi_domain_var)] + + setattr(aux_data1, + "aux_key", + c("country_code", "year", "reporting_level", + "survey_acronym", "welfare_type")) + + setattr(aux_data1, "aux_name", "pfw") + } + + if (measure2 == "pfw") { + + aux_data2 <- pfw_key[aux_data2, + on = .(country_code, survey_year, survey_acronym, cpi_domain_var)] + + setattr(aux_data2, + "aux_key", + c("country_code", "year", "reporting_level", + "survey_acronym", "welfare_type")) + + setattr(aux_data2, "aux_name", "pfw") + } + + } + + key_aux_data1 <- attr(aux_data1, "aux_key") + key_aux_data2 <- attr(aux_data2, "aux_key") + + int_key <- intersect(key_aux_data1, key_aux_data2) + + isid1 <- joyn::is_id(aux_data1, int_key) + isid2 <- joyn::is_id(aux_data2, int_key) + + mtype1 <- if (isid1 == TRUE) { + "1" + } else { + "m" + } + + mtype2 <- if (isid2 == TRUE) { + "1" + } else { + "m" + } + + + mtype <- paste(mtype1, mtype2, sep = ":") + + if (mtype == "m:m") { + cli::cli_abort("Auxiliary files shouldn't have `m:m` relationship") + } + + + mdata <- joyn::joyn(aux_data1, + aux_data2, + by = int_key, + match_type = mtype, + keep = keep) + + + attr(mdata, "aux_key", union(attr(aux_data1, "aux_key"), + attr(aux_data2, "aux_key"))) + + setattr(mdata, "aux_name", paste(measure1, measure2, sep = "_")) + + return(mdata) + +} diff --git a/R/pip_aux_labels.R b/R/pip_aux_labels.R index a94b168..c757424 100644 --- a/R/pip_aux_labels.R +++ b/R/pip_aux_labels.R @@ -10,7 +10,8 @@ pip_aux_labels <- function(x, measure) { # Label variables # attr(x$survey_year, "label") <- "Survey decimal year" #"Proportion of first year of survey" attr(x$cpi_domain, "label") <- "CPI domain to join with microdata" - attr(x$cpi_data_level, "label") <- "Values to use as keys to join with cpi_domain_var" + # attr(x$cpi_data_level, "label") <- "Values to use as keys to join with cpi_domain_var" + attr(x$reporting, "label") <- "Values to use as keys to join with cpi_domain_var" # attr(x$cpi_year, "label") <- "CPI year" #"Year of survey ID" attr(x$ccf, "label") <- "Currency conversion factor" attr(x$cpi, "label") <- "Consumer Price Index (Based on 2011)." @@ -19,7 +20,8 @@ pip_aux_labels <- function(x, measure) { # Label variables attr(x$ppp_domain, "label") <- "PPP domain to join with microdata" - attr(x$ppp_data_level, "label") <- "Values to use as keys to join with ppp_domain_var" + # attr(x$ppp_data_level, "label") <- "Values to use as keys to join with ppp_domain_var" + attr(x$reporting, "label") <- "Values to use as keys to join with ppp_domain_var" attr(x$ppp, "label") <- paste0( "Purchasing Power Parity (", ppp_year, "2011 ICP round)" @@ -43,19 +45,22 @@ pip_aux_labels <- function(x, measure) { # Label Variables attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" - attr(x$gdp_data_level, "label") <- "Values to use as keys to join with gdp_domain_var" + # attr(x$gdp_data_level, "label") <- "Values to use as keys to join with gdp_domain_var" + attr(x$reporting, "label") <- "Values to use as keys to join with gdp_domain_var" attr(x$gdp, "label") <- "GDP per capita (constant 2010 US$)" attr(x$gdp_domain, "label") <- "GDP domain to join with microdata" } else if (measure == "pce") { attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" - attr(x$pce_data_level, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" + # attr(x$pce_data_level, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" + attr(x$reporting, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" attr(x$pce, "label") <- "Households and NPISHs Final consumption expenditure per capita (constant 2010 US$)" attr(x$pce_domain, "label") <- "PCE domain to join with microdata" } else if (measure == "pop") { attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" - attr(x$pop_data_level, "label") <- "Values to use as keys to join with pop_domain_var" + # attr(x$pop_data_level, "label") <- "Values to use as keys to join with pop_domain_var" + attr(x$reporting, "label") <- "Values to use as keys to join with pop_domain_var" attr(x$pop, "label") <- "Population" attr(x$pop_domain, "label") <- "Population domain to join with microdata" } else { diff --git a/R/pip_countries.R b/R/pip_countries.R index 040af44..009f7a9 100644 --- a/R/pip_countries.R +++ b/R/pip_countries.R @@ -47,6 +47,10 @@ pip_countries <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + setattr(countries, "aux_name", "countries") + setattr(countries, + "aux_key", + c("country_code")) pipfun::pip_sign_save( x = countries, diff --git a/R/pip_country_list.R b/R/pip_country_list.R index cbfc7e1..bd9266a 100644 --- a/R/pip_country_list.R +++ b/R/pip_country_list.R @@ -17,7 +17,8 @@ pip_country_list <- function(action = c("update", "load"), maindir = gls$PIP_DATA_DIR, force = FALSE, branch = c("DEV", "PROD", "main"), - class_branch = "master" + class_branch = "master", + detail = getOption("pipaux.detail.raw") ) { measure <- "country_list" branch <- match.arg(branch) @@ -36,6 +37,12 @@ pip_country_list <- function(action = c("update", "load"), branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(cl, "aux_name", "country_list") + setattr(cl, + "aux_key", + c("country_code")) + saved <- pipfun::pip_sign_save( x = cl, measure = measure, diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R index c8927f1..c1883f8 100644 --- a/R/pip_cpi_update.R +++ b/R/pip_cpi_update.R @@ -46,6 +46,15 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(cpi, "aux_name", "cpi") + setattr(cpi, + "aux_key", + c("country_code", "year", "reporting_level", "survey_acronym")) + saved <- pipfun::pip_sign_save( x = cpi, measure = measure, diff --git a/R/pip_gdm_update.R b/R/pip_gdm_update.R index f7146ad..9e203ed 100644 --- a/R/pip_gdm_update.R +++ b/R/pip_gdm_update.R @@ -86,11 +86,11 @@ pip_gdm_update <- function(force = FALSE, ## ............................................................................ ## Merge with PFW #### - pip_pfw(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag) + # pip_pfw(maindir = maindir, + # force = force, + # owner = owner, + # branch = branch, + # tag = tag) pfw <- load_aux(measure = "pfw", maindir = maindir, @@ -192,11 +192,11 @@ pip_gdm_update <- function(force = FALSE, ## ............................................................................ ## Remove any non-WDI countries #### - pip_country_list(maindir = maindir, - force = force, - owner = owner, - branch = branch, - tag = tag) + # pip_country_list(maindir = maindir, + # force = force, + # owner = owner, + # branch = branch, + # tag = tag) cl <- load_aux(measure = "country_list", maindir = maindir, @@ -213,6 +213,16 @@ pip_gdm_update <- function(force = FALSE, branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + df <- df |> setnames(c("surveyid_year", "pop_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(df, "aux_name", "gdm") + setattr(df, + "aux_key", + c("country_code", "year", "reporting_level", "welfare_type")) + saved <- pipfun::pip_sign_save( x = df, measure = measure, diff --git a/R/pip_gdp_update.R b/R/pip_gdp_update.R index 78ae89a..cf38d03 100644 --- a/R/pip_gdp_update.R +++ b/R/pip_gdp_update.R @@ -286,6 +286,14 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + gdp <- gdp |> setnames("gdp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(gdp, "aux_name", "gdp") + setattr(gdp, + "aux_key", + c("country_code", "year", "reporting_level")) + saved <- pipfun::pip_sign_save( x = gdp, measure = measure, diff --git a/R/pip_income_groups.R b/R/pip_income_groups.R index 29743ee..d3410bc 100644 --- a/R/pip_income_groups.R +++ b/R/pip_income_groups.R @@ -62,6 +62,12 @@ pip_income_groups <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + ig <- ig |> setnames("year_data", "year", skip_absent=TRUE) + + setattr(ig, "aux_name", "income_groups") + setattr(ig, + "aux_key", + c("country_code", "year")) saved <- pipfun::pip_sign_save( x = ig, diff --git a/R/pip_maddison.R b/R/pip_maddison.R index d0cab00..d5d4501 100644 --- a/R/pip_maddison.R +++ b/R/pip_maddison.R @@ -35,6 +35,13 @@ pip_maddison <- function(action = c("update", "load"), branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(mpd, "aux_name", "maddison") + + setattr(mpd, + "aux_key", + c("country_code", "year")) + saved <- pipfun::pip_sign_save( x = mpd, measure = measure, diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R index 8bf7cf4..fe6a309 100644 --- a/R/pip_metadata_update.R +++ b/R/pip_metadata_update.R @@ -112,6 +112,14 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) + + setattr(df, "aux_name", "metadata") + setattr(df, + "aux_key", + c("country_code", "year", "welfare_type")) + saved <- pipfun::pip_sign_save( x = df, measure = measure, diff --git a/R/pip_npl.R b/R/pip_npl.R index 4de68e2..4bc5421 100644 --- a/R/pip_npl.R +++ b/R/pip_npl.R @@ -54,6 +54,14 @@ pip_npl <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + npl <- npl |> setnames("reporting_year", "year", + skip_absent=TRUE) + + setattr(npl, "aux_name", "npl") + setattr(npl, + "aux_key", + c("country_code", "year")) + saved <- pipfun::pip_sign_save( x = npl, measure = measure, diff --git a/R/pip_pce_update.R b/R/pip_pce_update.R index ba5a9c8..7e26eeb 100644 --- a/R/pip_pce_update.R +++ b/R/pip_pce_update.R @@ -237,6 +237,14 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + pce <- pce |> setnames("pce_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pce, "aux_name", "pce") + setattr(pce, + "aux_key", + c("country_code", "year", "reporting_level")) + saved <- pipfun::pip_sign_save( x = pce, measure = measure, diff --git a/R/pip_pfw_key.R b/R/pip_pfw_key.R new file mode 100644 index 0000000..8a6ff79 --- /dev/null +++ b/R/pip_pfw_key.R @@ -0,0 +1,40 @@ +#' Generate a dataset that contains pfw keys +#' +#' @return data.table +#' @export +#' +pip_pfw_key <- function(){ + + # pfw_temp <- pipload::pip_load_aux("pfw") + + pfw_temp <- pip_pfw(action = "load", + branch = "DEV", + maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + + pfw_key_options <- pfw_temp[, .(country_code, + survey_year, + survey_acronym, + welfare_type, + cpi_domain_var)] + + + # cpi_temp <- pipload::pip_load_aux("cpi") + + cpi_temp <- pip_cpi(action = "load", + branch = "DEV", + maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + + cpi_temp <- cpi_temp[, cpi_domain_var := + fifelse(reporting_level == "urban" & + cpi_domain_value == 1, "urban", "")] + + cpi_temp <- cpi_temp[, .(country_code, survey_year, survey_acronym, + cpi_domain_var, reporting_level)] + + pfw_key <- cpi_temp[pfw_key_options, on = .(country_code, survey_year, + survey_acronym, cpi_domain_var)] + + any(duplicated(pfw_key, by = c("country_code", "survey_year", "survey_acronym", "cpi_domain_var"))) + + return(pfw_key) +} diff --git a/R/pip_pfw_update.R b/R/pip_pfw_update.R index e700e12..f499269 100644 --- a/R/pip_pfw_update.R +++ b/R/pip_pfw_update.R @@ -34,6 +34,9 @@ pip_pfw_update <- function(maindir = gls$PIP_DATA_DIR, branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(pfw, "aux_name", "pfw") + saved <- pipfun::pip_sign_save( x = pfw, measure = measure, diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index 953bf4d..292cafa 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -182,6 +182,15 @@ pip_pop_update <- function(force = FALSE, branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + pop <- pop |> setnames("pop_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pop, "aux_name", "pop") + setattr(pop, + "aux_key", + c("country_code", "year", "reporting_level")) + saved <- pipfun::pip_sign_save( x = pop, measure = measure, diff --git a/R/pip_ppp.R b/R/pip_ppp.R index 63f1eb8..e8fe3a7 100644 --- a/R/pip_ppp.R +++ b/R/pip_ppp.R @@ -13,7 +13,8 @@ pip_ppp <- function(action = c("update", "load"), branch = c("DEV", "PROD", "main"), force = FALSE, tag = branch, - detail = getOption("pipaux.detail.raw")) { + detail = getOption("pipaux.detail.raw"), + ppp_defaults = TRUE) { # ____________________________________________________________________________ # on.exit #### @@ -52,7 +53,8 @@ pip_ppp <- function(action = c("update", "load"), load_aux( maindir = maindir, measure = measure, - branch = branch + branch = branch, + ppp_defaults = ppp_defaults ) } diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R index b95b7f7..d7f05ff 100644 --- a/R/pip_ppp_update.R +++ b/R/pip_ppp_update.R @@ -68,7 +68,17 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, if (branch == "main") { branch <- "" } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + ppp <- ppp |> setnames("ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(ppp, "aux_name", "ppp") + setattr(ppp, + "aux_key", + c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. + saved <- pipfun::pip_sign_save( x = ppp, measure = measure, @@ -86,6 +96,15 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, data.table::setnames(x = ppp_vintage, old = c("release_version", "adaptation_version"), new = c("ppp_rv", "ppp_av")) + + ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(ppp_vintage, "aux_name", "ppp") + setattr(ppp_vintage, + "aux_key", + c("country_code", "reporting_level")) + # Save pipfun::pip_sign_save( x = ppp_vintage, diff --git a/R/pip_regions.R b/R/pip_regions.R index d59ed09..c8d5244 100644 --- a/R/pip_regions.R +++ b/R/pip_regions.R @@ -81,6 +81,12 @@ pip_regions <- function(action = c("update", "load"), branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(dt, "aux_name", "regions") + setattr(dt, + "aux_key", + c("region_code")) + saved <- pipfun::pip_sign_save( x = dt, measure = measure, diff --git a/R/pip_wdi_update.R b/R/pip_wdi_update.R index 8bdf833..6c02d46 100644 --- a/R/pip_wdi_update.R +++ b/R/pip_wdi_update.R @@ -60,6 +60,12 @@ pip_wdi_update <- function(force = FALSE, branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(wdi, "aux_name", "wdi") + setattr(wdi, + "aux_key", + c("country_code", "year")) + saved <- pipfun::pip_sign_save( x = wdi, measure = measure, diff --git a/R/pip_weo.R b/R/pip_weo.R index b51301e..ea20984 100644 --- a/R/pip_weo.R +++ b/R/pip_weo.R @@ -51,6 +51,12 @@ pip_weo <- function(action = c("update", "load"), branch <- "" } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + + setattr(dt, "aux_name", "weo") + setattr(dt, + "aux_key", + c("country_code", "year")) + cat('\nDir : ', msrdir) saved <- pipfun::pip_sign_save( x = dt, diff --git a/R/ppp_validate_raw.R b/R/ppp_validate_raw.R index 6b562eb..cc5cd5d 100644 --- a/R/ppp_validate_raw.R +++ b/R/ppp_validate_raw.R @@ -62,9 +62,9 @@ ppp_validate_raw <- function(ppp, detail = getOption("pipaux.detail.raw")){ description = "`note_may192020` should be character") |> validate_if(is.character(ppp_2017_v1_v2_note), description = "`ppp_2017_v1_v2_note` should be character") |> - validate_cols(not_na, code, CoverageType, + validate_cols(not_na, code, CoverageType, datalevel, description = "no missing values in key variables") |> - validate_if(is_uniq(code, CoverageType), + validate_if(is_uniq(code, CoverageType, datalevel), description = "no duplicate records in key variables") |> add_results(report) diff --git a/man/aux_data.Rd b/man/aux_data.Rd new file mode 100644 index 0000000..2919fb9 --- /dev/null +++ b/man/aux_data.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aux_data_files.R +\name{aux_data} +\alias{aux_data} +\title{Attache key values into auxiliary file} +\usage{ +aux_data(aux_file) +} +\arguments{ +\item{aux_file}{auxiliary file} +} +\value{ +data.table with key values +} +\description{ +Attache key values into auxiliary file +} diff --git a/man/load_aux.Rd b/man/load_aux.Rd index 23d6602..16180eb 100644 --- a/man/load_aux.Rd +++ b/man/load_aux.Rd @@ -8,7 +8,8 @@ load_aux( measure, maindir = gls$PIP_DATA_DIR, branch = c("DEV", "PROD", "main"), - apply_label = TRUE + apply_label = TRUE, + ppp_defaults = TRUE ) } \arguments{ @@ -20,6 +21,8 @@ load_aux( will be used to update either the development server or production.} \item{apply_label}{logical: If TRUE, predefined labels will applied.} + +\item{ppp_defaults}{logical: If TRUE, wider format ppp data will be returned} } \description{ Load any auxiliary data diff --git a/man/merge_aux_files.Rd b/man/merge_aux_files.Rd new file mode 100644 index 0000000..6bdc9cc --- /dev/null +++ b/man/merge_aux_files.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/merge_aux_files.R +\name{merge_aux_files} +\alias{merge_aux_files} +\title{Merge auxiliary datasets} +\usage{ +merge_aux_files( + aux_data1, + aux_data2, + keep = c("left", "right", "full", "using", "master", "inner"), + ... +) +} +\arguments{ +\item{aux_data1}{auxiliary data two} + +\item{keep}{merge/ join type, the default is left join, options (left, right, full, using, master, inner)} +} +\value{ +data.table with key information +} +\description{ +Merge auxiliary datasets +} +\examples{ +pfw <- pip_pfw(action = "load") +ppp <- pip_ppp(action = "load") +pfw_ppp <- merge_aux_files(pfw, ppp) + +cpi <- pip_cpi(action = "cpi") +cpi <- cpi[, -c("cpi_domain")] +pfw_cpi <- merge_aux_files(cpi, pfw, keep = "right") + +} diff --git a/man/pip_country_list.Rd b/man/pip_country_list.Rd index 5b583e6..70eb8d4 100644 --- a/man/pip_country_list.Rd +++ b/man/pip_country_list.Rd @@ -9,7 +9,8 @@ pip_country_list( maindir = gls$PIP_DATA_DIR, force = FALSE, branch = c("DEV", "PROD", "main"), - class_branch = "master" + class_branch = "master", + detail = getOption("pipaux.detail.raw") ) } \arguments{ diff --git a/man/pip_pfw_key.Rd b/man/pip_pfw_key.Rd new file mode 100644 index 0000000..5afcf90 --- /dev/null +++ b/man/pip_pfw_key.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pip_pfw_key.R +\name{pip_pfw_key} +\alias{pip_pfw_key} +\title{Generate a dataset that contains pfw keys} +\usage{ +pip_pfw_key() +} +\value{ +data.table +} +\description{ +Generate a dataset that contains pfw keys +} diff --git a/man/pip_ppp.Rd b/man/pip_ppp.Rd index 6333c4d..12840dc 100644 --- a/man/pip_ppp.Rd +++ b/man/pip_ppp.Rd @@ -11,7 +11,8 @@ pip_ppp( branch = c("DEV", "PROD", "main"), force = FALSE, tag = branch, - detail = getOption("pipaux.detail.raw") + detail = getOption("pipaux.detail.raw"), + ppp_defaults = TRUE ) } \arguments{ From 31b29096a5965fc2cee1975267b23bbe53bb7c51 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Wed, 4 Sep 2024 08:04:33 -0400 Subject: [PATCH 19/23] Change function name from merge_aux_files to merger_aux --- .Rbuildignore | 1 + NAMESPACE | 2 +- R/{merge_aux_files.R => merger_aux.R} | 14 ++-- man/{merge_aux_files.Rd => merger_aux.Rd} | 12 +-- .../articles/utilizing_the_merge_function.Rmd | 77 +++++++++++++++++++ 5 files changed, 92 insertions(+), 14 deletions(-) rename R/{merge_aux_files.R => merger_aux.R} (89%) rename man/{merge_aux_files.Rd => merger_aux.Rd} (73%) create mode 100644 vignettes/articles/utilizing_the_merge_function.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 9853ffd..e5740bc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ tmp/ .vscode/ ^codecov\.yml$ ^data-raw$ +^vignettes/articles$ diff --git a/NAMESPACE b/NAMESPACE index 7101096..923c1dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,7 @@ export(gdp_validate_output) export(get_error_validation) export(incgroup_validate_output) export(load_aux) -export(merge_aux_files) +export(merger_aux) export(metadata_validate_output) export(metadata_validate_raw) export(mpd_validate_raw) diff --git a/R/merge_aux_files.R b/R/merger_aux.R similarity index 89% rename from R/merge_aux_files.R rename to R/merger_aux.R index 6444e78..3d9179d 100644 --- a/R/merge_aux_files.R +++ b/R/merger_aux.R @@ -10,17 +10,17 @@ #' @examples #' pfw <- pip_pfw(action = "load") #' ppp <- pip_ppp(action = "load") -#' pfw_ppp <- merge_aux_files(pfw, ppp) +#' pfw_ppp <- merger_aux(pfw, ppp) #' #' cpi <- pip_cpi(action = "cpi") #' cpi <- cpi[, -c("cpi_domain")] -#' pfw_cpi <- merge_aux_files(cpi, pfw, keep = "right") +#' pfw_cpi <- merger_aux(cpi, pfw, keep = "right") #' -merge_aux_files <- function(aux_data1, - aux_data2, - keep = c("left", "right", "full", - "using", "master", "inner"), - ... +merger_aux <- function(aux_data1, + aux_data2, + keep = c("left", "right", "full", + "using", "master", "inner"), + ... ){ keep <- match.arg(keep) diff --git a/man/merge_aux_files.Rd b/man/merger_aux.Rd similarity index 73% rename from man/merge_aux_files.Rd rename to man/merger_aux.Rd index 6bdc9cc..b465c44 100644 --- a/man/merge_aux_files.Rd +++ b/man/merger_aux.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/merge_aux_files.R -\name{merge_aux_files} -\alias{merge_aux_files} +% Please edit documentation in R/merger_aux.R +\name{merger_aux} +\alias{merger_aux} \title{Merge auxiliary datasets} \usage{ -merge_aux_files( +merger_aux( aux_data1, aux_data2, keep = c("left", "right", "full", "using", "master", "inner"), @@ -25,10 +25,10 @@ Merge auxiliary datasets \examples{ pfw <- pip_pfw(action = "load") ppp <- pip_ppp(action = "load") -pfw_ppp <- merge_aux_files(pfw, ppp) +pfw_ppp <- merger_aux(pfw, ppp) cpi <- pip_cpi(action = "cpi") cpi <- cpi[, -c("cpi_domain")] -pfw_cpi <- merge_aux_files(cpi, pfw, keep = "right") +pfw_cpi <- merger_aux(cpi, pfw, keep = "right") } diff --git a/vignettes/articles/utilizing_the_merge_function.Rmd b/vignettes/articles/utilizing_the_merge_function.Rmd new file mode 100644 index 0000000..eedb4ff --- /dev/null +++ b/vignettes/articles/utilizing_the_merge_function.Rmd @@ -0,0 +1,77 @@ +--- +title: "Utilizing the merge function" +output: rmarkdown::html_vignette +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} + +library(pipaux) + +``` + +# How to use the `merger_aux` + +To efficiently merge auxiliary files, you can utilize the merger_aux() from pipaux package. This function requires three arguments: the two data files you want to merge and the type of merge to perform. The merge type has six options: left, right, full, using, master, and inner, with "left" being the default. + +Here’s a breakdown of how it works: + +1. Arguments: + +- Data Files: You need to provide two auxiliary datasets that you want to merge. These are the primary inputs for the function. +- Merge Type: This argument specifies the nature of the merge operation. You can choose from six different merge types: + - **Left**: Includes all records from the left dataset and matches records from the right dataset where possible. + + - **Right**: Includes all records from the right dataset and matches records from the left dataset where possible. + + - **Full**: Includes all records from both datasets, with matching records where available and missing records filled with nulls. + + - **Using**: Merges the datasets based on specified columns or keys. + + - **Master**: Uses the left dataset as the primary dataset and adds records from the right dataset. + + - **Inner**: Includes only the records that are present in both datasets. + +2. Default Option: If you don’t specify a merge type, the function defaults to "left", meaning it will keep all records from the left dataset and include matching records from the right dataset where available. + +**Note:** Keys/IDs and names of auxiliary data files (such as *cpi*, *ppp*, *pfw*, *pop*, *gdp*, *gdm*, etc.) are embedded within the data files as attributes. The `merger_aux()` function utilizes these IDs to create suitable key variables for merging the data files and generating an ID for the combined data file. Additionally, the function produces a new auxiliary file name by combining the names of the files being merged. + +# Examples + +### 1. Merge pfw and cpi files + +```{r} + +# load pfw data from the testing folder +pfw <- pip_pfw(action = "load", + branch = "DEV", + maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + +# load cpi +cpi <- pip_cpi(action = "load", + branch = "DEV", + maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + +cpi <- cpi[, -c("cpi_domain")] + +pfw_cpi <- merger_aux(pfw, cpi) + +``` + +```{r} + +# load ppp wide format data +ppp <- pip_ppp(action = "load", + branch = "DEV", + maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + +pfw_cpi_ppp <- merger_aux(pfw_cpi, ppp) + +``` + From 90f7b449eba6748e16914c3a9cda20a851b77469 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 5 Sep 2024 08:24:58 -0400 Subject: [PATCH 20/23] pip_weo_clean() has been updated --- R/merger_aux.R | 1 + R/pip_pfw_key.R | 4 +- R/pip_weo_clean.R | 2 +- .../articles/utilizing_the_merge_function.Rmd | 72 +++++++++++++++++-- 4 files changed, 70 insertions(+), 9 deletions(-) diff --git a/R/merger_aux.R b/R/merger_aux.R index 3d9179d..b2bf56e 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -15,6 +15,7 @@ #' cpi <- pip_cpi(action = "cpi") #' cpi <- cpi[, -c("cpi_domain")] #' pfw_cpi <- merger_aux(cpi, pfw, keep = "right") +#' cpi_pfw <- merger_aux(cpi, pfw) #' merger_aux <- function(aux_data1, aux_data2, diff --git a/R/pip_pfw_key.R b/R/pip_pfw_key.R index 8a6ff79..0e24321 100644 --- a/R/pip_pfw_key.R +++ b/R/pip_pfw_key.R @@ -9,7 +9,7 @@ pip_pfw_key <- function(){ pfw_temp <- pip_pfw(action = "load", branch = "DEV", - maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + maindir = "Q:/Team/Tefera/pip/PIP-Data_QA") pfw_key_options <- pfw_temp[, .(country_code, survey_year, @@ -22,7 +22,7 @@ pip_pfw_key <- function(){ cpi_temp <- pip_cpi(action = "load", branch = "DEV", - maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + maindir = "Q:/Team/Tefera/pip/PIP-Data_QA") cpi_temp <- cpi_temp[, cpi_domain_var := fifelse(reporting_level == "urban" & diff --git a/R/pip_weo_clean.R b/R/pip_weo_clean.R index d9e07ad..120fa27 100644 --- a/R/pip_weo_clean.R +++ b/R/pip_weo_clean.R @@ -96,7 +96,7 @@ pip_weo_clean <- function(dt, branch = branch) setDT(pop) - pop <- pop[pop_data_level == "national", ] + pop <- pop[reporting_level == "national", ] #pop_data_level = reporting_level dt[pop, on = .(country_code, year), `:=`( diff --git a/vignettes/articles/utilizing_the_merge_function.Rmd b/vignettes/articles/utilizing_the_merge_function.Rmd index eedb4ff..fac9775 100644 --- a/vignettes/articles/utilizing_the_merge_function.Rmd +++ b/vignettes/articles/utilizing_the_merge_function.Rmd @@ -14,11 +14,13 @@ knitr::opts_chunk$set( library(pipaux) +temp_fld <- "Q:/Team/Tefera/pip/PIP-Data_QA" + ``` # How to use the `merger_aux` -To efficiently merge auxiliary files, you can utilize the merger_aux() from pipaux package. This function requires three arguments: the two data files you want to merge and the type of merge to perform. The merge type has six options: left, right, full, using, master, and inner, with "left" being the default. +To efficiently merge auxiliary files, you can utilize the merger_aux() from pipaux package. This function requires three arguments: the two data files you want to merge and the type of merge to perform. The merge type has six options: **left**, **right**, **full**, **using**, **master**, and **inner**, with *left* being the default. Here’s a breakdown of how it works: @@ -51,27 +53,85 @@ Here’s a breakdown of how it works: # load pfw data from the testing folder pfw <- pip_pfw(action = "load", branch = "DEV", - maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + maindir = temp_fld) # load cpi cpi <- pip_cpi(action = "load", branch = "DEV", - maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + maindir = temp_fld) cpi <- cpi[, -c("cpi_domain")] +# a) keep "left" pfw_cpi <- merger_aux(pfw, cpi) +# b) keep "inner" +cpi_pfw <- merger_aux(pfw, cpi, keep = "inner") + ``` +### 2. Merge cpi and pop files + ```{r} +# load cpi +cpi <- pip_cpi(action = "load", + branch = "DEV", + maindir = temp_fld) + # load ppp wide format data -ppp <- pip_ppp(action = "load", +pop <- pip_ppp(action = "load", + branch = "DEV", + maindir = temp_fld) + +cpi_pop <- merger_aux(cpi, pop) + +``` + +### 3. Add pop data to pfw_cpi + +This example is going to show how to merge a compiled data (generated in example 1a) with pop data. + +```{r} + +# load pop data +pop <- pip_pop(action = "load", branch = "DEV", - maindir = "//w1wbgencifs01/pip/PIP-Data_Testing/pipaux_Testing") + maindir = temp_fld) -pfw_cpi_ppp <- merger_aux(pfw_cpi, ppp) +pop_pfw_cpi <- merger_aux(pop, + pfw_cpi, + keep = "right") ``` +### 4. Add gdm data to cpi_pfw +This example is going to show how to merge a compiled data (generated in example 1b) with gdm data. + +```{r} + +# load pop data +gdm <- pip_gdm(action = "load", + branch = "DEV", + maindir = temp_fld) + +gdm_cpi_pfw <- merger_aux(gdm, + cpi_pfw, + keep = "right") + +``` + +### 5. Add ppp data to cpi_pfw + +This example is going to show how to merge a compiled data (generated in example 1b) with ppp data. + +```{r} + +# load ppp data +ppp <- pip_ppp(action = "load", + branch = "DEV", + maindir = temp_fld) + +cpi_pfw_ppp <- merger_aux(cpi_pfw, ppp) + +``` From c7dd7aeea6f019c25b7d3820981c4ec377c34c70 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Mon, 9 Sep 2024 09:07:04 -0400 Subject: [PATCH 21/23] Temporary folder loacation has been updated --- R/merger_aux.R | 6 +-- .../articles/utilizing_the_merge_function.Rmd | 50 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/R/merger_aux.R b/R/merger_aux.R index b2bf56e..06ffe4f 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -8,11 +8,11 @@ #' @export #' #' @examples -#' pfw <- pip_pfw(action = "load") -#' ppp <- pip_ppp(action = "load") +#' pfw <- load_aux("pfw") +#' ppp <- load_aux("ppp") #' pfw_ppp <- merger_aux(pfw, ppp) #' -#' cpi <- pip_cpi(action = "cpi") +#' cpi <- load_aux("cpi") #' cpi <- cpi[, -c("cpi_domain")] #' pfw_cpi <- merger_aux(cpi, pfw, keep = "right") #' cpi_pfw <- merger_aux(cpi, pfw) diff --git a/vignettes/articles/utilizing_the_merge_function.Rmd b/vignettes/articles/utilizing_the_merge_function.Rmd index fac9775..92e6d0d 100644 --- a/vignettes/articles/utilizing_the_merge_function.Rmd +++ b/vignettes/articles/utilizing_the_merge_function.Rmd @@ -14,7 +14,7 @@ knitr::opts_chunk$set( library(pipaux) -temp_fld <- "Q:/Team/Tefera/pip/PIP-Data_QA" +temp_fld <- "Y:/tefera_pipaux_test" ``` @@ -51,14 +51,14 @@ Here’s a breakdown of how it works: ```{r} # load pfw data from the testing folder -pfw <- pip_pfw(action = "load", - branch = "DEV", - maindir = temp_fld) - +pfw <- load_aux("pfw", + branch = "DEV", + maindir = temp_fld) # load cpi -cpi <- pip_cpi(action = "load", - branch = "DEV", - maindir = temp_fld) + +cpi <- load_aux("cpi", + branch = "DEV", + maindir = temp_fld) cpi <- cpi[, -c("cpi_domain")] @@ -75,14 +75,14 @@ cpi_pfw <- merger_aux(pfw, cpi, keep = "inner") ```{r} # load cpi -cpi <- pip_cpi(action = "load", - branch = "DEV", - maindir = temp_fld) +cpi <- load_aux("cpi", + branch = "DEV", + maindir = temp_fld) -# load ppp wide format data -pop <- pip_ppp(action = "load", - branch = "DEV", - maindir = temp_fld) +# load pop wide format data +pop <- load_aux("pop", + branch = "DEV", + maindir = temp_fld) cpi_pop <- merger_aux(cpi, pop) @@ -95,9 +95,9 @@ This example is going to show how to merge a compiled data (generated in example ```{r} # load pop data -pop <- pip_pop(action = "load", - branch = "DEV", - maindir = temp_fld) +pop <- load_aux("pop", + branch = "DEV", + maindir = temp_fld) pop_pfw_cpi <- merger_aux(pop, pfw_cpi, @@ -110,10 +110,10 @@ This example is going to show how to merge a compiled data (generated in example ```{r} -# load pop data -gdm <- pip_gdm(action = "load", - branch = "DEV", - maindir = temp_fld) +# load gdm data +gdm <- load_aux("gdm", + branch = "DEV", + maindir = temp_fld) gdm_cpi_pfw <- merger_aux(gdm, cpi_pfw, @@ -128,9 +128,9 @@ This example is going to show how to merge a compiled data (generated in example ```{r} # load ppp data -ppp <- pip_ppp(action = "load", - branch = "DEV", - maindir = temp_fld) +ppp <- load_aux("ppp", + branch = "DEV", + maindir = temp_fld) cpi_pfw_ppp <- merger_aux(cpi_pfw, ppp) From 0a4f936c885f9c4d66a354fd5fe196d0eeb76920 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 12 Sep 2024 09:52:30 -0400 Subject: [PATCH 22/23] test file for merger_aux() added --- tests/testthat/test-merger_aux.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/testthat/test-merger_aux.R diff --git a/tests/testthat/test-merger_aux.R b/tests/testthat/test-merger_aux.R new file mode 100644 index 0000000..3a5b894 --- /dev/null +++ b/tests/testthat/test-merger_aux.R @@ -0,0 +1,27 @@ +temp_fld <- "Y:/tefera_pipaux_test" + +pfw <- load_aux("pfw", + branch = "DEV", + maindir = temp_fld) + +cpi <- load_aux("cpi", + branch = "DEV", + maindir = temp_fld) + +cpi <- cpi[, -c("cpi_domain")] + +ppp <- load_aux("ppp", + branch = "DEV", + maindir = temp_fld) + +test_that("Merge pfw and cpi without erros", { + + expect_no_error(merger_aux(pfw, cpi)) + +}) + +test_that("Merge pfw and ppp without erros", { + + expect_no_error(merger_aux(pfw, ppp)) + +}) From add9f52d7ee12b80347d9936201f6c1ff082ae2b Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 19 Sep 2024 08:10:13 -0400 Subject: [PATCH 23/23] Validating, test, and pip functions are updated --- R/cpi_validate_output.R | 22 +++++++------- R/gdm_validate_output.R | 16 +++++----- R/gdp_validate_output.R | 10 +++---- R/metadata_validate_output.R | 8 ++--- R/npl_validate_output.R | 8 ++--- R/pce_validate_output.R | 10 +++---- R/pip_cpi_update.R | 19 ++++++------ R/pip_gdm_update.R | 18 ++++++------ R/pip_gdp_update.R | 16 +++++----- R/pip_income_groups.R | 13 ++++----- R/pip_metadata_update.R | 14 ++++----- R/pip_npl.R | 15 +++++----- R/pip_pce_update.R | 15 +++++----- R/pip_pl.R | 2 +- R/pip_pop_update.R | 15 +++++----- R/pip_ppp_update.R | 29 +++++++++---------- R/pip_weo.R | 9 +++--- R/pop_validate_output.R | 10 +++---- R/ppp_validate_output.R | 10 +++---- tests/testthat/test-cl-validation.R | 3 +- tests/testthat/test-countries-validation.R | 7 +++-- tests/testthat/test-cpi-validation.R | 13 +++++---- tests/testthat/test-gdm-validation.R | 17 ++++++----- tests/testthat/test-gdp-validation.R | 15 +++++----- .../testthat/test-income-groups-validation.R | 5 ++-- tests/testthat/test-maddison-validation.R | 2 ++ tests/testthat/test-metadata-validation.R | 9 +++--- tests/testthat/test-npl-validation.R | 11 +++---- tests/testthat/test-pce-validation.R | 15 +++++----- tests/testthat/test-pfw-validation.R | 7 +++-- tests/testthat/test-pl-validation.R | 5 ++-- tests/testthat/test-pop-validation.R | 15 +++++----- tests/testthat/test-ppp-validation.R | 24 ++++++++------- tests/testthat/test-sna-fy-validation.R | 2 ++ tests/testthat/test-sna-validation.R | 2 ++ tests/testthat/test-wdi-validation.R | 2 ++ tests/testthat/test-weo-validation.R | 5 ++-- 37 files changed, 219 insertions(+), 199 deletions(-) diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index a8af6f9..2c2102b 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -16,8 +16,8 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) validate(cpi, name = "CPI output data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> - validate_if(is.integer(cpi_year), - description = "`cpi_year` should be integer") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> validate_if(is.numeric(survey_year), description = "`survey_year` should be numeric") |> validate_if(is.numeric(cpi), @@ -52,23 +52,23 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) # description = "`cpi2017_SM22` should be numeric") |> validate_cols(is.logical, cpi2005, description = "`cpi2005` should be logical") |> - validate_if(is.character(cpi_data_level), - description = "`cpi_data_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), cpi_data_level, - description = "`cpi_data_level` values within range") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> # validate_if(is.numeric(cpi2011_AM23), # description = "`cpi2011_AM23` should be numeric") |> # validate_if(is.numeric(cpi2017_AM23), # description = "`cpi2017_AM23` should be numeric") |> validate_if(is.character(cpi_id), description = "`cpi_id` should be character") |> - validate_cols(not_na, country_code, cpi_year, survey_acronym, cpi_data_level, + validate_cols(not_na, country_code, year, survey_acronym, reporting_level, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, cpi_year, survey_acronym, - cpi_data_level), + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), description = "no duplicate records in key variables") |> - validate_if(is_uniq(country_code, cpi_year, survey_acronym, - cpi_data_level), + validate_if(is_uniq(country_code, year, survey_acronym, + reporting_level), description = "no duplicate cpi values") |> add_results(report) diff --git a/R/gdm_validate_output.R b/R/gdm_validate_output.R index 6c6e2c2..92f61dc 100644 --- a/R/gdm_validate_output.R +++ b/R/gdm_validate_output.R @@ -18,8 +18,8 @@ gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")) description = "`survey_id` should be character") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> - validate_if(is.integer(surveyid_year), - description = "`surveyid_year` should be integer") |> + validate_if(is.integer(year), + description = "`year` should be integer") |> validate_if(is.numeric(survey_year), description = "`survey_year` should be numeric") |> validate_if(is.character(welfare_type), @@ -34,17 +34,17 @@ gdm_validate_output <- function(gdm, detail = getOption("pipaux.detail.output")) description = "`distribution_type` values within range") |> validate_if(is.character(gd_type), description = "`gd_type` should be character") |> - validate_if(is.character(pop_data_level), - description = "`pop_data_level` should be character") |> - validate_cols(in_set(c("national", "rural", "urban")), pop_data_level, - description = "`pop_data_level` values within range") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> + validate_cols(in_set(c("national", "rural", "urban")), reporting_level, + description = "`reporting_level` values within range") |> validate_if(is.character(pcn_source_file), description = "`pcn_source_file` should be character") |> validate_if(is.character(pcn_survey_id), description = "`pcn_survey_id` should be character") |> - validate_cols(not_na, country_code, surveyid_year, pop_data_level, + validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, surveyid_year, pop_data_level), + validate_if(is_uniq(country_code, year, reporting_level), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R index eda5ea7..2a80d72 100644 --- a/R/gdp_validate_output.R +++ b/R/gdp_validate_output.R @@ -18,19 +18,19 @@ gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")) description = "`country_code` should be character") |> validate_if(is.numeric(year), description = "`year` should be numeric") |> - validate_if(is.character(gdp_data_level), - description = "`gdp_data_level` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), - gdp_data_level, description = "`gdp_data_level` values within range") |> + reporting_level, description = "`reporting_level` values within range") |> validate_if(is.numeric(gdp), description = "`gdp` should be numeric") |> validate_if(is.character(gdp_domain), description = "`gdp_domain` should be character") |> validate_cols(in_set(c("national", "urban/rural")), gdp_domain, description = "`gdp_domain` values within range") |> - validate_cols(not_na, country_code, year, gdp_data_level, + validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, gdp_data_level), + validate_if(is_uniq(country_code, year, reporting_level), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/metadata_validate_output.R b/R/metadata_validate_output.R index 421fe4c..9976802 100644 --- a/R/metadata_validate_output.R +++ b/R/metadata_validate_output.R @@ -18,8 +18,8 @@ metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail description = "`country_code` should be character") |> validate_if(is.character(country_name), description = "`country_name` should be character") |> - validate_if(is.numeric(reporting_year), - description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> validate_if(is.numeric(survey_year), description = "`survey_year` should be numeric") |> validate_if(is.character(survey_title), @@ -38,9 +38,9 @@ metadata_validate_output <- function(metadata, detail = getOption("pipaux.detail description = "`distribution_type` should be character") |> validate_cols(in_set(c("aggregated", "group", "micro", "micro, imputed", NA)), distribution_type, description = "`distribution_type` values within range") |> - validate_cols(not_na, country_code, reporting_year, welfare_type, + validate_cols(not_na, country_code, year, welfare_type, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, reporting_year, welfare_type), + validate_if(is_uniq(country_code, year, welfare_type), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/npl_validate_output.R b/R/npl_validate_output.R index 4980560..f4a3939 100644 --- a/R/npl_validate_output.R +++ b/R/npl_validate_output.R @@ -16,17 +16,17 @@ npl_validate_output <- function(npl, detail = getOption("pipaux.detail.output")) validate(npl, name = "NPL output data validation") |> validate_if(is.character(country_code), description = "`country_code` should be character") |> - validate_if(is.numeric(reporting_year), - description = "`reporting_year` should be numeric") |> + validate_if(is.numeric(year), + description = "`year` should be numeric") |> validate_if(is.numeric(nat_headcount), description = "`nat_headcount` should be numeric") |> validate_if(is.numeric(comparability), description = "`comparability` should be numeric") |> validate_if(is.character(footnote), description = "`footnote` should be character") |> - validate_cols(not_na, country_code, reporting_year, + validate_cols(not_na, country_code, year, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, reporting_year), + validate_if(is_uniq(country_code, year), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R index 383da86..7566b69 100644 --- a/R/pce_validate_output.R +++ b/R/pce_validate_output.R @@ -20,17 +20,17 @@ pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")) description = "`year` should be numeric") |> validate_if(is.numeric(pce), description = "`pce` should be numeric") |> - validate_if(is.character(pce_data_level), - description = "`pce_data_level` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), - pce_data_level, description = "`pce_data_level` values within range") |> + reporting_level, description = "`reporting_level` values within range") |> validate_if(is.character(pce_domain), description = "`pce_domain` should be character") |> validate_cols(in_set(c("national", "urban/rural")), pce_domain, description = "`pce_domain` values within range") |> - validate_cols(not_na, country_code, year, pce_data_level, + validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pce_data_level), + validate_if(is_uniq(country_code, year, reporting_level), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R index c1883f8..b6401a5 100644 --- a/R/pip_cpi_update.R +++ b/R/pip_cpi_update.R @@ -37,15 +37,7 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, maindir = maindir, branch = branch) - # validate cpi clean data before saving it - cpi_validate_output(cpi, detail = detail) - - # Save - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - + # changae cpi_year and cpi_data_level to year and reporting_level cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), c("year", "reporting_level"), skip_absent=TRUE) @@ -55,6 +47,15 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, "aux_key", c("country_code", "year", "reporting_level", "survey_acronym")) + # validate cpi clean data before saving it + cpi_validate_output(cpi, detail = detail) + + # Save + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + saved <- pipfun::pip_sign_save( x = cpi, measure = measure, diff --git a/R/pip_gdm_update.R b/R/pip_gdm_update.R index 9e203ed..572e114 100644 --- a/R/pip_gdm_update.R +++ b/R/pip_gdm_update.R @@ -206,6 +206,15 @@ pip_gdm_update <- function(force = FALSE, # ---- Save and sign ---- + df <- df |> setnames(c("surveyid_year", "pop_data_level"), + c("year", "reporting_level"), + skip_absent=TRUE) + + setattr(df, "aux_name", "gdm") + setattr(df, + "aux_key", + c("country_code", "year", "reporting_level", "welfare_type")) + # validate gdm output data gdm_validate_output(gdm = df, detail = detail) @@ -214,15 +223,6 @@ pip_gdm_update <- function(force = FALSE, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - df <- df |> setnames(c("surveyid_year", "pop_data_level"), - c("year", "reporting_level"), - skip_absent=TRUE) - - setattr(df, "aux_name", "gdm") - setattr(df, - "aux_key", - c("country_code", "year", "reporting_level", "welfare_type")) - saved <- pipfun::pip_sign_save( x = df, measure = measure, diff --git a/R/pip_gdp_update.R b/R/pip_gdp_update.R index cf38d03..aba1544 100644 --- a/R/pip_gdp_update.R +++ b/R/pip_gdp_update.R @@ -278,14 +278,6 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, gdp <- gdp[country_code %in% cl$country_code] # ---- Save and sign ---- - # validate gdp output data - gdp_validate_output(gdp = gdp, detail = detail) - - if (branch == "main") { - branch <- "" - } - msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - gdp <- gdp |> setnames("gdp_data_level", "reporting_level", skip_absent=TRUE) @@ -294,6 +286,14 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, "aux_key", c("country_code", "year", "reporting_level")) + # validate gdp output data + gdp_validate_output(gdp = gdp, detail = detail) + + if (branch == "main") { + branch <- "" + } + msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir + saved <- pipfun::pip_sign_save( x = gdp, measure = measure, diff --git a/R/pip_income_groups.R b/R/pip_income_groups.R index d3410bc..5b43c71 100644 --- a/R/pip_income_groups.R +++ b/R/pip_income_groups.R @@ -53,6 +53,12 @@ pip_income_groups <- function(action = c("update", "load"), #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## save -------- + # ig <- ig |> setnames("year_data", "year", skip_absent=TRUE) + + setattr(ig, "aux_name", "income_groups") + setattr(ig, + "aux_key", + c("country_code", "year")) # validate income group output data incgroup_validate_output(incgroup = ig, detail = detail) @@ -62,13 +68,6 @@ pip_income_groups <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - ig <- ig |> setnames("year_data", "year", skip_absent=TRUE) - - setattr(ig, "aux_name", "income_groups") - setattr(ig, - "aux_key", - c("country_code", "year")) - saved <- pipfun::pip_sign_save( x = ig, measure = measure, diff --git a/R/pip_metadata_update.R b/R/pip_metadata_update.R index fe6a309..22e80e6 100644 --- a/R/pip_metadata_update.R +++ b/R/pip_metadata_update.R @@ -105,6 +105,13 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, ## ............................................................................ ## Save #### + df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) + + setattr(df, "aux_name", "metadata") + setattr(df, + "aux_key", + c("country_code", "year", "welfare_type")) + # validate raw metdata data metadata_validate_output(metadata = df, detail = detail) @@ -113,13 +120,6 @@ pip_metadata_update <- function(maindir = gls$PIP_DATA_DIR, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - df <- df |> setnames("reporting_year", "year", skip_absent=TRUE) - - setattr(df, "aux_name", "metadata") - setattr(df, - "aux_key", - c("country_code", "year", "welfare_type")) - saved <- pipfun::pip_sign_save( x = df, measure = measure, diff --git a/R/pip_npl.R b/R/pip_npl.R index 4bc5421..53fdf2f 100644 --- a/R/pip_npl.R +++ b/R/pip_npl.R @@ -45,6 +45,13 @@ pip_npl <- function(action = c("update", "load"), #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## save -------- + npl <- npl |> setnames("reporting_year", "year", + skip_absent=TRUE) + + setattr(npl, "aux_name", "npl") + setattr(npl, + "aux_key", + c("country_code", "year")) # validate npl output data npl_validate_output(npl = npl, detail = detail) @@ -54,14 +61,6 @@ pip_npl <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - npl <- npl |> setnames("reporting_year", "year", - skip_absent=TRUE) - - setattr(npl, "aux_name", "npl") - setattr(npl, - "aux_key", - c("country_code", "year")) - saved <- pipfun::pip_sign_save( x = npl, measure = measure, diff --git a/R/pip_pce_update.R b/R/pip_pce_update.R index 7e26eeb..4b289a2 100644 --- a/R/pip_pce_update.R +++ b/R/pip_pce_update.R @@ -228,6 +228,13 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, pce <- pce[country_code %in% cl$country_code] ## ---- Sign and save ---- + pce <- pce |> setnames("pce_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pce, "aux_name", "pce") + setattr(pce, + "aux_key", + c("country_code", "year", "reporting_level")) # validate pce output data pce_validate_output(pce = pce, detail = detail) @@ -237,14 +244,6 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - pce <- pce |> setnames("pce_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(pce, "aux_name", "pce") - setattr(pce, - "aux_key", - c("country_code", "year", "reporting_level")) - saved <- pipfun::pip_sign_save( x = pce, measure = measure, diff --git a/R/pip_pl.R b/R/pip_pl.R index 61c4c15..1b467e5 100644 --- a/R/pip_pl.R +++ b/R/pip_pl.R @@ -33,7 +33,7 @@ pip_pl <- function(action = c("update", "load"), dt <- purrr::map_df(dl,pip_pl_clean) - # Save + # Save # validate pl clean data pl_validate_output(pl = dt, detail = detail) diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index 292cafa..229235d 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -173,6 +173,13 @@ pip_pop_update <- function(force = FALSE, #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Save data --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + pop <- pop |> setnames("pop_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(pop, "aux_name", "pop") + setattr(pop, + "aux_key", + c("country_code", "year", "reporting_level")) # validate output pop data pop_validate_output(pop = pop, detail = detail) @@ -183,14 +190,6 @@ pip_pop_update <- function(force = FALSE, } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - pop <- pop |> setnames("pop_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(pop, "aux_name", "pop") - setattr(pop, - "aux_key", - c("country_code", "year", "reporting_level")) - saved <- pipfun::pip_sign_save( x = pop, measure = measure, diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R index d7f05ff..0d442ec 100644 --- a/R/pip_ppp_update.R +++ b/R/pip_ppp_update.R @@ -61,6 +61,13 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, # ____________________________________________________________________________ # Saving #### + ppp <- ppp |> setnames("ppp_data_level", "reporting_level", + skip_absent=TRUE) + + setattr(ppp, "aux_name", "ppp") + setattr(ppp, + "aux_key", + c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. # validate ppp output data ppp_validate_output(ppp = ppp, detail = detail) @@ -71,14 +78,6 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - ppp <- ppp |> setnames("ppp_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(ppp, "aux_name", "ppp") - setattr(ppp, - "aux_key", - c("country_code", "reporting_level")) # this is going to be key variables only when PPP default year selected. - saved <- pipfun::pip_sign_save( x = ppp, measure = measure, @@ -97,13 +96,13 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, old = c("release_version", "adaptation_version"), new = c("ppp_rv", "ppp_av")) - ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", - skip_absent=TRUE) - - setattr(ppp_vintage, "aux_name", "ppp") - setattr(ppp_vintage, - "aux_key", - c("country_code", "reporting_level")) + # ppp_vintage <- ppp_vintage |> setnames("ppp_data_level", "reporting_level", + # skip_absent=TRUE) + # + # setattr(ppp_vintage, "aux_name", "ppp") + # setattr(ppp_vintage, + # "aux_key", + # c("country_code", "reporting_level")) # Save pipfun::pip_sign_save( diff --git a/R/pip_weo.R b/R/pip_weo.R index ea20984..da7a229 100644 --- a/R/pip_weo.R +++ b/R/pip_weo.R @@ -44,6 +44,10 @@ pip_weo <- function(action = c("update", "load"), branch = branch) # Save dataset + setattr(dt, "aux_name", "weo") + setattr(dt, + "aux_key", + c("country_code", "year")) # validate weo clean data weo_validate_output(weo = dt, detail = detail) @@ -52,11 +56,6 @@ pip_weo <- function(action = c("update", "load"), } msrdir <- fs::path(maindir, "_aux", branch, measure) # measure dir - setattr(dt, "aux_name", "weo") - setattr(dt, - "aux_key", - c("country_code", "year")) - cat('\nDir : ', msrdir) saved <- pipfun::pip_sign_save( x = dt, diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R index 13971f4..036e06c 100644 --- a/R/pop_validate_output.R +++ b/R/pop_validate_output.R @@ -18,19 +18,19 @@ pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")) description = "`country_code` should be character") |> validate_if(is.numeric(year), description = "`year` should be numeric") |> - validate_if(is.character(pop_data_level), - description = "`pop_data_level` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), - pop_data_level, description = "`pop_data_level` values within range") |> + reporting_level, description = "`reporting_level` values within range") |> validate_if(is.numeric(pop), description = "`pop` should be numeric") |> validate_if(is.character(pop_domain), description = "`pop_domain` should be character") |> validate_cols(in_set(c("national", "urban/rural")), pop_domain, description = "`pop_domain` values within range") |> - validate_cols(not_na, country_code, year, pop_data_level, + validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> - validate_if(is_uniq(country_code, year, pop_data_level), + validate_if(is_uniq(country_code, year, reporting_level), description = "no duplicate records in key variables") |> add_results(report) diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R index e1b585b..b0c4b12 100644 --- a/R/ppp_validate_output.R +++ b/R/ppp_validate_output.R @@ -32,15 +32,15 @@ ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")) description = "`ppp_domain` should be character") |> validate_cols(in_set(c("1", "2")), ppp_domain, description = "`ppp_domain` values within range") |> - validate_if(is.character(ppp_data_level), - description = "`ppp_data_level` should be character") |> + validate_if(is.character(reporting_level), + description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), - ppp_data_level, description = "`ppp_data_level` values within range") |> - validate_cols(not_na, country_code, ppp_year, ppp_data_level, + reporting_level, description = "`reporting_level` values within range") |> + validate_cols(not_na, country_code, ppp_year, reporting_level, adaptation_version, release_version, description = "no missing values in key variables") |> validate_if(is_uniq(country_code, ppp_year, - ppp_data_level, adaptation_version, release_version), + reporting_level, adaptation_version, release_version), description = "no duplicate records in key variables") |> add_results(report) diff --git a/tests/testthat/test-cl-validation.R b/tests/testthat/test-cl-validation.R index 0a46304..ff442f3 100644 --- a/tests/testthat/test-cl-validation.R +++ b/tests/testthat/test-cl-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure = "country_list" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("cl_validate_raw() works identifying duplicate error", { @@ -46,7 +47,7 @@ test_that("cl_validate_raw() works identifying duplicate error", { test_that("cl_validate_raw() works identifying invalid value", { - cl <- load_aux(maindir = gls$PIP_DATA_DIR, + cl <- load_aux(maindir = temp_fld, #fldgls$PIP_DATA_DIR, measure = measure, branch = branch) diff --git a/tests/testthat/test-countries-validation.R b/tests/testthat/test-countries-validation.R index d7e1ec2..2abbbf2 100644 --- a/tests/testthat/test-countries-validation.R +++ b/tests/testthat/test-countries-validation.R @@ -3,12 +3,13 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "countries" -gls <- pipfun::pip_create_globals() +gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("countries_validate_output() works identifying duplicate error", { countries <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -23,7 +24,7 @@ test_that("countries_validate_output() works identifying duplicate error", { test_that("countries_validate_output() works identifying invalid value", { countries <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-cpi-validation.R b/tests/testthat/test-cpi-validation.R index e5d1403..8ee84ef 100644 --- a/tests/testthat/test-cpi-validation.R +++ b/tests/testthat/test-cpi-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure = "cpi" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("cpi_validate_raw() works identifying duplicate error", { @@ -56,13 +57,13 @@ test_that("cpi_validate_raw() works identifying invalid value", { test_that("cpi_validate_output() works identifying duplicate error", { cpi <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - cpi[, `:=` (cpi_data_level = fifelse(cpi_data_level == "rural", - "urban", cpi_data_level))] + cpi[, `:=` (reporting_level = fifelse(reporting_level == "rural", + "urban", reporting_level))] expect_error(cpi_validate_output(cpi)) @@ -71,12 +72,12 @@ test_that("cpi_validate_output() works identifying duplicate error", { test_that("cpi_validate_output() works identifying type/ formating error", { cpi <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - cpi[, `:=` (cpi_year = as.character(cpi_year), + cpi[, `:=` (year = as.character(year), survey_year = as.character(survey_year), cpi_domain_value = as.character(cpi_domain_value), cpi2011 = as.character(cpi2011), @@ -90,7 +91,7 @@ test_that("cpi_validate_output() works identifying type/ formating error", { test_that("cpi_validate_output() works identifying invalid value", { cpi <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-gdm-validation.R b/tests/testthat/test-gdm-validation.R index 58a1eaf..a4cde61 100644 --- a/tests/testthat/test-gdm-validation.R +++ b/tests/testthat/test-gdm-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "gdm" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("gdm_validate_raw() works identifying duplicate error", { @@ -55,13 +56,13 @@ test_that("gdm_validate_raw() works identifying invalid value", { test_that("gdm_validate_output() works identifying duplicate error", { gdm <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - gdm[, `:=` (pop_data_level = fifelse(pop_data_level == "rural", - "urban", pop_data_level))] + gdm[, `:=` (reporting_level = fifelse(reporting_level == "rural", + "urban", reporting_level))] expect_error(gdm_validate_output(gdm)) @@ -70,12 +71,12 @@ test_that("gdm_validate_output() works identifying duplicate error", { test_that("gdm_validate_output() works identifying type/ formating error", { gdm <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - gdm[, `:=` (surveyid_year = as.character(surveyid_year), + gdm[, `:=` (year = as.character(year), survey_year = as.character(survey_year), survey_mean_lcu = as.character(survey_mean_lcu))] @@ -86,13 +87,13 @@ test_that("gdm_validate_output() works identifying type/ formating error", { test_that("gdm_validate_output() works identifying invalid value", { gdm <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - gdm[, pop_data_level := fifelse(pop_data_level == "national", - "national1", pop_data_level)] + gdm[, reporting_level := fifelse(reporting_level == "national", + "national1", reporting_level)] expect_error(gdm_validate_output(gdm)) diff --git a/tests/testthat/test-gdp-validation.R b/tests/testthat/test-gdp-validation.R index e9642a7..b67415c 100644 --- a/tests/testthat/test-gdp-validation.R +++ b/tests/testthat/test-gdp-validation.R @@ -4,17 +4,18 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "gdp" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("gdp_validate_output() works identifying duplicate error", { gdp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - gdp[, `:=` (gdp_data_level = fifelse(gdp_data_level == "rural", - "urban", gdp_data_level))] + gdp[, `:=` (reporting_level = fifelse(reporting_level == "rural", + "urban", reporting_level))] expect_error(gdp_validate_output(gdp)) @@ -23,7 +24,7 @@ test_that("gdp_validate_output() works identifying duplicate error", { test_that("gdp_validate_output() works identifying type/ formating error", { gdp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -38,13 +39,13 @@ test_that("gdp_validate_output() works identifying type/ formating error", { test_that("gdp_validate_output() works identifying invalid value", { gdp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - gdp[, gdp_data_level := fifelse(gdp_data_level == "national", - "national1", gdp_data_level)] + gdp[, reporting_level := fifelse(reporting_level == "national", + "national1", reporting_level)] expect_error(gdp_validate_output(gdp)) diff --git a/tests/testthat/test-income-groups-validation.R b/tests/testthat/test-income-groups-validation.R index 606a083..1919c1c 100644 --- a/tests/testthat/test-income-groups-validation.R +++ b/tests/testthat/test-income-groups-validation.R @@ -4,11 +4,12 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "income_groups" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("gdp_validate_output() works identifying type/ formating error", { incgroups <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -23,7 +24,7 @@ test_that("gdp_validate_output() works identifying type/ formating error", { test_that("incgroup_validate_output() works identifying invalid value", { incgroups <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-maddison-validation.R b/tests/testthat/test-maddison-validation.R index 9bccfdb..0bd9c93 100644 --- a/tests/testthat/test-maddison-validation.R +++ b/tests/testthat/test-maddison-validation.R @@ -3,6 +3,8 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "maddison" +gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("mpd_validate_raw() works identifying type/ formating error", { diff --git a/tests/testthat/test-metadata-validation.R b/tests/testthat/test-metadata-validation.R index 2763fcd..267c48d 100644 --- a/tests/testthat/test-metadata-validation.R +++ b/tests/testthat/test-metadata-validation.R @@ -5,6 +5,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "metadata" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("metadata_validate_raw() works identifying duplicate error", { @@ -53,7 +54,7 @@ test_that("metadata_validate_raw() works identifying invalid value", { test_that("metadata_validate_output() works identifying duplicate error", { metadata <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -68,12 +69,12 @@ test_that("metadata_validate_output() works identifying duplicate error", { test_that("metadata_validate_output() works identifying type/ formating error", { metadata <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - metadata[, `:=` (reporting_year = as.character(reporting_year), + metadata[, `:=` (year = as.character(year), survey_year = as.character(survey_year))] expect_error(metadata_validate_output(metadata)) @@ -83,7 +84,7 @@ test_that("metadata_validate_output() works identifying type/ formating error", test_that("metadata_validate_output() works identifying invalid value", { metadata <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-npl-validation.R b/tests/testthat/test-npl-validation.R index 1e4d907..cf05f50 100644 --- a/tests/testthat/test-npl-validation.R +++ b/tests/testthat/test-npl-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "npl" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("npl_validate_raw() works identifying duplicate error", { @@ -41,13 +42,13 @@ test_that("npl_validate_raw() works identifying type/ formating error", { test_that("npl_validate_output() works identifying duplicate error", { npl <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - npl[, `:=` (reporting_year = fifelse((reporting_year == 2007 & country_code == "AFG"), - 2011, reporting_year))] + npl[, `:=` (year = fifelse((year == 2007 & country_code == "AFG"), + 2011, year))] expect_error(npl_validate_output(npl)) @@ -56,12 +57,12 @@ test_that("npl_validate_output() works identifying duplicate error", { test_that("npl_validate_output() works identifying type/ formating error", { npl <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - npl[, `:=` (reporting_year = as.character(reporting_year), + npl[, `:=` (year = as.character(year), comparability = as.character(comparability), nat_headcount = as.character(nat_headcount))] diff --git a/tests/testthat/test-pce-validation.R b/tests/testthat/test-pce-validation.R index 62cfdd5..02f132a 100644 --- a/tests/testthat/test-pce-validation.R +++ b/tests/testthat/test-pce-validation.R @@ -4,17 +4,18 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pce" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("pce_validate_output() works identifying duplicate error", { pce <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - pce[, `:=` (pce_data_level = fifelse(pce_data_level == "rural", - "urban", pce_data_level))] + pce[, `:=` (reporting_level = fifelse(reporting_level == "rural", + "urban", reporting_level))] expect_error(pce_validate_output(pce)) @@ -23,7 +24,7 @@ test_that("pce_validate_output() works identifying duplicate error", { test_that("pce_validate_output() works identifying type/ formating error", { pce <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -38,13 +39,13 @@ test_that("pce_validate_output() works identifying type/ formating error", { test_that("pce_validate_output() works identifying invalid value", { pce <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - pce[, pce_data_level := fifelse(pce_data_level == "national", - "national1", pce_data_level)] + pce[, reporting_level := fifelse(reporting_level == "national", + "national1", reporting_level)] expect_error(pce_validate_output(pce)) diff --git a/tests/testthat/test-pfw-validation.R b/tests/testthat/test-pfw-validation.R index 1b7f391..f5e55a5 100644 --- a/tests/testthat/test-pfw-validation.R +++ b/tests/testthat/test-pfw-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pfw" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("pfw_validate_raw() works identifying duplicate error", { @@ -64,7 +65,7 @@ test_that("pfw_validate_raw() works identifying invalid value", { test_that("pfw_validate_output() works identifying duplicate error", { pfw <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -79,7 +80,7 @@ test_that("pfw_validate_output() works identifying duplicate error", { test_that("pfw_validate_output() works identifying type/ formating error", { pfw <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -96,7 +97,7 @@ test_that("pfw_validate_output() works identifying type/ formating error", { test_that("pfw_validate_output() works identifying invalid value", { pfw <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-pl-validation.R b/tests/testthat/test-pl-validation.R index 2ea2a94..52fb835 100644 --- a/tests/testthat/test-pl-validation.R +++ b/tests/testthat/test-pl-validation.R @@ -3,11 +3,12 @@ branch <- "DEV" measure <- "pl" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("pl_validate_output() works identifying duplicate error", { pl <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -22,7 +23,7 @@ test_that("pl_validate_output() works identifying duplicate error", { test_that("pl_validate_output() works identifying type/ formating error", { pl <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) diff --git a/tests/testthat/test-pop-validation.R b/tests/testthat/test-pop-validation.R index 2867d37..6099507 100644 --- a/tests/testthat/test-pop-validation.R +++ b/tests/testthat/test-pop-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "pop" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("pop_validate_raw() works identifying duplicate error", { @@ -56,13 +57,13 @@ test_that("pop_validate_raw() works identifying invalid value", { test_that("pop_validate_output() works identifying duplicate error", { pop <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - pop[, `:=` (pop_data_level = fifelse((pop_data_level == "rural" & country_code == "ABW"), - "urban", pop_data_level))] + pop[, `:=` (reporting_level = fifelse((reporting_level == "rural" & country_code == "ABW"), + "urban", reporting_level))] expect_error(pop_validate_output(pop)) @@ -71,7 +72,7 @@ test_that("pop_validate_output() works identifying duplicate error", { test_that("pop_validate_output() works identifying type/ formating error", { pop <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -86,13 +87,13 @@ test_that("pop_validate_output() works identifying type/ formating error", { test_that("pop_validate_output() works identifying invalid value", { pop <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) - pop[, pop_data_level := fifelse(pop_data_level == "national", - "national1", pop_data_level)] + pop[, reporting_level := fifelse(reporting_level == "national", + "national1", reporting_level)] expect_error(pop_validate_output(pop)) diff --git a/tests/testthat/test-ppp-validation.R b/tests/testthat/test-ppp-validation.R index ea64396..7fbea69 100644 --- a/tests/testthat/test-ppp-validation.R +++ b/tests/testthat/test-ppp-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "ppp" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("ppp_validate_raw() works identifying duplicate error", { @@ -54,13 +55,14 @@ test_that("ppp_validate_raw() works identifying invalid value", { test_that("ppp_validate_output() works identifying duplicate error", { ppp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, - branch = branch + branch = branch, + ppp_defaults = FALSE ) - ppp[, `:=` (ppp_data_level = fifelse(ppp_data_level == "rural", - "urban", ppp_data_level))] + ppp[, `:=` (reporting_level = fifelse(reporting_level == "rural", + "urban", reporting_level))] expect_error(ppp_validate_output(ppp)) @@ -69,9 +71,10 @@ test_that("ppp_validate_output() works identifying duplicate error", { test_that("ppp_validate_output() works identifying type/ formating error", { ppp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, - branch = branch + branch = branch, + ppp_defaults = FALSE ) ppp[, `:=` (ppp_year = as.character(ppp_year), @@ -84,13 +87,14 @@ test_that("ppp_validate_output() works identifying type/ formating error", { test_that("ppp_validate_output() works identifying invalid value", { ppp <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, - branch = branch + branch = branch, + ppp_defaults = FALSE ) - ppp[, ppp_data_level := fifelse(ppp_data_level == "national", - "national1", ppp_data_level)] + ppp[, reporting_level := fifelse(reporting_level == "national", + "national1", reporting_level)] expect_error(ppp_validate_output(ppp)) diff --git a/tests/testthat/test-sna-fy-validation.R b/tests/testthat/test-sna-fy-validation.R index be8d4d6..9098cde 100644 --- a/tests/testthat/test-sna-fy-validation.R +++ b/tests/testthat/test-sna-fy-validation.R @@ -3,6 +3,8 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "sna" +gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("sna_validate_raw() works identifying type/ formating error", { diff --git a/tests/testthat/test-sna-validation.R b/tests/testthat/test-sna-validation.R index 8df8ca2..b333ffb 100644 --- a/tests/testthat/test-sna-validation.R +++ b/tests/testthat/test-sna-validation.R @@ -3,6 +3,8 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "sna" +gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("sna_validate_raw() works identifying type/ formating error", { diff --git a/tests/testthat/test-wdi-validation.R b/tests/testthat/test-wdi-validation.R index ee20811..c7245dc 100644 --- a/tests/testthat/test-wdi-validation.R +++ b/tests/testthat/test-wdi-validation.R @@ -3,6 +3,8 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "wdi" +gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("wdi_validate_raw() works identifying duplicate error", { diff --git a/tests/testthat/test-weo-validation.R b/tests/testthat/test-weo-validation.R index 08caece..164a313 100644 --- a/tests/testthat/test-weo-validation.R +++ b/tests/testthat/test-weo-validation.R @@ -4,6 +4,7 @@ branch <- "DEV" owner <- getOption("pipfun.ghowner") measure <- "weo" gls <- pipfun::pip_create_globals() +temp_fld <- "Y:/tefera_pipaux_test" test_that("weo_validate_raw() works identifying duplicate error", { @@ -24,7 +25,7 @@ test_that("weo_validate_raw() works identifying duplicate error", { test_that("weo_validate_output() works identifying duplicate error", { weo <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch ) @@ -39,7 +40,7 @@ test_that("weo_validate_output() works identifying duplicate error", { test_that("weo_validate_output() works identifying type/ formating error", { weo <- load_aux( - maindir = gls$PIP_DATA_DIR, + maindir = temp_fld, #gls$PIP_DATA_DIR, measure = measure, branch = branch )