-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tefera update branch #100
Tefera update branch #100
Changes from all commits
31c6afc
9ec8dea
82ec25c
3295449
fd3693c
a439e4d
7ad3052
7fa88c2
e0aefac
3edaa70
c8c6989
38719e3
0056f16
29eb56e
4dba4f9
eb0d961
e3bdc25
0a790be
bca92ed
a5438ea
31b2909
90f7b44
c7dd7ae
0a4f936
add9f52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ tmp/ | |
.vscode/ | ||
^codecov\.yml$ | ||
^data-raw$ | ||
^vignettes/articles$ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,28 @@ | |
|
||
export("%>%") | ||
export(auto_aux_update) | ||
export(aux_data) | ||
export(cl_validate_raw) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to export all the validation functions? I think they are meant to be used internally. If they are, you should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you have suggested, I included #' @Keywords internal to roxygen chunk to all the data validation functions. Thanks |
||
export(clean_validation_report) | ||
export(countries_validate_output) | ||
export(cpi_validate_output) | ||
export(cpi_validate_raw) | ||
export(draw_model) | ||
export(gdm_validate_output) | ||
export(gdm_validate_raw) | ||
export(gdp_validate_output) | ||
export(get_error_validation) | ||
export(incgroup_validate_output) | ||
export(load_aux) | ||
export(merger_aux) | ||
export(metadata_validate_output) | ||
export(metadata_validate_raw) | ||
export(mpd_validate_raw) | ||
export(npl_validate_output) | ||
export(npl_validate_raw) | ||
export(pce_validate_output) | ||
export(pfw_validate_output) | ||
export(pfw_validate_raw) | ||
export(pip_censoring) | ||
export(pip_countries) | ||
export(pip_country_list) | ||
|
@@ -12,6 +32,7 @@ export(pip_cpi) | |
export(pip_dictionary) | ||
export(pip_gdm) | ||
export(pip_gdp) | ||
export(pip_gdp_weo) | ||
export(pip_income_groups) | ||
export(pip_indicators) | ||
export(pip_maddison) | ||
|
@@ -21,21 +42,41 @@ export(pip_missing_data) | |
export(pip_npl) | ||
export(pip_pce) | ||
export(pip_pfw) | ||
export(pip_pfw_key) | ||
export(pip_pl) | ||
export(pip_pl_clean) | ||
export(pip_pop) | ||
export(pip_ppp) | ||
export(pip_prices) | ||
export(pip_regions) | ||
export(pip_sna) | ||
export(pip_update_all_aux) | ||
export(pip_wdi) | ||
export(pip_wdi_update) | ||
export(pip_weo) | ||
export(pip_weo_clean) | ||
export(pl_validate_output) | ||
export(pop_validate_output) | ||
export(pop_validate_raw) | ||
export(popmain_validate_raw) | ||
export(ppp_validate_output) | ||
export(ppp_validate_raw) | ||
export(send_report) | ||
export(sna_fy_validate_raw) | ||
export(sna_validate_raw) | ||
export(spop_validate_raw) | ||
export(update_aux) | ||
export(wdi_validate_raw) | ||
export(weo_validate_output) | ||
export(weo_validate_raw) | ||
import(blastula) | ||
import(collapse, except = fdroplevels) | ||
import(data.table) | ||
import(data.table, except = fdroplevels) | ||
import(data.validator) | ||
importFrom(assertr,in_set) | ||
importFrom(assertr,is_uniq) | ||
importFrom(assertr,not_na) | ||
importFrom(glue,glue) | ||
importFrom(lifecycle,deprecated) | ||
importFrom(magrittr,"%<>%") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.pipaux <- new.env(parent = emptyenv()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#' Attache key values into auxiliary file | ||
#' | ||
#' @param aux_file auxiliary file | ||
#' | ||
#' @return data.table with key values | ||
#' @export | ||
#' | ||
#' @examples | ||
aux_data <- function(aux_file){ | ||
|
||
# list of possible auxiliary keys -------------------------------------------- | ||
keycolsg0 <- c("country_code") # countries, country_list | ||
keycolsg1 <- c("country_code", "surveyid_year") # maddison, weo, npl, income_group | ||
keycolsg2 <- c("country_code", "surveyid_year", "reporting_level") # gdp, pce, pop | ||
keycolsg3 <- c("country_code", "surveyid_year", "reporting_level") # gdm | ||
keycolsg4 <- c("country_code", "surveyid_year", "survey_acronym", "reporting_level") # cpi "survey_acronym" | ||
keypfw <- c("country_code", "surveyid_year", "year", "survey_acronym", | ||
"reporting_level") | ||
|
||
# list of all the auxiliary files | ||
aux_file_names <- c("pfw", "cpi", "gdp", "gdm", "pce", "pop", "ppp", "maddison", | ||
"weo", "npl", "countries", "country_list", "regions", | ||
"income_groups", "metadata") | ||
|
||
if (deparse(substitute(aux_file)) %chin% aux_file_names) { | ||
|
||
# pfw --------------------------------------------------------------------- | ||
if (deparse(substitute(aux_file)) == "pfw"){ | ||
|
||
setkeyv(pfw, | ||
c("country_code", "survey_year", "survey_acronym", "cpi_domain")) | ||
|
||
# generate a dataset that can be used to add reporting_level variable to pfw data | ||
pfw_key <- pip_pfw_key() | ||
|
||
aux_file <- pfw_key[aux_file] |> | ||
setkeyv(keypfw) | ||
} else if (deparse(substitute(aux_file)) == "ppp"){ | ||
|
||
# ppp -------------------------------------------------------------------- | ||
# filter ppp based on defualt ppp value | ||
aux_file <- ppp[ppp_default == TRUE, | ||
.(country_code, ppp_year, ppp, ppp_data_level)] | ||
|
||
setnames(aux_file, "ppp_data_level", "reporting_level", | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, c("country_code", "reporting_level")) | ||
|
||
} else if (deparse(substitute(aux_file)) == "cpi"){ | ||
|
||
# cpi -------------------------------------------------------------------- | ||
# rename two variables cpi_year to surveyid_year and cpi_data_level to reporting_level | ||
aux_file <- aux_file |> | ||
setnames(c("cpi_year", "cpi_data_level"), | ||
c("surveyid_year", "reporting_level"), | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, c("country_code", "surveyid_year", "survey_acronym", "reporting_level")) #keycolsg4) | ||
|
||
} else if (deparse(substitute(aux_file)) == "gdm"){ | ||
|
||
# gdm -------------------------------------------------------------------- | ||
aux_file <- aux_file |> | ||
setnames("pop_daaux_file ta_level", "reporting_level", | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, keycolsg3) | ||
|
||
} else if (deparse(substitute(aux_file)) == "npl"){ | ||
|
||
# npl -------------------------------------------------------------------- | ||
aux_file <- aux_file |> | ||
setnames("reporting_year", "surveyid_year", | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, keycolsg1) | ||
|
||
} else if (deparse(substitute(aux_file)) == "income_groups"){ | ||
|
||
# income_groups ------------------------------------------------------------ | ||
# rename year_data into surveyid_year | ||
aux_file <- aux_file |> | ||
setnames("year_data", "surveyid_year", | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, keycolsg1) | ||
|
||
} else if (deparse(substitute(aux_file)) == "countries"){ | ||
|
||
# countries ---------------------------------------------------------------- | ||
setkeyv(aux_file, keycolsg0) | ||
|
||
} else if (deparse(substitute(aux_file)) == "country_list"){ | ||
|
||
# country_list-------------------------------------------------------------- | ||
setkeyv(aux_file, keycolsg0) | ||
|
||
} else if (deparse(substitute(aux_file)) == "metadata"){ | ||
|
||
# metadata ----------------------------------------------------------------- | ||
setkeyv(aux_file, keycolsg5) | ||
|
||
} else if (deparse(substitute(aux_file)) %chin% c("maddison", "weo")){ | ||
|
||
# auxiliary datasets - group 1 (maddison and weo) -------------------------- | ||
aux_file |> | ||
setnames("year", "surveyid_year", | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, keycolsg1) | ||
|
||
} else if (deparse(substitute(aux_file)) %chin% c("gdp", "pop", "pce")){ | ||
|
||
# auxiliary datasets - group 2 (gdp, pop, pce) --------------------------- | ||
aux_data_level <- paste0(deparse(substitute(aux_file)), "_data_level") | ||
|
||
aux_file |> | ||
setnames(c(aux_data_level, "year"), | ||
c("reporting_level", "surveyid_year"), | ||
skip_absent=TRUE) | ||
|
||
setkeyv(aux_file, keycolsg2) | ||
|
||
} | ||
|
||
return(aux_file) | ||
|
||
} else { | ||
|
||
return(aux_file) | ||
|
||
} | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#' Validate raw country list data | ||
#' | ||
#' @param cl raw country list data, as loaded via `pipfun::load_from_gh` | ||
#' @param detail has an option TRUE/FALSE, default value is FALSE | ||
#' @import data.validator | ||
#' @importFrom assertr in_set not_na is_uniq | ||
#' @keywords internal | ||
#' | ||
#' @export | ||
cl_validate_raw <- function(cl, detail = getOption("pipaux.detail.raw")){ | ||
|
||
stopifnot("Country list raw data is not loaded" = !is.null(cl)) | ||
|
||
report <- data_validation_report() | ||
|
||
# country_list <- pipload::pip_load_aux("pfw") | ||
country_list <- pipfun::load_from_gh(measure = "pfw", | ||
owner = getOption("pipfun.ghowner"), | ||
branch = "DEV", | ||
ext = "dta") | ||
|
||
country_list <- unique(country_list[, code]) | ||
|
||
validate(cl, name = "CL raw data validation") |> | ||
validate_if(is.character(country_code), | ||
description = "`country_code` should be character") |> | ||
# validate_cols(in_set(country_list), | ||
# country_code, description = "`country_code` values within range") |> | ||
validate_if(is.character(country_name), | ||
description = "`country_name` should be character") |> | ||
validate_if(is.character(africa_split), | ||
description = "`africa_split` should be character") |> | ||
validate_cols(in_set(c("Eastern and Southern Africa", "Western and Central Africa", NA)), | ||
africa_split, description = "`africa_split` values within range") |> | ||
validate_if(is.character(africa_split_code), | ||
description = "`africa_split_code` should be character") |> | ||
validate_cols(in_set(c("AFE", "AFW", NA)), | ||
africa_split_code, description = "`africa_split_code` values within range") |> | ||
validate_if(is.character(pcn_region), | ||
description = "`pcn_region` should be character") |> | ||
validate_if(is.character(pcn_region_code), | ||
description = "`pcn_region_code` should be character") |> | ||
validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), | ||
pcn_region_code, description = "`pcn_region_code` values within range") |> | ||
validate_if(is.character(region), | ||
description = "`region` should be character") |> | ||
validate_if(is.character(region_code), | ||
description = "`region_code` should be character") |> | ||
validate_cols(in_set(c("EAP", "ECA", "LAC", "MNA", "OHI", "SAS", "SSA")), | ||
region_code, description = "`region_code` values within range") |> | ||
validate_if(is.character(world), | ||
description = "`world` should be character") |> | ||
validate_cols(in_set(c("World")), | ||
world, description = "`world` values within range") |> | ||
validate_if(is.character(world_code), | ||
description = "`world_code` should be character") |> | ||
validate_cols(in_set(c("WLD")), | ||
world_code, description = "`world_code` values within range") |> | ||
validate_cols(not_na, country_code, | ||
description = "no missing values in key variables") |> | ||
validate_if(is_uniq(country_code), | ||
description = "no duplicate records in key variables") |> | ||
add_results(report) | ||
|
||
validation_record <- get_results(report, unnest = FALSE) |> | ||
setDT() | ||
|
||
if (any(validation_record[["type"]] == "error")){ | ||
get_error_validation(validation_record, detail) | ||
} | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#' Remove data validation report from .pipaux environment variable | ||
#' | ||
#' @export | ||
clean_validation_report <- function(){ | ||
|
||
if (rlang::env_has(.pipaux, "validation_report")){ | ||
|
||
# rlang::env_bind(.pipaux, validation_report = rlang::zap()) | ||
rlang::env_unbind(.pipaux, "validation_report") | ||
|
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need all these dependencies? For example, do we need
covr
? Could it be moved tosuggests
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved covr package to suggests section.