From f917e687b07bba7835d28d0f38ba1b4ca9a56bda Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Thu, 12 Dec 2024 14:48:47 -0500 Subject: [PATCH 1/2] update pip_pfw_key() and merger_aux() functions --- NAMESPACE | 3 +- R/merger_aux.R | 17 ++++---- R/pip_pfw_key.R | 1 - man/merger_aux.Rd | 17 ++++---- man/save_aux_to_gh.Rd | 41 +++++++++++++++++++ tests/testthat/test-merger_aux.R | 2 - .../articles/utilizing_the_merge_function.Rmd | 17 ++++---- 7 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 man/save_aux_to_gh.Rd diff --git a/NAMESPACE b/NAMESPACE index 3db08a9..da2a7d5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,10 +5,10 @@ export(auto_aux_update) export(aux_data) export(cl_validate_raw) export(clean_validation_report) +export(convert_df_to_base64) export(countries_validate_output) export(cpi_validate_output) export(cpi_validate_raw) -export(convert_df_to_base64) export(draw_model) export(gdm_validate_output) export(gdm_validate_raw) @@ -64,6 +64,7 @@ export(pop_validate_raw) export(popmain_validate_raw) export(ppp_validate_output) export(ppp_validate_raw) +export(save_aux_to_gh) export(send_report) export(sna_fy_validate_raw) export(sna_validate_raw) diff --git a/R/merger_aux.R b/R/merger_aux.R index 06ffe4f..66616f6 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -2,7 +2,8 @@ #' #' @param aux_data1 auxiliary data one #' @param aux_data1 auxiliary data two -#' @param keep merge/ join type, the default is left join, options (left, right, full, using, master, inner) +#' @param merge_type merge/ join type, the default is left join, options (left, right, full, using, master, inner) +#' @param commn_vars option to keep or retain common variables, default is TRUE #' #' @return data.table with key information #' @export @@ -13,19 +14,19 @@ #' pfw_ppp <- merger_aux(pfw, ppp) #' #' cpi <- load_aux("cpi") -#' cpi <- cpi[, -c("cpi_domain")] -#' pfw_cpi <- merger_aux(cpi, pfw, keep = "right") +#' pfw_cpi <- merger_aux(cpi, pfw, merge_type = "right", commn_vars = FALSE) #' cpi_pfw <- merger_aux(cpi, pfw) #' merger_aux <- function(aux_data1, aux_data2, - keep = c("left", "right", "full", + merge_type = c("left", "right", "full", "using", "master", "inner"), + commn_vars = TRUE, ... ){ - keep <- match.arg(keep) - print(keep) + merge_type <- match.arg(merge_type) + print(merge_type) stopifnot("First data is empty" = !is.null(aux_data1)) stopifnot("Second data is empty" = !is.null(aux_data2)) @@ -111,8 +112,8 @@ merger_aux <- function(aux_data1, aux_data2, by = int_key, match_type = mtype, - keep = keep) - + keep = merge_type, + keep_common_vars = commn_vars) attr(mdata, "aux_key", union(attr(aux_data1, "aux_key"), attr(aux_data2, "aux_key"))) diff --git a/R/pip_pfw_key.R b/R/pip_pfw_key.R index 955b27a..9d752b9 100644 --- a/R/pip_pfw_key.R +++ b/R/pip_pfw_key.R @@ -10,7 +10,6 @@ pip_pfw_key <- function(){ pfw_key_options <- pfw_temp[, .(country_code, survey_year, survey_acronym, - welfare_type, cpi_domain_var)] diff --git a/man/merger_aux.Rd b/man/merger_aux.Rd index b465c44..e339861 100644 --- a/man/merger_aux.Rd +++ b/man/merger_aux.Rd @@ -7,14 +7,17 @@ merger_aux( aux_data1, aux_data2, - keep = c("left", "right", "full", "using", "master", "inner"), + merge_type = c("left", "right", "full", "using", "master", "inner"), + commn_vars = TRUE, ... ) } \arguments{ \item{aux_data1}{auxiliary data two} -\item{keep}{merge/ join type, the default is left join, options (left, right, full, using, master, inner)} +\item{merge_type}{merge/ join type, the default is left join, options (left, right, full, using, master, inner)} + +\item{commn_vars}{option to keep or retain common variables, default is TRUE} } \value{ data.table with key information @@ -23,12 +26,12 @@ data.table with key information Merge auxiliary datasets } \examples{ -pfw <- pip_pfw(action = "load") -ppp <- pip_ppp(action = "load") +pfw <- load_aux("pfw") +ppp <- load_aux("ppp") pfw_ppp <- merger_aux(pfw, ppp) -cpi <- pip_cpi(action = "cpi") -cpi <- cpi[, -c("cpi_domain")] -pfw_cpi <- merger_aux(cpi, pfw, keep = "right") +cpi <- load_aux("cpi") +pfw_cpi <- merger_aux(cpi, pfw, merge_type = "right", commn_vars = FALSE) +cpi_pfw <- merger_aux(cpi, pfw) } diff --git a/man/save_aux_to_gh.Rd b/man/save_aux_to_gh.Rd new file mode 100644 index 0000000..57edd8d --- /dev/null +++ b/man/save_aux_to_gh.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{save_aux_to_gh} +\alias{save_aux_to_gh} +\title{SAve auxiliary file to Github Repo} +\usage{ +save_aux_to_gh( + df, + measure, + owner = getOption("pipfun.ghowner"), + repo = paste0("aux_", measure), + branch = "DEV", + tag = branch, + filename = measure, + ext = "csv", + ... +) +} +\arguments{ +\item{df}{A dataframe object} + +\item{owner}{character: Github repo owner. Default is +\code{getOption("pipfun.ghowner")}} + +\item{repo}{character: name of the repo} + +\item{branch}{character: either "DEV" or "PROD". Refers to the branch that +will be used to update either the development server or production.} + +\item{filename}{character: Name of file name without the ".csv" extension. +Default is \code{measure}} + +\item{ext}{character: Extension of \code{filename}. Default "csv"} + +\item{...}{parameters to be passed to the loading functions depending of the +extension used} +} +\description{ +Sometimes we need to save auxiliary files to Github repo. +This function allows for this. +} diff --git a/tests/testthat/test-merger_aux.R b/tests/testthat/test-merger_aux.R index 3a5b894..83ded34 100644 --- a/tests/testthat/test-merger_aux.R +++ b/tests/testthat/test-merger_aux.R @@ -8,8 +8,6 @@ cpi <- load_aux("cpi", branch = "DEV", maindir = temp_fld) -cpi <- cpi[, -c("cpi_domain")] - ppp <- load_aux("ppp", branch = "DEV", maindir = temp_fld) diff --git a/vignettes/articles/utilizing_the_merge_function.Rmd b/vignettes/articles/utilizing_the_merge_function.Rmd index 92e6d0d..6ef899f 100644 --- a/vignettes/articles/utilizing_the_merge_function.Rmd +++ b/vignettes/articles/utilizing_the_merge_function.Rmd @@ -40,7 +40,7 @@ Here’s a breakdown of how it works: - **Inner**: Includes only the records that are present in both datasets. -2. Default Option: If you don’t specify a merge type, the function defaults to "left", meaning it will keep all records from the left dataset and include matching records from the right dataset where available. +2. Default Options: i) If you don’t specify a merge type, the function defaults to "left", meaning it will keep all records from the left dataset and include matching records from the right dataset where available. ii) If there are any common variables, the default behavior is to retain them (with different names) in the merged dataset. Otherwise, set keep_common_vars = FALSE to retain only the common variables from one of the datasets. **Note:** Keys/IDs and names of auxiliary data files (such as *cpi*, *ppp*, *pfw*, *pop*, *gdp*, *gdm*, etc.) are embedded within the data files as attributes. The `merger_aux()` function utilizes these IDs to create suitable key variables for merging the data files and generating an ID for the combined data file. Additionally, the function produces a new auxiliary file name by combining the names of the files being merged. @@ -60,13 +60,12 @@ cpi <- load_aux("cpi", branch = "DEV", maindir = temp_fld) -cpi <- cpi[, -c("cpi_domain")] +# a) merge type "left" +pfw_cpi <- merger_aux(pfw, cpi) # keeps the common variable from both datasets +pfw_cpi <- merger_aux(pfw, cpi, common_vars = FALSE) # keeps only pfw common variable -# a) keep "left" -pfw_cpi <- merger_aux(pfw, cpi) - -# b) keep "inner" -cpi_pfw <- merger_aux(pfw, cpi, keep = "inner") +# b) merge type "inner" +cpi_pfw <- merger_aux(pfw, cpi, merge_type = "inner") ``` @@ -101,7 +100,7 @@ pop <- load_aux("pop", pop_pfw_cpi <- merger_aux(pop, pfw_cpi, - keep = "right") + merge_type = "right") ``` ### 4. Add gdm data to cpi_pfw @@ -117,7 +116,7 @@ gdm <- load_aux("gdm", gdm_cpi_pfw <- merger_aux(gdm, cpi_pfw, - keep = "right") + merge_type = "right") ``` From 29f6c0b77ba485773c2ee8bbe3979501679938f4 Mon Sep 17 00:00:00 2001 From: Tefera19 Date: Wed, 18 Dec 2024 09:38:48 -0500 Subject: [PATCH 2/2] remove *_domain variables from output auxiliary files and update function to refelect these changes --- NAMESPACE | 1 - R/aux_data_files.R | 136 ------------------ R/cpi_validate_output.R | 8 +- R/gdp_validate_output.R | 8 +- R/merger_aux.R | 2 +- R/pce_validate_output.R | 8 +- R/pip_aux_labels.R | 20 +-- R/pip_cpi_update.R | 3 + R/pip_gdp_update.R | 3 + R/pip_pce_update.R | 3 + R/pip_pop_update.R | 4 + R/pip_ppp_update.R | 4 + R/pop_validate_output.R | 8 +- R/ppp_validate_output.R | 8 +- man/aux_data.Rd | 17 --- man/merger_aux.Rd | 2 +- tests/testthat/test-cpi-validation.R | 26 ++-- .../articles/utilizing_the_merge_function.Rmd | 6 +- 18 files changed, 65 insertions(+), 202 deletions(-) delete mode 100644 R/aux_data_files.R delete mode 100644 man/aux_data.Rd diff --git a/NAMESPACE b/NAMESPACE index da2a7d5..1521cf4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ export("%>%") export(auto_aux_update) -export(aux_data) export(cl_validate_raw) export(clean_validation_report) export(convert_df_to_base64) diff --git a/R/aux_data_files.R b/R/aux_data_files.R deleted file mode 100644 index 415aa56..0000000 --- a/R/aux_data_files.R +++ /dev/null @@ -1,136 +0,0 @@ -#' Attache key values into auxiliary file -#' -#' @param aux_file auxiliary file -#' -#' @return data.table with key values -#' @export -#' -#' @examples -aux_data <- function(aux_file){ - - # list of possible auxiliary keys -------------------------------------------- - keycolsg0 <- c("country_code") # countries, country_list - keycolsg1 <- c("country_code", "surveyid_year") # maddison, weo, npl, income_group - keycolsg2 <- c("country_code", "surveyid_year", "reporting_level") # gdp, pce, pop - keycolsg3 <- c("country_code", "surveyid_year", "reporting_level") # gdm - keycolsg4 <- c("country_code", "surveyid_year", "survey_acronym", "reporting_level") # cpi "survey_acronym" - keypfw <- c("country_code", "surveyid_year", "year", "survey_acronym", - "reporting_level") - - # list of all the auxiliary files - aux_file_names <- c("pfw", "cpi", "gdp", "gdm", "pce", "pop", "ppp", "maddison", - "weo", "npl", "countries", "country_list", "regions", - "income_groups", "metadata") - - if (deparse(substitute(aux_file)) %chin% aux_file_names) { - - # pfw --------------------------------------------------------------------- - if (deparse(substitute(aux_file)) == "pfw"){ - - setkeyv(pfw, - c("country_code", "survey_year", "survey_acronym", "cpi_domain")) - - # generate a dataset that can be used to add reporting_level variable to pfw data - pfw_key <- pip_pfw_key() - - aux_file <- pfw_key[aux_file] |> - setkeyv(keypfw) - } else if (deparse(substitute(aux_file)) == "ppp"){ - - # ppp -------------------------------------------------------------------- - # filter ppp based on defualt ppp value - aux_file <- ppp[ppp_default == TRUE, - .(country_code, ppp_year, ppp, ppp_data_level)] - - setnames(aux_file, "ppp_data_level", "reporting_level", - skip_absent=TRUE) - - setkeyv(aux_file, c("country_code", "reporting_level")) - - } else if (deparse(substitute(aux_file)) == "cpi"){ - - # cpi -------------------------------------------------------------------- - # rename two variables cpi_year to surveyid_year and cpi_data_level to reporting_level - aux_file <- aux_file |> - setnames(c("cpi_year", "cpi_data_level"), - c("surveyid_year", "reporting_level"), - skip_absent=TRUE) - - setkeyv(aux_file, c("country_code", "surveyid_year", "survey_acronym", "reporting_level")) #keycolsg4) - - } else if (deparse(substitute(aux_file)) == "gdm"){ - - # gdm -------------------------------------------------------------------- - aux_file <- aux_file |> - setnames("pop_daaux_file ta_level", "reporting_level", - skip_absent=TRUE) - - setkeyv(aux_file, keycolsg3) - - } else if (deparse(substitute(aux_file)) == "npl"){ - - # npl -------------------------------------------------------------------- - aux_file <- aux_file |> - setnames("reporting_year", "surveyid_year", - skip_absent=TRUE) - - setkeyv(aux_file, keycolsg1) - - } else if (deparse(substitute(aux_file)) == "income_groups"){ - - # income_groups ------------------------------------------------------------ - # rename year_data into surveyid_year - aux_file <- aux_file |> - setnames("year_data", "surveyid_year", - skip_absent=TRUE) - - setkeyv(aux_file, keycolsg1) - - } else if (deparse(substitute(aux_file)) == "countries"){ - - # countries ---------------------------------------------------------------- - setkeyv(aux_file, keycolsg0) - - } else if (deparse(substitute(aux_file)) == "country_list"){ - - # country_list-------------------------------------------------------------- - setkeyv(aux_file, keycolsg0) - - } else if (deparse(substitute(aux_file)) == "metadata"){ - - # metadata ----------------------------------------------------------------- - setkeyv(aux_file, keycolsg5) - - } else if (deparse(substitute(aux_file)) %chin% c("maddison", "weo")){ - - # auxiliary datasets - group 1 (maddison and weo) -------------------------- - aux_file |> - setnames("year", "surveyid_year", - skip_absent=TRUE) - - setkeyv(aux_file, keycolsg1) - - } else if (deparse(substitute(aux_file)) %chin% c("gdp", "pop", "pce")){ - - # auxiliary datasets - group 2 (gdp, pop, pce) --------------------------- - aux_data_level <- paste0(deparse(substitute(aux_file)), "_data_level") - - aux_file |> - setnames(c(aux_data_level, "year"), - c("reporting_level", "surveyid_year"), - skip_absent=TRUE) - - setkeyv(aux_file, keycolsg2) - - } - - return(aux_file) - - } else { - - return(aux_file) - - } - -} - diff --git a/R/cpi_validate_output.R b/R/cpi_validate_output.R index 2c2102b..3b80919 100644 --- a/R/cpi_validate_output.R +++ b/R/cpi_validate_output.R @@ -30,10 +30,10 @@ cpi_validate_output <- function(cpi, detail = getOption("pipaux.detail.output")) description = "`change_cpi2011` should be numeric") |> validate_cols(in_set(c(0, 1)), change_cpi2011, description = "`change_cpi2011` values within range") |> - validate_if(is.character(cpi_domain), - description = "`cpi_domain` should be character") |> - validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, - description = "`cpi_domian` values within range") |> + # validate_if(is.character(cpi_domain), + # description = "`cpi_domain` should be character") |> + # validate_cols(in_set(c("National", "Urban/Rural")), cpi_domain, + # description = "`cpi_domian` values within range") |> validate_if(is.numeric(cpi_domain_value), description = "`cpi_domain_value` should be numeric") |> validate_cols(in_set(c(0, 1)), cpi_domain_value, diff --git a/R/gdp_validate_output.R b/R/gdp_validate_output.R index 2a80d72..14052a8 100644 --- a/R/gdp_validate_output.R +++ b/R/gdp_validate_output.R @@ -24,10 +24,10 @@ gdp_validate_output <- function(gdp, detail = getOption("pipaux.detail.output")) reporting_level, description = "`reporting_level` values within range") |> validate_if(is.numeric(gdp), description = "`gdp` should be numeric") |> - validate_if(is.character(gdp_domain), - description = "`gdp_domain` should be character") |> - validate_cols(in_set(c("national", "urban/rural")), - gdp_domain, description = "`gdp_domain` values within range") |> + # validate_if(is.character(gdp_domain), + # description = "`gdp_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # gdp_domain, description = "`gdp_domain` values within range") |> validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> validate_if(is_uniq(country_code, year, reporting_level), diff --git a/R/merger_aux.R b/R/merger_aux.R index 66616f6..9b7e16b 100644 --- a/R/merger_aux.R +++ b/R/merger_aux.R @@ -21,7 +21,7 @@ merger_aux <- function(aux_data1, aux_data2, merge_type = c("left", "right", "full", "using", "master", "inner"), - commn_vars = TRUE, + commn_vars = FALSE, ... ){ diff --git a/R/pce_validate_output.R b/R/pce_validate_output.R index 7566b69..dd91bad 100644 --- a/R/pce_validate_output.R +++ b/R/pce_validate_output.R @@ -24,10 +24,10 @@ pce_validate_output <- function(pce, detail = getOption("pipaux.detail.output")) description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), reporting_level, description = "`reporting_level` values within range") |> - validate_if(is.character(pce_domain), - description = "`pce_domain` should be character") |> - validate_cols(in_set(c("national", "urban/rural")), - pce_domain, description = "`pce_domain` values within range") |> + # validate_if(is.character(pce_domain), + # description = "`pce_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pce_domain, description = "`pce_domain` values within range") |> validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> validate_if(is_uniq(country_code, year, reporting_level), diff --git a/R/pip_aux_labels.R b/R/pip_aux_labels.R index c757424..c4a4303 100644 --- a/R/pip_aux_labels.R +++ b/R/pip_aux_labels.R @@ -9,9 +9,9 @@ pip_aux_labels <- function(x, measure) { # Label variables # attr(x$survey_year, "label") <- "Survey decimal year" #"Proportion of first year of survey" - attr(x$cpi_domain, "label") <- "CPI domain to join with microdata" + # attr(x$cpi_domain, "label") <- "CPI domain to join with microdata" # attr(x$cpi_data_level, "label") <- "Values to use as keys to join with cpi_domain_var" - attr(x$reporting, "label") <- "Values to use as keys to join with cpi_domain_var" + # attr(x$reporting, "label") <- "Values to use as keys to join with cpi_domain_var" # attr(x$cpi_year, "label") <- "CPI year" #"Year of survey ID" attr(x$ccf, "label") <- "Currency conversion factor" attr(x$cpi, "label") <- "Consumer Price Index (Based on 2011)." @@ -19,9 +19,9 @@ pip_aux_labels <- function(x, measure) { ppp_year <- unique(x[x$ppp_default == TRUE, "ppp_year"]) # Label variables - attr(x$ppp_domain, "label") <- "PPP domain to join with microdata" + # attr(x$ppp_domain, "label") <- "PPP domain to join with microdata" # attr(x$ppp_data_level, "label") <- "Values to use as keys to join with ppp_domain_var" - attr(x$reporting, "label") <- "Values to use as keys to join with ppp_domain_var" + # attr(x$reporting, "label") <- "Values to use as keys to join with ppp_domain_var" attr(x$ppp, "label") <- paste0( "Purchasing Power Parity (", ppp_year, "2011 ICP round)" @@ -46,23 +46,23 @@ pip_aux_labels <- function(x, measure) { attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" # attr(x$gdp_data_level, "label") <- "Values to use as keys to join with gdp_domain_var" - attr(x$reporting, "label") <- "Values to use as keys to join with gdp_domain_var" + # attr(x$reporting, "label") <- "Values to use as keys to join with gdp_domain_var" attr(x$gdp, "label") <- "GDP per capita (constant 2010 US$)" - attr(x$gdp_domain, "label") <- "GDP domain to join with microdata" + # attr(x$gdp_domain, "label") <- "GDP domain to join with microdata" } else if (measure == "pce") { attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" # attr(x$pce_data_level, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" - attr(x$reporting, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" + # attr(x$reporting, "label") <- "Values to use as keys to join with \n pce_domain_var in microdata" attr(x$pce, "label") <- "Households and NPISHs Final consumption expenditure per capita (constant 2010 US$)" - attr(x$pce_domain, "label") <- "PCE domain to join with microdata" + # attr(x$pce_domain, "label") <- "PCE domain to join with microdata" } else if (measure == "pop") { attr(x$country_code, "label") <- "Country code" attr(x$year, "label") <- "Year" # attr(x$pop_data_level, "label") <- "Values to use as keys to join with pop_domain_var" - attr(x$reporting, "label") <- "Values to use as keys to join with pop_domain_var" + # attr(x$reporting, "label") <- "Values to use as keys to join with pop_domain_var" attr(x$pop, "label") <- "Population" - attr(x$pop_domain, "label") <- "Population domain to join with microdata" + # attr(x$pop_domain, "label") <- "Population domain to join with microdata" } else { cli::cli_inform("no labels available for measure {.code {measure}}") } diff --git a/R/pip_cpi_update.R b/R/pip_cpi_update.R index ea7afb3..7e55397 100644 --- a/R/pip_cpi_update.R +++ b/R/pip_cpi_update.R @@ -38,6 +38,9 @@ pip_cpi_update <- function(maindir = gls$PIP_DATA_DIR, maindir = maindir, branch = branch) + # drop cpi_domain + cpi <- cpi[, -c("cpi_domain")] + # changae cpi_year and cpi_data_level to year and reporting_level cpi <- cpi |> setnames(c("cpi_year", "cpi_data_level"), c("year", "reporting_level"), diff --git a/R/pip_gdp_update.R b/R/pip_gdp_update.R index e87ac05..b3c69c8 100644 --- a/R/pip_gdp_update.R +++ b/R/pip_gdp_update.R @@ -319,6 +319,9 @@ pip_gdp_update <- function(maindir = gls$PIP_DATA_DIR, # Remove any non-WDI countries gdp <- gdp[country_code %in% cl$country_code] + # drop gdp_domain + gdp <- gdp[, -c("gdp_domain")] + # ---- Save and sign ---- gdp <- gdp |> setnames("gdp_data_level", "reporting_level", skip_absent=TRUE) diff --git a/R/pip_pce_update.R b/R/pip_pce_update.R index f9c5b14..2d1bf5e 100644 --- a/R/pip_pce_update.R +++ b/R/pip_pce_update.R @@ -229,6 +229,9 @@ pip_pce_update <- function(maindir = gls$PIP_DATA_DIR, pce <- pce[country_code %in% cl$country_code] + # drop pce_domain + pce <- pce[, -c("pce_domain")] + ## ---- Sign and save ---- pce <- pce |> setnames("pce_data_level", "reporting_level", skip_absent=TRUE) diff --git a/R/pip_pop_update.R b/R/pip_pop_update.R index 35a9c4b..f55fc0a 100644 --- a/R/pip_pop_update.R +++ b/R/pip_pop_update.R @@ -179,6 +179,10 @@ pip_pop_update <- function(force = FALSE, #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Save data --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # drop pce_domain + pop <- pop[, -c("pop_domain")] + pop <- pop |> setnames("pop_data_level", "reporting_level", skip_absent=TRUE) diff --git a/R/pip_ppp_update.R b/R/pip_ppp_update.R index e400338..6320ad3 100644 --- a/R/pip_ppp_update.R +++ b/R/pip_ppp_update.R @@ -62,6 +62,10 @@ pip_ppp_update <- function(maindir = gls$PIP_DATA_DIR, # ____________________________________________________________________________ # Saving #### + + # drop ppp_domain + ppp <- ppp[, -c("ppp_domain")] + ppp <- ppp |> setnames("ppp_data_level", "reporting_level", skip_absent=TRUE) diff --git a/R/pop_validate_output.R b/R/pop_validate_output.R index 036e06c..db0deaa 100644 --- a/R/pop_validate_output.R +++ b/R/pop_validate_output.R @@ -24,10 +24,10 @@ pop_validate_output <- function(pop, detail = getOption("pipaux.detail.output")) reporting_level, description = "`reporting_level` values within range") |> validate_if(is.numeric(pop), description = "`pop` should be numeric") |> - validate_if(is.character(pop_domain), - description = "`pop_domain` should be character") |> - validate_cols(in_set(c("national", "urban/rural")), - pop_domain, description = "`pop_domain` values within range") |> + # validate_if(is.character(pop_domain), + # description = "`pop_domain` should be character") |> + # validate_cols(in_set(c("national", "urban/rural")), + # pop_domain, description = "`pop_domain` values within range") |> validate_cols(not_na, country_code, year, reporting_level, description = "no missing values in key variables") |> validate_if(is_uniq(country_code, year, reporting_level), diff --git a/R/ppp_validate_output.R b/R/ppp_validate_output.R index b0c4b12..af7fafb 100644 --- a/R/ppp_validate_output.R +++ b/R/ppp_validate_output.R @@ -28,10 +28,10 @@ ppp_validate_output <- function(ppp, detail = getOption("pipaux.detail.output")) description = "`ppp_default` should be numeric") |> validate_if(is.logical(ppp_default_by_year), description = "`ppp_default_by_year` should be numeric") |> - validate_if(is.character(ppp_domain), - description = "`ppp_domain` should be character") |> - validate_cols(in_set(c("1", "2")), - ppp_domain, description = "`ppp_domain` values within range") |> + # validate_if(is.character(ppp_domain), + # description = "`ppp_domain` should be character") |> + # validate_cols(in_set(c("1", "2")), + # ppp_domain, description = "`ppp_domain` values within range") |> validate_if(is.character(reporting_level), description = "`reporting_level` should be character") |> validate_cols(in_set(c("national", "rural", "urban")), diff --git a/man/aux_data.Rd b/man/aux_data.Rd deleted file mode 100644 index 2919fb9..0000000 --- a/man/aux_data.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aux_data_files.R -\name{aux_data} -\alias{aux_data} -\title{Attache key values into auxiliary file} -\usage{ -aux_data(aux_file) -} -\arguments{ -\item{aux_file}{auxiliary file} -} -\value{ -data.table with key values -} -\description{ -Attache key values into auxiliary file -} diff --git a/man/merger_aux.Rd b/man/merger_aux.Rd index e339861..cf5993e 100644 --- a/man/merger_aux.Rd +++ b/man/merger_aux.Rd @@ -8,7 +8,7 @@ merger_aux( aux_data1, aux_data2, merge_type = c("left", "right", "full", "using", "master", "inner"), - commn_vars = TRUE, + commn_vars = FALSE, ... ) } diff --git a/tests/testthat/test-cpi-validation.R b/tests/testthat/test-cpi-validation.R index c242b3e..1ac3425 100644 --- a/tests/testthat/test-cpi-validation.R +++ b/tests/testthat/test-cpi-validation.R @@ -91,16 +91,16 @@ test_that("cpi_validate_output() works identifying type/ formating error", { }) -test_that("cpi_validate_output() works identifying invalid value", { - - cpi <- load_aux( - maindir = temp_fld, #gls$PIP_DATA_DIR, - measure = measure, - branch = branch - ) - - cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] - - expect_error(cpi_validate_output(cpi)) - -}) +# test_that("cpi_validate_output() works identifying invalid value", { +# +# cpi <- load_aux( +# maindir = temp_fld, #gls$PIP_DATA_DIR, +# measure = measure, +# branch = branch +# ) +# +# cpi[, cpi_domain := fifelse(cpi_domain == "National", "National1", cpi_domain)] +# +# expect_error(cpi_validate_output(cpi)) +# +# }) diff --git a/vignettes/articles/utilizing_the_merge_function.Rmd b/vignettes/articles/utilizing_the_merge_function.Rmd index 6ef899f..493a2f3 100644 --- a/vignettes/articles/utilizing_the_merge_function.Rmd +++ b/vignettes/articles/utilizing_the_merge_function.Rmd @@ -40,7 +40,7 @@ Here’s a breakdown of how it works: - **Inner**: Includes only the records that are present in both datasets. -2. Default Options: i) If you don’t specify a merge type, the function defaults to "left", meaning it will keep all records from the left dataset and include matching records from the right dataset where available. ii) If there are any common variables, the default behavior is to retain them (with different names) in the merged dataset. Otherwise, set keep_common_vars = FALSE to retain only the common variables from one of the datasets. +2. Default Options: i) If you don’t specify a merge type, the function defaults to "left", meaning it will keep all records from the left dataset and include matching records from the right dataset where available. ii) If there are any common variables, the default behavior is to retain them (with different names) in the merged dataset. Otherwise, set keep_common_vars = TRUE to retain only the common variables from one of the datasets. **Note:** Keys/IDs and names of auxiliary data files (such as *cpi*, *ppp*, *pfw*, *pop*, *gdp*, *gdm*, etc.) are embedded within the data files as attributes. The `merger_aux()` function utilizes these IDs to create suitable key variables for merging the data files and generating an ID for the combined data file. Additionally, the function produces a new auxiliary file name by combining the names of the files being merged. @@ -61,8 +61,8 @@ cpi <- load_aux("cpi", maindir = temp_fld) # a) merge type "left" -pfw_cpi <- merger_aux(pfw, cpi) # keeps the common variable from both datasets -pfw_cpi <- merger_aux(pfw, cpi, common_vars = FALSE) # keeps only pfw common variable +pfw_cpi <- merger_aux(pfw, cpi) # keeps only pfw common variable +pfw_cpi <- merger_aux(pfw, cpi, common_vars = TRUE) # keeps the common variable from both datasets # b) merge type "inner" cpi_pfw <- merger_aux(pfw, cpi, merge_type = "inner")