From 7450c8e53b2e09883cd3c9b960e54bc5590281f5 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 30 Apr 2024 11:45:21 -0400 Subject: [PATCH 01/23] inital rm copy --- R/dplyr-joins.R | 30 +++++++++++++++--------------- R/fix_by_vars.R | 6 ++++++ R/joyn-merge.R | 4 ---- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index 0273b01b..eb729572 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -59,8 +59,8 @@ left_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) + # x <- copy(x) + # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -198,8 +198,8 @@ right_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) + # x <- copy(x) + # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -340,8 +340,8 @@ full_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) + # x <- copy(x) + # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -488,8 +488,8 @@ inner_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) + # x <- copy(x) + # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -629,8 +629,8 @@ anti_join <- function( clear_joynenv() # Argument checks --------------------------------- - x <- copy(x) - y <- copy(y) + # x <- copy(x) + # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -838,25 +838,25 @@ arguments_checks <- function(x, y, by, copy, keep, suffix, na_matches, multiple, #' @keywords internal set_col_names <- function(x, y, by, suffix, jn_type) { - x_1 <- copy(x) - y_1 <- copy(y) + #x_1 <- copy(x) + #y_1 <- copy(y) # If joining by different variables if (length(grep(pattern = "==?", x = by, value = TRUE)) != 0) { if (jn_type == "right") { - by_x_names <- fix_by_vars(by = by, x_1, y_1)$xby + by_x_names <- fix_by_vars(by = by, x, y)$xby } else if (jn_type == "left" | jn_type == "full" | jn_type == "inner") { - by_y_names <- fix_by_vars(by = by, x_1, y_1)$yby + by_y_names <- fix_by_vars(by = by, x, y)$yby } } # If joining by common var else { - by_y_names <- by_x_names <- fix_by_vars(by = by, x_1, y_1)$by + by_y_names <- by_x_names <- fix_by_vars(by = by, x, y)$by } # Add key vars with suffix to x and y diff --git a/R/fix_by_vars.R b/R/fix_by_vars.R index 97045927..00f6df67 100644 --- a/R/fix_by_vars.R +++ b/R/fix_by_vars.R @@ -16,6 +16,12 @@ fix_by_vars <- function(by, x, y) { yby <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp)) newkeys <- paste0("keyby", 1:length(xby)) + # x <- frename(x, + # newkeys, + # cols = which(names(x) %in% xby)) + # y <- frename(y, + # newkeys, + # cols = which(names(y) %in% yby)) setnames(x, xby, newkeys) setnames(y, yby, newkeys) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 87d24c6f..9a03a47b 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -214,9 +214,6 @@ joyn <- function(x, # Initial parameters --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ start_joyn <- Sys.time() - x <- copy(x) - y <- copy(y) - ## X and Y ----------- check_xy(x,y) @@ -261,7 +258,6 @@ joyn <- function(x, tx <- mts[1] ty <- mts[2] - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Variables to keep in y --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 130437d913111f90bbf082638996229ba89e9eb0 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 13:13:43 -0400 Subject: [PATCH 02/23] fix dplyr-like joins renaming & copying issue --- R/dplyr-joins.R | 173 +++++++++++++++++++++++++----- R/fix_by_vars.R | 6 -- R/joyn-merge.R | 23 ++-- tests/testthat/test-dplyr-joins.R | 33 ++++-- 4 files changed, 186 insertions(+), 49 deletions(-) diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index eb729572..6818473c 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -59,8 +59,6 @@ left_join <- function( clear_joynenv() # Argument checks --------------------------------- - # x <- copy(x) - # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -91,6 +89,19 @@ left_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + if (keep == TRUE) { jn_type <- "left" modified_cols <- set_col_names(x = x, @@ -121,6 +132,20 @@ left_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -134,6 +159,7 @@ left_join <- function( get_vars(lj, reportvar) <- NULL } + # return lj } @@ -198,8 +224,6 @@ right_join <- function( clear_joynenv() # Argument checks --------------------------------- - # x <- copy(x) - # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -211,6 +235,7 @@ right_join <- function( choices = c("drop", "error")) + args_check <- arguments_checks(x = x, y = y, by = by, @@ -230,6 +255,19 @@ right_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + if (keep == TRUE) { jn_type <- "right" modified_cols <- set_col_names(x = x, @@ -260,6 +298,20 @@ right_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -340,8 +392,6 @@ full_join <- function( clear_joynenv() # Argument checks --------------------------------- - # x <- copy(x) - # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -372,6 +422,19 @@ full_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + if (keep == TRUE) { jn_type <- "full" modified_cols <- set_col_names(x = x, @@ -403,6 +466,20 @@ full_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys---------------------------------------- if (unmatched == "error") { @@ -488,8 +565,6 @@ inner_join <- function( clear_joynenv() # Argument checks --------------------------------- - # x <- copy(x) - # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -520,6 +595,19 @@ inner_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + if (keep == TRUE) { jn_type <- "inner" modified_cols <- set_col_names(x = x, @@ -550,6 +638,20 @@ inner_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # Unmatched Keys --------------------------------------- if (unmatched == "error") { check_unmatched_keys(x = x, @@ -629,8 +731,6 @@ anti_join <- function( clear_joynenv() # Argument checks --------------------------------- - # x <- copy(x) - # y <- copy(y) na_matches <- match.arg(na_matches, choices = c("na","never")) multiple <- match.arg(multiple, @@ -658,6 +758,19 @@ anti_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + if (keep == TRUE) { jn_type <- "anti" modified_cols <- set_col_names(x = x, @@ -688,6 +801,20 @@ anti_join <- function( ... ) + # Change names back------------------------------------ + if (any(grepl(pattern = "keyby", x = names(x)))) { + data.table::setnames(x, + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) + } + if (any(grepl(pattern = "keyby", x = names(y)))) { + data.table::setnames(y, + old = names(y)[grepl(pattern = "keyby", + x = names(y))], + new = ybynames) + } + # # Unmatched Keys --------------------------------------- if (dropreport == T) { get_vars(aj, reportvar) <- NULL @@ -838,25 +965,23 @@ arguments_checks <- function(x, y, by, copy, keep, suffix, na_matches, multiple, #' @keywords internal set_col_names <- function(x, y, by, suffix, jn_type) { - #x_1 <- copy(x) - #y_1 <- copy(y) - # If joining by different variables - if (length(grep(pattern = "==?", x = by, value = TRUE)) != 0) { + byexp <- grep(pattern = "==?", x = by, value = TRUE) + if (length(byexp) != 0) { if (jn_type == "right") { - by_x_names <- fix_by_vars(by = by, x, y)$xby + by_x_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\1", byexp)) } else if (jn_type == "left" | jn_type == "full" | jn_type == "inner") { - by_y_names <- fix_by_vars(by = by, x, y)$yby + by_y_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp)) } } # If joining by common var else { - by_y_names <- by_x_names <- fix_by_vars(by = by, x, y)$by + by_y_names <- by_x_names <- by } # Add key vars with suffix to x and y @@ -903,24 +1028,16 @@ check_unmatched_keys <- function(x, y, out, by, jn_type) { # Left table -------------------------------------------------------- if (jn_type %in% c("left", "inner", "anti")) { - use_y_input <- process_by_vector(by = by, input = "right") - use_y_out <- process_by_vector(by = by, input = "left") + use_y_input <- process_by_vector(by = by, input = "right") # id2 + use_y_out <- process_by_vector(by = by, input = "left") # id1 if (length(grep("==?", by, value = TRUE)) != 0) { if (any(use_y_out %in% colnames(y))) { - - store_msg( - type = "warn", - warn = paste(cli::symbol$warn, "\nWarning:"), - pale = "\nUnmatched = error not active for this joyn -unmatched keys are not detected" - ) + cli::cli_warn("`Unmatched = error` not active for this joyn -unmatched keys are not detected") } else { - data.table::setnames(y, - new = use_y_out, - old = use_y_input) if (unmatched_keys(x = y, by = use_y_out, diff --git a/R/fix_by_vars.R b/R/fix_by_vars.R index 00f6df67..97045927 100644 --- a/R/fix_by_vars.R +++ b/R/fix_by_vars.R @@ -16,12 +16,6 @@ fix_by_vars <- function(by, x, y) { yby <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp)) newkeys <- paste0("keyby", 1:length(xby)) - # x <- frename(x, - # newkeys, - # cols = which(names(x) %in% xby)) - # y <- frename(y, - # newkeys, - # cols = which(names(y) %in% yby)) setnames(x, xby, newkeys) setnames(y, yby, newkeys) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 9a03a47b..cdd8c03c 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -227,6 +227,7 @@ joyn <- function(x, # the resulting table should have the same class as the x table. class_x <- class(x) + ynames <- copy(names(y)) # If match type is m:m we need to convert to data.table if (match_type == "m:m") { @@ -425,7 +426,6 @@ joyn <- function(x, .xreport = NULL, .yreport = NULL) - if (sort) { setorderv(x, by, na.last = na.last) setattr(x, 'sorted', by) @@ -433,13 +433,21 @@ joyn <- function(x, ## Rename by variables ----- - if (!is.null(fixby$xby)) { - data.table::setnames(x, fixby$tempkey, fixby$xby) - by <- fixby$xby - # not necessary - # setnames(y, fixby$tempkey, fixby$yby) + if (any(fixby$tempkey %in% names(x))) { + data.table::setnames(x, + old = fixby$tempkey, + new = fixby$xby) + } + if (any(fixby$tempkey %in% names(y))) { + data.table::setnames(y, + old = fixby$tempkey, + new = fixby$yby) + + if (all(names(y) %in% ynames)) { + colorderv(y, + neworder = ynames) + } } - ## convert to characters if chosen ------- if (reporttype == "character") { @@ -502,6 +510,7 @@ joyn <- function(x, } setattr(x, "class", class_x) + x } diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index 88842891..b6bedb74 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -156,7 +156,6 @@ test_that("LEFT JOIN - Conducts left join", { }) - test_that("LEFT JOIN - no id given", { jn1 <- left_join( x2, @@ -170,6 +169,7 @@ test_that("LEFT JOIN - no id given", { expect_equal(jn1, jn2) }) + test_that("LEFT JOIN - copy given", { jn1 <- joyn::left_join( x2, @@ -268,6 +268,7 @@ test_that("LEFT JOIN - incorrectly specified arguments give errors", { }) + test_that("LEFT JOIN - argument `keep` preserves keys in output", { jn <- left_join( x = x1, @@ -315,6 +316,7 @@ test_that("LEFT JOIN - argument `keep` preserves keys in output", { }) + test_that("LEFT JOIN - update values works", { x2a <- x2 x2a$x <- 1:5 @@ -343,6 +345,7 @@ test_that("LEFT JOIN - update values works", { }) + test_that("LEFT JOIN - reportvar works", { jn <- left_join( x1, @@ -378,15 +381,17 @@ test_that("LEFT JOIN - reportvar works", { + test_that("LEFT JOIN - unmatched throws error", { - left_join(x = x4, + left_join(x = x4, y = y4, relationship = "many-to-many", by = "id2", unmatched = "error") |> expect_no_error() + left_join(x = x4, y = y4, relationship = "many-to-many", @@ -394,6 +399,7 @@ test_that("LEFT JOIN - unmatched throws error", { unmatched = "error") |> expect_error() + left_join(x = x1, y = y1, relationship = "many-to-one", @@ -401,32 +407,34 @@ test_that("LEFT JOIN - unmatched throws error", { unmatched = "error") |> expect_error() + left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id2", "x"), unmatched = "error") |> expect_error() - +################################################################################################################################################ left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id2=id"), unmatched = "error") |> - expect_no_error() + expect_warning() left_join(x = x4, y = y4, relationship = "many-to-many", by = c("id1=id2", "id2=id"), unmatched = "error") |> - expect_no_error() + expect_warning() }) + test_that("LEFT JOIN - NA matches", { jn <- left_join( @@ -539,6 +547,7 @@ test_that("RIGHT JOIN - no id given", { by = NULL )) }) + test_that ("RIGHT JOIN - when copy TRUE get warning message", { clear_joynenv() joyn::right_join( @@ -699,6 +708,7 @@ test_that("RIGHT JOIN - reportvar works", { reportvar = FALSE)) }) + test_that("RIGHT JOIN - NA matches", { jn <- right_join( @@ -724,6 +734,9 @@ test_that("RIGHT JOIN - NA matches", { expect_contains("warn") }) + + +### ERROR 1 --> change in y4$id2 test_that("RIGHT JOIN - unmatched error", { right_join(x = x4, @@ -747,14 +760,14 @@ test_that("RIGHT JOIN - unmatched error", { unmatched = "error") |> expect_error() - right_join(x = x4, + right_join(x = x4, y = y4, relationship = "one-to-one", by = c("id2", "x"), unmatched = "error") |> expect_error() - right_join(x = x4, + right_join(x = x4, y = y4, relationship = "many-to-one", by = c("id1=id2", "id2=id"), @@ -1077,6 +1090,7 @@ test_that("FULL JOIN - (correctly) incorrectly specified arguments give (no) err }) + test_that("FULL JOIN - argument `keep` preserves keys in output", { jn <- full_join( x = x1, @@ -1621,6 +1635,8 @@ test_that("INNER JOIN - NA matches", { }) test_that("INNER JOIN - unmatched error", { + # HERE, the of y4 have gotten switched, so id1 --> id2. + # problem must be in the ordering of the setnames() somewhere inner_join(x = x4, y = y4, relationship = "many-to-many", @@ -1654,7 +1670,8 @@ test_that("INNER JOIN - unmatched error", { relationship = "many-to-one", by = c("id1=id2", "id2=id"), unmatched = "error") |> - expect_error() + expect_error() |> + expect_warning() }) From 56cb06309703a77e5d6c6e94eeb4e8e039e41794 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 15:59:52 -0400 Subject: [PATCH 03/23] correct input names in joyn function --- R/joyn-merge.R | 58 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index cdd8c03c..52499975 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -227,8 +227,23 @@ joyn <- function(x, # the resulting table should have the same class as the x table. class_x <- class(x) + + # ensure input names can be restored + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) ynames <- copy(names(y)) + # maintain name that is bound to original inputs + x_original <- x + y_original <- y + # If match type is m:m we need to convert to data.table if (match_type == "m:m") { x <- as.data.table(x) @@ -281,8 +296,7 @@ joyn <- function(x, # include report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars ZP ------------------------------------- - #yvars_w <- c(newyvars, ".yreport") # working yvars + yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars x <- x |> ftransform(.xreport = 1) y <- y |> @@ -416,7 +430,6 @@ joyn <- function(x, } - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Display results and cleaning --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -433,18 +446,39 @@ joyn <- function(x, ## Rename by variables ----- - if (any(fixby$tempkey %in% names(x))) { + # in output + if (any(grepl(pattern = "keyby", x = names(x)))) { data.table::setnames(x, - old = fixby$tempkey, - new = fixby$xby) + old = names(x)[grepl(pattern = "keyby", + x = names(x))], + new = xbynames) } - if (any(fixby$tempkey %in% names(y))) { - data.table::setnames(y, - old = fixby$tempkey, - new = fixby$yby) - if (all(names(y) %in% ynames)) { - colorderv(y, + + # Change names back for inputs------------------------------ + if (any(grepl(pattern = "keyby", x = names(x_original)))) { + + knames <- names(x_original)[grepl(pattern = "keyby", + x = names(x_original))] + knames <- knames[order(knames)] + + data.table::setnames(x_original, + old = knames, + new = xbynames) + } + + if (any(grepl(pattern = "keyby", x = names(y_original)))) { + + knames <- names(y_original)[grepl(pattern = "keyby", + x = names(y_original))] + knames <- knames[order(knames)] + + data.table::setnames(y_original, + old = knames, + new = ybynames) + + if (all(names(y_original) %in% ynames)) { + colorderv(y_original, neworder = ynames) } } From 5f56d925b85f5b6803a22230d4d254dec1c91a8f Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 16:00:14 -0400 Subject: [PATCH 04/23] test changing of input data frames --- tests/testthat/test-dplyr-joins.R | 58 +++++++++++++++++++++++++++++++ tests/testthat/test-joyn.R | 58 +++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index b6bedb74..13a5e259 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -2047,3 +2047,61 @@ test_that("ANTI JOIN - NA matches", { }) + + + +test_that("joyn() - input data unchanged", { + + expect_equal(x1, + data.table(id = c(1L, 1L, 2L, 3L, NA_integer_), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = 11:15)) + + expect_equal(y1, + data.table(id = c(1,2, 4), + y = c(11L, 15L, 16))) + + expect_equal(x2, + data.table(id = c(1, 4, 2, 3, NA), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + + expect_equal(y2, + data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + + expect_equal(x3, + data.table(id = c("c","b", "d"), + v = 8:10, + foo = c(4,2, 7))) + + expect_equal(y3, + data.table(id = c("c","b", "c", "a"), + y = c(11L, 15L, 18L, 20L))) + + expect_equal(x4, + data.table(id1 = c(1, 1, 2, 3, 3), + id2 = c(1, 1, 2, 3, 4), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + + expect_equal(y4, + data.table(id = c(1, 2, 5, 6, 3), + id2 = c(1, 1, 2, 3, 4), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + + # changed + expect_equal(x5, + data.table(id = c(1L, 1L, 2L, 3L, NA_integer_, NA_integer_), + t = c(1L, 2L, 1L, 2L, NA_integer_, 4L), + x = 11:16)) + + expect_equal(y5, + data.table(id = c(1,2, 4, NA_integer_, NA_integer_), + y = c(11L, 15L, 16, 17L, 18L))) + +}) + diff --git a/tests/testthat/test-joyn.R b/tests/testthat/test-joyn.R index 208eb929..a0f1fbc5 100644 --- a/tests/testthat/test-joyn.R +++ b/tests/testthat/test-joyn.R @@ -43,6 +43,9 @@ x5 = data.table(id = c(1, 2, 5, 6, 3), y = c(11L, 15L, 20L, 13L, 10L), x = c(16:18, NA, NA)) +#------------------------------------------------------------------------------- +# TESTS ------------------------------------------------------------------------ +#------------------------------------------------------------------------------- test_that( @@ -72,6 +75,8 @@ test_that("all types of by argument raise no error", { match_type = "m:m") |> expect_no_error() + + # THIS ONE joyn(x = x4, y = y4, by = c("id1 = id", "id2"), @@ -439,7 +444,7 @@ test_that("match types work", { }) - +########################################################################################### test_that("Update NAs", { # update NAs in x variable form x jn <- joyn(x2, @@ -765,12 +770,61 @@ test_that("anti join warning for update values", { r2) +}) -}) +# Test all input data is unchanged + + +test_that("joyn() - input data unchanged", { + + expect_equal(x1, + data.table(id = c(1L, 1L, 2L, 3L, NA_integer_), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = 11:15)) + expect_equal(y1, + data.table(id = c(1,2, 4), + y = c(11L, 15L, 16))) + expect_equal(x2, + data.table(id = c(1, 4, 2, 3, NA), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + expect_equal(y2, + data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + expect_equal(x3, + data.table(id = c("c","b", "d"), + v = 8:10, + foo = c(4,2, 7))) + + expect_equal(y3, + data.table(id = c("c","b", "c", "a"), + y = c(11L, 15L, 18L, 20L))) + + expect_equal(x4, + data.table(id1 = c(1, 1, 2, 3, 3), + id2 = c(1, 1, 2, 3, 4), + t = c(1L, 2L, 1L, 2L, NA_integer_), + x = c(16, 12, NA, NA, 15))) + + expect_equal(y4, + data.table(id = c(1, 2, 5, 6, 3), + id2 = c(1, 1, 2, 3, 4), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:20))) + + expect_equal(x5, + data.table(id = c(1, 2, 5, 6, 3), + yd = c(1, 2, 5, 6, 3), + y = c(11L, 15L, 20L, 13L, 10L), + x = c(16:18, NA, NA))) + +}) From e59560482708fdad60612e5a437224d7e1cc2561 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 16:32:55 -0400 Subject: [PATCH 05/23] rm copy() and modify by reference --- R/update_na_vals.R | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/R/update_na_vals.R b/R/update_na_vals.R index 580e44a6..1e2ec44b 100644 --- a/R/update_na_vals.R +++ b/R/update_na_vals.R @@ -31,7 +31,7 @@ update_na_values <- function(dt, is_data_table <- inherits(dt, "data.table") # Add util vars #### - dt_1 <- copy(dt) + dt_1 <- dt dt_1 <- dt_1 |> ftransform(#use_util_reportvar = get(reportvar), # create variable for var.x and var.y is NA @@ -60,20 +60,24 @@ update_na_values <- function(dt, if (is_data_table) { - dt_1[get(reportvar) == 4, - (x.var) := mget(y.var)] - - dt_1[get(reportvar) == 5, - eval(x.var) := mget(y.var)] + dt_1[dt_1[[reportvar]] == 4, + x.var] <- dt_1[dt_1[[reportvar]] == 4, + y.var, + with = FALSE] +# + dt_1[dt_1[[reportvar]] == 5, + x.var] <- dt_1[dt_1[[reportvar]] == 5, + y.var, + with = FALSE] } else { - to_replace <- which(dt_1[[reportvar]] %in% c(4, 5)) dt_1[to_replace, x.var] <- dt_1[to_replace, y.var] } # Remove util vars #### - get_vars(dt_1, c("varx_na", "vary_na")) <- NULL + get_vars(dt_1, + c("varx_na", "vary_na")) <- NULL # Return dt_1 From 2c18b7e80ef66d4fdd0fd717e99da68ae22e405b Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 16:43:28 -0400 Subject: [PATCH 06/23] Increment version number to 0.2.0.9001 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 33d97190..1fc2b2a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9000 +Version: 0.2.0.9001 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", From f4a6d6ca0cffcd97aa39c8f936d91468afb80795 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 16:44:04 -0400 Subject: [PATCH 07/23] Increment version number to 0.2.0.9002 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1fc2b2a5..e3d91a4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9001 +Version: 0.2.0.9002 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", From 3aa311b77689911d4bdbff59809bfa1b7deee2b6 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 17:24:09 -0400 Subject: [PATCH 08/23] replace data.table merge for m:m with collapse --- R/joyn_workhorse.R | 56 +++++++++++----------------------------------- 1 file changed, 13 insertions(+), 43 deletions(-) diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index 09a78015..f1561782 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -7,8 +7,7 @@ #' @param y data object, "right" or "using" #' @param by atomic character vector: key specifying join #' @param match_type atomic character vector of length 1: either "1:1" (default) -#' "1:m", "m:1", or "m:m". If "m:m" then executes `data.table::merge.data.table` -#' in the backend, otherwise uses `collapse::join()` +#' "1:m", "m:1", or "m:m". Relies on `collapse::join()` #' @param suffixes atomic character vector: give suffixes to columns common to both #' `x` and `y` #' @return data object of same class as `x` @@ -73,30 +72,18 @@ joyn_workhorse <- function( # not m:m => use collapse::join() dt_result <- tryCatch( expr = { - source_pkg <- if (match_type == "m:m") "data.table::merge" else "collapse::join" - if (match_type == "m:m") { - data.table::merge.data.table( - x = x, - y = y, - by = by, - all = TRUE, - sort = FALSE, - suffixes = suffixes, - allow.cartesian = TRUE - ) - - } else { - collapse::join( x = x, - y = y, - how = "full", - on = by, - multiple = TRUE, # matches row in x with m in y - validate = "m:m", # no checks performed - suffix = suffixes, # data.table suffixes - keep.col.order = TRUE, - verbose = 0, - column = NULL) - } + source_pkg <- "collapse::join" + + collapse::join(x = x, + y = y, + how = "full", + on = by, + multiple = TRUE, # matches row in x with m in y + validate = "m:m", # no checks performed + suffix = suffixes, # data.table suffixes + keep.col.order = TRUE, + verbose = 0, + column = NULL) }, # end of expr section error = function(e) { @@ -123,22 +110,6 @@ joyn_workhorse <- function( ) } - # This is inefficient but it is the only way to return the table when - # there is a warning - - if (match_type == "m:m") { - data.table::merge.data.table( - x = x, - y = y, - by = by, - all = TRUE, - sort = FALSE, - suffixes = suffixes, - allow.cartesian = TRUE - ) |> - suppressWarnings() - - } else { collapse::join( x = x, y = y, how = "full", @@ -150,7 +121,6 @@ joyn_workhorse <- function( verbose = 0, column = NULL) |> suppressWarnings() - } } From f4c2d37d8b976664e2b29d74995eba01debbe013 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 17:24:23 -0400 Subject: [PATCH 09/23] update documentation --- R/joyn-merge.R | 2 +- man/clear_joynenv.Rd | 2 +- man/joyn-package.Rd | 1 + man/joyn.Rd | 5 +---- man/joyn_report.Rd | 2 +- man/joyn_workhorse.Rd | 3 +-- man/msg_type_dt.Rd | 2 +- man/store_msg.Rd | 2 +- man/style.Rd | 2 +- man/type_choices.Rd | 2 +- 10 files changed, 10 insertions(+), 13 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 52499975..a340594f 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -29,7 +29,7 @@ #' observations that matched in both tables and the ones that did not match in #' y. The ones in x will be discarded. If *"inner"*, it only keeps the #' observations that matched both tables. Note that if, for example, a `keep = -#' "left"`, the `joyn()` function still executes a full join under the hood +#' "left", the `joyn()` function still executes a full join under the hood #' and then filters so that only rows the output table is a left join. This #' behaviour, while inefficient, allows all the diagnostics and checks #' conducted by `joyn`. diff --git a/man/clear_joynenv.Rd b/man/clear_joynenv.Rd index 869526bc..52badba2 100644 --- a/man/clear_joynenv.Rd +++ b/man/clear_joynenv.Rd @@ -23,8 +23,8 @@ print(joyn:::joyn_msgs_exist()) } \seealso{ Messages functions -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/joyn-package.Rd b/man/joyn-package.Rd index e9bed97a..884ed073 100644 --- a/man/joyn-package.Rd +++ b/man/joyn-package.Rd @@ -3,6 +3,7 @@ \docType{package} \name{joyn-package} \alias{joyn-package} +\alias{_PACKAGE} \title{joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features} \description{ Tool for diagnosing table joins. It combines the speed of `collapse` and `data.table`, the flexibility of `dplyr`, and the diagnosis and features of the `merge` command in `Stata`. diff --git a/man/joyn.Rd b/man/joyn.Rd index af320f28..eab18ee1 100644 --- a/man/joyn.Rd +++ b/man/joyn.Rd @@ -57,10 +57,7 @@ that matched in both tables and the ones that did not match in x. The ones in y will be discarded. If \emph{"right"} or \emph{"using"}, it keeps the observations that matched in both tables and the ones that did not match in y. The ones in x will be discarded. If \emph{"inner"}, it only keeps the -observations that matched both tables. Note that if, for example, a \code{keep = "left"}, the \code{joyn()} function still executes a full join under the hood -and then filters so that only rows the output table is a left join. This -behaviour, while inefficient, allows all the diagnostics and checks -conducted by \code{joyn}.} +observations that matched both tables. Note that if, for example, a \verb{keep = "left", the }joyn()\verb{function still executes a full join under the hood and then filters so that only rows the output table is a left join. This behaviour, while inefficient, allows all the diagnostics and checks conducted by}joyn`.} \item{y_vars_to_keep}{character: Vector of variable names in \code{y} that will be kept after the merge. If TRUE (the default), it keeps all the brings all diff --git a/man/joyn_report.Rd b/man/joyn_report.Rd index e8e3c4f6..60e5f200 100644 --- a/man/joyn_report.Rd +++ b/man/joyn_report.Rd @@ -31,8 +31,8 @@ joyn_report(verbose = TRUE) \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/joyn_workhorse.Rd b/man/joyn_workhorse.Rd index 95c3bee7..9bd5fb53 100644 --- a/man/joyn_workhorse.Rd +++ b/man/joyn_workhorse.Rd @@ -20,8 +20,7 @@ joyn_workhorse( \item{by}{atomic character vector: key specifying join} \item{match_type}{atomic character vector of length 1: either "1:1" (default) -"1:m", "m:1", or "m:m". If "m:m" then executes \code{data.table::merge.data.table} -in the backend, otherwise uses \code{collapse::join()}} +"1:m", "m:1", or "m:m". Relies on \code{collapse::join()}} \item{suffixes}{atomic character vector: give suffixes to columns common to both \code{x} and \code{y}} diff --git a/man/msg_type_dt.Rd b/man/msg_type_dt.Rd index db5b54fe..21114ce6 100644 --- a/man/msg_type_dt.Rd +++ b/man/msg_type_dt.Rd @@ -15,8 +15,8 @@ convert style of joyn message to data frame containing type and message \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/store_msg.Rd b/man/store_msg.Rd index 6a1de9b0..ed455461 100644 --- a/man/store_msg.Rd +++ b/man/store_msg.Rd @@ -29,8 +29,8 @@ joyn:::store_msg("warn", \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{style}()}, diff --git a/man/style.Rd b/man/style.Rd index 14ca8db5..78477723 100644 --- a/man/style.Rd +++ b/man/style.Rd @@ -22,8 +22,8 @@ https://github.com/r-lib/pkgbuild/blob/3ba537ab8a6ac07d3fe11c17543677d2a0786be6/ \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/type_choices.Rd b/man/type_choices.Rd index a158471a..9a2cdfe7 100644 --- a/man/type_choices.Rd +++ b/man/type_choices.Rd @@ -15,8 +15,8 @@ Choice of messages \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, From 310f5f7042ffc8b0645996717d3ae440cdde030c Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 17:24:40 -0400 Subject: [PATCH 10/23] Increment version number to 0.2.0.9003 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3d91a4f..c33447a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9002 +Version: 0.2.0.9003 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", From 0884746b980357116b4971088b4ed694aa25fa82 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 13 May 2024 17:51:41 -0400 Subject: [PATCH 11/23] update news --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index d9bd34ed..187ebba1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,12 @@ # joyn (development version) + * Add `anti_join()` function. * Add `unmask_joyn()` function to unmask `joyn` functions that mask `dplyr` equivalents. +* Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`. + # joyn 0.2.0 From bd126f8b03c34e6983e9cca7096689294929d75e Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 13 May 2024 18:00:30 -0400 Subject: [PATCH 12/23] Include more in news closes #58 --- DESCRIPTION | 2 +- NEWS.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c33447a7..2924e337 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,7 @@ Imports: data.table, cli, utils, - collapse (>= 2.0.9), + collapse (>= 2.0.13), lifecycle Depends: R (>= 2.10) diff --git a/NEWS.md b/NEWS.md index 187ebba1..51909a1a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,9 @@ * Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`. +* improve ineffciencies in deep copies with `m:m` joins + +* Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58). # joyn 0.2.0 From 6c243833fe025e90c86ea0102d67cd6421480284 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 13 May 2024 18:19:18 -0400 Subject: [PATCH 13/23] document --- man/clear_joynenv.Rd | 2 +- man/joyn-package.Rd | 1 - man/joyn_report.Rd | 2 +- man/msg_type_dt.Rd | 2 +- man/store_msg.Rd | 2 +- man/style.Rd | 2 +- man/type_choices.Rd | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/man/clear_joynenv.Rd b/man/clear_joynenv.Rd index 52badba2..869526bc 100644 --- a/man/clear_joynenv.Rd +++ b/man/clear_joynenv.Rd @@ -23,8 +23,8 @@ print(joyn:::joyn_msgs_exist()) } \seealso{ Messages functions -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/joyn-package.Rd b/man/joyn-package.Rd index 884ed073..e9bed97a 100644 --- a/man/joyn-package.Rd +++ b/man/joyn-package.Rd @@ -3,7 +3,6 @@ \docType{package} \name{joyn-package} \alias{joyn-package} -\alias{_PACKAGE} \title{joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features} \description{ Tool for diagnosing table joins. It combines the speed of `collapse` and `data.table`, the flexibility of `dplyr`, and the diagnosis and features of the `merge` command in `Stata`. diff --git a/man/joyn_report.Rd b/man/joyn_report.Rd index 60e5f200..e8e3c4f6 100644 --- a/man/joyn_report.Rd +++ b/man/joyn_report.Rd @@ -31,8 +31,8 @@ joyn_report(verbose = TRUE) \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/msg_type_dt.Rd b/man/msg_type_dt.Rd index 21114ce6..db5b54fe 100644 --- a/man/msg_type_dt.Rd +++ b/man/msg_type_dt.Rd @@ -15,8 +15,8 @@ convert style of joyn message to data frame containing type and message \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/store_msg.Rd b/man/store_msg.Rd index ed455461..6a1de9b0 100644 --- a/man/store_msg.Rd +++ b/man/store_msg.Rd @@ -29,8 +29,8 @@ joyn:::store_msg("warn", \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{style}()}, diff --git a/man/style.Rd b/man/style.Rd index 78477723..14ca8db5 100644 --- a/man/style.Rd +++ b/man/style.Rd @@ -22,8 +22,8 @@ https://github.com/r-lib/pkgbuild/blob/3ba537ab8a6ac07d3fe11c17543677d2a0786be6/ \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/type_choices.Rd b/man/type_choices.Rd index 9a2cdfe7..a158471a 100644 --- a/man/type_choices.Rd +++ b/man/type_choices.Rd @@ -15,8 +15,8 @@ Choice of messages \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, From 878803733145c281107ea4f43da847326567b506 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 22:32:53 -0400 Subject: [PATCH 14/23] change joyn() input names on.exit --- R/joyn-merge.R | 61 ++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index a340594f..0fdf0db2 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -254,6 +254,40 @@ joyn <- function(x, fixby <- check_by_vars(by, x, y) by <- fixby$by + # Change names back on exit + # Change names back for inputs------------------------------ + on.exit( + expr = { + if (any(grepl(pattern = "keyby", x = names(x_original)))) { + + knames <- names(x_original)[grepl(pattern = "keyby", + x = names(x_original))] + knames <- knames[order(knames)] + + data.table::setnames(x_original, + old = knames, + new = xbynames) + } + + if (any(grepl(pattern = "keyby", x = names(y_original)))) { + + knames <- names(y_original)[grepl(pattern = "keyby", + x = names(y_original))] + knames <- knames[order(knames)] + + data.table::setnames(y_original, + old = knames, + new = ybynames) + + if (all(names(y_original) %in% ynames)) { + colorderv(y_original, + neworder = ynames) + } + } + }, + add = TRUE + ) + ## Check suffixes ------------- check_suffixes(suffixes) @@ -455,33 +489,6 @@ joyn <- function(x, } - # Change names back for inputs------------------------------ - if (any(grepl(pattern = "keyby", x = names(x_original)))) { - - knames <- names(x_original)[grepl(pattern = "keyby", - x = names(x_original))] - knames <- knames[order(knames)] - - data.table::setnames(x_original, - old = knames, - new = xbynames) - } - - if (any(grepl(pattern = "keyby", x = names(y_original)))) { - - knames <- names(y_original)[grepl(pattern = "keyby", - x = names(y_original))] - knames <- knames[order(knames)] - - data.table::setnames(y_original, - old = knames, - new = ybynames) - - if (all(names(y_original) %in% ynames)) { - colorderv(y_original, - neworder = ynames) - } - } ## convert to characters if chosen ------- if (reporttype == "character") { From e128c3aef039f05d77c39d7096dc076e1fd3341d Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Mon, 13 May 2024 22:33:46 -0400 Subject: [PATCH 15/23] rm as.data.table --- R/joyn-merge.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 0fdf0db2..30bdc63e 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -244,12 +244,6 @@ joyn <- function(x, x_original <- x y_original <- y - # If match type is m:m we need to convert to data.table - if (match_type == "m:m") { - x <- as.data.table(x) - y <- as.data.table(y) - } - ## Modify BY when is expression --------- fixby <- check_by_vars(by, x, y) by <- fixby$by From 0d7604d511ea2bb65737dbe504170fe60b240aed Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 14 May 2024 00:15:23 -0400 Subject: [PATCH 16/23] change update values for data.tables --- tests/testthat/test-update_na_vals.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-update_na_vals.R b/tests/testthat/test-update_na_vals.R index ab46faba..0a145a17 100644 --- a/tests/testthat/test-update_na_vals.R +++ b/tests/testthat/test-update_na_vals.R @@ -112,7 +112,8 @@ test_that("update_na_vals -update values of one var", { ) # Check not updated values - dt[is.na(x.x) | is.na(x.y) | !.joyn == 3] |> fselect((id:x.y)) |> + dt |> + fsubset(is.na(x.x) | is.na(x.y) | !.joyn == 3) |> fselect((id:x.y)) |> expect_equal(res[!.joyn == 5,] |> fselect((id:x.y))) expect_true(!any(4 %in% res$.joyn)) From c715c2c6601274919ee32ff83df5fa4fdfea5eda Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 14 May 2024 00:15:37 -0400 Subject: [PATCH 17/23] change update values for data.tables --- R/update_na_vals.R | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/R/update_na_vals.R b/R/update_na_vals.R index 1e2ec44b..d6ecd102 100644 --- a/R/update_na_vals.R +++ b/R/update_na_vals.R @@ -57,22 +57,19 @@ update_na_values <- function(dt, } # Replace values #### - if (is_data_table) { - dt_1[dt_1[[reportvar]] == 4, - x.var] <- dt_1[dt_1[[reportvar]] == 4, - y.var, - with = FALSE] -# - dt_1[dt_1[[reportvar]] == 5, - x.var] <- dt_1[dt_1[[reportvar]] == 5, - y.var, - with = FALSE] + gv(dt_1[get(reportvar) == 4], + x.var) <- gv(dt_1[get(reportvar) == 4], + y.var) + + gv(dt_1[get(reportvar) == 5], + x.var) <- gv(dt_1[get(reportvar) == 5], + y.var) } else { - to_replace <- which(dt_1[[reportvar]] %in% c(4, 5)) - dt_1[to_replace, x.var] <- dt_1[to_replace, y.var] + to_replace <- which(dt_1[[reportvar]] %in% c(4, 5)) + dt_1[to_replace, x.var] <- dt_1[to_replace, y.var] } # Remove util vars #### From 717a4b7e1d8a12e8521257003ed90391341bc3a1 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 14 May 2024 13:10:40 -0400 Subject: [PATCH 18/23] remove unnecessary line of code --- R/update_na_vals.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/update_na_vals.R b/R/update_na_vals.R index d6ecd102..108bca39 100644 --- a/R/update_na_vals.R +++ b/R/update_na_vals.R @@ -31,8 +31,8 @@ update_na_values <- function(dt, is_data_table <- inherits(dt, "data.table") # Add util vars #### - dt_1 <- dt - dt_1 <- dt_1 |> + + dt_1 <- dt |> ftransform(#use_util_reportvar = get(reportvar), # create variable for var.x and var.y is NA # TRUE if NOT NA From 780782fd05d336d60a88aa7063010390c0f721a2 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 21 May 2024 21:29:55 -0400 Subject: [PATCH 19/23] modify test giving warning --- tests/testthat/test-dplyr-joins.R | 70 +------------------------------ 1 file changed, 2 insertions(+), 68 deletions(-) diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index 13a5e259..fc6e9494 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -547,7 +547,6 @@ test_that("RIGHT JOIN - no id given", { by = NULL )) }) - test_that ("RIGHT JOIN - when copy TRUE get warning message", { clear_joynenv() joyn::right_join( @@ -708,7 +707,6 @@ test_that("RIGHT JOIN - reportvar works", { reportvar = FALSE)) }) - test_that("RIGHT JOIN - NA matches", { jn <- right_join( @@ -734,9 +732,6 @@ test_that("RIGHT JOIN - NA matches", { expect_contains("warn") }) - - -### ERROR 1 --> change in y4$id2 test_that("RIGHT JOIN - unmatched error", { right_join(x = x4, @@ -760,14 +755,14 @@ test_that("RIGHT JOIN - unmatched error", { unmatched = "error") |> expect_error() - right_join(x = x4, + right_join(x = x4, y = y4, relationship = "one-to-one", by = c("id2", "x"), unmatched = "error") |> expect_error() - right_join(x = x4, + right_join(x = x4, y = y4, relationship = "many-to-one", by = c("id1=id2", "id2=id"), @@ -1090,7 +1085,6 @@ test_that("FULL JOIN - (correctly) incorrectly specified arguments give (no) err }) - test_that("FULL JOIN - argument `keep` preserves keys in output", { jn <- full_join( x = x1, @@ -1635,8 +1629,6 @@ test_that("INNER JOIN - NA matches", { }) test_that("INNER JOIN - unmatched error", { - # HERE, the of y4 have gotten switched, so id1 --> id2. - # problem must be in the ordering of the setnames() somewhere inner_join(x = x4, y = y4, relationship = "many-to-many", @@ -2047,61 +2039,3 @@ test_that("ANTI JOIN - NA matches", { }) - - - -test_that("joyn() - input data unchanged", { - - expect_equal(x1, - data.table(id = c(1L, 1L, 2L, 3L, NA_integer_), - t = c(1L, 2L, 1L, 2L, NA_integer_), - x = 11:15)) - - expect_equal(y1, - data.table(id = c(1,2, 4), - y = c(11L, 15L, 16))) - - expect_equal(x2, - data.table(id = c(1, 4, 2, 3, NA), - t = c(1L, 2L, 1L, 2L, NA_integer_), - x = c(16, 12, NA, NA, 15))) - - expect_equal(y2, - data.table(id = c(1, 2, 5, 6, 3), - yd = c(1, 2, 5, 6, 3), - y = c(11L, 15L, 20L, 13L, 10L), - x = c(16:20))) - - expect_equal(x3, - data.table(id = c("c","b", "d"), - v = 8:10, - foo = c(4,2, 7))) - - expect_equal(y3, - data.table(id = c("c","b", "c", "a"), - y = c(11L, 15L, 18L, 20L))) - - expect_equal(x4, - data.table(id1 = c(1, 1, 2, 3, 3), - id2 = c(1, 1, 2, 3, 4), - t = c(1L, 2L, 1L, 2L, NA_integer_), - x = c(16, 12, NA, NA, 15))) - - expect_equal(y4, - data.table(id = c(1, 2, 5, 6, 3), - id2 = c(1, 1, 2, 3, 4), - y = c(11L, 15L, 20L, 13L, 10L), - x = c(16:20))) - - # changed - expect_equal(x5, - data.table(id = c(1L, 1L, 2L, 3L, NA_integer_, NA_integer_), - t = c(1L, 2L, 1L, 2L, NA_integer_, 4L), - x = 11:16)) - - expect_equal(y5, - data.table(id = c(1,2, 4, NA_integer_, NA_integer_), - y = c(11L, 15L, 16, 17L, 18L))) - -}) - From 44cc5ebe716111a1a3590b95137d426d8778dbb2 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 21 May 2024 22:02:42 -0400 Subject: [PATCH 20/23] add fn correct_names() for repetitive code --- R/dplyr-joins.R | 92 ++++++++++++++++--------------------------------- R/joyn-merge.R | 18 +++++----- R/utils.R | 36 ++++++++++++++++++- 3 files changed, 73 insertions(+), 73 deletions(-) diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index 6818473c..f0c5609d 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -89,18 +89,12 @@ left_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - - xbynames <- xbynames[order(fmatch(xbynames, names(x)))] - ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames if (keep == TRUE) { jn_type <- "left" @@ -255,18 +249,12 @@ right_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - - xbynames <- xbynames[order(fmatch(xbynames, names(x)))] - ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames if (keep == TRUE) { jn_type <- "right" @@ -422,18 +410,12 @@ full_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - - xbynames <- xbynames[order(fmatch(xbynames, names(x)))] - ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames if (keep == TRUE) { jn_type <- "full" @@ -595,18 +577,12 @@ inner_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - - xbynames <- xbynames[order(fmatch(xbynames, names(x)))] - ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames if (keep == TRUE) { jn_type <- "inner" @@ -758,18 +734,12 @@ anti_join <- function( dropreport <- args_check$dropreport # Column names ----------------------------------- - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - - xbynames <- xbynames[order(fmatch(xbynames, names(x)))] - ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + correct_names <- correct_names(by = by, + x = x, + y = y) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames if (keep == TRUE) { jn_type <- "anti" @@ -1169,5 +1139,3 @@ process_by_vector <- function(by, input = c("left", "right")) { - - diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 30bdc63e..979db2b6 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -229,16 +229,14 @@ joyn <- function(x, class_x <- class(x) # ensure input names can be restored - byexp <- grep(pattern = "==?", - x = by, - value = TRUE) - xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\1", - byexp)) - ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", - "\\3", - byexp)) - ynames <- copy(names(y)) + correct_names <- correct_names(by = by, + x = x, + y = y, + order = FALSE) + byexp <- correct_names$byexp + xbynames <- correct_names$xbynames + ybynames <- correct_names$ybynames + ynames <- copy(names(y)) # maintain name that is bound to original inputs x_original <- x diff --git a/R/utils.R b/R/utils.R index a1080c15..6a72bee3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -312,7 +312,7 @@ detach_package <- function(pkg_name) { search_item <- paste("package", pkg_name, sep = ":") - if(search_item %in% search()) { + if (search_item %in% search()) { detach(search_item, unload = TRUE, @@ -320,3 +320,37 @@ detach_package <- function(pkg_name) { } } + + + + +#' Function used to correct names in input data frames using `by` argument +#' +#' @param by `by` argument parsed from higher level function +#' @param x left data frame +#' @param y right data frame +#' +#' @return list +#' @keywords internal +correct_names <- function(by, x, y, order = TRUE) { + byexp <- grep(pattern = "==?", + x = by, + value = TRUE) + xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\1", + byexp)) + ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", + "\\3", + byexp)) + + if (order) { + xbynames <- xbynames[order(fmatch(xbynames, names(x)))] + ybynames <- ybynames[order(fmatch(ybynames, names(y)))] + } + + out <- list(byexp = byexp, + xbynames = xbynames, + ybynames = ybynames) + out +} + From de552caf4b8d339983edf50fb49cca15d4ec3e6b Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 21 May 2024 22:03:03 -0400 Subject: [PATCH 21/23] documentation --- man/clear_joynenv.Rd | 2 +- man/correct_names.Rd | 22 ++++++++++++++++++++++ man/joyn-package.Rd | 1 + man/joyn_report.Rd | 2 +- man/msg_type_dt.Rd | 2 +- man/store_msg.Rd | 2 +- man/style.Rd | 2 +- man/type_choices.Rd | 2 +- 8 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 man/correct_names.Rd diff --git a/man/clear_joynenv.Rd b/man/clear_joynenv.Rd index 869526bc..52badba2 100644 --- a/man/clear_joynenv.Rd +++ b/man/clear_joynenv.Rd @@ -23,8 +23,8 @@ print(joyn:::joyn_msgs_exist()) } \seealso{ Messages functions -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/correct_names.Rd b/man/correct_names.Rd new file mode 100644 index 00000000..411d8710 --- /dev/null +++ b/man/correct_names.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{correct_names} +\alias{correct_names} +\title{Function used to correct names in input data frames using \code{by} argument} +\usage{ +correct_names(by, x, y, order = TRUE) +} +\arguments{ +\item{by}{\code{by} argument parsed from higher level function} + +\item{x}{left data frame} + +\item{y}{right data frame} +} +\value{ +list +} +\description{ +Function used to correct names in input data frames using \code{by} argument +} +\keyword{internal} diff --git a/man/joyn-package.Rd b/man/joyn-package.Rd index e9bed97a..884ed073 100644 --- a/man/joyn-package.Rd +++ b/man/joyn-package.Rd @@ -3,6 +3,7 @@ \docType{package} \name{joyn-package} \alias{joyn-package} +\alias{_PACKAGE} \title{joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features} \description{ Tool for diagnosing table joins. It combines the speed of `collapse` and `data.table`, the flexibility of `dplyr`, and the diagnosis and features of the `merge` command in `Stata`. diff --git a/man/joyn_report.Rd b/man/joyn_report.Rd index e8e3c4f6..60e5f200 100644 --- a/man/joyn_report.Rd +++ b/man/joyn_report.Rd @@ -31,8 +31,8 @@ joyn_report(verbose = TRUE) \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/msg_type_dt.Rd b/man/msg_type_dt.Rd index db5b54fe..21114ce6 100644 --- a/man/msg_type_dt.Rd +++ b/man/msg_type_dt.Rd @@ -15,8 +15,8 @@ convert style of joyn message to data frame containing type and message \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/store_msg.Rd b/man/store_msg.Rd index 6a1de9b0..ed455461 100644 --- a/man/store_msg.Rd +++ b/man/store_msg.Rd @@ -29,8 +29,8 @@ joyn:::store_msg("warn", \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{style}()}, diff --git a/man/style.Rd b/man/style.Rd index 14ca8db5..78477723 100644 --- a/man/style.Rd +++ b/man/style.Rd @@ -22,8 +22,8 @@ https://github.com/r-lib/pkgbuild/blob/3ba537ab8a6ac07d3fe11c17543677d2a0786be6/ \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/type_choices.Rd b/man/type_choices.Rd index a158471a..9a2cdfe7 100644 --- a/man/type_choices.Rd +++ b/man/type_choices.Rd @@ -15,8 +15,8 @@ Choice of messages \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msg}()}, \code{\link{joyn_msgs_exist}()}, +\code{\link{joyn_msg}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, From 72d4195453cb7d1320614b919d96dea31a8cfc77 Mon Sep 17 00:00:00 2001 From: zander-prinsloo Date: Tue, 21 May 2024 22:34:58 -0400 Subject: [PATCH 22/23] correct tests for sorting & attributes --- tests/testthat/test-dplyr-joins.R | 16 +++++++++------- tests/testthat/test-merge-data.table.R | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index e86c5f9a..9d478d9b 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -870,7 +870,8 @@ test_that("FULL JOIN - Conducts full join", { x4, y4, by = c("id1 = id2"), - relationship = "many-to-many" + relationship = "many-to-many", + sort = TRUE ) #dplyr::full_join(x4, y4, by = dplyr::join_by(id1 == id2), relationship = "many-to-many") jn_dplyr <- dplyr::full_join( @@ -879,9 +880,9 @@ test_that("FULL JOIN - Conducts full join", { by = dplyr::join_by(id1 == id2), relationship = "many-to-many" ) - attr(jn_dplyr, "sorted") <- "id1" - attr(jn_joyn, - "sorted") <- "id" + # attr(jn_dplyr, "sorted") <- "id1" + # attr(jn_joyn, + # "sorted") <- "id" expect_equal( jn |> fselect(-get(reportvar)), jn_dplyr, @@ -1703,13 +1704,14 @@ test_that("ANTI JOIN - Conducts ANTI join", { jn_joyn <- anti_join( x = x1, y = y1, - by = "id" + by = "id", + sort = TRUE ) jn_dplyr <- dplyr::anti_join( - x1, y1, by = "id" + x1, y1, by = "id", s ) - setorder(jn_dplyr, na.last = F) + attr( jn_dplyr, "sorted" diff --git a/tests/testthat/test-merge-data.table.R b/tests/testthat/test-merge-data.table.R index c391cfa0..952f758c 100644 --- a/tests/testthat/test-merge-data.table.R +++ b/tests/testthat/test-merge-data.table.R @@ -380,7 +380,7 @@ test_that("FULL JOIN - Conducts full join", { by.y = "id2", all = TRUE ) - + attr(jn_dt, 'sorted') <- NULL expect_equal( jn |> fselect(-get(reportvar)), From fc7177f0edcf2ef172cff86304f93fc4039b1210 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Thu, 23 May 2024 09:32:26 -0400 Subject: [PATCH 23/23] document --- man/clear_joynenv.Rd | 2 +- man/joyn-package.Rd | 1 - man/joyn_report.Rd | 2 +- man/msg_type_dt.Rd | 2 +- man/store_msg.Rd | 2 +- man/style.Rd | 2 +- man/type_choices.Rd | 2 +- 7 files changed, 6 insertions(+), 7 deletions(-) diff --git a/man/clear_joynenv.Rd b/man/clear_joynenv.Rd index 52badba2..869526bc 100644 --- a/man/clear_joynenv.Rd +++ b/man/clear_joynenv.Rd @@ -23,8 +23,8 @@ print(joyn:::joyn_msgs_exist()) } \seealso{ Messages functions -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/joyn-package.Rd b/man/joyn-package.Rd index 884ed073..e9bed97a 100644 --- a/man/joyn-package.Rd +++ b/man/joyn-package.Rd @@ -3,7 +3,6 @@ \docType{package} \name{joyn-package} \alias{joyn-package} -\alias{_PACKAGE} \title{joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features} \description{ Tool for diagnosing table joins. It combines the speed of `collapse` and `data.table`, the flexibility of `dplyr`, and the diagnosis and features of the `merge` command in `Stata`. diff --git a/man/joyn_report.Rd b/man/joyn_report.Rd index 60e5f200..e8e3c4f6 100644 --- a/man/joyn_report.Rd +++ b/man/joyn_report.Rd @@ -31,8 +31,8 @@ joyn_report(verbose = TRUE) \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/msg_type_dt.Rd b/man/msg_type_dt.Rd index 21114ce6..db5b54fe 100644 --- a/man/msg_type_dt.Rd +++ b/man/msg_type_dt.Rd @@ -15,8 +15,8 @@ convert style of joyn message to data frame containing type and message \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{store_msg}()}, \code{\link{style}()}, diff --git a/man/store_msg.Rd b/man/store_msg.Rd index ed455461..6a1de9b0 100644 --- a/man/store_msg.Rd +++ b/man/store_msg.Rd @@ -29,8 +29,8 @@ joyn:::store_msg("warn", \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{style}()}, diff --git a/man/style.Rd b/man/style.Rd index 78477723..14ca8db5 100644 --- a/man/style.Rd +++ b/man/style.Rd @@ -22,8 +22,8 @@ https://github.com/r-lib/pkgbuild/blob/3ba537ab8a6ac07d3fe11c17543677d2a0786be6/ \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()}, diff --git a/man/type_choices.Rd b/man/type_choices.Rd index 9a2cdfe7..a158471a 100644 --- a/man/type_choices.Rd +++ b/man/type_choices.Rd @@ -15,8 +15,8 @@ Choice of messages \seealso{ Messages functions \code{\link{clear_joynenv}()}, -\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_msg}()}, +\code{\link{joyn_msgs_exist}()}, \code{\link{joyn_report}()}, \code{\link{msg_type_dt}()}, \code{\link{store_msg}()},