Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collapse mm #61

Merged
merged 25 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7450c8e
inital rm copy
zander-prinsloo Apr 30, 2024
130437d
fix dplyr-like joins renaming & copying issue
zander-prinsloo May 13, 2024
56cb063
correct input names in joyn function
zander-prinsloo May 13, 2024
5f56d92
test changing of input data frames
zander-prinsloo May 13, 2024
e595604
rm copy() and modify by reference
zander-prinsloo May 13, 2024
2c18b7e
Increment version number to 0.2.0.9001
zander-prinsloo May 13, 2024
f4a6d6c
Increment version number to 0.2.0.9002
zander-prinsloo May 13, 2024
8965130
Merge branch 'DEV' into rm_copy
zander-prinsloo May 13, 2024
3aa311b
replace data.table merge for m:m with collapse
zander-prinsloo May 13, 2024
f4c2d37
update documentation
zander-prinsloo May 13, 2024
310f5f7
Increment version number to 0.2.0.9003
zander-prinsloo May 13, 2024
0884746
update news
randrescastaneda May 13, 2024
bd126f8
Include more in news
randrescastaneda May 13, 2024
6c24383
document
randrescastaneda May 13, 2024
8788037
change joyn() input names on.exit
zander-prinsloo May 14, 2024
e128c3a
rm as.data.table
zander-prinsloo May 14, 2024
0d7604d
change update values for data.tables
zander-prinsloo May 14, 2024
c715c2c
change update values for data.tables
zander-prinsloo May 14, 2024
717a4b7
remove unnecessary line of code
randrescastaneda May 14, 2024
780782f
modify test giving warning
zander-prinsloo May 22, 2024
44cc5eb
add fn correct_names() for repetitive code
zander-prinsloo May 22, 2024
de552ca
documentation
zander-prinsloo May 22, 2024
d7a045b
Merge branch 'DEV' into collapse_mm
zander-prinsloo May 22, 2024
72d4195
correct tests for sorting & attributes
zander-prinsloo May 22, 2024
fc7177f
document
randrescastaneda May 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: joyn
Type: Package
Title: Tool for Diagnosis of Tables Joins and Complementary Join Features
Version: 0.2.0.9001
Version: 0.2.0.9003
Authors@R: c(person(given = "R.Andres",
family = "Castaneda",
email = "[email protected]",
Expand Down Expand Up @@ -37,7 +37,7 @@ Imports:
data.table,
cli,
utils,
collapse (>= 2.0.9),
collapse (>= 2.0.13),
lifecycle
Depends:
R (>= 2.10)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
# joyn (development version)


* Add `anti_join()` function.

* Add `unmask_joyn()` function to unmask `joyn` functions that mask `dplyr` equivalents.

* Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`.

* improve ineffciencies in deep copies with `m:m` joins

* Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58).

# joyn 0.2.0

Expand Down
173 changes: 145 additions & 28 deletions R/dplyr-joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ left_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -91,6 +89,19 @@ left_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
byexp <- grep(pattern = "==?",
x = by,
value = TRUE)
xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\1",
byexp))
ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\3",
byexp))

xbynames <- xbynames[order(fmatch(xbynames, names(x)))]
ybynames <- ybynames[order(fmatch(ybynames, names(y)))]

if (keep == TRUE) {
jn_type <- "left"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -121,6 +132,20 @@ left_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand All @@ -134,6 +159,7 @@ left_join <- function(
get_vars(lj, reportvar) <- NULL
}


# return
lj
}
Expand Down Expand Up @@ -198,8 +224,6 @@ right_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand All @@ -211,6 +235,7 @@ right_join <- function(
choices = c("drop",
"error"))


args_check <- arguments_checks(x = x,
y = y,
by = by,
Expand All @@ -230,6 +255,19 @@ right_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
byexp <- grep(pattern = "==?",
x = by,
value = TRUE)
xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\1",
byexp))
ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\3",
byexp))

xbynames <- xbynames[order(fmatch(xbynames, names(x)))]
ybynames <- ybynames[order(fmatch(ybynames, names(y)))]

if (keep == TRUE) {
jn_type <- "right"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -260,6 +298,20 @@ right_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand Down Expand Up @@ -340,8 +392,6 @@ full_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -372,6 +422,19 @@ full_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
byexp <- grep(pattern = "==?",
x = by,
value = TRUE)
xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\1",
byexp))
ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\3",
byexp))

xbynames <- xbynames[order(fmatch(xbynames, names(x)))]
ybynames <- ybynames[order(fmatch(ybynames, names(y)))]

if (keep == TRUE) {
jn_type <- "full"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -403,6 +466,20 @@ full_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys----------------------------------------
if (unmatched == "error") {

Expand Down Expand Up @@ -488,8 +565,6 @@ inner_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -520,6 +595,19 @@ inner_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
byexp <- grep(pattern = "==?",
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these lines of code should be converted to a function that can be called everywhere. Right now, the same code is repeated in multiple places.

x = by,
value = TRUE)
xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\1",
byexp))
ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\3",
byexp))

xbynames <- xbynames[order(fmatch(xbynames, names(x)))]
ybynames <- ybynames[order(fmatch(ybynames, names(y)))]

if (keep == TRUE) {
jn_type <- "inner"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -550,6 +638,20 @@ inner_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# Unmatched Keys ---------------------------------------
if (unmatched == "error") {
check_unmatched_keys(x = x,
Expand Down Expand Up @@ -629,8 +731,6 @@ anti_join <- function(
clear_joynenv()

# Argument checks ---------------------------------
x <- copy(x)
y <- copy(y)
na_matches <- match.arg(na_matches,
choices = c("na","never"))
multiple <- match.arg(multiple,
Expand Down Expand Up @@ -658,6 +758,19 @@ anti_join <- function(
dropreport <- args_check$dropreport

# Column names -----------------------------------
byexp <- grep(pattern = "==?",
x = by,
value = TRUE)
xbynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\1",
byexp))
ybynames <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)",
"\\3",
byexp))

xbynames <- xbynames[order(fmatch(xbynames, names(x)))]
ybynames <- ybynames[order(fmatch(ybynames, names(y)))]

if (keep == TRUE) {
jn_type <- "anti"
modified_cols <- set_col_names(x = x,
Expand Down Expand Up @@ -688,6 +801,20 @@ anti_join <- function(
...
)

# Change names back------------------------------------
if (any(grepl(pattern = "keyby", x = names(x)))) {
data.table::setnames(x,
old = names(x)[grepl(pattern = "keyby",
x = names(x))],
new = xbynames)
}
if (any(grepl(pattern = "keyby", x = names(y)))) {
data.table::setnames(y,
old = names(y)[grepl(pattern = "keyby",
x = names(y))],
new = ybynames)
}

# # Unmatched Keys ---------------------------------------
if (dropreport == T) {
get_vars(aj, reportvar) <- NULL
Expand Down Expand Up @@ -838,25 +965,23 @@ arguments_checks <- function(x, y, by, copy, keep, suffix, na_matches, multiple,
#' @keywords internal
set_col_names <- function(x, y, by, suffix, jn_type) {

x_1 <- copy(x)
y_1 <- copy(y)

# If joining by different variables
if (length(grep(pattern = "==?", x = by, value = TRUE)) != 0) {
byexp <- grep(pattern = "==?", x = by, value = TRUE)
if (length(byexp) != 0) {

if (jn_type == "right") {
by_x_names <- fix_by_vars(by = by, x_1, y_1)$xby
by_x_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\1", byexp))
}

else if (jn_type == "left" | jn_type == "full" | jn_type == "inner") {
by_y_names <- fix_by_vars(by = by, x_1, y_1)$yby
by_y_names <- trimws(gsub("([^=]+)(\\s*==?\\s*)([^=]+)", "\\3", byexp))
}

}

# If joining by common var
else {
by_y_names <- by_x_names <- fix_by_vars(by = by, x_1, y_1)$by
by_y_names <- by_x_names <- by
}

# Add key vars with suffix to x and y
Expand Down Expand Up @@ -903,24 +1028,16 @@ check_unmatched_keys <- function(x, y, out, by, jn_type) {
# Left table --------------------------------------------------------
if (jn_type %in% c("left", "inner", "anti")) {

use_y_input <- process_by_vector(by = by, input = "right")
use_y_out <- process_by_vector(by = by, input = "left")
use_y_input <- process_by_vector(by = by, input = "right") # id2
use_y_out <- process_by_vector(by = by, input = "left") # id1

if (length(grep("==?", by, value = TRUE)) != 0) {

if (any(use_y_out %in% colnames(y))) {

store_msg(
type = "warn",
warn = paste(cli::symbol$warn, "\nWarning:"),
pale = "\nUnmatched = error not active for this joyn -unmatched keys are not detected"
)
cli::cli_warn("`Unmatched = error` not active for this joyn -unmatched keys are not detected")
}

else {
data.table::setnames(y,
new = use_y_out,
old = use_y_input)

if (unmatched_keys(x = y,
by = use_y_out,
Expand Down
Loading
Loading