From 34a744eaa05a8c57ef49ead365551ee2dfc232b9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 6 Apr 2022 22:46:01 +0200 Subject: [PATCH 01/24] add fix --- R/data.table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 473cf6e766..2bf1174033 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1324,7 +1324,8 @@ replace_dot_alias = function(e) { ## check key on i as well! ichk = is.data.table(i) && haskey(i) && identical(head(key(i), length(leftcols)), names_i[leftcols]) # i has the correct key, #3061 - if (keylen && (ichk || is.logical(i) || (.Call(CisOrderedSubset, irows, nrow(x)) && ((roll == FALSE) || length(irows) == 1L)))) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE + if (keylen && (ichk || is.logical(i) || (.Call(CisOrderedSubset, irows, nrow(x)) && ((roll == FALSE) || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE + && (identical(vapply_1c(ans[icolsAns[leftcols]], typeof), vapply_1c(ans[xcolsAns[rightcols]], typeof)) || is.sorted(ans, by=head(key(x),keylen)))))) #5361 merging on different key types, check if really sorted (e.g., character + factors) setattr(ans,"sorted",head(key(x),keylen)) } setattr(ans, "class", class(x)) # retain class that inherits from data.table, #64 From 80b0b91347579843db94f29c2f457a516d6fcc70 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 6 Apr 2022 22:46:10 +0200 Subject: [PATCH 02/24] add test --- inst/tests/tests.Rraw | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f453b96208..fb58d7bde0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7132,6 +7132,15 @@ test(1483.3, merge(x,y,by="country",all=TRUE), data.table(country=factor(c("US", setkey(y) test(1483.4, y[x], data.table(country="US", key="country")) +# 5361 merge on character and factor should only have key(x) if result is really sorted +x = data.table(x=c("c", "b", "a")) +y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a")), key="x") +test(1483.5, x[y, on="x"], x) +test(1483.6, y[x, on="x"], x) +x = data.table(x=c("c", "b", "a"), key="x") +y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a"))) +test(1483.7, x[y, on="x"], x) +test(1483.8, y[x, on="x"], data.table(x=c("a", "b", "c"))) # NULL items should be removed when making data.table from list, #842 # Original fix for #842 added a branch in as.data.table.list() using point() # Then PR#3471 moved logic from data.table() into as.data.table.list() and now removes NULL items up front, so longer need for the branch From 1f6c41e9b7a3fa48d9f4d0dcab2a7eb94ef39f3f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 6 Apr 2022 22:52:20 +0200 Subject: [PATCH 03/24] add more tests --- inst/tests/tests.Rraw | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fb58d7bde0..0fb73ac2c4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7135,12 +7135,18 @@ test(1483.4, y[x], data.table(country="US", key="country")) # 5361 merge on character and factor should only have key(x) if result is really sorted x = data.table(x=c("c", "b", "a")) y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a")), key="x") -test(1483.5, x[y, on="x"], x) -test(1483.6, y[x, on="x"], x) +test(1483.51, x[y, on="x"], x) +test(1483.52, y[x, on="x"], x) +test(1483.53, merge(x, y, by="x"), data.table(x=c("a", "b", "c"), key="x")) +test(1483.54, merge(y, x, by="x"), data.table(x=c("a", "b", "c"), key="x")) + x = data.table(x=c("c", "b", "a"), key="x") y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a"))) -test(1483.7, x[y, on="x"], x) -test(1483.8, y[x, on="x"], data.table(x=c("a", "b", "c"))) +test(1483.61, x[y, on="x"], x) +test(1483.62, y[x, on="x"], data.table(x=c("a", "b", "c"))) +test(1483.63, merge(x, y, by="x"), data.table(x=c("a", "b", "c"), key="x")) +test(1483.64, merge(y, x, by="x"), data.table(x=c("a", "b", "c"), key="x")) + # NULL items should be removed when making data.table from list, #842 # Original fix for #842 added a branch in as.data.table.list() using point() # Then PR#3471 moved logic from data.table() into as.data.table.list() and now removes NULL items up front, so longer need for the branch From 4ab82c18c2c4e4455f3dea199674bcbee4bc5527 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 6 Apr 2022 22:58:13 +0200 Subject: [PATCH 04/24] add NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 19fc575e0d..352b5bccc5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,6 +550,8 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. +54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `k` is `character` in `X` and a `factor` in `Y` (or vice-versa) and keyed in `X`, [[#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : From 5a3e3c771682b75e364977b88798fa1b4f902d25 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Apr 2022 21:40:17 -0700 Subject: [PATCH 05/24] typo --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 352b5bccc5..54c08c9630 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,7 +550,7 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. -54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `k` is `character` in `X` and a `factor` in `Y` (or vice-versa) and keyed in `X`, [[#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `k` is `character` in `X` and a `factor` in `Y` (or vice-versa) and keyed in `X`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ## NOTES From 8d70e6db69d34556ef5ec14d0aa5b2d1642cea89 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Apr 2022 21:42:23 -0700 Subject: [PATCH 06/24] sentence structure --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 54c08c9630..eabccfdcc7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,7 +550,7 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. -54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `k` is `character` in `X` and a `factor` in `Y` (or vice-versa) and keyed in `X`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is `character` in `X` and `factor` in `Y` (or vice-versa), [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ## NOTES From b1f4892945b6648a746ff680cda29cd019db2525 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 7 Apr 2022 10:18:47 +0200 Subject: [PATCH 07/24] state bug more precisely --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index eabccfdcc7..a27ba522b0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,7 +550,7 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. -54. `X[Y, on="k"]` and `merge(X, Y, by="k")` could return a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is `character` in `X` and `factor` in `Y` (or vice-versa), [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +54. `X[Y, on="k"]` and `merge(X, Y, by="k")` returned a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is a `factor`, whose levels are not sorted, in `X` and `character` in `Y`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ## NOTES From 2183cbb3a9d870cbeed02064e2c22fb2abe8f4aa Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 7 Apr 2022 12:17:54 +0200 Subject: [PATCH 08/24] unwield lengthy if with ws --- R/data.table.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 2bf1174033..683a70a2fb 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1324,9 +1324,21 @@ replace_dot_alias = function(e) { ## check key on i as well! ichk = is.data.table(i) && haskey(i) && identical(head(key(i), length(leftcols)), names_i[leftcols]) # i has the correct key, #3061 - if (keylen && (ichk || is.logical(i) || (.Call(CisOrderedSubset, irows, nrow(x)) && ((roll == FALSE) || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE - && (identical(vapply_1c(ans[icolsAns[leftcols]], typeof), vapply_1c(ans[xcolsAns[rightcols]], typeof)) || is.sorted(ans, by=head(key(x),keylen)))))) #5361 merging on different key types, check if really sorted (e.g., character + factors) - setattr(ans,"sorted",head(key(x),keylen)) + if ( + keylen + && ( + ichk + || is.logical(i) + || ( + .Call(CisOrderedSubset, irows, nrow(x)) + && (!roll || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE + && ( #5361 merging on keyed factor with character, check if resulting character is really sorted + identical(vapply_1c(i[,leftcols,with=FALSE], typeof), vapply_1c(x[,rightcols,with=FALSE], typeof)) # can only be not identical + || is.sorted(ans, by=head(key(x), keylen)) + ) + ) + ) + ) setattr(ans,"sorted",head(key(x),keylen)) } setattr(ans, "class", class(x)) # retain class that inherits from data.table, #64 setattr(ans, "row.names", .set_row_names(length(ans[[1L]]))) From f42d498e7b59b5c6e88a4bd7a33decd37383b803 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 7 Apr 2022 12:54:20 +0200 Subject: [PATCH 09/24] more tests --- inst/tests/tests.Rraw | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0fb73ac2c4..85019f3f69 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7133,19 +7133,28 @@ setkey(y) test(1483.4, y[x], data.table(country="US", key="country")) # 5361 merge on character and factor should only have key(x) if result is really sorted -x = data.table(x=c("c", "b", "a")) -y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a")), key="x") +let = c("a", "b", "c") +rlet = c("c", "b", "a") +x = data.table(x=rlet) +y = data.table(x=factor(let, levels=rlet), key="x") test(1483.51, x[y, on="x"], x) test(1483.52, y[x, on="x"], x) -test(1483.53, merge(x, y, by="x"), data.table(x=c("a", "b", "c"), key="x")) -test(1483.54, merge(y, x, by="x"), data.table(x=c("a", "b", "c"), key="x")) +test(1483.53, merge(x, y, by="x"), data.table(x=let, key="x")) +test(1483.54, merge(y, x, by="x"), data.table(x=let, key="x")) -x = data.table(x=c("c", "b", "a"), key="x") -y = data.table(x=factor(c("a", "b", "c"), levels=c("c", "b", "a"))) +x = data.table(x=rlet, key="x") +y = data.table(x=factor(let, levels=rlet)) test(1483.61, x[y, on="x"], x) -test(1483.62, y[x, on="x"], data.table(x=c("a", "b", "c"))) -test(1483.63, merge(x, y, by="x"), data.table(x=c("a", "b", "c"), key="x")) -test(1483.64, merge(y, x, by="x"), data.table(x=c("a", "b", "c"), key="x")) +test(1483.62, y[x, on="x"], data.table(x=let)) +test(1483.63, merge(x, y, by="x"), data.table(x=let, key="x")) +test(1483.64, merge(y, x, by="x"), data.table(x=let, key="x")) + +x = data.table(x=rlet, a=rlet) +y = data.table(x=factor(let, levels=rlet), b=let, key="x") +test(1483.71, x[y, on="x"], data.table(x=rlet, a=rlet, b=rlet)) +test(1483.72, y[x, on="x"], data.table(x=rlet, b=rlet, a=rlet)) +test(1483.73, merge(x, y, by="x"), data.table(x=let, a=let, b=let, key="x")) +test(1483.74, merge(y, x, by="x"), data.table(x=let, b=let, a=let, key="x")) # NULL items should be removed when making data.table from list, #842 # Original fix for #842 added a branch in as.data.table.list() using point() From 96437fb97c82e7fc34fefc489bad4dec1fe3e023 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 7 Apr 2022 18:04:22 +0200 Subject: [PATCH 10/24] extend NEWS for mirror casse --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a27ba522b0..77a1bad2cd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,7 +550,7 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. -54. `X[Y, on="k"]` and `merge(X, Y, by="k")` returned a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is a `factor`, whose levels are not sorted, in `X` and `character` in `Y`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +54. `X[Y, on="k"]` and `merge(Y, X, by="k")` as well as `merge(X, Y, by="k")` returned a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is a `factor`, whose levels are not sorted, in `X` and `character` in `Y`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ## NOTES From f45564a31396787cc756e1b354a9ed1d14705ae8 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sun, 26 Jun 2022 22:02:05 +0200 Subject: [PATCH 11/24] update NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 77a1bad2cd..039fa3db72 100644 --- a/NEWS.md +++ b/NEWS.md @@ -550,7 +550,7 @@ 53. `as.data.frame(DT, row.names=)` no longer silently ignores `row.names`, [#5319](https://github.com/Rdatatable/data.table/issues/5319). Thanks to @dereckdemezquita for the fix and PR, and @ben-schwen for guidance. -54. `X[Y, on="k"]` and `merge(Y, X, by="k")` as well as `merge(X, Y, by="k")` returned a wrongly keyed `data.table` if `X` is keyed by `k` and `k` is a `factor`, whose levels are not sorted, in `X` and `character` in `Y`, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +54. If the first column of a key is a factor column containing unsorted levels (`setkey()` does not order the levels) and it is joined to a character column, then an invalidly keyed result would be produced. This could lead to a subsequent error/warning or silent incorrect results. Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ## NOTES From 9ce6737d92d23190e972b753a7c4cfc6aee30db1 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 9 Sep 2024 22:23:36 +0200 Subject: [PATCH 12/24] vapply on single columns instead of whole subset --- R/data.table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 683a70a2fb..6b30ae3f00 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1324,6 +1324,7 @@ replace_dot_alias = function(e) { ## check key on i as well! ichk = is.data.table(i) && haskey(i) && identical(head(key(i), length(leftcols)), names_i[leftcols]) # i has the correct key, #3061 + dt_types = function(x, cols = seq_along(x)) vapply_1c(cols, function(j) typeof(x[[j]])) if ( keylen && ( @@ -1333,7 +1334,7 @@ replace_dot_alias = function(e) { .Call(CisOrderedSubset, irows, nrow(x)) && (!roll || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE && ( #5361 merging on keyed factor with character, check if resulting character is really sorted - identical(vapply_1c(i[,leftcols,with=FALSE], typeof), vapply_1c(x[,rightcols,with=FALSE], typeof)) # can only be not identical + identical(dt_types(i, leftcols), dt_types(x, rightcols)) # can only be not identical || is.sorted(ans, by=head(key(x), keylen)) ) ) From 608599c725de95b339de16b06ffc37fe7f2f55da Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Mon, 9 Sep 2024 23:04:58 +0200 Subject: [PATCH 13/24] use .shallow --- R/data.table.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 7d740b204f..aa6e70a0ed 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1337,7 +1337,6 @@ replace_dot_alias = function(e) { ## check key on i as well! ichk = is.data.table(i) && haskey(i) && identical(head(key(i), length(leftcols)), names_i[leftcols]) # i has the correct key, #3061 - dt_types = function(x, cols = seq_along(x)) vapply_1c(cols, function(j) typeof(x[[j]])) if ( keylen && ( @@ -1347,7 +1346,7 @@ replace_dot_alias = function(e) { .Call(CisOrderedSubset, irows, nrow(x)) && (!roll || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE && ( #5361 merging on keyed factor with character, check if resulting character is really sorted - identical(dt_types(i, leftcols), dt_types(x, rightcols)) # can only be not identical + identical(vapply_1c(.shallow(i, leftcols), typeof), vapply_1c(.shallow(x, rightcols), typeof)) # can only be not identical || is.sorted(ans, by=head(key(x), keylen)) ) ) From 0d5f5a5be27318b1dbaa145a02dcbf452356f630 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 16:24:51 +0000 Subject: [PATCH 14/24] suggested NEWS wording --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e7592d086f..cc76f9d235 100644 --- a/NEWS.md +++ b/NEWS.md @@ -70,7 +70,7 @@ 14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report and @jangorecki for the PR. -15. If the first column of a key is a factor column containing unsorted levels (`setkey()` does not order the levels) and it is joined to a character column, then an invalidly keyed result would be produced. This could lead to a subsequent error/warning or silent incorrect results. Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. +15. Fixed incorrect sorting of merges where the first column of a key is a factor with non-`sort()`-ed levels (e.g. `factor(1:2, 2:1)` and it is joined to a character column, [#5361](https://github.com/Rdatatable/data.table/issues/5361). Thanks to @gbrunick for the report and Benjamin Schwendinger for the fix. ### NOTES From c204850434d4cd4aa5e87077817830ec21880c59 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 16:26:43 +0000 Subject: [PATCH 15/24] add OPs original example more exactly to the regression test --- inst/tests/tests.Rraw | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8e96728fc8..b594ec4101 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7101,6 +7101,14 @@ test(1483.72, y[x, on="x"], data.table(x=rlet, b=rlet, a=rlet)) test(1483.73, merge(x, y, by="x"), data.table(x=let, a=let, b=let, key="x")) test(1483.74, merge(y, x, by="x"), data.table(x=let, b=let, a=let, key="x")) +some_letters <- c("c", "b", "a") +some_more_letters <- rep(c("a", "b", "c"), 2L) +dt1 <- data.table(x = some_letters, y=1:3) +dt2 <- data.table(x = factor(some_more_letters, levels=some_letters), z=1:6, key=c("x", "z")) +dt3 <- merge(dt1, dt2, by="x") +test(1483.81, key(dt3), NULL) +test(1483.82, nrow(dt3[x %in% "c", ]), 2L) + # NULL items should be removed when making data.table from list, #842 # Original fix for #842 added a branch in as.data.table.list() using point() # Then PR#3471 moved logic from data.table() into as.data.table.list() and now removes NULL items up front, so longer need for the branch From 6750e27307cab2173e0ef1c772179491a50f506d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 16:31:46 +0000 Subject: [PATCH 16/24] add some tests of multiple join columns --- inst/tests/tests.Rraw | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b594ec4101..8ff0a8178e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7086,6 +7086,12 @@ test(1483.51, x[y, on="x"], x) test(1483.52, y[x, on="x"], x) test(1483.53, merge(x, y, by="x"), data.table(x=let, key="x")) test(1483.54, merge(y, x, by="x"), data.table(x=let, key="x")) +x = data.table(i1=1:3, i2=rlet) +y = data.table(i1=1:3, i2=factor(let, levels=rlet), key="x") +test(1483.55, x[y, on=c("i1", "i2")], x) +test(1483.56, y[x, on=c("i1", "i2")], x) +test(1483.57, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) +test(1483.58, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i2", "i2"))) x = data.table(x=rlet, key="x") y = data.table(x=factor(let, levels=rlet)) @@ -7093,6 +7099,12 @@ test(1483.61, x[y, on="x"], x) test(1483.62, y[x, on="x"], data.table(x=let)) test(1483.63, merge(x, y, by="x"), data.table(x=let, key="x")) test(1483.64, merge(y, x, by="x"), data.table(x=let, key="x")) +x = data.table(i1=1:3, i2=rlet, key=c("i1", "i2")) +y = data.table(i1=1:3, i2=factor(let, levels=rlet)) +test(1483.65, x[y, on=c("i1", "i2")], x) +test(1483.66, y[x, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) +test(1483.67, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) +test(1483.68, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) x = data.table(x=rlet, a=rlet) y = data.table(x=factor(let, levels=rlet), b=let, key="x") From 49a8f1aa5507989af3014d90a46f385929f986ea Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 16:33:52 +0000 Subject: [PATCH 17/24] avoid using column x in table x --- inst/tests/tests.Rraw | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8ff0a8178e..2601b26c08 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7080,25 +7080,25 @@ test(1483.4, y[x], data.table(country="US", key="country")) # 5361 merge on character and factor should only have key(x) if result is really sorted let = c("a", "b", "c") rlet = c("c", "b", "a") -x = data.table(x=rlet) -y = data.table(x=factor(let, levels=rlet), key="x") -test(1483.51, x[y, on="x"], x) -test(1483.52, y[x, on="x"], x) -test(1483.53, merge(x, y, by="x"), data.table(x=let, key="x")) -test(1483.54, merge(y, x, by="x"), data.table(x=let, key="x")) +x = data.table(i=rlet) +y = data.table(i=factor(let, levels=rlet), key="i") +test(1483.51, x[y, on="i"], x) +test(1483.52, y[x, on="i"], x) +test(1483.53, merge(x, y, by="i"), data.table(i=let, key="i")) +test(1483.54, merge(y, x, by="i"), data.table(i=let, key="i")) x = data.table(i1=1:3, i2=rlet) -y = data.table(i1=1:3, i2=factor(let, levels=rlet), key="x") +y = data.table(i1=1:3, i2=factor(let, levels=rlet), key=c("i1", "i2")) test(1483.55, x[y, on=c("i1", "i2")], x) test(1483.56, y[x, on=c("i1", "i2")], x) test(1483.57, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) test(1483.58, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i2", "i2"))) -x = data.table(x=rlet, key="x") -y = data.table(x=factor(let, levels=rlet)) -test(1483.61, x[y, on="x"], x) -test(1483.62, y[x, on="x"], data.table(x=let)) -test(1483.63, merge(x, y, by="x"), data.table(x=let, key="x")) -test(1483.64, merge(y, x, by="x"), data.table(x=let, key="x")) +x = data.table(i=rlet, key="i") +y = data.table(i=factor(let, levels=rlet)) +test(1483.61, x[y, on="i"], x) +test(1483.62, y[x, on="i"], data.table(i=let)) +test(1483.63, merge(x, y, by="i"), data.table(i=let, key="i")) +test(1483.64, merge(y, x, by="i"), data.table(i=let, key="i")) x = data.table(i1=1:3, i2=rlet, key=c("i1", "i2")) y = data.table(i1=1:3, i2=factor(let, levels=rlet)) test(1483.65, x[y, on=c("i1", "i2")], x) @@ -7106,12 +7106,12 @@ test(1483.66, y[x, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) test(1483.67, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) test(1483.68, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) -x = data.table(x=rlet, a=rlet) -y = data.table(x=factor(let, levels=rlet), b=let, key="x") -test(1483.71, x[y, on="x"], data.table(x=rlet, a=rlet, b=rlet)) -test(1483.72, y[x, on="x"], data.table(x=rlet, b=rlet, a=rlet)) -test(1483.73, merge(x, y, by="x"), data.table(x=let, a=let, b=let, key="x")) -test(1483.74, merge(y, x, by="x"), data.table(x=let, b=let, a=let, key="x")) +x = data.table(i=rlet, a=rlet) +y = data.table(i=factor(let, levels=rlet), b=let, key="i") +test(1483.71, x[y, on="i"], data.table(i=rlet, a=rlet, b=rlet)) +test(1483.72, y[x, on="i"], data.table(i=rlet, b=rlet, a=rlet)) +test(1483.73, merge(x, y, by="i"), data.table(i=let, a=let, b=let, key="i")) +test(1483.74, merge(y, x, by="i"), data.table(i=let, b=let, a=let, key="i")) some_letters <- c("c", "b", "a") some_more_letters <- rep(c("a", "b", "c"), 2L) From c9a33af8e3c59d1482ecaa5643f5f5a0e19deba7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 10:00:31 -0700 Subject: [PATCH 18/24] =?UTF-8?q?attempt=20to=20refactor=20into=20huge=20h?= =?UTF-8?q?elper=20(=F0=9F=A4=9E)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/data.table.R | 76 +++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9e82b2c5f0..8b2d3618de 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1335,34 +1335,8 @@ replace_dot_alias = function(e) { ans[icolsAns] = .Call(CsubsetDT, i, ii, icols) ans[xcolsAns] = .Call(CsubsetDT, x, irows, xcols) setattr(ans, "names", ansvars) - if (haskey(x)) { - keylen = which.first(!key(x) %chin% ansvars)-1L - if (is.na(keylen)) keylen = length(key(x)) - len = length(rightcols) - # fix for #1268, #1704, #1766 and #1823 - chk = if (len && !missing(on)) !identical(head(key(x), len), names(on)) else FALSE - if ( (keylen>len || chk) && !.Call(CisOrderedSubset, irows, nrow(x))) { - keylen = if (!chk) len else 0L # fix for #1268 - } - ## check key on i as well! - ichk = is.data.table(i) && haskey(i) && - identical(head(key(i), length(leftcols)), names_i[leftcols]) # i has the correct key, #3061 - if ( - keylen - && ( - ichk - || is.logical(i) - || ( - .Call(CisOrderedSubset, irows, nrow(x)) - && (!roll || length(irows) == 1L) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE - && ( #5361 merging on keyed factor with character, check if resulting character is really sorted - identical(vapply_1c(.shallow(i, leftcols), typeof), vapply_1c(.shallow(x, rightcols), typeof)) # can only be not identical - || is.sorted(ans, by=head(key(x), keylen)) - ) - ) - ) - ) setattr(ans,"sorted",head(key(x),keylen)) - } + # NB: could be NULL + setattr(ans, "sorted", .join_result_key(x, i, if (!missing(on)) names(on), ansvars, leftcols, rightcols, names_i, irows, roll)) setattr(ans, "class", class(x)) # retain class that inherits from data.table, #64 setattr(ans, "row.names", .set_row_names(length(ans[[1L]]))) setalloccol(ans) @@ -2034,6 +2008,52 @@ replace_dot_alias = function(e) { setalloccol(ans) # TODO: overallocate in dogroups in the first place and remove this line } +# can the specified merge of x and i be marked as sorted? return the columns for which this is true, otherwise NULL +.join_result_key <- function(x, i, on_lhs, ansvars, leftcols, rightcols, names_i, irows, roll) { + x_key <- key(x) + if (is.null(x_key)) + return(NULL) + + key_length = which.first(!x_key %chin% ansvars) - 1L + if (is.na(key_length)) + key_length = length(x_key) + + rhs_length = length(rightcols) + # fix for #1268, #1704, #1766 and #1823 + chk = rhs_length && !is.null(on_lhs) && !identical(head(x_key, rhs_length), on_lhs) + if ( (key_length > rhs_length || chk) && !.Call(CisOrderedSubset, irows, nrow(x))) { + key_length = if (chk) 0L else rhs_length # fix for #1268 + } + + if (!key_length) + return(NULL) + + ## check key on i as well! + if (is.logical(i)) + return(head(x_key, key_length)) + + # i has the correct key, #3061 + if (identical(head(key(i), length(leftcols)), names_i[leftcols])) + return(head(x_key, key_length)) + + if (!.Call(CisOrderedSubset, irows, nrow(x))) + return(NULL) + + # see #1010. don't set key when i has no key, but irows is ordered and !roll + if (roll && length(irows) != 1L) + return(NULL) + + new_key <- head(x_key, key_length) + + #5361 merging on keyed factor with character, check if resulting character is really sorted + if (identical(vapply_1c(.shallow(i, leftcols), typeof), vapply_1c(.shallow(x, rightcols), typeof))) + return(new_key) + + if (!is.sorted(ans, by=new_key)) + return(NULL) + new_key +} + # What's the name of the top-level call in 'j'? # NB: earlier, we used 'as.character()' but that fails for closures/builtins (#6026). root_name = function(jsub) if (is.call(jsub)) paste(deparse(jsub[[1L]]), collapse = " ") else "" From 07e3acc4f77c27db81e71ca747e44adf59e9cd9f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 10:03:30 -0700 Subject: [PATCH 19/24] trailing ws --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 8b2d3618de..3b951d5975 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2048,7 +2048,7 @@ replace_dot_alias = function(e) { #5361 merging on keyed factor with character, check if resulting character is really sorted if (identical(vapply_1c(.shallow(i, leftcols), typeof), vapply_1c(.shallow(x, rightcols), typeof))) return(new_key) - + if (!is.sorted(ans, by=new_key)) return(NULL) new_key From fa3be6479996bb767716383854c106b26400ec57 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 10:08:13 -0700 Subject: [PATCH 20/24] fix (?) tests --- inst/tests/tests.Rraw | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2601b26c08..8158b75e43 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7088,10 +7088,10 @@ test(1483.53, merge(x, y, by="i"), data.table(i=let, key="i")) test(1483.54, merge(y, x, by="i"), data.table(i=let, key="i")) x = data.table(i1=1:3, i2=rlet) y = data.table(i1=1:3, i2=factor(let, levels=rlet), key=c("i1", "i2")) -test(1483.55, x[y, on=c("i1", "i2")], x) +test(1483.55, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) test(1483.56, y[x, on=c("i1", "i2")], x) -test(1483.57, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) -test(1483.58, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i2", "i2"))) +test(1483.57, merge(x, y, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) +test(1483.58, merge(y, x, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) x = data.table(i=rlet, key="i") y = data.table(i=factor(let, levels=rlet)) @@ -7101,10 +7101,10 @@ test(1483.63, merge(x, y, by="i"), data.table(i=let, key="i")) test(1483.64, merge(y, x, by="i"), data.table(i=let, key="i")) x = data.table(i1=1:3, i2=rlet, key=c("i1", "i2")) y = data.table(i1=1:3, i2=factor(let, levels=rlet)) -test(1483.65, x[y, on=c("i1", "i2")], x) -test(1483.66, y[x, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) -test(1483.67, merge(x, y, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) -test(1483.68, merge(y, x, by=c("i1", "i2")), data.table(i1=1:3, i2=let, key=c("i1", "i2"))) +test(1483.65, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) +test(1483.66, y[x, on=c("i1", "i2")], data.table(i1=1:3, i2=rlet)) +test(1483.67, merge(x, y, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) +test(1483.68, merge(y, x, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) x = data.table(i=rlet, a=rlet) y = data.table(i=factor(let, levels=rlet), b=let, key="i") @@ -7118,7 +7118,7 @@ some_more_letters <- rep(c("a", "b", "c"), 2L) dt1 <- data.table(x = some_letters, y=1:3) dt2 <- data.table(x = factor(some_more_letters, levels=some_letters), z=1:6, key=c("x", "z")) dt3 <- merge(dt1, dt2, by="x") -test(1483.81, key(dt3), NULL) +test(1483.81, key(dt3), "x") test(1483.82, nrow(dt3[x %in% "c", ]), 2L) # NULL items should be removed when making data.table from list, #842 From dc95644fb873f622f32417ad2aebb354cdd7bf3c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 17:31:24 +0000 Subject: [PATCH 21/24] need to pass 'ans' too --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 3b951d5975..a10b8e0831 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1336,7 +1336,7 @@ replace_dot_alias = function(e) { ans[xcolsAns] = .Call(CsubsetDT, x, irows, xcols) setattr(ans, "names", ansvars) # NB: could be NULL - setattr(ans, "sorted", .join_result_key(x, i, if (!missing(on)) names(on), ansvars, leftcols, rightcols, names_i, irows, roll)) + setattr(ans, "sorted", .join_result_key(x, i, ans, if (!missing(on)) names(on), ansvars, leftcols, rightcols, names_i, irows, roll)) setattr(ans, "class", class(x)) # retain class that inherits from data.table, #64 setattr(ans, "row.names", .set_row_names(length(ans[[1L]]))) setalloccol(ans) @@ -2009,7 +2009,7 @@ replace_dot_alias = function(e) { } # can the specified merge of x and i be marked as sorted? return the columns for which this is true, otherwise NULL -.join_result_key <- function(x, i, on_lhs, ansvars, leftcols, rightcols, names_i, irows, roll) { +.join_result_key <- function(x, i, ans, on_lhs, ansvars, leftcols, rightcols, names_i, irows, roll) { x_key <- key(x) if (is.null(x_key)) return(NULL) From 5ad17ade2b9e83d90cdec4a741f95f106914aef8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 17:31:37 +0000 Subject: [PATCH 22/24] don't reuse overloaded name 'let' --- inst/tests/tests.Rraw | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8158b75e43..6212ee477a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7078,40 +7078,40 @@ setkey(y) test(1483.4, y[x], data.table(country="US", key="country")) # 5361 merge on character and factor should only have key(x) if result is really sorted -let = c("a", "b", "c") +lett = c("a", "b", "c") rlet = c("c", "b", "a") x = data.table(i=rlet) -y = data.table(i=factor(let, levels=rlet), key="i") +y = data.table(i=factor(lett, levels=rlet), key="i") test(1483.51, x[y, on="i"], x) test(1483.52, y[x, on="i"], x) -test(1483.53, merge(x, y, by="i"), data.table(i=let, key="i")) -test(1483.54, merge(y, x, by="i"), data.table(i=let, key="i")) +test(1483.53, merge(x, y, by="i"), data.table(i=lett, key="i")) +test(1483.54, merge(y, x, by="i"), data.table(i=lett, key="i")) x = data.table(i1=1:3, i2=rlet) -y = data.table(i1=1:3, i2=factor(let, levels=rlet), key=c("i1", "i2")) -test(1483.55, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) +y = data.table(i1=1:3, i2=factor(lett, levels=rlet), key=c("i1", "i2")) +test(1483.55, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=lett)) test(1483.56, y[x, on=c("i1", "i2")], x) test(1483.57, merge(x, y, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) test(1483.58, merge(y, x, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) x = data.table(i=rlet, key="i") -y = data.table(i=factor(let, levels=rlet)) +y = data.table(i=factor(lett, levels=rlet)) test(1483.61, x[y, on="i"], x) -test(1483.62, y[x, on="i"], data.table(i=let)) -test(1483.63, merge(x, y, by="i"), data.table(i=let, key="i")) -test(1483.64, merge(y, x, by="i"), data.table(i=let, key="i")) +test(1483.62, y[x, on="i"], data.table(i=lett)) +test(1483.63, merge(x, y, by="i"), data.table(i=lett, key="i")) +test(1483.64, merge(y, x, by="i"), data.table(i=lett, key="i")) x = data.table(i1=1:3, i2=rlet, key=c("i1", "i2")) -y = data.table(i1=1:3, i2=factor(let, levels=rlet)) -test(1483.65, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=let)) +y = data.table(i1=1:3, i2=factor(lett, levels=rlet)) +test(1483.65, x[y, on=c("i1", "i2")], data.table(i1=1:3, i2=lett)) test(1483.66, y[x, on=c("i1", "i2")], data.table(i1=1:3, i2=rlet)) test(1483.67, merge(x, y, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) test(1483.68, merge(y, x, by=c("i1", "i2")), data.table(i1=2L, i2="b", key=c("i1", "i2"))) x = data.table(i=rlet, a=rlet) -y = data.table(i=factor(let, levels=rlet), b=let, key="i") +y = data.table(i=factor(lett, levels=rlet), b=lett, key="i") test(1483.71, x[y, on="i"], data.table(i=rlet, a=rlet, b=rlet)) test(1483.72, y[x, on="i"], data.table(i=rlet, b=rlet, a=rlet)) -test(1483.73, merge(x, y, by="i"), data.table(i=let, a=let, b=let, key="i")) -test(1483.74, merge(y, x, by="i"), data.table(i=let, b=let, a=let, key="i")) +test(1483.73, merge(x, y, by="i"), data.table(i=lett, a=lett, b=lett, key="i")) +test(1483.74, merge(y, x, by="i"), data.table(i=lett, b=lett, a=lett, key="i")) some_letters <- c("c", "b", "a") some_more_letters <- rep(c("a", "b", "c"), 2L) @@ -18578,8 +18578,8 @@ test(2251.15, dim(fread(text, fill=Inf)), c(9L, 9L)) .datatable.aware = FALSE dt = data.table(a = 1L) -test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.table-aware.*':='") -test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'") +test(2252.1, dt[, b:=2L], error = "\\[ was called on a dtest(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'ata.table.*not data.table-aware.*':='") +") rm(.datatable.aware) # tests for trunc.char handling wide characters # 5096 From 7c8d2f7108d05ccf0af0f156c1a6f7fb9f1751fd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 17:32:50 +0000 Subject: [PATCH 23/24] typo --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6212ee477a..68933abc2b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18578,8 +18578,8 @@ test(2251.15, dim(fread(text, fill=Inf)), c(9L, 9L)) .datatable.aware = FALSE dt = data.table(a = 1L) -test(2252.1, dt[, b:=2L], error = "\\[ was called on a dtest(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'ata.table.*not data.table-aware.*':='") -") +test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.table-aware.*':='") +test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'") rm(.datatable.aware) # tests for trunc.char handling wide characters # 5096 From 088f8766750991235854c3f11a226c57fdc0827d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 30 Jun 2025 18:30:07 +0000 Subject: [PATCH 24/24] remove apparently vestigial check --- R/data.table.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index a10b8e0831..73224742ea 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2028,10 +2028,6 @@ replace_dot_alias = function(e) { if (!key_length) return(NULL) - ## check key on i as well! - if (is.logical(i)) - return(head(x_key, key_length)) - # i has the correct key, #3061 if (identical(head(key(i), length(leftcols)), names_i[leftcols])) return(head(x_key, key_length))