diff --git a/.github/.gitignore b/.github/.gitignore index f920f889..2d19fc76 100644 --- a/.github/.gitignore +++ b/.github/.gitignore @@ -1,2 +1 @@ - *.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml deleted file mode 100644 index 38a50449..00000000 --- a/.github/workflows/R-CMD-check.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: [main, DEV] - pull_request: - branches: [main, DEV] - -name: R-CMD-check - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-pandoc@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::rcmdcheck - needs: check - - - uses: r-lib/actions/check-r-package@v2 - with: - upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index a7d8ffd2..bac8513c 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [DEV] + branches: [wbpage_dev] pull_request: - branches: [DEV] + branches: [wbpage_dev] release: types: [published] workflow_dispatch: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml deleted file mode 100644 index ddf8b7bd..00000000 --- a/.github/workflows/test-coverage.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: [main, DEV] - pull_request: - branches: [main, DEV] - -name: test-coverage - -jobs: - test-coverage: - runs-on: ubuntu-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::covr - needs: coverage - - - name: Test coverage - run: | - covr::codecov( - quiet = FALSE, - clean = FALSE, - install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") - ) - shell: Rscript {0} - - - name: Show testthat output - if: always() - run: | - ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true - shell: bash - - - name: Upload test results - if: failure() - uses: actions/upload-artifact@v3 - with: - name: coverage-test-failures - path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 061b0855..6247c85b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,4 @@ .RData .Ruserdata -inst/doc -doc -Meta - docs -/doc/ -/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index ea03dbe2..0211bd6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.3 +Version: 0.2.3.9000 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", diff --git a/_pkgdown.yml b/_pkgdown.yml index 04769651..6997f49a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: https://randrescastaneda.github.io/joyn/ +url: https://randrescastaneda.github.io/joyn/dev/ template: bootstrap: 5 bottswatch: cosmo diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index 4c817195..00000000 --- a/docs/404.html +++ /dev/null @@ -1,101 +0,0 @@ - - - - - - - -Page not found (404) • joyn - - - - - - - - - Skip to contents - - -
-
-
- -Content not found. Please use links in the navbar. - -
-
- - - -
- - - - - - - diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html deleted file mode 100644 index 21c01725..00000000 --- a/docs/LICENSE-text.html +++ /dev/null @@ -1,80 +0,0 @@ - -License • joyn - Skip to contents - - -
-
-
- -
YEAR: 2021
-COPYRIGHT HOLDER: joyn authors
-
- -
- - -
- - - - - - - diff --git a/docs/LICENSE.html b/docs/LICENSE.html deleted file mode 100644 index f9c90b4b..00000000 --- a/docs/LICENSE.html +++ /dev/null @@ -1,84 +0,0 @@ - -MIT License • joyn - Skip to contents - - -
-
-
- -
- -

Copyright (c) 2021 joyn authors

-

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

-

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

-

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-
- -
- - -
- - - - - - - diff --git a/docs/articles/adv-functionalities.html b/docs/articles/adv-functionalities.html deleted file mode 100644 index 6e363b4d..00000000 --- a/docs/articles/adv-functionalities.html +++ /dev/null @@ -1,408 +0,0 @@ - - - - - - - - -Advanced functionalities • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -
-
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-
-x <- data.table(id = c(1, 4, 2, 3, NA),
-                t  = c(1L, 2L, 1L, 2L, NA),
-                country = c(16, 12, 3, NA, 15))
-  
-y <- data.table(id  = c(1, 2, 5, 6, 3),
-                gdp = c(11L, 15L, 20L, 13L, 10L),
-                country = 16:20)
-
-

Advanced use -

-

This vignette will let you explore some additional features available -in joyn, through an example use case.

-

Suppose you want to join tables x and y, -where the variable country is available in both. You could do -one of five things:

-
-

1. Use variable country as one of the key variables -

-

If you don’t use the argument by, joyn will -consider country and id as key variables by default -given that they are common between x and -y.

-
-
-# The variables with the same name, `id` and `country`, are used as key
-# variables.
-
-joyn(x = x, 
-     y = y)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 4   44.4%
-#> 2     y 4   44.4%
-#> 3 x & y 1   11.1%
-#> 4 total 9    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id and country from id, gdp, and country
-#>       id     t country   gdp  .joyn
-#>    <num> <int>   <num> <int> <fctr>
-#> 1:     1     1      16    11  x & y
-#> 2:     4     2      12    NA      x
-#> 3:     2     1       3    NA      x
-#> 4:     3     2      NA    NA      x
-#> 5:    NA    NA      15    NA      x
-#> 6:     2    NA      17    15      y
-#> 7:     5    NA      18    20      y
-#> 8:     6    NA      19    13      y
-#> 9:     3    NA      20    10      y
-

Alternatively, you can specify to join by country

-
-
-# Joining by country
-
-joyn(x = x, 
-     y = y, 
-     by = "country")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 4   44.4%
-#> 2     y 4   44.4%
-#> 3 x & y 1   11.1%
-#> 4 total 9    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables country from id, gdp, and country
-#>       id     t country   gdp  .joyn
-#>    <num> <int>   <num> <int> <fctr>
-#> 1:     1     1      16    11  x & y
-#> 2:     4     2      12    NA      x
-#> 3:     2     1       3    NA      x
-#> 4:     3     2      NA    NA      x
-#> 5:    NA    NA      15    NA      x
-#> 6:    NA    NA      17    15      y
-#> 7:    NA    NA      18    20      y
-#> 8:    NA    NA      19    13      y
-#> 9:    NA    NA      20    10      y
-
-
-

2. Ignore the values of country from y and -don’t bring it into the resulting table -

-

This the default if you did not include country as part of -the key variables in argument by.

-
-
-joyn(x = x, 
-     y = y, 
-     by = "id")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, gdp, and country
-#>       id     t country   gdp  .joyn
-#>    <num> <int>   <num> <int> <fctr>
-#> 1:     1     1      16    11  x & y
-#> 2:     4     2      12    NA      x
-#> 3:     2     1       3    15  x & y
-#> 4:     3     2      NA    10  x & y
-#> 5:    NA    NA      15    NA      x
-#> 6:     5    NA      NA    20      y
-#> 7:     6    NA      NA    13      y
-
-
-

3. Update only NAs in table x -

-

Another possibility is to make use of the update_NAs -argument of joyn(). This allows you to update the NAs -values in variable country in table x with the -actual values of the matching observations in country from -table y. In this case, actual values in country from table x -will remain unchanged.

-
-
-joyn(x = x,
-     y = y, 
-     by = "id", 
-     update_NAs = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>        .joyn n percent
-#> 1          x 2   28.6%
-#> 2      x & y 2   28.6%
-#> 3 NA updated 3   42.9%
-#> 4      total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, gdp, and country
-#>       id     t country   gdp      .joyn
-#>    <num> <int>   <num> <int>     <fctr>
-#> 1:     1     1      16    11      x & y
-#> 2:     4     2      12    NA          x
-#> 3:     2     1       3    15      x & y
-#> 4:     3     2      20    10 NA updated
-#> 5:    NA    NA      15    NA          x
-#> 6:     5    NA      18    20 NA updated
-#> 7:     6    NA      19    13 NA updated
-
-
-

4. Update actual values in table x -

-

You can also update all the values - both NAs and actual - in -variable country of table x with the actual values -of the matching observations in country from y. -This is done by setting update_values = TRUE.

-

Notice that the reportvar allows you keep track of how -the update worked. In this case, value update means that only -the values that are different between country from -x and country from y are updated.

-

However, let’s consider other possible cases:

-
    -
  • If, for the same matching observations, the values between the -two country variables were the same, the reporting variable -would report x & y instead (so you know that there is no -update to make).

  • -
  • if there are NAs in country from y, the -actual values in x will be unchanged, and you would see a -not updated status in the reporting variable. Nevertheless, -notice there is another way for you to bring country from -y to x. This is done through the argument -keep_y_in_x (see 5. below ⬇️)

  • -
-
-
-# Notice that only the value that are 
-
-joyn(x = x, 
-     y = y, 
-     by = "id", 
-     update_values = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>           .joyn n percent
-#> 1    NA updated 3   42.9%
-#> 2 value updated 2   28.6%
-#> 3   not updated 2   28.6%
-#> 4         total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, gdp, and country
-#>       id     t country   gdp         .joyn
-#>    <num> <int>   <num> <int>        <fctr>
-#> 1:     1     1      16    11 value updated
-#> 2:     4     2      12    NA   not updated
-#> 3:     2     1      17    15 value updated
-#> 4:     3     2      20    10    NA updated
-#> 5:    NA    NA      15    NA   not updated
-#> 6:     5    NA      18    20    NA updated
-#> 7:     6    NA      19    13    NA updated
-
-
-

5. Keep original country variable from y into returning -table -

-
-

(Keep matching-names variable from y into x -not updating values in -x) -

-

Another available option is that of bringing the original variable -country from y into the resulting table, without -using it to update the values in x. In order to distinguish -country from x and country from -y, joyn will assign a suffix to the variable’s -name: so that you will get country.y and country.x. -All of this can be done specifying -keep_common_vars = TRUE.

-
-
-joyn(x = x, 
-     y = y, 
-     by = "id", 
-     keep_common_vars = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, gdp, and country
-#>       id     t country.x   gdp country.y  .joyn
-#>    <num> <int>     <num> <int>     <int> <fctr>
-#> 1:     1     1        16    11        16  x & y
-#> 2:     4     2        12    NA        NA      x
-#> 3:     2     1         3    15        17  x & y
-#> 4:     3     2        NA    10        20  x & y
-#> 5:    NA    NA        15    NA        NA      x
-#> 6:     5    NA        NA    20        18      y
-#> 7:     6    NA        NA    13        19      y
-
-
-
-

Bring other variables from y into returning table -

-

In joyn , you can also bring non common variables from -y into the resulting table. In fact you can specify them in -y_vars_to_keep, as shown in the example below:

-
-
-# Keeping variable gdp 
-
-joyn(x = x, 
-     y = y, 
-     by = "id", 
-     y_vars_to_keep = "gdp")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#>       id     t country   gdp  .joyn
-#>    <num> <int>   <num> <int> <fctr>
-#> 1:     1     1      16    11  x & y
-#> 2:     4     2      12    NA      x
-#> 3:     2     1       3    15  x & y
-#> 4:     3     2      NA    10  x & y
-#> 5:    NA    NA      15    NA      x
-#> 6:     5    NA      NA    20      y
-#> 7:     6    NA      NA    13      y
-

Notice that if you set y_vars_to_keep = FALSE or -y_vars_to_keep = NULL, then joyn won’t bring -any variable into the returning table.

-
-
-
-
- - - - -
- - - - - - - diff --git a/docs/articles/aux-functions.html b/docs/articles/aux-functions.html deleted file mode 100644 index 3bfac999..00000000 --- a/docs/articles/aux-functions.html +++ /dev/null @@ -1,269 +0,0 @@ - - - - - - - - -Additional functions • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -
-
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-

This vignette will give you a brief overview of how you can use some -auxiliary functions that joyn makes available to the -user.

-
-

Verifying if dt is uniquely identified -

-

One of the advantages of joyn is that you can perform -one-to-one (1:1), one-to-many (1:m), many-to-one (m:1), and many-to-many -(m:m) joins. is_id() is a function that might come in handy -when you want to check whether your data table is uniquely identified by -the variables you want to merge by. In fact this is what -is_id() checks by default, returning either TRUE or FALSE -depending on whether the data table is uniquely identified or not. -Alternatively, you can set return_report = FALSE to get a -summary of the duplicates.

-
-
-x1 <- data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
-                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x  = 11:15,
-                 c  = c("a", "b", "a", "t", "d"),
-                 c1 = c("h", "j", "k", "l", "y"))
-
-y1 <- data.table(id = c(1,2, 4),
-                 y  = c(11L, 15L, 16))
-
-# Checking if x1 is uniquely identified by "id" with return_report = TRUE
-
-is_id(dt = x1, 
-      by = "id")
-#> 
-#> ── Duplicates in terms of `id`
-#>   copies n percent
-#> 1      1 3     75%
-#> 2      2 1     25%
-#> 3  total 4    100%
-#> ─────────────────────────────────────────────────────── End of is_id() report ──
-#> [1] FALSE
-
-# Checking duplicates in x1 with return_report = FALSE
-
-is_id(dt = x1, 
-      by = "id", 
-      return_report = FALSE)
-#> 
-#> ── Duplicates in terms of `id`
-#>   copies n percent
-#> 1      1 3     75%
-#> 2      2 1     25%
-#> 3  total 4    100%
-#> ─────────────────────────────────────────────────────── End of is_id() report ──
-#> [1] FALSE
-
-
-

Possible unique identifiers -

-

In joyn, you can also search for variables which -possibly uniquely identify your data table x using the -possible_ids() function. For example,

-
-
-# Identify possible unique identifier excluding variable t
-possible_ids(dt      = x1, 
-             exclude = "t")
-#>  There are no duplicates in data frame
-#> → we found 2 possible ids
-#> $V1
-#> [1] "x"
-#> 
-#> $V2
-#> [1] "c1"
-
-# Identify possible unique identifier excluding character variables
-possible_ids(dt      = x1, 
-             exclude = "_character")
-#>  There are no duplicates in data frame
-#> → we found 1 possible id
-#> $V1
-#> [1] "x"
-
-# Identify possible unique identifiers, excluding character variables but considering variable z
-possible_ids(dt      = x1, 
-             exclude = "_character",
-             include = "z")
-#>  There are no duplicates in data frame
-#> → we found 1 possible id
-#> $V1
-#> [1] "x"
-
-
-

Verifying if data table is balanced -

-

Additionally, joyn makes available to the user the -is_balanced() function. This is instrumental in assessing -the completeness of the data table within a specified group, i.e., if -the table contains all the combinations of observations in the group. By -default, is_balanced() will tell you if/if not the table is -balanced. However, if you set return = "table", you will -get a summary of the unbalanced observations. In other words, those -combinations of elements between the specified variables that is not -contained in the input table.

-
-
-# Example with return = "logic", the default
-
-is_balanced(df = x1,
-            by = c("id", "t"))
-#> [1] FALSE
-
-# Example with return = "table"
-is_balanced(df = x1,
-            by = c("id", "t"), 
-            return = "table")
-#>   id t
-#> 1  3 1
-#> 2  2 2
-
-
-

Tabulating simple frequencies -

-

Furthermore, joyn provides a function that generates -simple frequency tables, so that you can easily have an overview of the -distribution of values within your data tables.

-
-
-# Tabulating frequencies of var `id`
-
-freq_table(x     = x1, 
-           byvar = "id")[]
-#>      id n percent
-#> 1     1 2     40%
-#> 2     2 1     20%
-#> 3     3 1     20%
-#> 4  <NA> 1     20%
-#> 5 total 5    100%
-
-# Removing NAs from the calculation
-
-freq_table(x     = x1, 
-           byvar = "id", 
-           na.rm = TRUE)[]
-#>      id n percent
-#> 1     1 2     50%
-#> 2     2 1     25%
-#> 3     3 1     25%
-#> 4 total 4    100%
-
-
-
- - - - -
- - - - - - - diff --git a/docs/articles/dplyr-joins.html b/docs/articles/dplyr-joins.html deleted file mode 100644 index 94b24148..00000000 --- a/docs/articles/dplyr-joins.html +++ /dev/null @@ -1,454 +0,0 @@ - - - - - - - - -dplyr joins wrappers • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -
-

Overview -

-

Joining data tables with joyn is particularly convenient -as it allows you to analyze/be aware of the quality of the merging.

-

This vignette explores dplyr-like join functions available in -joyn. Their major objective is to let you employ a syntax -you are supposedly already familiar with - the dplyr one - -while at the same time benefiting of the additional tools that -joyn offers. That is, obtaining additional information and -verification of the joining.

-

There are four types of dplyr-like join functions in -joyn:

- -

Each of them is a wrapper that works in a similar way as the -corresponding dplyr function.

-
-
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-
-
-

Rationale -

-
-
-x1 <- data.table(id  = c(1L, 1L, 2L, 3L, NA_integer_),
-                 t   = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x   = 11:15)
-
-y1 <- data.table(id  = c(1,2, 4),
-                 y   = c(11L, 15L, 16))
-

Suppose you want to perform a simple left join -between tables x1 and y1.

-

With joyn you have two possibilities:

- -

In addition, you could use dplyr::left_join() or base R -merging functions.

-

Consider these three options:

-
-
-# Option 1
-
-joyn(x          = x1, 
-     y          = y1, 
-     keep       = "left",
-     match_type = "m:1")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2     40%
-#> 2     y 1     20%
-#> 3 x & y 2     40%
-#> 4 total 5    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-
-# Option 2 
-
-joyn::left_join(x            = x1, 
-                y            = y1, 
-                relationship = "many-to-one") 
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2     40%
-#> 2     y 1     20%
-#> 3 x & y 2     40%
-#> 4 total 5    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ⚠ Warning: joyn does not currently allow inequality joins, so keep = NULL will
-#> retain only keys in x
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-
-# Option 3
-
-dplyr::left_join(x            = x1, 
-                 y            = y1, 
-                 relationship = "many-to-one") 
-#> Joining with `by = join_by(id)`
-#>       id     t     x     y
-#>    <num> <int> <int> <num>
-#> 1:     1     1    11    11
-#> 2:     1     2    12    11
-#> 3:     2     1    13    15
-#> 4:     3     2    14    NA
-#> 5:    NA    NA    15    NA
-

Comparing the results, the same returning data table is produced.

-

However, joyn::left_join() allows you to enjoy both the -intuitive syntax from dplyr and the additional tools from -joyn. These include additional options to customize how the -join is performed, the availability of the joyn report, messages -informing you on time of execution and the status of the join as well as -the execution of various checks during the merging. (For additional -information on each of these joyn’s features, please take a -look at all the other articles in this website.)

-
-
-

Some examples -

-
-

1. Left join -

-

ℹ️ Left joins return in the output table all rows from -x, i.e., the left table, and only matching rows from -y, i.e., the right table.

-
-
-# Data tables to be joined 
-
-df1 <- data.frame(id = c(1L, 1L, 2L, 3L, NA_integer_, NA_integer_),
-                  t  = c(1L, 2L, 1L, 2L, NA_integer_, 4L),
-                  x  = 11:16)
-
-df2 <- data.frame(id = c(1,2, 4, NA_integer_, 8),
-                  y  = c(11L, 15L, 16, 17L, 18L),
-                  t  = c(13:17))
-

Example usage of some of the joyn’s additional -options:

-

Updating NAs in left table

-

Using the update_NAs argument from joyn you -can update the values that are NA in the t variable in the left -table with the actual values from the matching column t in the -right one

-
-
-left_join(x            = df1, 
-          y            = df2,
-          relationship = "many-to-one", 
-          by           = "id",
-          update_NAs   = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>        .joyn n percent
-#> 1          x 1   16.7%
-#> 2      x & y 4   66.7%
-#> 3 NA updated 1   16.7%
-#> 4      total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, y, and t
-#>   id t.x  x  y t.y      .joyn
-#> 1  1   1 11 11  13      x & y
-#> 2  1   2 12 11  13      x & y
-#> 3  2   1 13 15  14      x & y
-#> 4  3   2 14 NA  NA          x
-#> 5 NA  16 15 17  16 NA updated
-#> 6 NA   4 16 17  16      x & y
-

Specifying which variables to keep from the right table -after the join

-
-
-left_join(x              = df1, 
-          y              = df2,
-          relationship   = "many-to-one", 
-          by             = "id", 
-          y_vars_to_keep = "y")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 1   16.7%
-#> 2     y 2   33.3%
-#> 3 x & y 3     50%
-#> 4 total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#>   id  t  x  y .joyn
-#> 1  1  1 11 11 x & y
-#> 2  1  2 12 11 x & y
-#> 3  2  1 13 15 x & y
-#> 4  3  2 14 NA     x
-#> 5 NA NA 15 17 x & y
-#> 6 NA  4 16 17 x & y
-
-
-

2. Right join -

-

ℹ️ Right joins return in the output table matching rows from -x, i.e., the left table, and all rows from y, -i.e., the right table.

-

Example usage of some of the joyn’s additional -options:

-

Specifying a name for the reporting -variable

-
-
-right_join(x            = df1, 
-          y            = df2,
-          relationship = "many-to-one", 
-          by           = "id",
-          reportvar    = "right.joyn")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   right.joyn n percent
-#> 1          x 1   14.3%
-#> 2          y 2   28.6%
-#> 3      x & y 4   57.1%
-#> 4      total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable right.joyn
-#> ℹ Note: Removing key variables id from id, y, and t
-#>   id t.x  x  y t.y right.joyn
-#> 1  1   1 11 11  13      x & y
-#> 2  1   2 12 11  13      x & y
-#> 3  2   1 13 15  14      x & y
-#> 4  4  NA NA 16  15          y
-#> 5  8  NA NA 18  17          y
-#> 6 NA  NA 15 17  16      x & y
-#> 7 NA   4 16 17  16      x & y
-

Updating values in common variables

-

By setting update_values = TRUE, all values in x (both -NAs and not) will be updated with the actual values of variables in y -with the same name as the ones in x. You can then see the status of the -update in the reporting variable.

-
-
-right_join(x            = df1, 
-           y            = df2,
-           relationship = "many-to-one", 
-           by           = "id",
-           reportvar    = "right.joyn")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   right.joyn n percent
-#> 1          x 1   14.3%
-#> 2          y 2   28.6%
-#> 3      x & y 4   57.1%
-#> 4      total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable right.joyn
-#> ℹ Note: Removing key variables id from id, y, and t
-#>   id t.x  x  y t.y right.joyn
-#> 1  1   1 11 11  13      x & y
-#> 2  1   2 12 11  13      x & y
-#> 3  2   1 13 15  14      x & y
-#> 4  4  NA NA 16  15          y
-#> 5  8  NA NA 18  17          y
-#> 6 NA  NA 15 17  16      x & y
-#> 7 NA   4 16 17  16      x & y
-
-
-

3. Full join -

-

ℹ️ Full joins return in the output table all rows, both matching and -non matching rows from x, i.e., the left table, and -y, i.e., the right table.

-
-
-full_join(x = x1, 
-          y = y1, 
-          relationship = "many-to-one", 
-          keep = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   33.3%
-#> 2     y 1   16.7%
-#> 3 x & y 3     50%
-#> 4 total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id.y, id, and y
-#>       id     t     x  id.y     y  .joyn
-#>    <num> <int> <int> <num> <num> <fctr>
-#> 1:     1     1    11     1    11  x & y
-#> 2:     1     2    12     1    11  x & y
-#> 3:     2     1    13     2    15  x & y
-#> 4:     3     2    14    NA    NA      x
-#> 5:     4    NA    NA     4    16      y
-#> 6:    NA    NA    15    NA    NA      x
-
-
-

4. Inner join -

-

ℹ️ Inner joins return in the output table only rows that match -between x, i.e., the left table, and y, i.e., -the right table.

-

Simple inner join

-
-
-inner_join(x            = df1, 
-           y             = df2,
-           relationship  = "many-to-one", 
-           by            = "id")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 1     20%
-#> 2     y 2     40%
-#> 3 x & y 2     40%
-#> 4 total 5    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, y, and t
-#>   id t.x  x  y t.y .joyn
-#> 1  1   1 11 11  13 x & y
-#> 2  1   2 12 11  13 x & y
-#> 3  2   1 13 15  14 x & y
-#> 4 NA  NA 15 17  16 x & y
-#> 5 NA   4 16 17  16 x & y
-
-
-
-
- - - - -
- - - - - - - diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 30e5a8d5..00000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,92 +0,0 @@ - -Articles • joyn - Skip to contents - - -
-
-
- - -
- - -
- - - - - - - diff --git a/docs/articles/main-functionalities.html b/docs/articles/main-functionalities.html deleted file mode 100644 index 9f89b9c3..00000000 --- a/docs/articles/main-functionalities.html +++ /dev/null @@ -1,599 +0,0 @@ - - - - - - - - -Main functionalities • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-
-

Overview -

-

📌 In joyn, there are two major sets of tools to join -data tables:

-
    -
  1. The primary function joyn()

  2. -
  3. Dplyr-like join functions: left_join(), -right_join(), full_join(), -inner_join()

  4. -
-

This vignette will explore the main function joyn(). You -can read about dplyr-joins in the “dplyr-joins” article -instead.

-
-
-

General use -

-
-
-library(joyn)
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-
-x1 <- data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
-                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x  = 11:15)
-
-y1 <- data.table(id = c(1,2, 4),
-                 y  = c(11L, 15L, 16))
-
-
-x2 <- data.table(id = c(1, 4, 2, 3, NA),
-                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x  = c(16, 12, NA, NA, 15))
-
-
-y2 <- data.table(id = c(1, 2, 5, 6, 3),
-                 yd = c(1, 2, 5, 6, 3),
-                 y  = c(11L, 15L, 20L, 13L, 10L),
-                 x  = c(16:20))
-
-x3 <- data.table(id  = c("c","b", "d", "d"),
-                 v   = 8:11,
-                 foo = c(4,2, 7, 3))
-
-y3 <- data.table(id = c("c","b", "c", "a"),
-                 y  = c(11L, 15L, 18L, 20L))
-
-
-x4 <- data.table(id1 = c(1, 1, 2, 3, 3),
-                 id2 = c(1, 1, 2, 3, 4),
-                 t   = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x   = c(16, 12, NA, NA, 15))
-
-y4 <- data.table(id  = c(1, 2, 5, 6, 3),
-                 id2 = c(1, 1, 2, 3, 4),
-                 y   = c(11L, 15L, 20L, 13L, 10L),
-                 x   = c(16:20))
-
-
-x5 <- data.table(id      = c(1, 4, 2, 3, NA),
-                 t       = c(1L, 2L, 1L, 2L, NA),
-                 country = c(16, 12, 3, NA, 15))
-  
-y5 <- data.table(id      = c(1, 2, 2, 6, 3),
-                 gdp     = c(11L, 15L, 20L, 13L, 10L),
-                 country = 16:20)
-
-

The basics -

-

Let’s suppose that you want to join the two tables x1 -and y1.

-
-
-# Calling joyn() to join x1 and y1
-
-joyn(x = x1,
-     y = y1, 
-     match_type = "m:1" ) #Note RT: remove this argument once fixing the default value
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-#> 6:     4    NA    NA    16      y
-

The output table is the result of a full join -which is what -joyn always executes by the default. This means that the -returning table will retains both matching and non matching rows from -both x1 and y1. Notice that the resulting -table also contains an additional variable called .joyn, -which is the reporting variable. (Read below ⬇️)

-
-

Reporting variable -

-

A particular feature of joyn is that it includes the -reportvar in the returning table, which -informs you about the status of the join. You can modify both the name -and the format of the reporting variable as follows:

-
    -
  • Name: by default reportvar = ".joyn", but you can -modify it with reportvar = "myname" specifying the name you -want to assign

  • -
  • Format: by default reporttype = "character" , but -you can also set it to numeric using -reporttype = "numeric"

  • -
-

You can see the difference between the two types in the table below1:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
numericcharactermeaning
1xObs only available in x table
2yObs only available in y table
3x & yMatching obs available in both tables
4NA updatedNAs in x updated with actual values in variables with -same names in y
5value updatedActual values and NAs in x updated with actual values -in variables with same names in y
6not updatedActual values and NAs in x are NOT updated with actual -values in y
-
-
-

Key variables -

-

When performing a join, you might want to specify which variable(s) -joyn should join by.

-

While by default joyn will consider the variable(s) in -common between x and y as key(s) for the join, -our suggestion is to make the keys explicit - i.e., specifying it/them -in the by argument

-
-
-# Join with one variable in common
-
-joyn(x = x1,
-     y = y1, 
-     by = "id", 
-     match_type = "m:1")
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-#> 6:     4    NA    NA    16      y
-

If you don’t want to join by all variables in common between -x and y, you can alternately use equivalency -as an element of by vector. This specification allows you -to join on different variables between x and -y.

-
-
-joyn(x = x4, 
-     y = y4, 
-     by = c("id1 = id", "id2"), 
-     match_type = "m:m")
-#>      id1   id2     t     x     y  .joyn
-#>    <num> <num> <int> <num> <int> <fctr>
-#> 1:     1     1     1    16    11  x & y
-#> 2:     1     1     2    12    11  x & y
-#> 3:     2     2     1    NA    NA      x
-#> 4:     3     3     2    NA    NA      x
-#> 5:     3     4    NA    15    10  x & y
-#> 6:     2     1    NA    NA    15      y
-#> 7:     5     2    NA    NA    20      y
-#> 8:     6     3    NA    NA    13      y
-

Also, notice that joyn will sort the -resulting table by key variables in by. This is because -sort = TRUE by default.

-
-
-
-

Match type -

-

💡Match type refers to the relationship that exists between the -observations of the joining tables. The possibility to perform joins -based on the match type is one of the value added of using -joyn.

-

Following Stata’s convention, we can have four different match -types:

-
    -
  1. 1:1 (one to one): the -default2, the variables specified in by -variables uniquely identify single observations in both table –> each -observation in left table has a unique match in the right table and -viceversa

  2. -
  3. 1:m (one to many): only left table is uniquely -identified by byvariables –> each observation in -by var of the left table can have multiple matches in -by var of the right table

  4. -
  5. m:1 (many to one): only right table is uniquely -identified by byvar -> each observation in left table -can have only one match in the right table but observations in the right -table might have multiple matches in the left table

  6. -
  7. m:m (many to many): variables in by -does not uniquely identify the observations in either table –> both -tables can have multiple matches for each observation

  8. -
-

We recommend you always specify the match type when joining tables to -ensure the output is correct.

-
-
-# Many to one match type
-joyn(x = x1,
-     y = y1,
-     by = "id",
-     match_type = "m:1")
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-#> 6:     4    NA    NA    16      y
-
-# Many to many match type
-joyn(x = x3,
-     y = y3,
-     by = "id",
-     match_type = "m:m")
-#>        id     v   foo     y  .joyn
-#>    <char> <int> <num> <int> <fctr>
-#> 1:      c     8     4    11  x & y
-#> 2:      c     8     4    18  x & y
-#> 3:      b     9     2    15  x & y
-#> 4:      d    10     7    NA      x
-#> 5:      d    11     3    NA      x
-#> 6:      a    NA    NA    20      y
-
-# One to one match type - the default
-joyn(x = x2,
-     y = y2,
-     by = "id",
-     match_type = "1:1")
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-
-# Same join as:
-
-joyn(x = x2,
-     y = y2,
-     by = "id")
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-
-
-# One to many match type 
-joyn(x = x5,
-     y = y5,
-     by = "id",
-     match_type = "1:m")
-#>       id     t country   gdp  .joyn
-#>    <num> <int>   <num> <int> <fctr>
-#> 1:     1     1      16    11  x & y
-#> 2:     4     2      12    NA      x
-#> 3:     2     1       3    15  x & y
-#> 4:     2     1       3    20  x & y
-#> 5:     3     2      NA    10  x & y
-#> 6:    NA    NA      15    NA      x
-#> 7:     6    NA      NA    13      y
-

However, if are unsure/wrong about the relationships between the -observations in your tables, joyn will let you know that -something is not right. Suppose you think your data is uniquely -identified by variable id, while it is not. By setting -match_type = "1:1" you will get and error, informing you -that the match type is not as expected.

-
-
-# Merging correctly but getting error because something is not right in the data
-joyn(x3, y3, by = "id", match_type = "1:1")
-#> ✖ Error: table x is not uniquely identified by id
-#> ✖ Error: table y is not uniquely identified by id
-#> Error in `check_match_type()`:
-#> ! match type inconsistency
-#>  set verbose to TRUE to see where the issue is
-
-# Merging wrongly but getting NO errors because you did not use match_type
-joyn(x3, y3, by = "id")
-#> ✖ Error: table x is not uniquely identified by id
-#> ✖ Error: table y is not uniquely identified by id
-#> Error in `check_match_type()`:
-#> ! match type inconsistency
-#>  set verbose to TRUE to see where the issue is
-

If instead you don’t care about match types or you don’t think it is -necessary to use them for your particular needs, you might be fine -without joyn.

-
-
-

Type of join -

-

Join type determines which observations will be kept after the join. -joyn() allows you to choose which type of join to execute -via the keep argument.

-

This argument is called keep rather than -join_type to avoid confusion with the argument -match_type, and in order to reflect that what you are -specifying in the end is which observations you want to keep. This -argument plays the role of allowing joyn() to mimic the -behavior of dplyr’s functions left_join, -right_join, inner_join, and -full_join, the default.

-

keep can be of four types:

-
    -
  • -

    keep = "full": the default, which -keeps all the observations in x and y, regardless of whether -they match or not.

    -
    -
    -# Full join 
    -
    -joyn(x          = x1, 
    -     y          = y1, 
    -     match_type = "m:m")
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:    NA    NA    15    NA      x
    -#> 6:     4    NA    NA    16      y
    -
  • -
  • -

    keep = "left" or -keep = "master" : keeps all observations -in x, both matching and non, and only those observations in -y that match in x

    -
    -
    -# keep obs in x
    -
    -joyn(x          = x1, 
    -     y          = y1, 
    -     keep       = "left", 
    -     match_type = "m:m")
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:    NA    NA    15    NA      x
    -
  • -
  • -

    keep = "right" or -keep = "using" keeps all observations in -y, both matching and non, and only those observations in -x that match in y

    -
    -
    -# keep obs in y
    -
    -joyn(x          = x1, 
    -     y          = y1,
    -     keep       = "right", 
    -     match_type = "m:m")
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     4    NA    NA    16      y
    -
  • -
  • -

    keep = "inner" keeps only those -observations that match in both tables.

    -
    -
    -# keep matching obs in both tables
    -
    -joyn(x1, y1, keep = "inner", match_type = "m:m")
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -
  • -
-
-
-
-

An important feature: JOYn report and info display -

-

Recall that joyn is intended to be informative about the -status and quality of the merging.

-

📊 JOYn report

-

By default, joyn returns the JOYn report , -i.e., a summary table of the merging. This includes the reporting -variable, the number of rows that come from x, the number -of rows that come from y and those that are common to both -x and y. This info is also shown in percentage -form in the percent column.

-
-
-joyn(x = x3, 
-     y = y3, 
-     by = "id", 
-     match_type = "m:m",
-     verbose = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   33.3%
-#> 2     y 1   16.7%
-#> 3 x & y 3     50%
-#> 4 total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#>        id     v   foo     y  .joyn
-#>    <char> <int> <num> <int> <fctr>
-#> 1:      c     8     4    11  x & y
-#> 2:      c     8     4    18  x & y
-#> 3:      b     9     2    15  x & y
-#> 4:      d    10     7    NA      x
-#> 5:      d    11     3    NA      x
-#> 6:      a    NA    NA    20      y
-

📝 Displaying messages

-

One of the value added of joyn is that it produces a -number of messages that are intended to inform you about the status of -the join. The display of such messages is controlled by the argument -verbose, which allows you to show -(verbose = TRUE) or silent (verbose = FALSE) -any messages.

-

To further explore messages in joyn, please refer to the -“Messages” article.

-
- -
-
- - - - -
- - - - - - - diff --git a/docs/articles/merge-wrapper.html b/docs/articles/merge-wrapper.html deleted file mode 100644 index 87671781..00000000 --- a/docs/articles/merge-wrapper.html +++ /dev/null @@ -1,311 +0,0 @@ - - - - - - - - -`data.table::merge()` wrapper • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -
-
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-
- x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
-                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x  = 11:15)
- y1 = data.table(id = c(1,2, 4),
-                 y  = c(11L, 15L, 16))
- 
- x2 = data.table(id1 = c(1, 1, 2, 3, 3),
-                 id2 = c(1, 1, 2, 3, 4),
-                 t   = c(1L, 2L, 1L, 2L, NA_integer_),
-                 x   = c(16, 12, NA, NA, 15))
- 
- y2 = data.table(id  = c(1, 2, 5, 6, 3),
-                 id2 = c(1, 1, 2, 3, 4),
-                 y   = c(11L, 15L, 20L, 13L, 10L),
-                 x   = c(16:20))
- 
-

This vignette describes the use of the joyn -merge() function.

-

🔀 joyn::merge resembles the usability of -base::merge and data.table::merge, while also -incorporating the additional features that characterize -joyn. In fact, joyn::merge masks the other -two.

-
-

Examples -

-
-

Simple merge -

-

Suppose you want to merge x1 and y1. First -notice that while base::merge is principally for data -frames, joyn::merge coerces x and -y to data tables if they are not already.

-

By default, merge will join by the shared column name(s) -in x and y.

-
-
-# Example not specifying the key
-merge(x = x1, 
-      y = y1)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   66.7%
-#> 2     y 1   33.3%
-#> 3 total 3    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ⚠ Warning: The keys supplied uniquely identify y, therefore a m:1 join is
-#> executed
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-
-# Example specifying the key
-merge(x = x1, 
-      y = y1,
-      by = "id")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   66.7%
-#> 2     y 1   33.3%
-#> 3 total 3    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ⚠ Warning: The keys supplied uniquely identify y, therefore a m:1 join is
-#> executed
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-

As usual, if the columns you want to join by don’t have the same -name, you need to tell merge which columns you want to join -by: by.x for the x data frame column name, -and by.y for the y one. For example,

-
-
-df1 <- data.frame(id = c(1L, 1L, 2L, 3L, NA_integer_, NA_integer_),
-                  t  = c(1L, 2L, 1L, 2L, NA_integer_, 4L),
-                  x  = 11:16)
-
-df2 <- data.frame(id = c(1,2, 4, NA_integer_, 8),
-                  y  = c(11L, 15L, 16, 17L, 18L),
-                  t  = c(13:17))
-
-merge(x    = df1,
-      y    = df2,
-      by.x = "x",
-      by.y = "y")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 3    100%
-#> 2     y 2   66.7%
-#> 3 total 3    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables keyby1 from id, keyby1, and t
-#> ⚠ Warning: The keys supplied uniquely identify both x and y, therefore a 1:1
-#> join is executed
-#>   id.x t.x  x id.y t.y .joyn
-#> 1    1   1 11    1  13 x & y
-#> 2   NA  NA 15    2  14 x & y
-#> 3   NA   4 16    4  15 x & y
-

By default, sort is TRUE, so that the -merged table will be sorted by the by.x column. Notice that -the output table distinguishes non-by column t coming from -x from the one coming from y by adding the -.x and .y suffixes -which occurs because the -no.dups argument is set to TRUE by -default.

-
-
-

Going further -

-

In a similar fashion as the joyn() primary function -does, merge() offers a number of arguments to -verify/control the merge1.

-

For example, joyn::joyn allows to execute one-to-one, -one-to-many, many-to-one and many-to-many joins. Similarly, -merge accepts the match_type argument:

-
-
-# Example with many to many merge
-joyn::merge(x          = x2,
-            y          = y2,
-            by.x       = "id1",
-            by.y       = "id2",
-            match_type = "m:m")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     y 1   14.3%
-#> 2 x & y 6   85.7%
-#> 3 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables keyby1 from id, keyby1, y, and x
-#> ⚠ Warning:  Supplied both by and by.x/by.y. by argument will be ignored.
-#>      id1   id2     t   x.x    id     y   x.y  .joyn
-#>    <num> <num> <int> <num> <num> <int> <int> <fctr>
-#> 1:     1     1     1    16     1    11    16  x & y
-#> 2:     1     1     1    16     2    15    17  x & y
-#> 3:     1     1     2    12     1    11    16  x & y
-#> 4:     1     1     2    12     2    15    17  x & y
-#> 5:     2     2     1    NA     5    20    18  x & y
-#> 6:     3     3     2    NA     6    13    19  x & y
-#> 7:     3     4    NA    15     6    13    19  x & y
-
-# Example with many to many merge
-joyn::merge(x          = x1,
-            y          = y1,
-            by         = "id",
-            match_type = "m:1")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   66.7%
-#> 2     y 1   33.3%
-#> 3 total 3    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ⚠ Warning:  Supplied both by and by.x/by.y. by argument will be ignored.
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-

In a similar way, you can exploit all the other additional options -available in joyn(), e.g., for keeping common variables, -updating NAs and values, displaying messages etc…, which you can explore -in the “Advanced functionalities” article.

-
-
- -
-
- - - - -
- - - - - - - diff --git a/docs/articles/messages.html b/docs/articles/messages.html deleted file mode 100644 index cc19ea5b..00000000 --- a/docs/articles/messages.html +++ /dev/null @@ -1,681 +0,0 @@ - - - - - - - - -Messages • joyn - - - - - - - - - - Skip to contents - - -
- - - - -
-
- - - -

✅ This vignette is dedicated to one specific feature of -joyn: displaying information through -messages.

-

We’ll start with a rough overview of the different kinds of messages -that might be generated when merging two data tables, then discuss each -of them in detail with representative examples.

-
-

Overview -

-

Joyn messages can be of 4 different types:

-
    -
  1. Info

  2. -
  3. Timing

  4. -
  5. Warning

  6. -
  7. Error

  8. -
-
-
-# Setup 
-library(joyn)
-#> 
-#> Attaching package: 'joyn'
-#> The following object is masked from 'package:base':
-#> 
-#>     merge
-library(data.table)
-#> Warning: package 'data.table' was built under R version 4.3.3
-
-
-# Checking available types of messages
-msgs_types = joyn:::type_choices()
-print(msgs_types)
-#> [1] "info"   "note"   "warn"   "timing" "err"
-
-

Information messages ℹ -

-

Info messages are intended to inform you about various aspects of the -join and the data tables involved, as you can see in the examples -below.

-

Recall that one of the additional features of joyn is -that it returns a reporting variable with the status of the join. -Examples in this regard include info messages that tell you in which -variable it is available the joyn report, or if the -reporting variable is not returned instead.

-

Recall that one of the additional features of joyn is that it returns -a reporting variable with the status of the join. -Examples in this regard include info messages that tell you in which -variable it is available the joyn report, or if the -reporting variable is not returned instead. Also, an info message might -let you know that the name you want to assign to the reporting variable -is already present in the returning table, so that it will be changed to -a another one.

-
-
-# Example dataframes
-
-x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
-                t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                x  = 11:15)
-
-y1 = data.table(id = c(1,2, 4),
-                y  = c(11L, 15L, 16))
-
-
-x2 = data.table(id = c(1, 4, 2, 3, NA),
-                t  = c(1L, 2L, 1L, 2L, NA_integer_),
-                x  = c(16, 12, NA, NA, 15))
-
-
-y2 = data.table(id = c(1, 2, 5, 6, 3),
-                yd = c(1, 2, 5, 6, 3),
-                y  = c(11L, 15L, 20L, 13L, 10L),
-                x  = c(16:20))
-
-x3 = data.table(id1 = c(1, 1, 2, 3, 3),
-                id2 = c(1, 1, 2, 3, 4),
-                t   = c(1L, 2L, 1L, 2L, NA_integer_),
-                x   = c(16, 12, NA, NA, 15))
-
-
-y3 = data.table(id3  = c(1, 2, 5, 6, 3),
-                id4 = c(1, 1, 2, 3, 4),
-                y   = c(11L, 15L, 20L, 13L, 10L),
-                z   = c(16:20))
-
-
-
-# ------------------- Showing which var contains joyn report -------------------
-
-# Joining x2 and y2
-joyn(x              = x2,
-     y              = y2,
-     by             = "id", 
-     y_vars_to_keep = FALSE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#>       id     t     x  .joyn
-#>    <num> <int> <num> <fctr>
-#> 1:     1     1    16  x & y
-#> 2:     4     2    12      x
-#> 3:     2     1    NA  x & y
-#> 4:     3     2    NA  x & y
-#> 5:    NA    NA    15      x
-#> 6:     5    NA    NA      y
-#> 7:     6    NA    NA      y
-
-# Printing the info message
-joyn_msg(msg_type = "info")
-#> ℹ Note: Joyn's report available in variable .joyn
-
-# ---------------- Info about change in reporting variable name ----------------
-joyn(x              = x2,
-     y              = y2,
-     by             = "id", 
-     reportvar      = "x",
-     y_vars_to_keep = FALSE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>     x.1 n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable x
-#> ℹ Note: reportvar x is already part of the resulting table. It will be changed
-#> to x.1
-#>       id     t     x    x.1
-#>    <num> <int> <num> <fctr>
-#> 1:     1     1    16  x & y
-#> 2:     4     2    12      x
-#> 3:     2     1    NA  x & y
-#> 4:     3     2    NA  x & y
-#> 5:    NA    NA    15      x
-#> 6:     5    NA    NA      y
-#> 7:     6    NA    NA      y
-
-joyn_msg(msg_type = "info")
-#> ℹ Note: Joyn's report available in variable x
-#> ℹ Note: reportvar x is already part of the resulting table. It will be changed
-#> to x.1
-
-# ------------- Informing that reporting variable is not returned -------------
-joyn(x              = x2,
-     y              = y2,
-     by             = "id", 
-     reportvar      = FALSE,
-     y_vars_to_keep = FALSE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note:  Reporting variable is NOT returned
-#>       id     t     x
-#>    <num> <int> <num>
-#> 1:     1     1    16
-#> 2:     4     2    12
-#> 3:     2     1    NA
-#> 4:     3     2    NA
-#> 5:    NA    NA    15
-#> 6:     5    NA    NA
-#> 7:     6    NA    NA
-
-joyn_msg(msg_type = "info")
-#> ℹ Note:  Reporting variable is NOT returned
-

Furthermore, info messages will help you keep track of which -variables in y will be kept after -the merging, for example notifying you if any of the y -variables you have specified to keep will be removed because they are -part of the by variables.

-
-
-joyn(x              = x2,
-     y              = y2,
-     by             = "id", 
-     y_vars_to_keep = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, yd, y, and x
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-
-joyn_msg(msg_type = "info")
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, yd, y, and x
-
-
-

Timing messages 🔵 -

-

Timing messages report in how many seconds the join is executed, -including the time spent to perform all checks.

-

While performing the join, joyn keeps track of the -time spent for the execution. This is then displayed in -timing messages, which report the elapsed time measured in seconds.

-

Before visualizing some examples, it is important to remind a feature -of how joyn executes any join between two data tables.

-

Specifically, joyn always first executes a full join -between the data tables - which includes all matching and non matching -rows in the resulting table. Then, it filters the rows depending on the -specific type of join that user wants to execute. For example, if the -user sets keep = "right", joyn will filter the -table resulting from the full join and return to the user the data table -retaining all rows from the right table and only -matching rows from the left table. In addition, note that since -joyn performs a number of checks throughout the execution -(e.g., checking that the specified key for the merge is valid, or the -match type consistency), the time spent on checks will also be included -in reported time.

-

As a result, timing messages enable you to be aware of both:

-
    -
  1. Time spent to execute the full join -
  2. -
  3. Time spent to execute the entire joyn function, including -checks -
  4. -
-
-
-# --------------------------- Example with full join ---------------------------
-
-joyn(x          = x1, 
-     y          = y1, 
-     match_type = "m:1")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   33.3%
-#> 2     y 1   16.7%
-#> 3 x & y 3     50%
-#> 4 total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-#> 6:     4    NA    NA    16      y
-
-joyn_msg("timing")
-#> ● Timing:The full joyn is executed in 0.000251 seconds.
-#> ● Timing: The entire joyn function, including checks, is executed in 0.020076
-#> seconds.
-
-
-# --------------------------- Example with left join ---------------------------
-left_join(x            = x1, 
-          y            = y1, 
-          relationship = "many-to-one")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2     40%
-#> 2     y 1     20%
-#> 3 x & y 2     40%
-#> 4 total 5    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-
-joyn_msg("timing")
-#> ● Timing:The full joyn is executed in 0.000499 seconds.
-#> ● Timing: The entire joyn function, including checks, is executed in 0.01727
-#> seconds.
-
-
-

Warning messages ⚠️ -

-

joyn generates warning messages to alert you about -possible problematic situation which however do not warrant terminating -execution of the merge.

-

For example, if you provide a match type that is inconsistent with -the data, joyn will generate a warning to inform you about -the actual relationship and to alert that the join will be executed -accordingly.

-

In the example below, both x2 and y2 are -uniquely identified by the key id, but the user is choosing -a “one to many” relationship instead. The user will be alerted and a -“one to one” join will be executed instead.

-
-
-# Warning that "id" uniquely identifies y2 
-
-joyn(x2, y2, by = "id", match_type = "1:m", verbose = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, yd, y, and x
-#> ⚠ Warning: The keys supplied uniquely identify y, therefore a 1:1 join is
-#> executed
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-joyn_msg("warn")
-#> ⚠ Warning: The keys supplied uniquely identify y, therefore a 1:1 join is
-#> executed
-

In a similar way, warning messages are generated when choosing -match_type = "m:m" or "m:1"

-
-
-# ------------ Warning that "id" uniquely identifies both x2 and y2 ------------
-
-joyn(x2, y2, by = "id", match_type = "m:m", verbose = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, yd, y, and x
-#> ⚠ Warning: The keys supplied uniquely identify both x and y, therefore a 1:1
-#> join is executed
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-joyn_msg("warn")
-#> ⚠ Warning: The keys supplied uniquely identify both x and y, therefore a 1:1
-#> join is executed
-
-# ------------------ Warning that "id" uniquely identifies x2 ------------------
-
-joyn(x2, y2, by = "id", match_type = "m:1", verbose = TRUE)
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   28.6%
-#> 2     y 2   28.6%
-#> 3 x & y 3   42.9%
-#> 4 total 7    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id, yd, y, and x
-#> ⚠ Warning: The keys supplied uniquely identify x, therefore a 1:1 join is
-#> executed
-#>       id     t     x    yd     y  .joyn
-#>    <num> <int> <num> <num> <int> <fctr>
-#> 1:     1     1    16     1    11  x & y
-#> 2:     4     2    12    NA    NA      x
-#> 3:     2     1    NA     2    15  x & y
-#> 4:     3     2    NA     3    10  x & y
-#> 5:    NA    NA    15    NA    NA      x
-#> 6:     5    NA    NA     5    20      y
-#> 7:     6    NA    NA     6    13      y
-joyn_msg("warn")
-#> ⚠ Warning: The keys supplied uniquely identify x, therefore a 1:1 join is
-#> executed
-

Other examples of warnings are those that arise when you are trying -to supply certain arguments to the merging functions that are not yet -supported by the current version of joyn.

-

Suppose you are executing a left-join and you try to set the -na_matches argument to ‘never’. joyn will warn -you that it currently allows only na_matches = 'na'. A -similar message is displayed when keep = NULL. Given that -the current version of joyn does not support inequality -joins, joyn will warn you that keep = NULL -will make the join retain only keys in x.

-
-
-joyn::left_join(x            = x1, 
-                y            = y1, 
-                relationship = "many-to-one", 
-                keep         = NULL,
-                na_matches   = "never")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2     40%
-#> 2     y 1     20%
-#> 3 x & y 2     40%
-#> 4 total 5    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ⚠ Warning: joyn does not currently allow inequality joins, so keep = NULL will
-#> retain only keys in x
-#> ⚠ Warning: Currently, joyn allows only na_matches = 'na'
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-
-joyn_msg("warn")
-#> ⚠ Warning: joyn does not currently allow inequality joins, so keep = NULL will
-#> retain only keys in x
-#> ⚠ Warning: Currently, joyn allows only na_matches = 'na'
-
-
-

Error messages ❌ -

-

Error messages act as helpful notifications about the reasons why the -join you are trying to perform can’t be executed. Error messages -highlight where you went off course and provide clues to fix the issue -so that the merging can be successfully executed.

-

Sometimes error messages are due to a wrong/missing provision of the -inputs, for example if you do not supply variables to be used as key for -the merge, and x and y do not have any common -variable names. Error messages will also pop up if you provide an input -data table that has no variables, or that has duplicate variable -names.

-

Representative messages in this regard can be visualized below:

-
-
-# ----------------- Error due to input table x with no columns -----------------
-
-x_empty = data.table()
-  
-joyn(x = x_empty,
-     y = y1)
-#> ✖ Error:  Input table x has no columns.
-#> Error in `check_xy()`:
-#> ! wrong input specification
-
-joyn_msg("err")
-#> ✖ Error:  Input table x has no columns.
-
-# ----------------------- Error due to duplicate names  ------------------------
-
-x_duplicates = data.table(id          = c(1L, 1L, 2L, 3L, NA_integer_),
-                          x           = c(1L, 2L, 1L, 2L, NA_integer_),
-                          x           = 11:15,
-                          check.names = FALSE)
-joyn(x = x_duplicates,
-     y = y1)
-#> ✖ Error:  Table x has the following column duplicated: x.  Please rename or
-#> remove and try again.
-#> Error in `check_xy()`:
-#> ! wrong input specification
-
-joyn_msg("err")
-#> ✖ Error:  Table x has the following column duplicated: x.  Please rename or
-#> remove and try again.
-

Furthermore, errors messages are generated when choosing the wrong -match_type, that is not consistent with the actual -relationship between the variables being used for merging. -joyn will therefore display the following message:

-
-
-joyn(x = x1, y=y1, by="id", match_type = "1:1")
-#> ✖ Error: table x is not uniquely identified by id
-#> Duplicate counts in x:
-#>       id copies
-#>    <int>  <int>
-#> 1:     1      2
-#> Error in `check_match_type()`:
-#> ! match type inconsistency
-#>  refer to the duplicate counts in the table(s) above to identify where the
-#>   issue occurred
-joyn_msg("err")
-#> ✖ Error: table x is not uniquely identified by id
-
-
-
-

Additional: How to visualize joyn messages? -

-

joyn stores the messages in the joyn -environment.

-

In order to print them, you can use the joyn_msg() -function. The msg_type argument allows you to specify a -certain type of message you would like to visualize, or, if you want all -of them to be displayed, you can just set type = 'all'

-
-
-# Execute a join 
-
-joyn(x = x1, 
-     y = y1, 
-     match_type = "m:1")
-#> 
-#> ── JOYn Report ──
-#> 
-#>   .joyn n percent
-#> 1     x 2   33.3%
-#> 2     y 1   16.7%
-#> 3 x & y 3     50%
-#> 4 total 6    100%
-#> ────────────────────────────────────────────────────────── End of JOYn report ──
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#>       id     t     x     y  .joyn
-#>    <num> <int> <int> <num> <fctr>
-#> 1:     1     1    11    11  x & y
-#> 2:     1     2    12    11  x & y
-#> 3:     2     1    13    15  x & y
-#> 4:     3     2    14    NA      x
-#> 5:    NA    NA    15    NA      x
-#> 6:     4    NA    NA    16      y
-
-# Print all messages stored
-joyn_msg(msg_type = "all")
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-#> ● Timing:The full joyn is executed in 0.000206 seconds.
-#> ● Timing: The entire joyn function, including checks, is executed in 0.016425
-#> seconds.
-
-# Print info messages only 
-joyn_msg(msg_type = "info")
-#> ℹ Note: Joyn's report available in variable .joyn
-#> ℹ Note: Removing key variables id from id and y
-
-
-
- - - - -
- - - - - - - diff --git a/docs/authors.html b/docs/authors.html deleted file mode 100644 index 65e2412c..00000000 --- a/docs/authors.html +++ /dev/null @@ -1,109 +0,0 @@ - -Authors and Citation • joyn - Skip to contents - - -
-
-
- -
-

Authors

- -
  • -

    R.Andres Castaneda. Author, maintainer. -

    -
  • -
  • -

    Zander Prinsloo. Author. -

    -
  • -
  • -

    Rossana Tatulli. Author. -

    -
  • -
- -
-

Citation

-

Source: DESCRIPTION

- -

Castaneda R, Prinsloo Z, Tatulli R (2024). -joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features. -R package version 0.2.3, https://randrescastaneda.github.io/joyn/, https://github.com/randrescastaneda/joyn. -

-
@Manual{,
-  title = {joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features},
-  author = {R.Andres Castaneda and Zander Prinsloo and Rossana Tatulli},
-  year = {2024},
-  note = {R package version 0.2.3, https://randrescastaneda.github.io/joyn/},
-  url = {https://github.com/randrescastaneda/joyn},
-}
-
-
- - -
- - - - - - - diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a859415..00000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573b..00000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $(''); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1d..00000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504cd..00000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index e60e5469..00000000 --- a/docs/index.html +++ /dev/null @@ -1,441 +0,0 @@ - - - - - - - - -An R package for joining tables • joyn - - - - - - - - - - Skip to contents - - -
    -
    -
    - - - - -

    joyn empowers you to assess the results of joining data frames, making it easier and more efficient to combine your tables. Similar in philosophy to the merge command in Stata, joyn offers matching key variables and detailed join reports to ensure accurate and insightful results.

    -
    -

    Motivation -

    -

    Merging tables in R can be tricky. Ensuring accuracy and understanding the joined data fully can be tedious tasks. That’s where joyn comes in. Inspired by Stata’s informative approach to merging, joyn makes the process smoother and more insightful.

    -

    While standard R merge functions are powerful, they often lack features like assessing join accuracy, detecting potential issues, and providing detailed reports. joyn fills this gap by offering:

    -
      -
    • -Intuitive join handling: Whether you’re dealing with one-to-one, one-to-many, or many-to-many relationships, joyn helps you navigate them confidently.
    • -
    • -Informative reports: Get clear insights into the join process with helpful reports that identify duplicate observations, missing values, and potential inconsistencies.
    • -
    -
    -
    -

    What makes joyn special? -

    -

    While standard R merge functions offer basic functionality, joyn goes above and beyond by providing comprehensive tools and features tailored to your data joining needs:

    -

    1. Flexibility in join types: Choose your ideal join type (“left”, “right”, or “inner”) with the keep argument. Unlike R’s default, joyn performs a full join by default, ensuring all observations are included, but you have full control to tailor the results.

    -

    2. Seamless variable handling: No more wrestling with duplicate variable names! joyn offers multiple options:

    -
      -
    • Update values: Use update_values or update_NA to automatically update conflicting variables in the left table with values from the right table.

    • -
    • Keep both (with different names): Enable keep_common_vars = TRUE to retain both variables, each with a unique suffix.

    • -
    • Selective inclusion: Choose specific variables from the right table with y_vars_to_keep, ensuring you get only the data you need.

    • -
    -

    3. Relationship awareness: joyn recognizes one-to-one, one-to-many, many-to-one, and many-to-many relationships between tables. While it defaults to many-to-many for compatibility, remember this is often not ideal. Always specify the correct relationship using by arguments for accurate and meaningful results.

    -

    4. Join success at a glance: Get instant feedback on your join with the automatically generated reporting variable. Identify potential issues like unmatched observations or missing values to ensure data integrity and informed decision-making.

    -

    By addressing these common pain points and offering enhanced flexibility, joyn empowers you to confidently and effectively join your data frames, paving the way for deeper insights and data-driven success.

    -
    -
    -

    Performance and flexibility -

    -
    -

    The cost of Reliability -

    -

    While raw speed is essential, understanding your joins every step of the way is equally crucial. joyn prioritizes providing insightful information and preventing errors over solely focusing on speed. Unlike other functions, it adds:

    -
      -
    • -Meticulous checks: joyn performs comprehensive checks to ensure your join is accurate and avoids potential missteps, like unmatched observations or missing values.
    • -
    • -Detailed reporting: Get a clear picture of your join with a dedicated report, highlighting any issues you should be aware of.
    • -
    • -User-friendly summary: Quickly grasp the join’s outcome with a concise overview presented in a clear table.
    • -
    -

    These valuable features contribute to a slightly slower performance compared to functions like data.table::merge.data.table() or collapse::join(). However, the benefits of preventing errors and gaining invaluable insights far outweigh the minor speed difference.

    -
    -
    -

    Know your needs, choose your tool -

    -
      -
    • -Speed is your top priority for massive datasets? Consider using data.table or collapse directly.
    • -
    • -Seek clear understanding and error prevention for your joins? joyn is your trusted guide.
    • -
    -
    -
    -

    Protective by design -

    -

    joyn intentionally restricts certain actions and provides clear messages when encountering unexpected data configurations. This might seem opinionated, but it’s designed to protect you from accidentally creating inaccurate or misleading joins. This “safety net” empowers you to confidently merge your data, knowing joyn has your back.

    -
    -
    -

    Flexibility -

    -

    Currently, joyn focuses on the most common and valuable join types. Future development might explore expanding its flexibility based on user needs and feedback.

    -
    -
    -
    -

    -joyn as wrapper: Familiar Syntax, Familiar Power -

    -

    While joyn::join() offers the core functionality and Stata-inspired arguments, you might prefer a syntax more aligned with your existing workflow. joyn has you covered!

    -

    Embrace base R and data.table:

    -
      -
    • -joyn::merge(): Leverage familiar base R and data.table syntax for seamless integration with your existing code.
    • -
    -

    Join with flair using dplyr:

    -
      -
    • -joyn::{dplyr verbs}(): Enjoy the intuitive verb-based syntax of dplyr for a powerful and expressive way to perform joins.
    • -
    -

    Dive deeper: Explore the corresponding vignettes to unlock the full potential of these alternative interfaces and find the perfect fit for your data manipulation style.

    -
    -
    -

    Installation -

    -

    You can install the stable version of joyn from CRAN with:

    - -

    The development version from GitHub with:

    -
    -# install.packages("devtools")
    -devtools::install_github("randrescastaneda/joyn")
    -
    -
    -

    Examples -

    -
    -
    -library(joyn)
    -#> 
    -#> Attaching package: 'joyn'
    -#> The following object is masked from 'package:base':
    -#> 
    -#>     merge
    -library(data.table)
    -
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -
    -
    -x2 = data.table(id = c(1, 4, 2, 3, NA),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = c(16, 12, NA, NA, 15))
    -
    -
    -y2 = data.table(id = c(1, 2, 5, 6, 3),
    -                yd = c(1, 2, 5, 6, 3),
    -                y  = c(11L, 15L, 20L, 13L, 10L),
    -                x  = c(16:20))
    -
    -# using common variable `id` as key.
    -joyn(x = x1, 
    -     y = y1,
    -     match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   33.3%
    -#> 2     y 1   16.7%
    -#> 3 x & y 3     50%
    -#> 4 total 6    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:    NA    NA    15    NA      x
    -#> 6:     4    NA    NA    16      y
    -
    -# keep just those observations that match
    -joyn(x = x1, 
    -     y = y1, 
    -     match_type = "m:1",
    -     keep = "inner")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   66.7%
    -#> 2     y 1   33.3%
    -#> 3 total 3    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -
    -# Bad merge for not specifying by argument
    -joyn(x = x2, 
    -     y = y2,
    -     match_type = "1:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 4   44.4%
    -#> 2     y 4   44.4%
    -#> 3 x & y 1   11.1%
    -#> 4 total 9    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id and x from id, yd, y, and x
    -#>       id     t     x    yd     y  .joyn
    -#>    <num> <int> <num> <num> <int> <fctr>
    -#> 1:     1     1    16     1    11  x & y
    -#> 2:     4     2    12    NA    NA      x
    -#> 3:     2     1    NA    NA    NA      x
    -#> 4:     3     2    NA    NA    NA      x
    -#> 5:    NA    NA    15    NA    NA      x
    -#> 6:     2    NA    17     2    15      y
    -#> 7:     5    NA    18     5    20      y
    -#> 8:     6    NA    19     6    13      y
    -#> 9:     3    NA    20     3    10      y
    -
    -# good merge, ignoring variable x from y
    -joyn(x = x2, 
    -     y = y2,
    -     by = "id",
    -     match_type = "1:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   28.6%
    -#> 2     y 2   28.6%
    -#> 3 x & y 3   42.9%
    -#> 4 total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y  .joyn
    -#>    <num> <int> <num> <num> <int> <fctr>
    -#> 1:     1     1    16     1    11  x & y
    -#> 2:     4     2    12    NA    NA      x
    -#> 3:     2     1    NA     2    15  x & y
    -#> 4:     3     2    NA     3    10  x & y
    -#> 5:    NA    NA    15    NA    NA      x
    -#> 6:     5    NA    NA     5    20      y
    -#> 7:     6    NA    NA     6    13      y
    -
    -# update NAs in var x in table x from var x in y
    -joyn(x = x2, 
    -     y = y2, 
    -     by = "id", 
    -     update_NAs = TRUE)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>        .joyn n percent
    -#> 1          x 2   28.6%
    -#> 2      x & y 1   14.3%
    -#> 3 NA updated 4   57.1%
    -#> 4      total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y      .joyn
    -#>    <num> <int> <num> <num> <int>     <fctr>
    -#> 1:     1     1    16     1    11      x & y
    -#> 2:     4     2    12    NA    NA          x
    -#> 3:     2     1    17     2    15 NA updated
    -#> 4:     3     2    20     3    10 NA updated
    -#> 5:    NA    NA    15    NA    NA          x
    -#> 6:     5    NA    18     5    20 NA updated
    -#> 7:     6    NA    19     6    13 NA updated
    -
    -# update values in var x in table x from var x in y
    -joyn(x = x2, 
    -     y = y2, 
    -     by = "id", 
    -     update_values = TRUE)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>           .joyn n percent
    -#> 1    NA updated 4   57.1%
    -#> 2 value updated 1   14.3%
    -#> 3   not updated 2   28.6%
    -#> 4         total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y         .joyn
    -#>    <num> <int> <num> <num> <int>        <fctr>
    -#> 1:     1     1    16     1    11 value updated
    -#> 2:     4     2    12    NA    NA   not updated
    -#> 3:     2     1    17     2    15    NA updated
    -#> 4:     3     2    20     3    10    NA updated
    -#> 5:    NA    NA    15    NA    NA   not updated
    -#> 6:     5    NA    18     5    20    NA updated
    -#> 7:     6    NA    19     6    13    NA updated
    -
    -
    -# do not bring any variable from y into x, just the report
    -joyn(x = x2, 
    -     y = y2, 
    -     by = "id", 
    -     y_vars_to_keep = NULL)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   28.6%
    -#> 2     y 2   28.6%
    -#> 3 x & y 3   42.9%
    -#> 4 total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#>       id     t     x  .joyn
    -#>    <num> <int> <num> <fctr>
    -#> 1:     1     1    16  x & y
    -#> 2:     4     2    12      x
    -#> 3:     2     1    NA  x & y
    -#> 4:     3     2    NA  x & y
    -#> 5:    NA    NA    15      x
    -#> 6:     5    NA    NA      y
    -#> 7:     6    NA    NA      y
    -
    -
    -
    -
    - - - -
    - - - - - - - diff --git a/docs/news/index.html b/docs/news/index.html deleted file mode 100644 index 840e5fa5..00000000 --- a/docs/news/index.html +++ /dev/null @@ -1,154 +0,0 @@ - -Changelog • joyn - Skip to contents - - -
    -
    -
    - -
    -

    joyn (development version)

    -
    • Impreove displaying messages. Now, they are clickable.
    • -
    -
    -

    joyn 0.2.3

    CRAN release: 2024-08-21

    -
    • Fix bug where the left join did not work when updating values and a full join was maintained.
    • -
    -
    -

    joyn 0.2.2

    CRAN release: 2024-07-10

    -
    • Remove unmasking functions as we realized that they are not necessary.
    • -
    -
    -

    joyn 0.2.1 (Not in CRAN)

    -
    • Add anti_join() function.

    • -
    • Add unmask_joyn() function to unmask joyn functions that mask dplyr equivalents.

    • -
    • Add information about duplicated obs in by variable when match type is 1 rathern than m.

    • -
    • improve inefficiencies in deep copies with m:m joins

    • -
    • Replace m:m joins from data.table::merge.data.table to collapse::join. Thanks to @SebKrantz for the suggestion (#58).

    • -
    • Add information about duplicated obs in by variable when match type is 1 rather than m.

    • -
    • Internal: improve storing of joyn messages.

    • -
    • Improve creation of reporting variable. Now, it is created in [collapse::join] rather than in joyn function. In addition, the reporting variable is created as factor to improve performance. Thanks to @SebKrantz for the suggestion (#58)

    • -
    -

    breaking changes

    -
    • Now, by default, joyn will not sort the data. This is to avoid unnecessary computational time that most of the time is not needed. If the user wants to sort the data, they can use the sort argument, which triggers the sorting mechanism of collapse package.

    • -
    • report variable (named “.join” by default) is now a factor instead of character. Yet, users can still use character if they want with the reporttype = "character".

    • -
    -
    -
    -

    joyn 0.2.0

    CRAN release: 2024-03-29

    -
    • -joyn has gained two new authors: Zander Prinsloo and Rossana Tatulli.
    • -
    -

    Breaking changes

    -
    • Function joyn::merge() was replaced by joyn::joyn(). This is now the main function of the joyn package.

    • -
    • Arguments allow.cartesian, yvars, and keep_y_in_x have been deprecated. The latter two have been replaced by y_vars_to_keep and keep_common_vars, respectively. The new argument names bring more clarity about what they arguments do.

    • -
    -
    -

    New features

    -
    -
    -

    Minor improvements and fixes

    -
    • Minor inconsistency bugs were fixed.
    • -
    -
    -
    -

    joyn 0.1.4

    CRAN release: 2021-12-14

    -
    • update_NAs now could be FALSE even if update_values is TRUE

    • -
    • Select rows-to-keep before transformation of updated values and NAs to avoid keeping rows from y that did not match in x but whose values got updated because update_values = TRUE

    • -
    • Solve issues #1 and #19

    • -
    • Change to data.table::merge.data.table syntax in all joins. It makes it easier to work with and consistent across different join types.

    • -
    • Remove previous lazy-loaded data.

    • -
    -
    -

    joyn 0.1.3

    CRAN release: 2021-04-28

    -
    • Convert external data to external data.
    • -
    -
    -

    joyn 0.1.2

    -
    • Add function possible_ids() to identify what variables are suitable for uniquely identify the database.
    • -
    -

    joyn 0.1.1

    -
    • Add function is_id() to check whether the table is uniquely identified by key variables

    • -
    • Add function freq_table() as a substitute for janitor::tabyl. This makes it more convenient for users who do not have janitor installed.

    • -
    -
    -

    joyn 0.1.0

    -

    Fix bug on by argument when using “=” or “==”.

    -
    -
    -

    joyn 0.0.1

    -

    First Public release

    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/pkgdown.css b/docs/pkgdown.css deleted file mode 100644 index 80ea5b83..00000000 --- a/docs/pkgdown.css +++ /dev/null @@ -1,384 +0,0 @@ -/* Sticky footer */ - -/** - * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ - * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css - * - * .Site -> body > .container - * .Site-content -> body > .container .row - * .footer -> footer - * - * Key idea seems to be to ensure that .container and __all its parents__ - * have height set to 100% - * - */ - -html, body { - height: 100%; -} - -body { - position: relative; -} - -body > .container { - display: flex; - height: 100%; - flex-direction: column; -} - -body > .container .row { - flex: 1 0 auto; -} - -footer { - margin-top: 45px; - padding: 35px 0 36px; - border-top: 1px solid #e5e5e5; - color: #666; - display: flex; - flex-shrink: 0; -} -footer p { - margin-bottom: 0; -} -footer div { - flex: 1; -} -footer .pkgdown { - text-align: right; -} -footer p { - margin-bottom: 0; -} - -img.icon { - float: right; -} - -/* Ensure in-page images don't run outside their container */ -.contents img { - max-width: 100%; - height: auto; -} - -/* Fix bug in bootstrap (only seen in firefox) */ -summary { - display: list-item; -} - -/* Typographic tweaking ---------------------------------*/ - -.contents .page-header { - margin-top: calc(-60px + 1em); -} - -dd { - margin-left: 3em; -} - -/* Section anchors ---------------------------------*/ - -a.anchor { - display: none; - margin-left: 5px; - width: 20px; - height: 20px; - - background-image: url(./link.svg); - background-repeat: no-repeat; - background-size: 20px 20px; - background-position: center center; -} - -h1:hover .anchor, -h2:hover .anchor, -h3:hover .anchor, -h4:hover .anchor, -h5:hover .anchor, -h6:hover .anchor { - display: inline-block; -} - -/* Fixes for fixed navbar --------------------------*/ - -.contents h1, .contents h2, .contents h3, .contents h4 { - padding-top: 60px; - margin-top: -40px; -} - -/* Navbar submenu --------------------------*/ - -.dropdown-submenu { - position: relative; -} - -.dropdown-submenu>.dropdown-menu { - top: 0; - left: 100%; - margin-top: -6px; - margin-left: -1px; - border-radius: 0 6px 6px 6px; -} - -.dropdown-submenu:hover>.dropdown-menu { - display: block; -} - -.dropdown-submenu>a:after { - display: block; - content: " "; - float: right; - width: 0; - height: 0; - border-color: transparent; - border-style: solid; - border-width: 5px 0 5px 5px; - border-left-color: #cccccc; - margin-top: 5px; - margin-right: -10px; -} - -.dropdown-submenu:hover>a:after { - border-left-color: #ffffff; -} - -.dropdown-submenu.pull-left { - float: none; -} - -.dropdown-submenu.pull-left>.dropdown-menu { - left: -100%; - margin-left: 10px; - border-radius: 6px 0 6px 6px; -} - -/* Sidebar --------------------------*/ - -#pkgdown-sidebar { - margin-top: 30px; - position: -webkit-sticky; - position: sticky; - top: 70px; -} - -#pkgdown-sidebar h2 { - font-size: 1.5em; - margin-top: 1em; -} - -#pkgdown-sidebar h2:first-child { - margin-top: 0; -} - -#pkgdown-sidebar .list-unstyled li { - margin-bottom: 0.5em; -} - -/* bootstrap-toc tweaks ------------------------------------------------------*/ - -/* All levels of nav */ - -nav[data-toggle='toc'] .nav > li > a { - padding: 4px 20px 4px 6px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; -} - -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 5px; - color: inherit; - border-left: 1px solid #878787; -} - -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 5px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; - border-left: 2px solid #878787; -} - -/* Nav: second level (shown on .active) */ - -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} - -nav[data-toggle='toc'] .nav .nav > li > a { - padding-left: 16px; - font-size: 1.35rem; -} - -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 15px; -} - -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 15px; - font-weight: 500; - font-size: 1.35rem; -} - -/* orcid ------------------------------------------------------------------- */ - -.orcid { - font-size: 16px; - color: #A6CE39; - /* margins are required by official ORCID trademark and display guidelines */ - margin-left:4px; - margin-right:4px; - vertical-align: middle; -} - -/* Reference index & topics ----------------------------------------------- */ - -.ref-index th {font-weight: normal;} - -.ref-index td {vertical-align: top; min-width: 100px} -.ref-index .icon {width: 40px;} -.ref-index .alias {width: 40%;} -.ref-index-icons .alias {width: calc(40% - 40px);} -.ref-index .title {width: 60%;} - -.ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} -.ref-arguments .name {width: 20%;} -.ref-arguments .desc {width: 80%;} - -/* Nice scrolling for wide elements --------------------------------------- */ - -table { - display: block; - overflow: auto; -} - -/* Syntax highlighting ---------------------------------------------------- */ - -pre, code, pre code { - background-color: #f8f8f8; - color: #333; -} -pre, pre code { - white-space: pre-wrap; - word-break: break-all; - overflow-wrap: break-word; -} - -pre { - border: 1px solid #eee; -} - -pre .img, pre .r-plt { - margin: 5px 0; -} - -pre .img img, pre .r-plt img { - background-color: #fff; -} - -code a, pre a { - color: #375f84; -} - -a.sourceLine:hover { - text-decoration: none; -} - -.fl {color: #1514b5;} -.fu {color: #000000;} /* function */ -.ch,.st {color: #036a07;} /* string */ -.kw {color: #264D66;} /* keyword */ -.co {color: #888888;} /* comment */ - -.error {font-weight: bolder;} -.warning {font-weight: bolder;} - -/* Clipboard --------------------------*/ - -.hasCopyButton { - position: relative; -} - -.btn-copy-ex { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.hasCopyButton:hover button.btn-copy-ex { - visibility: visible; -} - -/* headroom.js ------------------------ */ - -.headroom { - will-change: transform; - transition: transform 200ms linear; -} -.headroom--pinned { - transform: translateY(0%); -} -.headroom--unpinned { - transform: translateY(-100%); -} - -/* mark.js ----------------------------*/ - -mark { - background-color: rgba(255, 255, 51, 0.5); - border-bottom: 2px solid rgba(255, 153, 51, 0.3); - padding: 1px; -} - -/* vertical spacing after htmlwidgets */ -.html-widget { - margin-bottom: 10px; -} - -/* fontawesome ------------------------ */ - -.fab { - font-family: "Font Awesome 5 Brands" !important; -} - -/* don't display links in code chunks when printing */ -/* source: https://stackoverflow.com/a/10781533 */ -@media print { - code a:link:after, code a:visited:after { - content: ""; - } -} - -/* Section anchors --------------------------------- - Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 -*/ - -div.csl-bib-body { } -div.csl-entry { - clear: both; -} -.hanging-indent div.csl-entry { - margin-left:2em; - text-indent:-2em; -} -div.csl-left-margin { - min-width:2em; - float:left; -} -div.csl-right-inline { - margin-left:2em; - padding-left:1em; -} -div.csl-indent { - margin-left: 2em; -} diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml deleted file mode 100644 index 0c576f3c..00000000 --- a/docs/pkgdown.yml +++ /dev/null @@ -1,15 +0,0 @@ -pandoc: 3.1.1 -pkgdown: 2.0.7 -pkgdown_sha: ~ -articles: - adv-functionalities: adv-functionalities.html - aux-functions: aux-functions.html - dplyr-joins: dplyr-joins.html - main-functionalities: main-functionalities.html - merge-wrapper: merge-wrapper.html - messages: messages.html -last_built: 2024-10-09T20:39Z -urls: - reference: https://randrescastaneda.github.io/joyn/reference - article: https://randrescastaneda.github.io/joyn/articles - diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png deleted file mode 100644 index 17a35806..00000000 Binary files a/docs/reference/Rplot001.png and /dev/null differ diff --git a/docs/reference/arguments_checks.html b/docs/reference/arguments_checks.html deleted file mode 100644 index 22bb5f0d..00000000 --- a/docs/reference/arguments_checks.html +++ /dev/null @@ -1,195 +0,0 @@ - -Perform necessary preliminary checks on arguments that are passed to joyn — arguments_checks • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Perform necessary preliminary checks on arguments that are passed to joyn

    -
    - -
    -

    Usage

    -
    arguments_checks(
    -  x,
    -  y,
    -  by,
    -  copy,
    -  keep,
    -  suffix,
    -  na_matches,
    -  multiple,
    -  relationship,
    -  reportvar
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: left table

    - - -
    y
    -

    data frame: right table

    - - -
    by
    -

    character vector or variables to join by

    - - -
    copy
    -

    If x and y are not from the same data source, -and copy is TRUE, then y will be copied into the -same src as x. This allows you to join tables across srcs, but -it is a potentially expensive operation so you must opt into it.

    - - -
    keep
    -

    Should the join keys from both x and y be preserved in the -output?

    • If NULL, the default, joins on equality retain only the keys from x, -while joins on inequality retain the keys from both inputs.

    • -
    • If TRUE, all keys from both inputs are retained.

    • -
    • If FALSE, only keys from x are retained. For right and full joins, -the data in key columns corresponding to rows that only exist in y are -merged into the key columns from x. Can't be used when joining on -inequality conditions.

    • -
    - - -
    suffix
    -

    If there are non-joined duplicate variables in x and -y, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.

    - - -
    na_matches
    -

    Should two NA or two NaN values match?

    • "na", the default, treats two NA or two NaN values as equal, like -%in%, match(), and merge().

    • -
    • "never" treats two NA or two NaN values as different, and will -never match them together or to any other values. This is similar to joins -for database sources and to base::merge(incomparables = NA).

    • -
    - - -
    multiple
    -

    Handling of rows in x with multiple matches in y. -For each row of x:

    • "all", the default, returns every match detected in y. This is the -same behavior as SQL.

    • -
    • "any" returns one match detected in y, with no guarantees on which -match will be returned. It is often faster than "first" and "last" -if you just need to detect if there is at least one match.

    • -
    • "first" returns the first match detected in y.

    • -
    • "last" returns the last match detected in y.

    • -
    - - -
    relationship
    -

    Handling of the expected relationship between the keys of -x and y. If the expectations chosen from the list below are -invalidated, an error is thrown.

    • NULL, the default, doesn't expect there to be any relationship between -x and y. However, for equality joins it will check for a many-to-many -relationship (which is typically unexpected) and will warn if one occurs, -encouraging you to either take a closer look at your inputs or make this -relationship explicit by specifying "many-to-many".

      -

      See the Many-to-many relationships section for more details.

    • -
    • "one-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "one-to-many" expects:

      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "many-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
    • -
    • "many-to-many" doesn't perform any relationship checks, but is provided -to allow you to be explicit about this relationship if you know it -exists.

    • -

    relationship doesn't handle cases where there are zero matches. For that, -see unmatched.

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - -
    -
    -

    Value

    - - -

    list of checked arguments to pass on to the main joyn function

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/check_by_vars.html b/docs/reference/check_by_vars.html deleted file mode 100644 index b62d16cc..00000000 --- a/docs/reference/check_by_vars.html +++ /dev/null @@ -1,121 +0,0 @@ - -Check by input — check_by_vars • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This function checks the variable name(s) to be used as key(s) of the join

    -
    - -
    -

    Usage

    -
    check_by_vars(by, x, y)
    -
    - -
    -

    Arguments

    -
    by
    -

    A vector of shared column names in x and y to merge on. -This defaults to the shared key columns between the two tables. -If y has no key columns, this defaults to the key of x.

    - - -
    x, y
    -

    data tables. y is coerced to a data.table if -it isn't one already.

    - -
    -
    -

    Value

    - - -

    list with information about by variables

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -x1 = data.frame(
    -       id = c(1L, 1L, 2L, 3L, NA_integer_),
    -       t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -       x  = 11:15)
    -y1 = data.frame(id = 1:2,
    -                y  = c(11L, 15L))
    -# With var "id" shared in x and y
    -joyn:::check_by_vars(by = "id", x = x1, y = y1)
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_dt_by.html b/docs/reference/check_dt_by.html deleted file mode 100644 index 5d5c9a04..00000000 --- a/docs/reference/check_dt_by.html +++ /dev/null @@ -1,135 +0,0 @@ - -Check dt by vars — check_dt_by • joyn - Skip to contents - - -
    -
    -
    - -
    -

    check variable(s) by which data frames are joined: either a single by var, common to right and left dt, -or

    -
    - -
    -

    Usage

    -
    check_dt_by(x, y, by, by.x, by.y)
    -
    - -
    -

    Arguments

    -
    x
    -

    left table

    - - -
    y
    -

    right table

    - - -
    by
    -

    character: variable to join by (common variable to x and y)

    - - -
    by.x
    -

    character: specified var in x to join by

    - - -
    by.y
    -

    character: specified var in y to join by

    - -
    -
    -

    Value

    - - -

    character specifying checked variable(s) to join by

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -x = data.table(id1 = c(1, 1, 2, 3, 3),
    -               id2 = c(1, 1, 2, 3, 4),
    -               t   = c(1L, 2L, 1L, 2L, NA_integer_),
    -               x   = c(16, 12, NA, NA, 15))
    -y = data.table(id  = c(1, 2, 5, 6, 3),
    -               id2 = c(1, 1, 2, 3, 4),
    -               y   = c(11L, 15L, 20L, 13L, 10L),
    -               x   = c(16:20))
    -# example specifying by.x and by.y
    -joyn:::check_dt_by(x, y, by.x = "id1", by.y = "id2")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_duplicate_names.html b/docs/reference/check_duplicate_names.html deleted file mode 100644 index f0a7b511..00000000 --- a/docs/reference/check_duplicate_names.html +++ /dev/null @@ -1,122 +0,0 @@ - -Check if vars in dt have duplicate names — check_duplicate_names • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Check if vars in dt have duplicate names

    -
    - -
    -

    Usage

    -
    check_duplicate_names(dt, name)
    -
    - -
    -

    Arguments

    -
    dt
    -

    data.frame to check

    - - -
    name
    -

    var name to check if has duplicates in dt

    - -
    -
    -

    Value

    - - -

    logical either TRUE, if any duplicates are found, or FALSE otherwise

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# When no duplicates
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -joyn:::check_duplicate_names(x1, "x")
    -
    -# When duplicates
    -x1_duplicates = data.frame(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                           x  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                           x  = 11:15,
    -                           check.names = FALSE)
    -joyn:::check_duplicate_names(x1_duplicates, "x")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_match_type.html b/docs/reference/check_match_type.html deleted file mode 100644 index 7e26d854..00000000 --- a/docs/reference/check_match_type.html +++ /dev/null @@ -1,136 +0,0 @@ - -Check match type consistency — check_match_type • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This function checks if the match type chosen by the user is consistent with the data. -
    (Match type must be one of the valid types: "1:1", "1:m", "m:1", "m:m")

    -
    - -
    -

    Usage

    -
    check_match_type(x, y, by, match_type, verbose = getOption("joyn.verbose"))
    -
    - -
    -

    Arguments

    -
    x, y
    -

    data tables. y is coerced to a data.table if -it isn't one already.

    - - -
    by
    -

    A vector of shared column names in x and y to merge on. -This defaults to the shared key columns between the two tables. -If y has no key columns, this defaults to the key of x.

    - - -
    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    -
    -

    Value

    - - -

    character vector from split_match_type

    - - -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# Consistent match type
    -x1 = data.frame(
    -       id = c(1L, 1L, 2L, 3L, NA_integer_),
    -       t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -       x  = 11:15)
    -y1 = data.frame(id = 1:2,
    -                y  = c(11L, 15L))
    -joyn:::check_match_type(x = x1, y=y1, by="id", match_type = "m:1")
    -
    -# Inconsistent match type
    -joyn:::check_match_type(x = x1, y=y1, by="id", match_type = "1:1")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_new_y_vars.html b/docs/reference/check_new_y_vars.html deleted file mode 100644 index 27d7aa92..00000000 --- a/docs/reference/check_new_y_vars.html +++ /dev/null @@ -1,123 +0,0 @@ - -Rename vars in y so they are different to x's when joined — check_new_y_vars • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Check vars in y with same names as vars in x, and return new variables names for those y vars for the joined data frame

    -
    - -
    -

    Usage

    -
    check_new_y_vars(x, by, y_vars_to_keep)
    -
    - -
    -

    Arguments

    -
    x
    -

    master table

    - - -
    by
    -

    character: by vars

    - - -
    y_vars_to_keep
    -

    character vector of y variables to keep

    - -
    -
    -

    Value

    - - -

    vector with new variable names for y

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -y2 = data.frame(id = c(1, 2, 5, 6, 3),
    -                yd = c(1, 2, 5, 6, 3),
    -                y  = c(11L, 15L, 20L, 13L, 10L),
    -                x  = c(16:20))
    -joyn:::y_vars_to_keep <- check_y_vars_to_keep(TRUE, y2, by = "id")
    -x2 = data.frame(id = c(1, 1, 2, 3, NA),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = c(16, 12, NA, NA, 15))
    -joyn:::check_new_y_vars(x = x2, by="id", y_vars_to_keep)
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_reportvar.html b/docs/reference/check_reportvar.html deleted file mode 100644 index d5e642b2..00000000 --- a/docs/reference/check_reportvar.html +++ /dev/null @@ -1,106 +0,0 @@ - -Check reporting variable — check_reportvar • joyn - Skip to contents - - -
    -
    -
    - -
    -

    check reportvar input
    If resulting data frame has a reporting variable (storing joyn's report), check and return a valid name.

    -
    - -
    -

    Usage

    -
    check_reportvar(reportvar, verbose = getOption("joyn.verbose"))
    -
    - -
    -

    Value

    - - -

    if input reportvar is character, return valid name for the report var. If NULL or FALSE, return NULL.

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# When null - reporting variable not returned in merged dt
    -joyn:::check_reportvar(reportvar = NULL)
    -# When FALSE - reporting variable not returned in merged dt
    -joyn:::check_reportvar(reportvar = FALSE)
    -# When character
    -joyn:::check_reportvar(reportvar = ".joyn")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_unmatched_keys.html b/docs/reference/check_unmatched_keys.html deleted file mode 100644 index c932afdc..00000000 --- a/docs/reference/check_unmatched_keys.html +++ /dev/null @@ -1,116 +0,0 @@ - -Conduct all unmatched keys checks and return error if necessary — check_unmatched_keys • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Conduct all unmatched keys checks and return error if necessary

    -
    - -
    -

    Usage

    -
    check_unmatched_keys(x, y, out, by, jn_type)
    -
    - -
    -

    Arguments

    -
    x
    -

    left table

    - - -
    y
    -

    right table

    - - -
    out
    -

    output from join

    - - -
    by
    -

    character vector of keys that x and y are joined by

    - - -
    jn_type
    -

    character: "left", "right", or "inner"

    - -
    -
    -

    Value

    - - -

    error message

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/check_xy.html b/docs/reference/check_xy.html deleted file mode 100644 index 36b85c5b..00000000 --- a/docs/reference/check_xy.html +++ /dev/null @@ -1,129 +0,0 @@ - -Check tables X and Y — check_xy • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This function performs checks inspired on merge.data.table: it detects errors

    • if x and/or y have no columns

    • -
    • if x and/or y contain duplicate column names

    • -
    - -
    -

    Usage

    -
    check_xy(x, y)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - -
    -
    -

    Value

    - - -

    invisible TRUE

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# Check passing with no errors
    -library(data.table)
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -joyn:::check_xy(x = x1, y=y1)
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/check_y_vars_to_keep.html b/docs/reference/check_y_vars_to_keep.html deleted file mode 100644 index 3817b1cd..00000000 --- a/docs/reference/check_y_vars_to_keep.html +++ /dev/null @@ -1,125 +0,0 @@ - -Check variables in y that will be kept in returning table — check_y_vars_to_keep • joyn - Skip to contents - - -
    -
    -
    - -
    -

    check and return variable names in y to keep in returning table, excluding those that are keys of the merge

    -
    - -
    -

    Usage

    -
    check_y_vars_to_keep(y_vars_to_keep, y, by)
    -
    - -
    -

    Arguments

    -
    y_vars_to_keep
    -

    either TRUE, if keep all vars in y; -FALSE or NULL, if keep no vars; or character vector specifying which variables in y to keep

    - - -
    y
    -

    data frame

    - - -
    by
    -

    A vector of shared column names in x and y to merge on. -This defaults to the shared key columns between the two tables. -If y has no key columns, this defaults to the key of x.

    - -
    -
    -

    Value

    - - -

    character vector with variable names from y table

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -y1 = data.table(id = 1:2,
    -               y  = c(11L, 15L))
    -# With y_vars_to_keep TRUE
    -joyn:::check_y_vars_to_keep(TRUE, y1, by = "id")
    -# With y_vars_to_keep FALSE
    -joyn:::check_y_vars_to_keep(FALSE, y1, by = "id")
    -# Specifying which y vars to keep
    -joyn:::check_y_vars_to_keep("y", y1, by = "id")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/clear_joynenv.html b/docs/reference/clear_joynenv.html deleted file mode 100644 index bbda8d21..00000000 --- a/docs/reference/clear_joynenv.html +++ /dev/null @@ -1,113 +0,0 @@ - -Clearing joyn environment — clear_joynenv • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Clearing joyn environment

    -
    - -
    -

    Usage

    -
    clear_joynenv()
    -
    - -
    -

    See also

    -

    Messages functions -joyn_msg(), -joyn_msgs_exist(), -joyn_report(), -msg_type_dt(), -store_msg(), -style(), -type_choices()

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# Storing a message
    -joyn:::store_msg("info", "simple message")
    -
    -# Clearing the environment
    -joyn:::clear_joynenv()
    -
    -# Checking it does not exist in the environment
    -print(joyn:::joyn_msgs_exist())
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/figures/lifecycle-archived.svg b/docs/reference/figures/lifecycle-archived.svg deleted file mode 100644 index 745ab0c7..00000000 --- a/docs/reference/figures/lifecycle-archived.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: archived - - - - - - - - - - - - - - - lifecycle - - archived - - diff --git a/docs/reference/figures/lifecycle-defunct.svg b/docs/reference/figures/lifecycle-defunct.svg deleted file mode 100644 index d5c9559e..00000000 --- a/docs/reference/figures/lifecycle-defunct.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: defunct - - - - - - - - - - - - - - - lifecycle - - defunct - - diff --git a/docs/reference/figures/lifecycle-deprecated.svg b/docs/reference/figures/lifecycle-deprecated.svg deleted file mode 100644 index b61c57c3..00000000 --- a/docs/reference/figures/lifecycle-deprecated.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: deprecated - - - - - - - - - - - - - - - lifecycle - - deprecated - - diff --git a/docs/reference/figures/lifecycle-experimental.svg b/docs/reference/figures/lifecycle-experimental.svg deleted file mode 100644 index 5d88fc2c..00000000 --- a/docs/reference/figures/lifecycle-experimental.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: experimental - - - - - - - - - - - - - - - lifecycle - - experimental - - diff --git a/docs/reference/figures/lifecycle-maturing.svg b/docs/reference/figures/lifecycle-maturing.svg deleted file mode 100644 index 897370ec..00000000 --- a/docs/reference/figures/lifecycle-maturing.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: maturing - - - - - - - - - - - - - - - lifecycle - - maturing - - diff --git a/docs/reference/figures/lifecycle-questioning.svg b/docs/reference/figures/lifecycle-questioning.svg deleted file mode 100644 index 7c1721d0..00000000 --- a/docs/reference/figures/lifecycle-questioning.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: questioning - - - - - - - - - - - - - - - lifecycle - - questioning - - diff --git a/docs/reference/figures/lifecycle-soft-deprecated.svg b/docs/reference/figures/lifecycle-soft-deprecated.svg deleted file mode 100644 index 9c166ff3..00000000 --- a/docs/reference/figures/lifecycle-soft-deprecated.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: soft-deprecated - - - - - - - - - - - - - - - lifecycle - - soft-deprecated - - diff --git a/docs/reference/figures/lifecycle-stable.svg b/docs/reference/figures/lifecycle-stable.svg deleted file mode 100644 index 9bf21e76..00000000 --- a/docs/reference/figures/lifecycle-stable.svg +++ /dev/null @@ -1,29 +0,0 @@ - - lifecycle: stable - - - - - - - - - - - - - - - - lifecycle - - - - stable - - - diff --git a/docs/reference/figures/lifecycle-superseded.svg b/docs/reference/figures/lifecycle-superseded.svg deleted file mode 100644 index db8d757f..00000000 --- a/docs/reference/figures/lifecycle-superseded.svg +++ /dev/null @@ -1,21 +0,0 @@ - - lifecycle: superseded - - - - - - - - - - - - - - - lifecycle - - superseded - - diff --git a/docs/reference/freq_table.html b/docs/reference/freq_table.html deleted file mode 100644 index 4f6469aa..00000000 --- a/docs/reference/freq_table.html +++ /dev/null @@ -1,122 +0,0 @@ - -Tabulate simple frequencies — freq_table • joyn - Skip to contents - - -
    -
    -
    - -
    -

    tabulate one variable frequencies

    -
    - -
    -

    Usage

    -
    freq_table(x, byvar, digits = 1, na.rm = FALSE)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame

    - - -
    byvar
    -

    character: name of variable to tabulate. Use Standard evaluation.

    - - -
    digits
    -

    numeric: number of decimal places to display. Default is 1.

    - - -
    na.rm
    -

    logical: report NA values in frequencies. Default is FALSE.

    - -
    -
    -

    Value

    - - -

    data.table with frequencies.

    -
    - -
    -

    Examples

    -
    library(data.table)
    -x4 = data.table(id1 = c(1, 1, 2, 3, 3),
    -                id2 = c(1, 1, 2, 3, 4),
    -                t   = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x   = c(16, 12, NA, NA, 15))
    -freq_table(x4, "id1")
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/full_join.html b/docs/reference/full_join.html deleted file mode 100644 index dd941f6c..00000000 --- a/docs/reference/full_join.html +++ /dev/null @@ -1,359 +0,0 @@ - -Full join two data frames — full_join • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is a joyn wrapper that works in a similar -fashion to dplyr::full_join

    -
    - -
    -

    Usage

    -
    full_join(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  copy = FALSE,
    -  suffix = c(".x", ".y"),
    -  keep = NULL,
    -  na_matches = c("na", "never"),
    -  multiple = "all",
    -  unmatched = "drop",
    -  relationship = "one-to-one",
    -  y_vars_to_keep = TRUE,
    -  update_values = FALSE,
    -  update_NAs = update_values,
    -  reportvar = getOption("joyn.reportvar"),
    -  reporttype = c("factor", "character", "numeric"),
    -  roll = NULL,
    -  keep_common_vars = FALSE,
    -  sort = TRUE,
    -  verbose = getOption("joyn.verbose"),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - - -
    by
    -

    a character vector of variables to join by. If NULL, the default, -joyn will do a natural join, using all variables with common names across -the two tables. A message lists the variables so that you can check they're -correct (to suppress the message, simply explicitly list the variables that -you want to join). To join by different variables on x and y use a vector -of expressions. For example, by = c("a = b", "z") will use "a" in x, "b" -in y, and "z" in both tables.

    - - -
    copy
    -

    If x and y are not from the same data source, -and copy is TRUE, then y will be copied into the -same src as x. This allows you to join tables across srcs, but -it is a potentially expensive operation so you must opt into it.

    - - -
    suffix
    -

    If there are non-joined duplicate variables in x and -y, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.

    - - -
    keep
    -

    Should the join keys from both x and y be preserved in the -output?

    • If NULL, the default, joins on equality retain only the keys from x, -while joins on inequality retain the keys from both inputs.

    • -
    • If TRUE, all keys from both inputs are retained.

    • -
    • If FALSE, only keys from x are retained. For right and full joins, -the data in key columns corresponding to rows that only exist in y are -merged into the key columns from x. Can't be used when joining on -inequality conditions.

    • -
    - - -
    na_matches
    -

    Should two NA or two NaN values match?

    • "na", the default, treats two NA or two NaN values as equal, like -%in%, match(), and merge().

    • -
    • "never" treats two NA or two NaN values as different, and will -never match them together or to any other values. This is similar to joins -for database sources and to base::merge(incomparables = NA).

    • -
    - - -
    multiple
    -

    Handling of rows in x with multiple matches in y. -For each row of x:

    • "all", the default, returns every match detected in y. This is the -same behavior as SQL.

    • -
    • "any" returns one match detected in y, with no guarantees on which -match will be returned. It is often faster than "first" and "last" -if you just need to detect if there is at least one match.

    • -
    • "first" returns the first match detected in y.

    • -
    • "last" returns the last match detected in y.

    • -
    - - -
    unmatched
    -

    How should unmatched keys that would result in dropped rows -be handled?

    • "drop" drops unmatched keys from the result.

    • -
    • "error" throws an error if unmatched keys are detected.

    • -

    unmatched is intended to protect you from accidentally dropping rows -during a join. It only checks for unmatched keys in the input that could -potentially drop rows.

    • For left joins, it checks y.

    • -
    • For right joins, it checks x.

    • -
    • For inner joins, it checks both x and y. In this case, unmatched is -also allowed to be a character vector of length 2 to specify the behavior -for x and y independently.

    • -
    - - -
    relationship
    -

    Handling of the expected relationship between the keys of -x and y. If the expectations chosen from the list below are -invalidated, an error is thrown.

    • NULL, the default, doesn't expect there to be any relationship between -x and y. However, for equality joins it will check for a many-to-many -relationship (which is typically unexpected) and will warn if one occurs, -encouraging you to either take a closer look at your inputs or make this -relationship explicit by specifying "many-to-many".

      -

      See the Many-to-many relationships section for more details.

    • -
    • "one-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "one-to-many" expects:

      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "many-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
    • -
    • "many-to-many" doesn't perform any relationship checks, but is provided -to allow you to be explicit about this relationship if you know it -exists.

    • -

    relationship doesn't handle cases where there are zero matches. For that, -see unmatched.

    - - -
    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    reporttype
    -

    character: One of "character" or "numeric". Default is -"character". If "numeric", the reporting variable will contain numeric -codes of the source and the contents of each observation in the joined -table. See below for more information.

    - - -
    roll
    -

    double: to be implemented

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    sort
    -

    logical: If TRUE, sort by key variables in by. Default is -FALSE.

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    ...
    -

    Arguments passed on to joyn

    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    allow.cartesian
    -

    logical: Check documentation in official web site. -Default is NULL, which implies that if the join is "1:1" it will be -FALSE, but if the join has any "m" on it, it will be converted to TRUE. -By specifying TRUE of FALSE you force the behavior of the join.

    - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - -
    yvars
    -

    [Superseded]: use now y_vars_to_keep

    - -
    keep_y_in_x
    -

    [Superseded]: use now -keep_common_vars

    - -
    msg_type
    -

    character: type of messages to display by default

    - -
    na.last
    -

    logical. If TRUE, missing values in the data are placed last; if FALSE, they are placed first; if NA they are removed. -na.last=NA is valid only for x[order(., na.last)] and its -default is TRUE. setorder and setorderv only accept -TRUE/FALSE with default FALSE.

    - - -
    - -
    -
    -

    Value

    - - -

    An data frame of the same class as x. The properties of the output -are as close as possible to the ones returned by the dplyr alternative.

    -
    -
    -

    See also

    -

    Other dplyr alternatives: -anti_join(), -inner_join(), -left_join(), -right_join()

    -
    - -
    -

    Examples

    -
    # Simple full join
    -library(data.table)
    -
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -full_join(x1, y1, relationship = "many-to-one")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   33.3%
    -#> 2     y 1   16.7%
    -#> 3 x & y 3     50%
    -#> 4 total 6    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:     4    NA    NA    16      y
    -#> 6:    NA    NA    15    NA      x
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/get_joyn_options.html b/docs/reference/get_joyn_options.html deleted file mode 100644 index f9c19007..00000000 --- a/docs/reference/get_joyn_options.html +++ /dev/null @@ -1,134 +0,0 @@ - -Get joyn options — get_joyn_options • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This function aims to display and store info on joyn options

    -
    - -
    -

    Usage

    -
    get_joyn_options(env = .joynenv, display = TRUE, option = NULL)
    -
    - -
    -

    Arguments

    -
    env
    -

    environment, which is joyn environment by default

    - - -
    display
    -

    logical, if TRUE displays (i.e., print) info on joyn options and -corresponding default and current values

    - - -
    option
    -

    character or NULL. If character, name of a specific joyn option. -If NULL, all joyn options

    - -
    -
    -

    Value

    - - -

    joyn options and values invisibly as a list

    -
    -
    -

    See also

    -

    JOYn options functions -set_joyn_options()

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -
    -# display all joyn options, their default and current values
    -joyn:::get_joyn_options()
    -
    -# store list of option = value pairs AND do not display info
    -joyn_options <- joyn:::get_joyn_options(display = FALSE)
    -
    -# get info on one specific option and store it
    -joyn.verbose <- joyn:::get_joyn_options(option = "joyn.verbose")
    -
    -# get info on two specific option
    -joyn:::get_joyn_options(option = c("joyn.verbose", "joyn.reportvar"))
    -
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/index.html b/docs/reference/index.html deleted file mode 100644 index 2df49c25..00000000 --- a/docs/reference/index.html +++ /dev/null @@ -1,234 +0,0 @@ - -Function reference • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Main function

    - -

    Since the objective of joyn is to join tables with joy, there is only one main function in this package

    - - -
    - - - - -
    - - joyn() -
    -
    Join two tables
    -
    -

    Dplyr-joins

    - - - - -
    - - - - -
    - - full_join() -
    -
    Full join two data frames
    -
    - - right_join() -
    -
    Right join two data frames
    -
    - - left_join() -
    -
    Left join two data frames
    -
    - - inner_join() -
    -
    Inner join two data frames
    -
    - - anti_join() -
    -
    Anti join on two data frames
    -
    -

    Merge data tables

    - - - - -
    - - - - -
    - - merge() -
    -
    Merge two data frames
    -
    -

    Auxiliary functions

    - - - - -
    - -

    Information

    - - - -
    - - - - -
    - - joyn_msg() -
    -
    display type of joyn message
    -
    - - joyn_report() -
    -
    Print JOYn report table
    -
    - -

    JOYn options

    - - - -
    - - - - -
    - - get_joyn_options() -
    -
    Get joyn options
    -
    - - set_joyn_options() -
    -
    Set joyn options
    -
    - -

    MISC

    - - - -
    - - - - -
    - - is_id() -
    -
    Check if dt is uniquely identified by by variable
    -
    - - freq_table() -
    -
    Tabulate simple frequencies
    -
    - - possible_ids() -
    -
    Find possible unique identifies of data frame
    -
    - - is_balanced() -
    -
    Is data frame balanced by group?
    -
    - - rename_to_valid() -
    -
    Rename to syntactically valid names
    -
    - - joyn_msg() -
    -
    display type of joyn message
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/inner_join.html b/docs/reference/inner_join.html deleted file mode 100644 index 813e5454..00000000 --- a/docs/reference/inner_join.html +++ /dev/null @@ -1,355 +0,0 @@ - -Inner join two data frames — inner_join • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is a joyn wrapper that works in a similar fashion to -dplyr::inner_join

    -
    - -
    -

    Usage

    -
    inner_join(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  copy = FALSE,
    -  suffix = c(".x", ".y"),
    -  keep = NULL,
    -  na_matches = c("na", "never"),
    -  multiple = "all",
    -  unmatched = "drop",
    -  relationship = "one-to-one",
    -  y_vars_to_keep = TRUE,
    -  update_values = FALSE,
    -  update_NAs = update_values,
    -  reportvar = getOption("joyn.reportvar"),
    -  reporttype = c("factor", "character", "numeric"),
    -  roll = NULL,
    -  keep_common_vars = FALSE,
    -  sort = TRUE,
    -  verbose = getOption("joyn.verbose"),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - - -
    by
    -

    a character vector of variables to join by. If NULL, the default, -joyn will do a natural join, using all variables with common names across -the two tables. A message lists the variables so that you can check they're -correct (to suppress the message, simply explicitly list the variables that -you want to join). To join by different variables on x and y use a vector -of expressions. For example, by = c("a = b", "z") will use "a" in x, -"b" in y, and "z" in both tables.

    - - -
    copy
    -

    If x and y are not from the same data source, -and copy is TRUE, then y will be copied into the -same src as x. This allows you to join tables across srcs, but -it is a potentially expensive operation so you must opt into it.

    - - -
    suffix
    -

    If there are non-joined duplicate variables in x and -y, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.

    - - -
    keep
    -

    Should the join keys from both x and y be preserved in the -output?

    • If NULL, the default, joins on equality retain only the keys from x, -while joins on inequality retain the keys from both inputs.

    • -
    • If TRUE, all keys from both inputs are retained.

    • -
    • If FALSE, only keys from x are retained. For right and full joins, -the data in key columns corresponding to rows that only exist in y are -merged into the key columns from x. Can't be used when joining on -inequality conditions.

    • -
    - - -
    na_matches
    -

    Should two NA or two NaN values match?

    • "na", the default, treats two NA or two NaN values as equal, like -%in%, match(), and merge().

    • -
    • "never" treats two NA or two NaN values as different, and will -never match them together or to any other values. This is similar to joins -for database sources and to base::merge(incomparables = NA).

    • -
    - - -
    multiple
    -

    Handling of rows in x with multiple matches in y. -For each row of x:

    • "all", the default, returns every match detected in y. This is the -same behavior as SQL.

    • -
    • "any" returns one match detected in y, with no guarantees on which -match will be returned. It is often faster than "first" and "last" -if you just need to detect if there is at least one match.

    • -
    • "first" returns the first match detected in y.

    • -
    • "last" returns the last match detected in y.

    • -
    - - -
    unmatched
    -

    How should unmatched keys that would result in dropped rows -be handled?

    • "drop" drops unmatched keys from the result.

    • -
    • "error" throws an error if unmatched keys are detected.

    • -

    unmatched is intended to protect you from accidentally dropping rows -during a join. It only checks for unmatched keys in the input that could -potentially drop rows.

    • For left joins, it checks y.

    • -
    • For right joins, it checks x.

    • -
    • For inner joins, it checks both x and y. In this case, unmatched is -also allowed to be a character vector of length 2 to specify the behavior -for x and y independently.

    • -
    - - -
    relationship
    -

    Handling of the expected relationship between the keys of -x and y. If the expectations chosen from the list below are -invalidated, an error is thrown.

    • NULL, the default, doesn't expect there to be any relationship between -x and y. However, for equality joins it will check for a many-to-many -relationship (which is typically unexpected) and will warn if one occurs, -encouraging you to either take a closer look at your inputs or make this -relationship explicit by specifying "many-to-many".

      -

      See the Many-to-many relationships section for more details.

    • -
    • "one-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "one-to-many" expects:

      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "many-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
    • -
    • "many-to-many" doesn't perform any relationship checks, but is provided -to allow you to be explicit about this relationship if you know it -exists.

    • -

    relationship doesn't handle cases where there are zero matches. For that, -see unmatched.

    - - -
    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    reporttype
    -

    character: One of "character" or "numeric". Default is -"character". If "numeric", the reporting variable will contain numeric -codes of the source and the contents of each observation in the joined -table. See below for more information.

    - - -
    roll
    -

    double: to be implemented

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    sort
    -

    logical: If TRUE, sort by key variables in by. Default is -FALSE.

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    ...
    -

    Arguments passed on to joyn

    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    allow.cartesian
    -

    logical: Check documentation in official web site. -Default is NULL, which implies that if the join is "1:1" it will be -FALSE, but if the join has any "m" on it, it will be converted to TRUE. -By specifying TRUE of FALSE you force the behavior of the join.

    - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - -
    yvars
    -

    [Superseded]: use now y_vars_to_keep

    - -
    keep_y_in_x
    -

    [Superseded]: use now -keep_common_vars

    - -
    msg_type
    -

    character: type of messages to display by default

    - -
    na.last
    -

    logical. If TRUE, missing values in the data are placed last; if FALSE, they are placed first; if NA they are removed. -na.last=NA is valid only for x[order(., na.last)] and its -default is TRUE. setorder and setorderv only accept -TRUE/FALSE with default FALSE.

    - - -
    - -
    -
    -

    Value

    - - -

    An data frame of the same class as x. The properties of the output -are as close as possible to the ones returned by the dplyr alternative.

    -
    -
    -

    See also

    -

    Other dplyr alternatives: -anti_join(), -full_join(), -left_join(), -right_join()

    -
    - -
    -

    Examples

    -
    # Simple full join
    -library(data.table)
    -
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -inner_join(x1, y1, relationship = "many-to-one")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   66.7%
    -#> 2     y 1   33.3%
    -#> 3 total 3    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/is_balanced.html b/docs/reference/is_balanced.html deleted file mode 100644 index 2b03d4b0..00000000 --- a/docs/reference/is_balanced.html +++ /dev/null @@ -1,127 +0,0 @@ - -Is data frame balanced by group? — is_balanced • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Check if the data frame is balanced by group of columns, i.e., if it contains every combination of the elements in the specified variables

    -
    - -
    -

    Usage

    -
    is_balanced(df, by, return = c("logic", "table"))
    -
    - -
    -

    Arguments

    -
    df
    -

    data frame

    - - -
    by
    -

    character: variables used to check if df is balanced

    - - -
    return
    -

    character: either "logic" or "table". If "logic", returns TRUE -or FALSE depending on whether data frame is balanced. If "table" returns the unbalanced -observations - i.e. the combinations of elements in specified variables not found in input df

    - -
    -
    -

    Value

    - - -

    logical, if return == "logic", else returns data frame of unbalanced observations

    -
    - -
    -

    Examples

    -
    x1 = data.frame(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -is_balanced(df = x1,
    -            by = c("id", "t"),
    -            return = "table") # returns combination of elements in "id" and "t" not present in df
    -#>   id t
    -#> 1  3 1
    -#> 2  2 2
    -is_balanced(df = x1,
    -            by = c("id", "t"),
    -            return = "logic") # FALSE
    -#> [1] FALSE
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/is_id.html b/docs/reference/is_id.html deleted file mode 100644 index ff8b539b..00000000 --- a/docs/reference/is_id.html +++ /dev/null @@ -1,161 +0,0 @@ - -Check if dt is uniquely identified by by variable — is_id • joyn - Skip to contents - - -
    -
    -
    - -
    -

    report if dt is uniquely identified by by var or, if report = TRUE, the duplicates in by variable

    -
    - -
    -

    Usage

    -
    is_id(dt, by, verbose = getOption("joyn.verbose"), return_report = FALSE)
    -
    - -
    -

    Arguments

    -
    dt
    -

    either right of left table

    - - -
    by
    -

    variable to merge by

    - - -
    verbose
    -

    logical: if TRUE messages will be displayed

    - - -
    return_report
    -

    logical: if TRUE, returns data with summary of duplicates. -If FALSE, returns logical value depending on whether dt is uniquely identified -by by

    - -
    -
    -

    Value

    - - -

    logical or data.frame, depending on the value of argument return_report

    - - -
    - -
    -

    Examples

    -
    library(data.table)
    -
    -# example with data frame not uniquely identified by `by` var
    -
    -y <- data.table(id = c("c","b", "c", "a"),
    -                 y  = c(11L, 15L, 18L, 20L))
    -is_id(y, by = "id")
    -#> 
    -#> ── Duplicates in terms of `id` 
    -#>   copies n percent
    -#> 1      1 2   66.7%
    -#> 2      2 1   33.3%
    -#> 3  total 3    100%
    -#> ─────────────────────────────────────────────────────── End of is_id() report ──
    -#> [1] FALSE
    -is_id(y, by = "id", return_report = TRUE)
    -#> 
    -#> ── Duplicates in terms of `id` 
    -#>   copies n percent
    -#> 1      1 2   66.7%
    -#> 2      2 1   33.3%
    -#> 3  total 3    100%
    -#> ─────────────────────────────────────────────────────── End of is_id() report ──
    -#>        id copies
    -#>    <char>  <int>
    -#> 1:      c      2
    -#> 2:      b      1
    -#> 3:      a      1
    -
    -# example with data frame uniquely identified by `by` var
    -
    -y1 <- data.table(id = c("1","3", "2", "9"),
    -                 y  = c(11L, 15L, 18L, 20L))
    -is_id(y1, by = "id")
    -#> 
    -#> ── Duplicates in terms of `id` 
    -#>   copies n percent
    -#> 1      1 4    100%
    -#> 2  total 4    100%
    -#> ─────────────────────────────────────────────────────── End of is_id() report ──
    -#> [1] TRUE
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/is_match_type_error.html b/docs/reference/is_match_type_error.html deleted file mode 100644 index fb7d86c0..00000000 --- a/docs/reference/is_match_type_error.html +++ /dev/null @@ -1,121 +0,0 @@ - -Confirm if match type error — is_match_type_error • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Confirm if match type error

    -
    - -
    -

    Usage

    -
    is_match_type_error(x, name, by, verbose, match_type_error)
    -
    - -
    -

    Arguments

    -
    name
    -

    name of data frame

    - - -
    by
    -

    A vector of shared column names in x and y to merge on. -This defaults to the shared key columns between the two tables. -If y has no key columns, this defaults to the key of x.

    - - -
    match_type_error
    -

    logical: from existing code

    - -
    -
    -

    Value

    - - -

    logical

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# example with dt not uniquely identified by "id"
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -joyn:::is_match_type_error(x1, name = "x1", by = "id")
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/is_valid_m_key.html b/docs/reference/is_valid_m_key.html deleted file mode 100644 index 1bea7711..00000000 --- a/docs/reference/is_valid_m_key.html +++ /dev/null @@ -1,121 +0,0 @@ - -Check whether specified "many" relationship is valid — is_valid_m_key • joyn - Skip to contents - - -
    -
    -
    - -
    -

    When "many" relationship is specified, check if it is valid.
    (Specified many relationship not valid if the dt is instead uniquely identified by specified keys)

    -
    - -
    -

    Usage

    -
    is_valid_m_key(dt, by)
    -
    - -
    -

    Arguments

    -
    dt
    -

    data object

    - - -
    by
    -

    character vector: specified keys, already fixed

    - -
    -
    -

    Value

    - - -

    logical: TRUE if valid, FALSE if uniquely identified

    -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# example with data frame uniquely identified by specified `by` vars
    -x1 = data.frame(id  = c(1L, 1L, 2L, 3L, NA_integer_),
    -                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                 x  = 11:15)
    -
    -joyn:::is_valid_m_key(x1, by = c("id", "t"))
    -# example with valid specified "many" relationship
    -x2 = data.frame(id  = c(1L, 1L, 1L, 3L, NA_integer_),
    -                 t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                 x  = 11:15)
    -joyn:::is_valid_m_key(x2, by = c("id", "t"))
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn-package.html b/docs/reference/joyn-package.html deleted file mode 100644 index dd7c0559..00000000 --- a/docs/reference/joyn-package.html +++ /dev/null @@ -1,97 +0,0 @@ - -joyn: Tool for Diagnosis of Tables Joins and Complementary Join Features — joyn-package • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Tool for diagnosing table joins. It combines the speed of `collapse` and `data.table`, the flexibility of `dplyr`, and the diagnosis and features of the `merge` command in `Stata`.

    -
    - - - -
    -

    Author

    -

    Maintainer: R.Andres Castaneda acastanedaa@worldbank.org

    -

    Authors:

    - -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn.html b/docs/reference/joyn.html deleted file mode 100644 index 918fa763..00000000 --- a/docs/reference/joyn.html +++ /dev/null @@ -1,427 +0,0 @@ - -Join two tables — joyn • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is the primary function in the joyn package. It executes a full join, -performs a number of checks, and filters to allow the user-specified join.

    -
    - -
    -

    Usage

    -
    joyn(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  match_type = c("1:1", "1:m", "m:1", "m:m"),
    -  keep = c("full", "left", "master", "right", "using", "inner", "anti"),
    -  y_vars_to_keep = ifelse(keep == "anti", FALSE, TRUE),
    -  update_values = FALSE,
    -  update_NAs = update_values,
    -  reportvar = getOption("joyn.reportvar"),
    -  reporttype = c("factor", "character", "numeric"),
    -  roll = NULL,
    -  keep_common_vars = FALSE,
    -  sort = FALSE,
    -  verbose = getOption("joyn.verbose"),
    -  suffixes = getOption("joyn.suffixes"),
    -  allow.cartesian = deprecated(),
    -  yvars = deprecated(),
    -  keep_y_in_x = deprecated(),
    -  na.last = getOption("joyn.na.last"),
    -  msg_type = getOption("joyn.msg_type")
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - - -
    by
    -

    a character vector of variables to join by. If NULL, the default, -joyn will do a natural join, using all variables with common names across -the two tables. A message lists the variables so that you can check they're -correct (to suppress the message, simply explicitly list the variables that -you want to join). To join by different variables on x and y use a vector -of expressions. For example, by = c("a = b", "z") will use "a" in x, -"b" in y, and "z" in both tables.

    - - -
    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - - -
    keep
    -

    atomic character vector of length 1: One of "full", "left", -"master", "right", -"using", "inner". Default is "full". Even though this is not the -regular behavior of joins in R, the objective of joyn is to present a -diagnosis of the join which requires a full join. That is why the default -is a a full join. Yet, if "left" or "master", it keeps the observations -that matched in both tables and the ones that did not match in x. The ones -in y will be discarded. If "right" or "using", it keeps the -observations that matched in both tables and the ones that did not match in -y. The ones in x will be discarded. If "inner", it only keeps the -observations that matched both tables. Note that if, for example, a keep = "left", the joyn()function still executes a full join under the hood and then filters so that only rows the output table is a left join. This behaviour, while inefficient, allows all the diagnostics and checks conducted byjoyn`.

    - - -
    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    reporttype
    -

    character: One of "character" or "numeric". Default is -"character". If "numeric", the reporting variable will contain numeric -codes of the source and the contents of each observation in the joined -table. See below for more information.

    - - -
    roll
    -

    double: to be implemented

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    sort
    -

    logical: If TRUE, sort by key variables in by. Default is -FALSE.

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - - -
    allow.cartesian
    -

    logical: Check documentation in official web site. -Default is NULL, which implies that if the join is "1:1" it will be -FALSE, but if the join has any "m" on it, it will be converted to TRUE. -By specifying TRUE of FALSE you force the behavior of the join.

    - - -
    yvars
    -

    [Superseded]: use now y_vars_to_keep

    - - -
    keep_y_in_x
    -

    [Superseded]: use now -keep_common_vars

    - - -
    na.last
    -

    logical. If TRUE, missing values in the data are placed last; if FALSE, they are placed first; if NA they are removed. -na.last=NA is valid only for x[order(., na.last)] and its -default is TRUE. setorder and setorderv only accept -TRUE/FALSE with default FALSE.

    - - -
    msg_type
    -

    character: type of messages to display by default

    - -
    -
    -

    Value

    - - -

    a data.table joining x and y.

    -
    -
    -

    match types

    - - - -

    Using the same wording of the Stata manual

    -

    1:1: specifies a one-to-one match merge. The variables specified in -by uniquely identify single observations in both table.

    -

    1:m and m:1: specify one-to-many and many-to-one match merges, -respectively. This means that in of the tables the observations are -uniquely identify by the variables in by, while in the other table many -(two or more) of the observations are identify by the variables in by

    -

    m:m refers to many-to-many merge. variables in by does not uniquely -identify the observations in either table. Matching is performed by -combining observations with equal values in by; within matching values, -the first observation in the master (i.e. left or x) table is matched with -the first matching observation in the using (i.e. right or y) table; the -second, with the second; and so on. If there is an unequal number of -observations within a group, then the last observation of the shorter group -is used repeatedly to match with subsequent observations of the longer -group.

    -
    -
    -

    reporttype

    - - - -

    If reporttype = "numeric", then the numeric values have the following -meaning:

    -

    1: row comes from x, i.e. "x" 2: row comes from y, i.e. "y" 3: row from -both x and y, i.e. "x & y" 4: row has NA in x that has been updated -with y, i.e. "NA updated" 5: row has valued in x that has been updated -with y, i.e. "value updated" 6: row from x that has not been updated, -i.e. "not updated"

    -
    -
    -

    NAs order

    -

    NAs are placed either at first or at last in the -resulting data.frame depending on the value of getOption("joyn.na.last"). -The Default is FALSE as it is the default value of -data.table::setorderv.

    -
    - -
    -

    Examples

    -
    # Simple join
    -library(data.table)
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -x  = 11:15)
    -
    -y1 = data.table(id = 1:2,
    -                y  = c(11L, 15L))
    -
    -x2 = data.table(id = c(1, 1, 2, 3, NA),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = c(16, 12, NA, NA, 15))
    -
    -y2 = data.table(id = c(1, 2, 5, 6, 3),
    -              yd = c(1, 2, 5, 6, 3),
    -              y  = c(11L, 15L, 20L, 13L, 10L),
    -              x  = c(16:20))
    -joyn(x1, y1, match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     40%
    -#> 2 x & y 3     60%
    -#> 3 total 5    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <int> <int> <int> <int> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:    NA    NA    15    NA      x
    -
    -# Bad merge for not specifying by argument or match_type
    -joyn(x2, y2)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 4   44.4%
    -#> 2     y 4   44.4%
    -#> 3 x & y 1   11.1%
    -#> 4 total 9    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id and x from id, yd, y, and x
    -#>       id     t     x    yd     y  .joyn
    -#>    <num> <int> <num> <num> <int> <fctr>
    -#> 1:     1     1    16     1    11  x & y
    -#> 2:     1     2    12    NA    NA      x
    -#> 3:     2     1    NA    NA    NA      x
    -#> 4:     3     2    NA    NA    NA      x
    -#> 5:    NA    NA    15    NA    NA      x
    -#> 6:     2    NA    17     2    15      y
    -#> 7:     5    NA    18     5    20      y
    -#> 8:     6    NA    19     6    13      y
    -#> 9:     3    NA    20     3    10      y
    -
    -# good merge, ignoring variable x from y
    -joyn(x2, y2, by = "id", match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 1   14.3%
    -#> 2     y 2   28.6%
    -#> 3 x & y 4   57.1%
    -#> 4 total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y  .joyn
    -#>    <num> <int> <num> <num> <int> <fctr>
    -#> 1:     1     1    16     1    11  x & y
    -#> 2:     1     2    12     1    11  x & y
    -#> 3:     2     1    NA     2    15  x & y
    -#> 4:     3     2    NA     3    10  x & y
    -#> 5:    NA    NA    15    NA    NA      x
    -#> 6:     5    NA    NA     5    20      y
    -#> 7:     6    NA    NA     6    13      y
    -
    -# update NAs in x variable form x
    -joyn(x2, y2, by = "id", update_NAs = TRUE, match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>        .joyn n percent
    -#> 1          x 1   14.3%
    -#> 2      x & y 2   28.6%
    -#> 3 NA updated 4   57.1%
    -#> 4      total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y      .joyn
    -#>    <num> <int> <num> <num> <int>     <fctr>
    -#> 1:     1     1    16     1    11      x & y
    -#> 2:     1     2    12     1    11      x & y
    -#> 3:     2     1    17     2    15 NA updated
    -#> 4:     3     2    20     3    10 NA updated
    -#> 5:    NA    NA    15    NA    NA          x
    -#> 6:     5    NA    18     5    20 NA updated
    -#> 7:     6    NA    19     6    13 NA updated
    -
    -# Update values in x with variables from y
    -joyn(x2, y2, by = "id", update_values = TRUE, match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>           .joyn n percent
    -#> 1    NA updated 4   57.1%
    -#> 2 value updated 2   28.6%
    -#> 3   not updated 1   14.3%
    -#> 4         total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id, yd, y, and x
    -#>       id     t     x    yd     y         .joyn
    -#>    <num> <int> <num> <num> <int>        <fctr>
    -#> 1:     1     1    16     1    11 value updated
    -#> 2:     1     2    16     1    11 value updated
    -#> 3:     2     1    17     2    15    NA updated
    -#> 4:     3     2    20     3    10    NA updated
    -#> 5:    NA    NA    15    NA    NA   not updated
    -#> 6:     5    NA    18     5    20    NA updated
    -#> 7:     6    NA    19     6    13    NA updated
    -
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn_msg.html b/docs/reference/joyn_msg.html deleted file mode 100644 index c71b0176..00000000 --- a/docs/reference/joyn_msg.html +++ /dev/null @@ -1,148 +0,0 @@ - -display type of joyn message — joyn_msg • joyn - Skip to contents - - -
    -
    -
    - -
    -

    display type of joyn message

    -
    - -
    -

    Usage

    -
    joyn_msg(msg_type = getOption("joyn.msg_type"), msg = NULL)
    -
    - -
    -

    Arguments

    -
    msg_type
    -

    character: one or more of the following: -all, basic, info, note, warn, timing, or err

    - - -
    msg
    -

    character vector to be parsed to cli::cli_abort(). Default is -NULL. It only works if "err" %in% msg_type. This is an internal argument.

    - -
    -
    -

    Value

    - - -

    returns data frame with message invisibly. print message in console

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msgs_exist(), -joyn_report(), -msg_type_dt(), -store_msg(), -style(), -type_choices()

    -
    - -
    -

    Examples

    -
    library(data.table)
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -x  = 11:15)
    -
    -y1 = data.table(id = 1:2,
    -                y  = c(11L, 15L))
    -df <- joyn(x1, y1, match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     40%
    -#> 2 x & y 3     60%
    -#> 3 total 5    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -joyn_msg("basic")
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -joyn_msg("all")
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#> ● Timing:The full joyn is executed in 0.000205 seconds.
    -#> ● Timing: The entire joyn function, including checks, is executed in 0.025927
    -#> seconds.
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn_msgs_exist.html b/docs/reference/joyn_msgs_exist.html deleted file mode 100644 index 17dcd011..00000000 --- a/docs/reference/joyn_msgs_exist.html +++ /dev/null @@ -1,109 +0,0 @@ - -Presence of joyn msgs in the environment — joyn_msgs_exist • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Checks the presence of joyn messages stored in joyn environment

    -
    - -
    -

    Usage

    -
    joyn_msgs_exist()
    -
    - -
    -

    Value

    - - -

    invisible TRUE

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msg(), -joyn_report(), -msg_type_dt(), -store_msg(), -style(), -type_choices()

    -
    - -
    -

    Examples

    - -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn_report.html b/docs/reference/joyn_report.html deleted file mode 100644 index e0742904..00000000 --- a/docs/reference/joyn_report.html +++ /dev/null @@ -1,144 +0,0 @@ - -Print JOYn report table — joyn_report • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Print JOYn report table

    -
    - -
    -

    Usage

    -
    joyn_report(verbose = getOption("joyn.verbose"))
    -
    - -
    -

    Arguments

    -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - -
    -
    -

    Value

    - - -

    invisible table of frequencies

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msg(), -joyn_msgs_exist(), -msg_type_dt(), -store_msg(), -style(), -type_choices()

    -
    - -
    -

    Examples

    -
    library(data.table)
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -x  = 11:15)
    -
    -y1 = data.table(id = 1:2,
    -                y  = c(11L, 15L))
    -
    -d <- joyn(x1, y1, match_type = "m:1")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     40%
    -#> 2 x & y 3     60%
    -#> 3 total 5    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -joyn_report(verbose = TRUE)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     40%
    -#> 2 x & y 3     60%
    -#> 3 total 5    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/joyn_workhorse.html b/docs/reference/joyn_workhorse.html deleted file mode 100644 index 0c448a1d..00000000 --- a/docs/reference/joyn_workhorse.html +++ /dev/null @@ -1,140 +0,0 @@ - -Internal workhorse join function, used in the back-end of joyn — joyn_workhorse • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Always executes a full join.

    -
    - -
    -

    Usage

    -
    joyn_workhorse(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  sort = FALSE,
    -  suffixes = getOption("joyn.suffixes"),
    -  reportvar = getOption("joyn.reportvar")
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data object, "left" or "master"

    - - -
    y
    -

    data object, "right" or "using"

    - - -
    by
    -

    atomic character vector: key specifying join

    - - -
    sort
    -

    logical: sort the result by the columns in by -x and y

    - - -
    suffixes
    -

    atomic character vector: give suffixes to columns common to both

    - -
    -
    -

    Value

    - - -

    data object of same class as x

    - - -
    - -
    -

    Examples

    -
    if (FALSE) {
    -# Full join
    -library(data.table)
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -joyn:::joyn_workhorse(x = x1, y=y1)
    -}
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/left_join.html b/docs/reference/left_join.html deleted file mode 100644 index 50de8b6e..00000000 --- a/docs/reference/left_join.html +++ /dev/null @@ -1,358 +0,0 @@ - -Left join two data frames — left_join • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is a joyn wrapper that works in a similar -fashion to dplyr::left_join

    -
    - -
    -

    Usage

    -
    left_join(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  copy = FALSE,
    -  suffix = c(".x", ".y"),
    -  keep = NULL,
    -  na_matches = c("na", "never"),
    -  multiple = "all",
    -  unmatched = "drop",
    -  relationship = NULL,
    -  y_vars_to_keep = TRUE,
    -  update_values = FALSE,
    -  update_NAs = update_values,
    -  reportvar = getOption("joyn.reportvar"),
    -  reporttype = c("factor", "character", "numeric"),
    -  roll = NULL,
    -  keep_common_vars = FALSE,
    -  sort = TRUE,
    -  verbose = getOption("joyn.verbose"),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - - -
    by
    -

    a character vector of variables to join by. If NULL, the default, -joyn will do a natural join, using all variables with common names across -the two tables. A message lists the variables so that you can check they're -correct (to suppress the message, simply explicitly list the variables that -you want to join). To join by different variables on x and y use a vector -of expressions. For example, by = c("a = b", "z") will use "a" in x, "b" -in y, and "z" in both tables.

    - - -
    copy
    -

    If x and y are not from the same data source, -and copy is TRUE, then y will be copied into the -same src as x. This allows you to join tables across srcs, but -it is a potentially expensive operation so you must opt into it.

    - - -
    suffix
    -

    If there are non-joined duplicate variables in x and -y, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.

    - - -
    keep
    -

    Should the join keys from both x and y be preserved in the -output?

    • If NULL, the default, joins on equality retain only the keys from x, -while joins on inequality retain the keys from both inputs.

    • -
    • If TRUE, all keys from both inputs are retained.

    • -
    • If FALSE, only keys from x are retained. For right and full joins, -the data in key columns corresponding to rows that only exist in y are -merged into the key columns from x. Can't be used when joining on -inequality conditions.

    • -
    - - -
    na_matches
    -

    Should two NA or two NaN values match?

    • "na", the default, treats two NA or two NaN values as equal, like -%in%, match(), and merge().

    • -
    • "never" treats two NA or two NaN values as different, and will -never match them together or to any other values. This is similar to joins -for database sources and to base::merge(incomparables = NA).

    • -
    - - -
    multiple
    -

    Handling of rows in x with multiple matches in y. -For each row of x:

    • "all", the default, returns every match detected in y. This is the -same behavior as SQL.

    • -
    • "any" returns one match detected in y, with no guarantees on which -match will be returned. It is often faster than "first" and "last" -if you just need to detect if there is at least one match.

    • -
    • "first" returns the first match detected in y.

    • -
    • "last" returns the last match detected in y.

    • -
    - - -
    unmatched
    -

    How should unmatched keys that would result in dropped rows -be handled?

    • "drop" drops unmatched keys from the result.

    • -
    • "error" throws an error if unmatched keys are detected.

    • -

    unmatched is intended to protect you from accidentally dropping rows -during a join. It only checks for unmatched keys in the input that could -potentially drop rows.

    • For left joins, it checks y.

    • -
    • For right joins, it checks x.

    • -
    • For inner joins, it checks both x and y. In this case, unmatched is -also allowed to be a character vector of length 2 to specify the behavior -for x and y independently.

    • -
    - - -
    relationship
    -

    Handling of the expected relationship between the keys of -x and y. If the expectations chosen from the list below are -invalidated, an error is thrown.

    • NULL, the default, doesn't expect there to be any relationship between -x and y. However, for equality joins it will check for a many-to-many -relationship (which is typically unexpected) and will warn if one occurs, -encouraging you to either take a closer look at your inputs or make this -relationship explicit by specifying "many-to-many".

      -

      See the Many-to-many relationships section for more details.

    • -
    • "one-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "one-to-many" expects:

      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "many-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
    • -
    • "many-to-many" doesn't perform any relationship checks, but is provided -to allow you to be explicit about this relationship if you know it -exists.

    • -

    relationship doesn't handle cases where there are zero matches. For that, -see unmatched.

    - - -
    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    reporttype
    -

    character: One of "character" or "numeric". Default is -"character". If "numeric", the reporting variable will contain numeric -codes of the source and the contents of each observation in the joined -table. See below for more information.

    - - -
    roll
    -

    double: to be implemented

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    sort
    -

    logical: If TRUE, sort by key variables in by. Default is -FALSE.

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    ...
    -

    Arguments passed on to joyn

    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    allow.cartesian
    -

    logical: Check documentation in official web site. -Default is NULL, which implies that if the join is "1:1" it will be -FALSE, but if the join has any "m" on it, it will be converted to TRUE. -By specifying TRUE of FALSE you force the behavior of the join.

    - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - -
    yvars
    -

    [Superseded]: use now y_vars_to_keep

    - -
    keep_y_in_x
    -

    [Superseded]: use now -keep_common_vars

    - -
    msg_type
    -

    character: type of messages to display by default

    - -
    na.last
    -

    logical. If TRUE, missing values in the data are placed last; if FALSE, they are placed first; if NA they are removed. -na.last=NA is valid only for x[order(., na.last)] and its -default is TRUE. setorder and setorderv only accept -TRUE/FALSE with default FALSE.

    - - -
    - -
    -
    -

    Value

    - - -

    An data frame of the same class as x. The properties of the output -are as close as possible to the ones returned by the dplyr alternative.

    -
    -
    -

    See also

    -

    Other dplyr alternatives: -anti_join(), -full_join(), -inner_join(), -right_join()

    -
    - -
    -

    Examples

    -
    # Simple left join
    -library(data.table)
    -
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -left_join(x1, y1, relationship = "many-to-one")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     40%
    -#> 2     y 1     20%
    -#> 3 x & y 2     40%
    -#> 4 total 5    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     3     2    14    NA      x
    -#> 5:    NA    NA    15    NA      x
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/merge.html b/docs/reference/merge.html deleted file mode 100644 index 6c8b7820..00000000 --- a/docs/reference/merge.html +++ /dev/null @@ -1,290 +0,0 @@ - -Merge two data frames — merge • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is a joyn wrapper that works in a similar fashion to base::merge and -data.table::merge, which is why merge masks the other two.

    -
    - -
    -

    Usage

    -
    merge(
    -  x,
    -  y,
    -  by = NULL,
    -  by.x = NULL,
    -  by.y = NULL,
    -  all = FALSE,
    -  all.x = all,
    -  all.y = all,
    -  sort = TRUE,
    -  suffixes = c(".x", ".y"),
    -  no.dups = TRUE,
    -  allow.cartesian = getOption("datatable.allow.cartesian"),
    -  match_type = c("m:m", "m:1", "1:m", "1:1"),
    -  keep_common_vars = TRUE,
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    x, y
    -

    data tables. y is coerced to a data.table if -it isn't one already.

    - - -
    by
    -

    A vector of shared column names in x and y to merge on. -This defaults to the shared key columns between the two tables. -If y has no key columns, this defaults to the key of x.

    - - -
    by.x, by.y
    -

    Vectors of column names in x and y to merge on.

    - - -
    all
    -

    logical; all = TRUE is shorthand to save setting both -all.x = TRUE and all.y = TRUE.

    - - -
    all.x
    -

    logical; if TRUE, rows from x which have no matching row -in y are included. These rows will have 'NA's in the columns that are usually -filled with values from y. The default is FALSE so that only rows with -data from both x and y are included in the output.

    - - -
    all.y
    -

    logical; analogous to all.x above.

    - - -
    sort
    -

    logical. If TRUE (default), the rows of the merged -data.table are sorted by setting the key to the by / by.x columns. If -FALSE, unlike base R's merge for which row order is unspecified, the -row order in x is retained (including retaining the position of missing entries when -all.x=TRUE), followed by y rows that don't match x (when all.y=TRUE) -retaining the order those appear in y.

    - - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for -making non-by column names unique. The suffix behaviour works in a similar -fashion as the merge.data.frame method does.

    - - -
    no.dups
    -

    logical indicating that suffixes are also appended to -non-by.y column names in y when they have the same column name -as any by.x.

    - - -
    allow.cartesian
    -

    See allow.cartesian in [.data.table.

    - - -
    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    ...
    -

    Arguments passed on to joyn

    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    - -
    -
    -

    Value

    - - -

    data.table merging x and y

    -
    - -
    -

    Examples

    -
    x1 = data.frame(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.frame(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -joyn::merge(x1, y1, by = "id")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2   66.7%
    -#> 2     y 1   33.3%
    -#> 3 total 3    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#> ⚠ Warning:  Supplied both by and by.x/by.y. by argument will be ignored.
    -#> ⚠ Warning: The keys supplied uniquely identify y, therefore a m:1 join is
    -#> executed
    -#>   id t  x  y .joyn
    -#> 1  1 1 11 11 x & y
    -#> 2  1 2 12 11 x & y
    -#> 3  2 1 13 15 x & y
    -# example of using by.x and by.y
    -x2 = data.frame(id1 = c(1, 1, 2, 3, 3),
    -                id2 = c(1, 1, 2, 3, 4),
    -                t   = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x   = c(16, 12, NA, NA, 15))
    -y2 = data.frame(id  = c(1, 2, 5, 6, 3),
    -                id2 = c(1, 1, 2, 3, 4),
    -                y   = c(11L, 15L, 20L, 13L, 10L),
    -                x   = c(16:20))
    -jn <- joyn::merge(x2,
    -            y2,
    -            match_type = "m:m",
    -            all.x = TRUE,
    -            by.x = "id1",
    -            by.y = "id2")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     y 1   14.3%
    -#> 2 x & y 6   85.7%
    -#> 3 total 7    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables keyby1 from id, keyby1, y, and x
    -#> ⚠ Warning:  Supplied both by and by.x/by.y. by argument will be ignored.
    -# example with all = TRUE
    -jn <- joyn::merge(x2,
    -            y2,
    -            match_type = "m:m",
    -            by.x = "id1",
    -            by.y = "id2",
    -            all = TRUE)
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     y 1   12.5%
    -#> 2 x & y 7   87.5%
    -#> 3 total 8    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables keyby1 from id, keyby1, y, and x
    -#> ⚠ Warning:  Supplied both by and by.x/by.y. by argument will be ignored.
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/msg_type_dt.html b/docs/reference/msg_type_dt.html deleted file mode 100644 index 585aa698..00000000 --- a/docs/reference/msg_type_dt.html +++ /dev/null @@ -1,105 +0,0 @@ - -convert style of joyn message to data frame containing type and message — msg_type_dt • joyn - Skip to contents - - -
    -
    -
    - -
    -

    convert style of joyn message to data frame containing type and message

    -
    - -
    -

    Usage

    -
    msg_type_dt(type, ...)
    -
    - -
    -

    Value

    - - -

    data frame with two variables, type and msg

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msg(), -joyn_msgs_exist(), -joyn_report(), -store_msg(), -style(), -type_choices()

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/possible_ids.html b/docs/reference/possible_ids.html deleted file mode 100644 index 83597aa4..00000000 --- a/docs/reference/possible_ids.html +++ /dev/null @@ -1,148 +0,0 @@ - -Find possible unique identifies of data frame — possible_ids • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Identify possible variables uniquely identifying x

    -
    - -
    -

    Usage

    -
    possible_ids(
    -  dt,
    -  exclude = NULL,
    -  include = NULL,
    -  verbose = getOption("possible_ids.verbose")
    -)
    -
    - -
    -

    Arguments

    -
    dt
    -

    data frame

    - - -
    exclude
    -

    character: Exclude variables to be selected as identifiers. It -could be either the name of the variables of one type of the variable -prefixed by "_". For instance, "_numeric" or "_character".

    - - -
    include
    -

    character: Name of variable to be included, that might belong -to the group excluded in the exclude

    - - -
    verbose
    -

    logical: If FALSE no message will be displayed. Default is -TRUE

    - -
    -
    -

    Value

    - - -

    list with possible identifiers

    -
    - -
    -

    Examples

    -
    library(data.table)
    -x4 = data.table(id1 = c(1, 1, 2, 3, 3),
    -                id2 = c(1, 1, 2, 3, 4),
    -                t   = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x   = c(16, 12, NA, NA, 15))
    -possible_ids(x4)
    -#>  There are no duplicates in data frame
    -#> → we found 5 possible ids
    -#> $V1
    -#> [1] "id1" "t"  
    -#> 
    -#> $V2
    -#> [1] "id1" "x"  
    -#> 
    -#> $V3
    -#> [1] "id2" "t"  
    -#> 
    -#> $V4
    -#> [1] "id2" "x"  
    -#> 
    -#> $V5
    -#> [1] "t" "x"
    -#> 
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/process_by_vector.html b/docs/reference/process_by_vector.html deleted file mode 100644 index 4569f154..00000000 --- a/docs/reference/process_by_vector.html +++ /dev/null @@ -1,115 +0,0 @@ - -Process the by vector — process_by_vector • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Gives as output a vector of names to be used for the specified -table that correspond to the by argument for that table

    -
    - -
    -

    Usage

    -
    process_by_vector(by, input = c("left", "right"))
    -
    - -
    -

    Arguments

    -
    by
    -

    character vector: by argument for join

    - - -
    input
    -

    character: either "left" or "right", indicating -whether to give the left or right side of the equals ("=") if -the equals is part of the by vector

    - -
    -
    -

    Value

    - - -

    character vector

    -
    - -
    -

    Examples

    -
    joyn:::process_by_vector(by = c("An = foo", "example"), input = "left")
    -#> [1] "An"      "example"
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/rename_to_valid.html b/docs/reference/rename_to_valid.html deleted file mode 100644 index cb8a1b01..00000000 --- a/docs/reference/rename_to_valid.html +++ /dev/null @@ -1,112 +0,0 @@ - -Rename to syntactically valid names — rename_to_valid • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Rename to syntactically valid names

    -
    - -
    -

    Usage

    -
    rename_to_valid(name, verbose = getOption("joyn.verbose"))
    -
    - -
    -

    Arguments

    -
    name
    -

    character: name to be coerced to syntactically valid name

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - -
    -
    -

    Value

    - - -

    valid character name

    -
    - -
    -

    Examples

    -
    joyn:::rename_to_valid("x y")
    -#>  name x y is an invalid variable name. It will be changed to x.y
    -#> [1] "x.y"
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/right_join.html b/docs/reference/right_join.html deleted file mode 100644 index 8984fbd7..00000000 --- a/docs/reference/right_join.html +++ /dev/null @@ -1,357 +0,0 @@ - -Right join two data frames — right_join • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is a joyn wrapper that works in a similar -fashion to dplyr::right_join

    -
    - -
    -

    Usage

    -
    right_join(
    -  x,
    -  y,
    -  by = intersect(names(x), names(y)),
    -  copy = FALSE,
    -  suffix = c(".x", ".y"),
    -  keep = NULL,
    -  na_matches = c("na", "never"),
    -  multiple = "all",
    -  unmatched = "drop",
    -  relationship = "one-to-one",
    -  y_vars_to_keep = TRUE,
    -  update_values = FALSE,
    -  update_NAs = update_values,
    -  reportvar = getOption("joyn.reportvar"),
    -  reporttype = c("factor", "character", "numeric"),
    -  roll = NULL,
    -  keep_common_vars = FALSE,
    -  sort = TRUE,
    -  verbose = getOption("joyn.verbose"),
    -  ...
    -)
    -
    - -
    -

    Arguments

    -
    x
    -

    data frame: referred to as left in R terminology, or master in -Stata terminology.

    - - -
    y
    -

    data frame: referred to as right in R terminology, or using in -Stata terminology.

    - - -
    by
    -

    a character vector of variables to join by. If NULL, the default, -joyn will do a natural join, using all variables with common names across -the two tables. A message lists the variables so that you can check they're -correct (to suppress the message, simply explicitly list the variables that -you want to join). To join by different variables on x and y use a vector -of expressions. For example, by = c("a = b", "z") will use "a" in x, "b" -in y, and "z" in both tables.

    - - -
    copy
    -

    If x and y are not from the same data source, -and copy is TRUE, then y will be copied into the -same src as x. This allows you to join tables across srcs, but -it is a potentially expensive operation so you must opt into it.

    - - -
    suffix
    -

    If there are non-joined duplicate variables in x and -y, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.

    - - -
    keep
    -

    Should the join keys from both x and y be preserved in the -output?

    • If NULL, the default, joins on equality retain only the keys from x, -while joins on inequality retain the keys from both inputs.

    • -
    • If TRUE, all keys from both inputs are retained.

    • -
    • If FALSE, only keys from x are retained. For right and full joins, -the data in key columns corresponding to rows that only exist in y are -merged into the key columns from x. Can't be used when joining on -inequality conditions.

    • -
    - - -
    na_matches
    -

    Should two NA or two NaN values match?

    • "na", the default, treats two NA or two NaN values as equal, like -%in%, match(), and merge().

    • -
    • "never" treats two NA or two NaN values as different, and will -never match them together or to any other values. This is similar to joins -for database sources and to base::merge(incomparables = NA).

    • -
    - - -
    multiple
    -

    Handling of rows in x with multiple matches in y. -For each row of x:

    • "all", the default, returns every match detected in y. This is the -same behavior as SQL.

    • -
    • "any" returns one match detected in y, with no guarantees on which -match will be returned. It is often faster than "first" and "last" -if you just need to detect if there is at least one match.

    • -
    • "first" returns the first match detected in y.

    • -
    • "last" returns the last match detected in y.

    • -
    - - -
    unmatched
    -

    How should unmatched keys that would result in dropped rows -be handled?

    • "drop" drops unmatched keys from the result.

    • -
    • "error" throws an error if unmatched keys are detected.

    • -

    unmatched is intended to protect you from accidentally dropping rows -during a join. It only checks for unmatched keys in the input that could -potentially drop rows.

    • For left joins, it checks y.

    • -
    • For right joins, it checks x.

    • -
    • For inner joins, it checks both x and y. In this case, unmatched is -also allowed to be a character vector of length 2 to specify the behavior -for x and y independently.

    • -
    - - -
    relationship
    -

    Handling of the expected relationship between the keys of -x and y. If the expectations chosen from the list below are -invalidated, an error is thrown.

    • NULL, the default, doesn't expect there to be any relationship between -x and y. However, for equality joins it will check for a many-to-many -relationship (which is typically unexpected) and will warn if one occurs, -encouraging you to either take a closer look at your inputs or make this -relationship explicit by specifying "many-to-many".

      -

      See the Many-to-many relationships section for more details.

    • -
    • "one-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "one-to-many" expects:

      • Each row in y matches at most 1 row in x.

      • -
    • -
    • "many-to-one" expects:

      • Each row in x matches at most 1 row in y.

      • -
    • -
    • "many-to-many" doesn't perform any relationship checks, but is provided -to allow you to be explicit about this relationship if you know it -exists.

    • -

    relationship doesn't handle cases where there are zero matches. For that, -see unmatched.

    - - -
    y_vars_to_keep
    -

    character: Vector of variable names in y that will be -kept after the merge. If TRUE (the default), it keeps all the brings all -the variables in y into x. If FALSE or NULL, it does not bring any variable -into x, but a report will be generated.

    - - -
    update_values
    -

    logical: If TRUE, it will update all values of variables -in x with the actual of variables in y with the same name as the ones in x. -NAs from y won't be used to update actual values in x. Yet, by default, -NAs in x will be updated with values in y. To avoid this, make sure to set -update_NAs = FALSE

    - - -
    update_NAs
    -

    logical: If TRUE, it will update NA values of all variables -in x with actual values of variables in y that have the same name as the -ones in x. If FALSE, NA values won't be updated, even if update_values is -TRUE

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    reporttype
    -

    character: One of "character" or "numeric". Default is -"character". If "numeric", the reporting variable will contain numeric -codes of the source and the contents of each observation in the joined -table. See below for more information.

    - - -
    roll
    -

    double: to be implemented

    - - -
    keep_common_vars
    -

    logical: If TRUE, it will keep the original variable -from y when both tables have common variable names. Thus, the prefix "y." -will be added to the original name to distinguish from the resulting -variable in the joined table.

    - - -
    sort
    -

    logical: If TRUE, sort by key variables in by. Default is -FALSE.

    - - -
    verbose
    -

    logical: if FALSE, it won't display any message (programmer's -option). Default is TRUE.

    - - -
    ...
    -

    Arguments passed on to joyn

    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    allow.cartesian
    -

    logical: Check documentation in official web site. -Default is NULL, which implies that if the join is "1:1" it will be -FALSE, but if the join has any "m" on it, it will be converted to TRUE. -By specifying TRUE of FALSE you force the behavior of the join.

    - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - -
    yvars
    -

    [Superseded]: use now y_vars_to_keep

    - -
    keep_y_in_x
    -

    [Superseded]: use now -keep_common_vars

    - -
    msg_type
    -

    character: type of messages to display by default

    - -
    na.last
    -

    logical. If TRUE, missing values in the data are placed last; if FALSE, they are placed first; if NA they are removed. -na.last=NA is valid only for x[order(., na.last)] and its -default is TRUE. setorder and setorderv only accept -TRUE/FALSE with default FALSE.

    - - -
    - -
    -
    -

    Value

    - - -

    An data frame of the same class as x. The properties of the output -are as close as possible to the ones returned by the dplyr alternative.

    -
    -
    -

    See also

    -

    Other dplyr alternatives: -anti_join(), -full_join(), -inner_join(), -left_join()

    -
    - -
    -

    Examples

    -
    # Simple right join
    -library(data.table)
    -
    -x1 = data.table(id = c(1L, 1L, 2L, 3L, NA_integer_),
    -                t  = c(1L, 2L, 1L, 2L, NA_integer_),
    -                x  = 11:15)
    -y1 = data.table(id = c(1,2, 4),
    -                y  = c(11L, 15L, 16))
    -right_join(x1, y1, relationship = "many-to-one")
    -#> 
    -#> ── JOYn Report ──
    -#> 
    -#>   .joyn n percent
    -#> 1     x 2     50%
    -#> 2     y 1     25%
    -#> 3 x & y 1     25%
    -#> 4 total 4    100%
    -#> ────────────────────────────────────────────────────────── End of JOYn report ──
    -#> ℹ Note: Joyn's report available in variable .joyn
    -#> ℹ Note: Removing key variables id from id and y
    -#>       id     t     x     y  .joyn
    -#>    <num> <int> <int> <num> <fctr>
    -#> 1:     1     1    11    11  x & y
    -#> 2:     1     2    12    11  x & y
    -#> 3:     2     1    13    15  x & y
    -#> 4:     4    NA    NA    16      y
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/set_col_names.html b/docs/reference/set_col_names.html deleted file mode 100644 index bfd28896..00000000 --- a/docs/reference/set_col_names.html +++ /dev/null @@ -1,122 +0,0 @@ - -Add x key var and y key var (with suffixes) to x and y --when joining by different variables and keep is true — set_col_names • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Add x key var and y key var (with suffixes) to x and y --when joining by different variables and keep is true

    -
    - -
    -

    Usage

    -
    set_col_names(x, y, by, suffix, jn_type)
    -
    - -
    -

    Arguments

    -
    x
    -

    data table: left table

    - - -
    y
    -

    data table: right table

    - - -
    by
    -

    character vector of variables to join by

    - - -
    suffix
    -

    character(2) specifying the suffixes to be used for making non-by column names unique

    - - -
    jn_type
    -

    character specifying type of join

    - -
    -
    -

    Value

    - - -

    list containing x and y

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/set_joyn_options.html b/docs/reference/set_joyn_options.html deleted file mode 100644 index 2c27ed8a..00000000 --- a/docs/reference/set_joyn_options.html +++ /dev/null @@ -1,115 +0,0 @@ - -Set joyn options — set_joyn_options • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This function is used to change the value of one or more joyn options

    -
    - -
    -

    Usage

    -
    set_joyn_options(..., env = .joynenv)
    -
    - -
    -

    Arguments

    -
    ...
    -

    pairs of option = value

    - - -
    env
    -

    environment, which is joyn environment by default

    - -
    -
    -

    Value

    - - -

    joyn new options and values invisibly as a list

    -
    -
    -

    See also

    -

    JOYn options functions -get_joyn_options()

    -
    - -
    -

    Examples

    -
    joyn:::set_joyn_options(joyn.verbose = FALSE, joyn.reportvar = "joyn_status")
    -joyn:::set_joyn_options() # return to default options
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/split_match_type.html b/docs/reference/split_match_type.html deleted file mode 100644 index c95c558e..00000000 --- a/docs/reference/split_match_type.html +++ /dev/null @@ -1,103 +0,0 @@ - -Split matching type — split_match_type • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Split matching type (one of "1:1", "m:1", "1:m", "m:m") into its two components

    -
    - -
    -

    Usage

    -
    split_match_type(match_type)
    -
    - -
    -

    Arguments

    -
    match_type
    -

    character: one of "m:m", "m:1", "1:m", "1:1". -Default is "1:1" since this the most restrictive. However, following -Stata's recommendation, it is better to be explicit and use any of the -other three match types (See details in match types sections).

    - -
    -
    -

    Value

    - - -

    character vector

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/store_msg.html b/docs/reference/store_msg.html deleted file mode 100644 index a1c65dd5..00000000 --- a/docs/reference/store_msg.html +++ /dev/null @@ -1,124 +0,0 @@ - -Store joyn message to .joynenv environment — store_msg • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Store joyn message to .joynenv environment

    -
    - -
    -

    Usage

    -
    store_msg(type, ...)
    -
    - -
    -

    Arguments

    -
    ...
    -

    combination of type and text in the form style1 = text1, style2 = text2, etc.

    - -
    -
    -

    Value

    - - -

    current message data frame invisibly

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msg(), -joyn_msgs_exist(), -joyn_report(), -msg_type_dt(), -style(), -type_choices()

    -
    - -
    -

    Examples

    -
    # Storing msg with msg_type "info"
    -joyn:::store_msg("info",
    -  ok = cli::symbol$tick, "  ",
    -  pale = "This is an info message")
    -
    -# Storing msg with msg_type "warn"
    -joyn:::store_msg("warn",
    -  err = cli::symbol$cross, "  ",
    -  note = "This is a warning message")
    -
    -
    -
    - - -
    - - - - - - - diff --git a/docs/reference/style.html b/docs/reference/style.html deleted file mode 100644 index 6e9a7f15..00000000 --- a/docs/reference/style.html +++ /dev/null @@ -1,119 +0,0 @@ - -style of text displayed — style • joyn - Skip to contents - - -
    -
    -
    - -
    -

    This is an adaptation from -https://github.com/r-lib/pkgbuild/blob/3ba537ab8a6ac07d3fe11c17543677d2a0786be6/R/styles.R

    -
    - -
    -

    Usage

    -
    style(..., sep = "")
    -
    - -
    -

    Arguments

    -
    ...
    -

    combination of type and text in the form -type1 = text1, type2 = text2

    - - -
    sep
    -

    a character string to separate the terms to paste

    - -
    -
    -

    Value

    - - -

    formatted text

    -
    -
    -

    See also

    - -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/type_choices.html b/docs/reference/type_choices.html deleted file mode 100644 index d43a7b7f..00000000 --- a/docs/reference/type_choices.html +++ /dev/null @@ -1,105 +0,0 @@ - -Choice of messages — type_choices • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Choice of messages

    -
    - -
    -

    Usage

    -
    type_choices()
    -
    - -
    -

    Value

    - - -

    character vector with choices of types

    -
    -
    -

    See also

    -

    Messages functions -clear_joynenv(), -joyn_msg(), -joyn_msgs_exist(), -joyn_report(), -msg_type_dt(), -store_msg(), -style()

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/unmatched_keys.html b/docs/reference/unmatched_keys.html deleted file mode 100644 index 3a990b99..00000000 --- a/docs/reference/unmatched_keys.html +++ /dev/null @@ -1,108 +0,0 @@ - -Check for unmatched keys — unmatched_keys • joyn - Skip to contents - - -
    -
    -
    - -
    -

    Gives TRUE if unmatched keys, FALSE if not.

    -
    - -
    -

    Usage

    -
    unmatched_keys(x, out, by)
    -
    - -
    -

    Arguments

    -
    x
    -

    input table to join

    - - -
    out
    -

    output of join

    - - -
    by
    -

    by argument, giving keys for join

    - -
    -
    -

    Value

    - - -

    logical

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/reference/update_na_values.html b/docs/reference/update_na_values.html deleted file mode 100644 index 7b4afdde..00000000 --- a/docs/reference/update_na_values.html +++ /dev/null @@ -1,150 +0,0 @@ - -Update NA and/or values — update_na_values • joyn - Skip to contents - - -
    -
    -
    - -
    -

    The function updates NAs and/or values in the following way:

    • If only update_NAs is TRUE: update NAs of var in x with values of var y of the same name

    • -
    • If only update_values = TRUE: update all values, but NOT NAs, of var in x with values of var y of the same name. -NAs from y are not used to update values in x . (e.g., if x.var = 10 and y.var = NA, x.var remains 10)

    • -
    • If both update_NAs and update_values are TRUE, both NAs and values in x are updated as described above

    • -
    • If both update_NAs and update_values are FALSE, no update

    • -
    - -
    -

    Usage

    -
    update_na_values(
    -  dt,
    -  var,
    -  reportvar = getOption("joyn.reportvar"),
    -  suffixes = getOption("joyn.suffixes"),
    -  rep_NAs = FALSE,
    -  rep_values = FALSE
    -)
    -
    - -
    -

    Arguments

    -
    dt
    -

    joined data.table

    - - -
    var
    -

    variable(s) to be updated

    - - -
    reportvar
    -

    character: Name of reporting variable. Default is ".joyn". -This is the same as variable "_merge" in Stata after performing a merge. If -FALSE or NULL, the reporting variable will be excluded from the final -table, though a summary of the join will be display after concluding.

    - - -
    suffixes
    -

    A character(2) specifying the suffixes to be used for making -non-by column names unique. The suffix behaviour works in a similar fashion -as the base::merge method does.

    - - -
    rep_NAs
    -

    inherited from joyn update_NAs

    - - -
    rep_values
    -

    inherited from joyn update_values

    - -
    -
    -

    Value

    - - -

    data.table

    -
    - -
    - - -
    - - - - - - - diff --git a/docs/sitemap.xml b/docs/sitemap.xml deleted file mode 100644 index f9f93f78..00000000 --- a/docs/sitemap.xml +++ /dev/null @@ -1,177 +0,0 @@ - - - - https://randrescastaneda.github.io/joyn/404.html - - - https://randrescastaneda.github.io/joyn/articles/adv-functionalities.html - - - https://randrescastaneda.github.io/joyn/articles/aux-functions.html - - - https://randrescastaneda.github.io/joyn/articles/dplyr-joins.html - - - https://randrescastaneda.github.io/joyn/articles/index.html - - - https://randrescastaneda.github.io/joyn/articles/main-functionalities.html - - - https://randrescastaneda.github.io/joyn/articles/merge-wrapper.html - - - https://randrescastaneda.github.io/joyn/articles/messages.html - - - https://randrescastaneda.github.io/joyn/authors.html - - - https://randrescastaneda.github.io/joyn/index.html - - - https://randrescastaneda.github.io/joyn/LICENSE-text.html - - - https://randrescastaneda.github.io/joyn/LICENSE.html - - - https://randrescastaneda.github.io/joyn/news/index.html - - - https://randrescastaneda.github.io/joyn/reference/anti_join.html - - - https://randrescastaneda.github.io/joyn/reference/arguments_checks.html - - - https://randrescastaneda.github.io/joyn/reference/check_by_vars.html - - - https://randrescastaneda.github.io/joyn/reference/check_dt_by.html - - - https://randrescastaneda.github.io/joyn/reference/check_duplicate_names.html - - - https://randrescastaneda.github.io/joyn/reference/check_match_type.html - - - https://randrescastaneda.github.io/joyn/reference/check_new_y_vars.html - - - https://randrescastaneda.github.io/joyn/reference/check_reportvar.html - - - https://randrescastaneda.github.io/joyn/reference/check_unmatched_keys.html - - - https://randrescastaneda.github.io/joyn/reference/check_xy.html - - - https://randrescastaneda.github.io/joyn/reference/check_y_vars_to_keep.html - - - https://randrescastaneda.github.io/joyn/reference/clear_joynenv.html - - - https://randrescastaneda.github.io/joyn/reference/correct_names.html - - - https://randrescastaneda.github.io/joyn/reference/freq_table.html - - - https://randrescastaneda.github.io/joyn/reference/full_join.html - - - https://randrescastaneda.github.io/joyn/reference/get_joyn_options.html - - - https://randrescastaneda.github.io/joyn/reference/index.html - - - https://randrescastaneda.github.io/joyn/reference/inner_join.html - - - https://randrescastaneda.github.io/joyn/reference/is_balanced.html - - - https://randrescastaneda.github.io/joyn/reference/is_id.html - - - https://randrescastaneda.github.io/joyn/reference/is_match_type_error.html - - - https://randrescastaneda.github.io/joyn/reference/is_valid_m_key.html - - - https://randrescastaneda.github.io/joyn/reference/joyn-package.html - - - https://randrescastaneda.github.io/joyn/reference/joyn.html - - - https://randrescastaneda.github.io/joyn/reference/joyn_msg.html - - - https://randrescastaneda.github.io/joyn/reference/joyn_msgs_exist.html - - - https://randrescastaneda.github.io/joyn/reference/joyn_report.html - - - https://randrescastaneda.github.io/joyn/reference/joyn_workhorse.html - - - https://randrescastaneda.github.io/joyn/reference/left_join.html - - - https://randrescastaneda.github.io/joyn/reference/merge.html - - - https://randrescastaneda.github.io/joyn/reference/msg_type_dt.html - - - https://randrescastaneda.github.io/joyn/reference/possible_ids.html - - - https://randrescastaneda.github.io/joyn/reference/process_by_vector.html - - - https://randrescastaneda.github.io/joyn/reference/rename_to_valid.html - - - https://randrescastaneda.github.io/joyn/reference/report_from_attr.html - - - https://randrescastaneda.github.io/joyn/reference/right_join.html - - - https://randrescastaneda.github.io/joyn/reference/set_col_names.html - - - https://randrescastaneda.github.io/joyn/reference/set_joyn_options.html - - - https://randrescastaneda.github.io/joyn/reference/split_match_type.html - - - https://randrescastaneda.github.io/joyn/reference/store_joyn_msg.html - - - https://randrescastaneda.github.io/joyn/reference/store_msg.html - - - https://randrescastaneda.github.io/joyn/reference/style.html - - - https://randrescastaneda.github.io/joyn/reference/type_choices.html - - - https://randrescastaneda.github.io/joyn/reference/unmatched_keys.html - - - https://randrescastaneda.github.io/joyn/reference/update_na_values.html - -