diff --git a/.Rbuildignore b/.Rbuildignore index 30a4f786..714c1746 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,9 @@ ^\.Rproj\.user$ ^isc-proposal.pdf$ ^LICENSE\.md$ +^README\.Rmd$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^codecov\.yml$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..2d19fc76 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..ebcf2a91 --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,126 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, +and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall +community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or +advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email +address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards +of acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies +when an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at codeofconduct@rstudio.com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, +available at . + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +. Translations are available at . diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..4efedd96 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,61 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow is overkill for most R packages and +# check-standard.yaml is likely a better choice. +# usethis::use_github_action("check-standard") will install it. +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macOS-latest, r: 'release'} + + - {os: windows-latest, r: 'release'} + # Use 3.6 to trigger usage of RTools35 + - {os: windows-latest, r: '3.6'} + + # Use older ubuntu to maximise backward compatibility + - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-18.04, r: 'release'} + - {os: ubuntu-18.04, r: 'oldrel-1'} + - {os: ubuntu-18.04, r: 'oldrel-2'} + - {os: ubuntu-18.04, r: 'oldrel-3'} + - {os: ubuntu-18.04, r: 'oldrel-4'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..0b260216 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,46 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@4.1.4 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml new file mode 100644 index 00000000..97271eb2 --- /dev/null +++ b/.github/workflows/pr-commands.yaml @@ -0,0 +1,79 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + issue_comment: + types: [created] + +name: Commands + +jobs: + document: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} + name: document + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::roxygen2 + needs: pr-document + + - name: Document + run: roxygen2::roxygenise() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add man/\* NAMESPACE + git commit -m 'Document' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + style: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} + name: style + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + + - name: Install dependencies + run: install.packages("styler") + shell: Rscript {0} + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add \*.R + git commit -m 'Style' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..4b654182 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,31 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: covr::codecov(quiet = FALSE) + shell: Rscript {0} diff --git a/.gitignore b/.gitignore index 565f2b6a..9168bf8f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ .Rdata .httr-oauth .DS_Store +docs diff --git a/DESCRIPTION b/DESCRIPTION index fd79c0b0..6dd9b6dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,8 @@ Version: 0.0.0.9000 Authors@R: c( person("Emil", "Hvitfeldt", , "emilhhvitfeldt@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")), - person("Kelly", "Bodwin", , "kelly@bodwin.us", role = "aut") + person("Kelly", "Bodwin", , "kelly@bodwin.us", role = "aut"), + person("RStudio", role = c("cph", "fnd")) ) Description: What the package does (one paragraph). License: MIT + file LICENSE @@ -24,7 +25,10 @@ Imports: utils, vctrs Suggests: - modeldata + testthat (>= 3.0.0), + modeldata, + covr Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.2.9000 +Config/testthat/edition: 3 diff --git a/R/aaa.R b/R/aaa.R index 94489682..e0c57fe9 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -2,8 +2,10 @@ # nocov utils::globalVariables( - c("engine", "predictor_indicators", "compute_intercept", "remove_intercept", - "value", "x", "y", "engine2", "lab", "original", "type") + c( + "engine", "predictor_indicators", "compute_intercept", "remove_intercept", + "value", "x", "y", "engine2", "lab", "original", "type" + ) ) # nocov end diff --git a/R/aaa_models.R b/R/aaa_models.R index 338d579a..b1a8a2fe 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -31,7 +31,6 @@ celery$modes <- c(all_modes, "unknown") # check if class and mode and engine are compatible check_spec_mode_engine_val <- function(cls, eng, mode) { - all_modes <- get_from_env_celery(paste0(cls, "_modes")) if (!(mode %in% all_modes)) { rlang::abort(paste0("'", mode, "' is not a known mode for model `", cls, "()`.")) @@ -104,7 +103,6 @@ check_spec_mode_engine_val <- function(cls, eng, mode) { #' # Access the model data: #' current_code <- get_model_env_celery() #' ls(envir = current_code) -#' #' @keywords internal #' @export get_model_env_celery <- function() { @@ -372,8 +370,9 @@ set_model_engine_celery <- function(model, mode, eng) { } check_eng_val <- function(eng) { - if (rlang::is_missing(eng) || length(eng) != 1 || !is.character(eng)) + if (rlang::is_missing(eng) || length(eng) != 1 || !is.character(eng)) { rlang::abort("Please supply a character string for an engine name (e.g. `'lm'`)") + } invisible(NULL) } @@ -479,8 +478,10 @@ set_fit_celery <- function(model, mode, eng, value) { dplyr::filter(engine == eng & mode == !!mode) %>% nrow() if (has_engine != 1) { - rlang::abort(glue::glue("The combination of '{eng}' and mode '{mode}' has not ", - "been registered for model '{model}'.")) + rlang::abort(glue::glue( + "The combination of '{eng}' and mode '{mode}' has not ", + "been registered for model '{model}'." + )) } has_fit <- @@ -489,8 +490,10 @@ set_fit_celery <- function(model, mode, eng, value) { nrow() if (has_fit > 0) { - rlang::abort(glue::glue("The combination of '{eng}' and mode '{mode}' ", - "already has a fit component for model '{model}'.")) + rlang::abort(glue::glue( + "The combination of '{eng}' and mode '{mode}' ", + "already has a fit component for model '{model}'." + )) } new_fit <- @@ -524,8 +527,10 @@ check_fit_info <- function(fit_obj) { if (!all(has_req_nms)) { rlang::abort( - glue::glue("The `fit` module should have elements: ", - glue::glue_collapse(glue::glue("`{exp_nms}`"), sep = ", ")) + glue::glue( + "The `fit` module should have elements: ", + glue::glue_collapse(glue::glue("`{exp_nms}`"), sep = ", ") + ) ) } @@ -534,8 +539,10 @@ check_fit_info <- function(fit_obj) { other_nms <- setdiff(exp_nms, names(fit_obj)) has_opt_nms <- other_nms %in% opt_nms if (any(!has_opt_nms)) { - msg <- glue::glue("The `fit` module can only have optional elements: ", - glue::glue_collapse(glue::glue("`{exp_nms}`"), sep = ", ")) + msg <- glue::glue( + "The `fit` module can only have optional elements: ", + glue::glue_collapse(glue::glue("`{exp_nms}`"), sep = ", ") + ) rlang::abort(msg) } @@ -560,8 +567,10 @@ check_interface_val <- function(x) { exp_interf <- c("data.frame", "formula", "matrix") if (length(x) != 1 || !(x %in% exp_interf)) { rlang::abort( - glue::glue("The `interface` element should have a single value of: ", - glue::glue_collapse(glue::glue("`{exp_interf}`"), sep = ", ")) + glue::glue( + "The `interface` element should have a single value of: ", + glue::glue_collapse(glue::glue("`{exp_interf}`"), sep = ", ") + ) ) } invisible(NULL) @@ -575,12 +584,13 @@ check_func_val <- function(func) { "`func` and 'pkg' should both be single character strings." ) - if (rlang::is_missing(func) || !is.vector(func)) + if (rlang::is_missing(func) || !is.vector(func)) { rlang::abort(msg) + } nms <- sort(names(func)) - if (all(is.null(nms))) { + if (all(is.null(nms))) { rlang::abort(msg) } @@ -626,8 +636,10 @@ get_encoding_celery <- function(model) { remove_intercept = TRUE, allow_sparse_x = FALSE ) %>% - dplyr::select(model, engine, mode, predictor_indicators, - compute_intercept, remove_intercept) + dplyr::select( + model, engine, mode, predictor_indicators, + compute_intercept, remove_intercept + ) } res } @@ -641,7 +653,7 @@ set_encoding_celery <- function(model, mode, eng, options) { check_mode_val(mode) check_encodings(options) - keys <- tibble::tibble(model = model, engine = eng, mode = mode) + keys <- tibble::tibble(model = model, engine = eng, mode = mode) options <- tibble::as_tibble(options) new_values <- dplyr::bind_cols(keys, options) @@ -659,7 +671,6 @@ set_encoding_celery <- function(model, mode, eng, options) { if (nrow(dup_check)) { rlang::abort(glue::glue("Engine '{eng}' and mode '{mode}' already have defined encodings for model '{model}'.")) } - } else { current <- NULL } @@ -674,10 +685,12 @@ check_encodings <- function(x) { if (!is.list(x)) { rlang::abort("`values` should be a list.") } - req_args <- list(predictor_indicators = rlang::na_chr, - compute_intercept = rlang::na_lgl, - remove_intercept = rlang::na_lgl, - allow_sparse_x = rlang::na_lgl) + req_args <- list( + predictor_indicators = rlang::na_chr, + compute_intercept = rlang::na_lgl, + remove_intercept = rlang::na_lgl, + allow_sparse_x = rlang::na_lgl + ) missing_args <- setdiff(names(req_args), names(x)) if (length(missing_args) > 0) { @@ -736,8 +749,9 @@ set_model_arg_celery <- function(model, eng, celery, original, func, has_submode } check_arg_val <- function(arg) { - if (rlang::is_missing(arg) || length(arg) != 1 || !is.character(arg)) + if (rlang::is_missing(arg) || length(arg) != 1 || !is.character(arg)) { rlang::abort("Please supply a character string for the argument.") + } invisible(NULL) } diff --git a/R/arguments.R b/R/arguments.R index fcbd1d2e..afcd4f5f 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -7,8 +7,10 @@ check_eng_args <- function(args, obj, core_args) { if (length(common_args) > 0) { args <- args[!(names(args) %in% common_args)] common_args <- paste0(common_args, collapse = ", ") - rlang::warn(glue::glue("The following arguments cannot be manually modified ", - "and were removed: {common_args}.")) + rlang::warn(glue::glue( + "The following arguments cannot be manually modified ", + "and were removed: {common_args}." + )) } args } @@ -23,9 +25,8 @@ make_x_call <- function(object, target) { data_args <- object$method$fit$data } - object$method$fit$args[[ unname(data_args["x"]) ]] <- - switch( - target, + object$method$fit$args[[unname(data_args["x"])]] <- + switch(target, none = rlang::expr(x), data.frame = rlang::expr(maybe_data_frame(x)), matrix = rlang::expr(maybe_matrix(x)), @@ -54,11 +55,11 @@ make_form_call <- function(object, env = NULL) { # add data arguments for (i in seq_along(data_args)) { - fit_args[[ unname(data_args[i]) ]] <- sym(names(data_args)[i]) + fit_args[[unname(data_args[i])]] <- sym(names(data_args)[i]) } # sub in actual formula - fit_args[[ unname(data_args["formula"]) ]] <- env$formula + fit_args[[unname(data_args["formula"])]] <- env$formula fit_call <- make_call( fun = object$method$fit$func["fun"], diff --git a/R/cluster_spec.R b/R/cluster_spec.R index 2c6ab088..62eac80a 100644 --- a/R/cluster_spec.R +++ b/R/cluster_spec.R @@ -7,11 +7,12 @@ #' @keywords internal #' @rdname add_on_exports new_cluster_spec <- function(cls, args, eng_args, mode, method, engine) { - check_spec_mode_engine_val(cls, engine, mode) - out <- list(args = args, eng_args = eng_args, - mode = mode, method = method, engine = engine) + out <- list( + args = args, eng_args = eng_args, + mode = mode, method = method, engine = engine + ) class(out) <- make_classes_celery(cls) out } diff --git a/R/control.R b/R/control.R index 7449a22f..0737f36a 100644 --- a/R/control.R +++ b/R/control.R @@ -1,14 +1,18 @@ check_control <- function(x) { - if (!is.list(x)) + if (!is.list(x)) { rlang::abort("control should be a named list.") - if (!isTRUE(all.equal(sort(names(x)), c("catch", "verbosity")))) + } + if (!isTRUE(all.equal(sort(names(x)), c("catch", "verbosity")))) { rlang::abort("control should be a named list with elements 'verbosity' and 'catch'.") + } # based on ?is.integer - int_check <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol - if (!int_check(x$verbosity)) + int_check <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol + if (!int_check(x$verbosity)) { rlang::abort("verbosity should be an integer.") - if (!is.logical(x$catch)) + } + if (!is.logical(x$catch)) { rlang::abort("catch should be a logical.") + } x } @@ -41,9 +45,11 @@ control_celery <- function(verbosity = 1L, catch = FALSE) { #' @export print.control_celery <- function(x, ...) { cat("celery control object\n") - if (x$verbosity > 1) + if (x$verbosity > 1) { cat(" - verbose level", x$verbosity, "\n") - if (x$catch) + } + if (x$catch) { cat(" - fit errors will be caught\n") + } invisible(x) } diff --git a/R/convert_data.R b/R/convert_data.R index 08f7ba37..6ceab870 100644 --- a/R/convert_data.R +++ b/R/convert_data.R @@ -33,12 +33,12 @@ #' @export #' .convert_form_to_x_fit <- function(formula, - data, - ..., - na.action = na.omit, - indicators = "traditional", - composition = "data.frame", - remove_intercept = TRUE) { + data, + ..., + na.action = na.omit, + indicators = "traditional", + composition = "data.frame", + remove_intercept = TRUE) { if (!(composition %in% c("data.frame", "matrix"))) { rlang::abort("`composition` should be either 'data.frame' or 'matrix'.") } @@ -198,10 +198,10 @@ local_one_hot_contrasts <- function(frame = rlang::caller_env()) { make_formula <- function(x, short = TRUE) { y_part <- "~" - if(short) + if (short) { form_text <- paste0(y_part, ".") - else + } else { form_text <- paste0(y_part, paste0(x, collapse = "+")) + } as.formula(form_text) } - diff --git a/R/engines.R b/R/engines.R index c48f3ea6..c56abf65 100644 --- a/R/engines.R +++ b/R/engines.R @@ -9,8 +9,9 @@ possible_engines <- function(object, ...) { unique(engs$engine) } -shhhh <- function(x) +shhhh <- function(x) { suppressPackageStartupMessages(requireNamespace(x, quietly = TRUE)) +} is_installed <- function(pkg) { res <- try(shhhh(pkg), silent = TRUE) @@ -85,10 +86,14 @@ stop_missing_engine <- function(cls) { info <- get_from_env_celery(cls) %>% dplyr::group_by(mode) %>% - dplyr::summarize(msg = paste0(unique(mode), " {", - paste0(unique(engine), collapse = ", "), - "}"), - .groups = "drop") + dplyr::summarize( + msg = paste0( + unique(mode), " {", + paste0(unique(engine), collapse = ", "), + "}" + ), + .groups = "drop" + ) if (nrow(info) == 0) { rlang::abort(paste0("No known engines for `", cls, "()`.")) } diff --git a/R/fit.R b/R/fit.R index 6a10b5b3..50c7e83c 100644 --- a/R/fit.R +++ b/R/fit.R @@ -56,11 +56,11 @@ #' using_formula <- #' kmeans_mod %>% #' set_engine_celery("stats") %>% -#' fit( ~ ., data = mtcars) +#' fit(~., data = mtcars) #' #' using_x <- #' kmeans_mod %>% -#' set_engine_celery("stats") %>% +#' set_engine_celery("stats") %>% #' fit_x(x = mtcars) #' #' using_formula @@ -108,8 +108,9 @@ fit.cluster_spec <- function(object, } } - if (all(c("x", "y") %in% names(dots))) + if (all(c("x", "y") %in% names(dots))) { rlang::abort("`fit.cluster_spec()` is for the formula methods. Use `fit_x()` instead.") + } cl <- match.call(expand.dots = TRUE) # Create an environment with the evaluated argument objects. This will be # used when a model call is made later. @@ -133,8 +134,7 @@ fit.cluster_spec <- function(object, # used here, `fit_interface_formula` will determine if a # translation has to be made if the model interface is x/y/ res <- - switch( - interfaces, + switch(interfaces, # homogeneous combinations: formula_formula = form_form( @@ -160,7 +160,6 @@ fit.cluster_spec <- function(object, target = object$method$fit$interface, ... ), - rlang::abort(glue::glue("{interfaces} is unknown.")) ) model_classes <- class(res$fit) @@ -174,8 +173,9 @@ check_interface <- function(formula, data, cl, model) { # Determine the `fit()` interface form_interface <- !is.null(formula) & !is.null(data) - if (form_interface) + if (form_interface) { return("formula") + } rlang::abort("Error when checking the interface.") } @@ -183,17 +183,18 @@ inher <- function(x, cls, cl) { if (!is.null(x) && !inherits(x, cls)) { call <- match.call() obj <- deparse(call[["x"]]) - if (length(cls) > 1) + if (length(cls) > 1) { rlang::abort( glue::glue( "`{obj}` should be one of the following classes: ", glue::glue_collapse(glue::glue("'{cls}'"), sep = ", ") ) ) - else + } else { rlang::abort( glue::glue("`{obj}` should be a {cls} object") ) + } } invisible(x) } @@ -243,79 +244,78 @@ fit_x <- function(object, ...) { #' @export fit_x.cluster_spec fit_x.cluster_spec <- function(object, x, control = control_celery(), ...) { - - if (!identical(class(control), class(control_celery()))) { - rlang::abort("The 'control' argument should have class 'control_celery'.") - } - if (is.null(colnames(x))) { - rlang::abort("'x' should have column names.") - } - - dots <- quos(...) - if (is.null(object$engine)) { - eng_vals <- possible_engines(object) - object$engine <- eng_vals[1] - if (control$verbosity > 0) { - rlang::warn(glue::glue("Engine set to `{object$engine}`.")) + if (!identical(class(control), class(control_celery()))) { + rlang::abort("The 'control' argument should have class 'control_celery'.") + } + if (is.null(colnames(x))) { + rlang::abort("'x' should have column names.") } - } - - cl <- match.call(expand.dots = TRUE) - eval_env <- rlang::env() - eval_env$x <- x - fit_interface <- check_x_interface(eval_env$x, cl, object) - - # populate `method` with the details for this model type - object <- add_methods(object, engine = object$engine) - - check_installs(object) - - interfaces <- paste(fit_interface, object$method$fit$interface, sep = "_") - - # Now call the wrappers that transition between the interface - # called here ("fit" interface) that will direct traffic to - # what the underlying model uses. For example, if a formula is - # used here, `fit_interface_formula` will determine if a - # translation has to be made if the model interface is x/y/ - res <- - switch( - interfaces, - # homogeneous combinations: - matrix_matrix = , data.frame_matrix = - x_x( - object = object, - env = eval_env, - control = control, - target = "matrix", - ... - ), - data.frame_data.frame = , matrix_data.frame = - x_x( - object = object, - env = eval_env, - control = control, - target = "data.frame", - ... - ), + dots <- quos(...) + if (is.null(object$engine)) { + eng_vals <- possible_engines(object) + object$engine <- eng_vals[1] + if (control$verbosity > 0) { + rlang::warn(glue::glue("Engine set to `{object$engine}`.")) + } + } - # heterogenous combinations - matrix_formula = , data.frame_formula = - x_form( - object = object, - env = eval_env, - control = control, - ... - ), - rlang::abort(glue::glue("{interfaces} is unknown.")) - ) - model_classes <- class(res$fit) - class(res) <- c(paste0("_", model_classes[1]), "model_fit") - res -} + cl <- match.call(expand.dots = TRUE) + eval_env <- rlang::env() + eval_env$x <- x + fit_interface <- check_x_interface(eval_env$x, cl, object) + + # populate `method` with the details for this model type + object <- add_methods(object, engine = object$engine) + + check_installs(object) + + interfaces <- paste(fit_interface, object$method$fit$interface, sep = "_") + + # Now call the wrappers that transition between the interface + # called here ("fit" interface) that will direct traffic to + # what the underlying model uses. For example, if a formula is + # used here, `fit_interface_formula` will determine if a + # translation has to be made if the model interface is x/y/ + res <- + switch(interfaces, + # homogeneous combinations: + matrix_matrix = , + data.frame_matrix = + x_x( + object = object, + env = eval_env, + control = control, + target = "matrix", + ... + ), + data.frame_data.frame = , + matrix_data.frame = + x_x( + object = object, + env = eval_env, + control = control, + target = "data.frame", + ... + ), + + # heterogenous combinations + matrix_formula = , + data.frame_formula = + x_form( + object = object, + env = eval_env, + control = control, + ... + ), + rlang::abort(glue::glue("{interfaces} is unknown.")) + ) + model_classes <- class(res$fit) + class(res) <- c(paste0("_", model_classes[1]), "model_fit") + res + } check_x_interface <- function(x, cl, model) { - sparse_ok <- allow_sparse(model) sparse_x <- inherits(x, "dgCMatrix") if (!sparse_ok & sparse_x) { diff --git a/R/fit_helpers.R b/R/fit_helpers.R index c7f52c06..b38490d3 100644 --- a/R/fit_helpers.R +++ b/R/fit_helpers.R @@ -1,44 +1,43 @@ form_form <- function(object, control, env, ...) { - # evaluate quoted args once here to check them - object <- check_args(object) + # evaluate quoted args once here to check them + object <- check_args(object) - # sub in arguments to actual syntax for corresponding engine - object <- translate_celery(object, engine = object$engine) + # sub in arguments to actual syntax for corresponding engine + object <- translate_celery(object, engine = object$engine) - fit_call <- make_form_call(object, env = env) + fit_call <- make_form_call(object, env = env) - res <- list( - spec = object - ) + res <- list( + spec = object + ) - if (control$verbosity > 1L) { - elapsed <- system.time( - res$fit <- eval_mod( - fit_call, - capture = control$verbosity == 0, - catch = control$catch, - env = env, - ... - ), - gcFirst = FALSE - ) - } else { + if (control$verbosity > 1L) { + elapsed <- system.time( res$fit <- eval_mod( fit_call, capture = control$verbosity == 0, catch = control$catch, env = env, ... - ) - elapsed <- list(elapsed = NA_real_) - } - res$elapsed <- elapsed - res + ), + gcFirst = FALSE + ) + } else { + res$fit <- eval_mod( + fit_call, + capture = control$verbosity == 0, + catch = control$catch, + env = env, + ... + ) + elapsed <- list(elapsed = NA_real_) + } + res$elapsed <- elapsed + res } form_x <- function(object, control, env, target = "none", ...) { - encoding_info <- get_encoding_celery(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) @@ -58,7 +57,7 @@ form_x <- function(object, control, env, target = "none", ...) { res <- x_x( object = object, - env = env, #weights! + env = env, # weights! control = control, target = target ) @@ -69,7 +68,6 @@ form_x <- function(object, control, env, target = "none", ...) { } x_x <- function(object, env, control, target = "none", ...) { - encoding_info <- get_encoding_celery(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) @@ -116,7 +114,6 @@ x_x <- function(object, env, control, target = "none", ...) { } x_form <- function(object, env, control, ...) { - encoding_info <- get_encoding_celery(class(object)[1]) %>% dplyr::filter(mode == object$mode, engine == object$engine) @@ -142,4 +139,3 @@ x_form <- function(object, env, control, ...) { res$preproc <- data_obj[c("x_var")] res } - diff --git a/R/k_means.R b/R/k_means.R index e8df4c79..d185c0cc 100644 --- a/R/k_means.R +++ b/R/k_means.R @@ -21,7 +21,6 @@ k_means <- function(mode = "partition", engine = "stats", k = NULL) { - args <- list( k = enquo(k) ) diff --git a/R/k_means_data.R b/R/k_means_data.R index 93176a0d..f33f5213 100644 --- a/R/k_means_data.R +++ b/R/k_means_data.R @@ -78,4 +78,3 @@ set_model_arg_celery( func = list(pkg = "dials", fun = "k"), has_submodel = TRUE ) - diff --git a/R/translate.R b/R/translate.R index 751ff613..05a49a22 100644 --- a/R/translate.R +++ b/R/translate.R @@ -28,16 +28,18 @@ #' to modify the model specification. #' #' @export -translate_celery <- function(x, ...) +translate_celery <- function(x, ...) { UseMethod("translate_celery") +} #' @rdname translate_celery #' @export #' @export translate_celery.default translate_celery.default <- function(x, engine = x$engine, ...) { check_empty_ellipse_celery(...) - if (is.null(engine)) + if (is.null(engine)) { rlang::abort("Please set an engine.") + } mod_name <- specific_model(x) @@ -122,8 +124,9 @@ get_args <- function(model, engine) { # to replace harmonize deharmonize <- function(args, key) { - if (length(args) == 0) + if (length(args) == 0) { return(args) + } parsn <- tibble::tibble(celery = names(args), order = seq_along(args)) merged <- dplyr::left_join(parsn, key, by = "celery") %>% @@ -139,9 +142,10 @@ deharmonize <- function(args, key) { #' @return If an error is not thrown (from non-empty ellipses), a NULL list. #' @keywords internal #' @export -check_empty_ellipse_celery <- function(...) { +check_empty_ellipse_celery <- function(...) { terms <- quos(...) - if (!rlang::is_empty(terms)) + if (!rlang::is_empty(terms)) { rlang::abort("Please pass other arguments to the model function via `set_engine_celery()`.") + } terms } diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 00000000..35bc36dd --- /dev/null +++ b/README.Rmd @@ -0,0 +1,52 @@ +--- +output: github_document +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# celery + + +[![Codecov test coverage](https://codecov.io/gh/EmilHvitfeldt/celery/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/celery?branch=main) +[![R-CMD-check](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml) + + +The goal of celery is to provide a tidy, unified interface to clustering models. The packages is closely modeled after the [parsnip](https://parsnip.tidymodels.org/) package. + +## Installation + +You can install the development version of celery from [GitHub](https://github.com/) with: + +``` r +# install.packages("devtools") +devtools::install_github("EmilHvitfeldt/celery") +``` + +## Example + +The first thing you do is to create a `cluster specification`. For this example we are creating a K-means model, using the `stats` engine. + +```{r} +library(celery) + +k_means_spec <- k_means(k = 3) %>% + set_engine_celery("stats") + +k_means_spec +``` + +This specification can then be fit using data. + +```{r} +k_means_spec %>% + fit(~., data = mtcars) +``` diff --git a/README.md b/README.md new file mode 100644 index 00000000..4edfbdb4 --- /dev/null +++ b/README.md @@ -0,0 +1,166 @@ + + + +# celery + + + +[![Codecov test +coverage](https://codecov.io/gh/EmilHvitfeldt/celery/branch/main/graph/badge.svg)](https://app.codecov.io/gh/EmilHvitfeldt/celery?branch=main) +[![R-CMD-check](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/EmilHvitfeldt/celery/actions/workflows/R-CMD-check.yaml) + + +The goal of celery is to provide a tidy, unified interface to clustering +models. The packages is closely modeled after the +[parsnip](https://parsnip.tidymodels.org/) package. + +## Installation + +You can install the development version of celery from +[GitHub](https://github.com/) with: + +``` r +# install.packages("devtools") +devtools::install_github("EmilHvitfeldt/celery") +``` + +## Example + +The first thing you do is to create a `cluster specification`. For this +example we are creating a K-means model, using the `stats` engine. + +``` r +library(celery) + +k_means_spec <- k_means(k = 3) %>% + set_engine_celery("stats") + +k_means_spec +#> K Means Cluster Specification (partition) +#> +#> Main Arguments: +#> k = 3 +#> +#> Computational engine: stats +``` + +This specification can then be fit using data. + +``` r +k_means_spec %>% + fit(~., data = mtcars) +#> $spec +#> K Means Cluster Specification (partition) +#> +#> Main Arguments: +#> k = 3 +#> +#> Computational engine: stats +#> +#> Model fit template: +#> stats::kmeans(x = missing_arg(), centers = missing_arg(), centers = 3) +#> +#> $fit +#> K-means clustering with 3 clusters of sizes 4, 17, 11 +#> +#> Cluster means: +#> mpg cyl disp hp drat wt qsec vs +#> 1 13.67500 8.000000 443.0000 206.25000 3.060000 4.966000 17.56750 0.00000000 +#> 2 24.12353 4.705882 128.3353 97.35294 3.929412 2.573412 18.64176 0.76470588 +#> 3 16.19091 7.818182 311.7636 201.27273 3.277273 3.576364 16.72545 0.09090909 +#> am gear carb +#> 1 0.0000000 3.000000 3.500000 +#> 2 0.6470588 4.058824 2.352941 +#> 3 0.1818182 3.363636 3.272727 +#> +#> Clustering vector: +#> Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive +#> 2 2 2 3 +#> Hornet Sportabout Valiant Duster 360 Merc 240D +#> 3 2 3 2 +#> Merc 230 Merc 280 Merc 280C Merc 450SE +#> 2 2 2 3 +#> Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental +#> 3 3 1 1 +#> Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla +#> 1 2 2 2 +#> Toyota Corona Dodge Challenger AMC Javelin Camaro Z28 +#> 2 3 3 3 +#> Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa +#> 1 2 2 2 +#> Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E +#> 3 2 3 2 +#> +#> Within cluster sum of squares by cluster: +#> [1] 4665.041 42877.103 56041.432 +#> (between_SS / total_SS = 83.4 %) +#> +#> Available components: +#> +#> [1] "cluster" "centers" "totss" "withinss" "tot.withinss" +#> [6] "betweenss" "size" "iter" "ifault" +#> +#> $elapsed +#> $elapsed$elapsed +#> [1] NA +#> +#> +#> $preproc +#> $preproc$offset +#> NULL +#> +#> $preproc$terms +#> ~mpg + cyl + disp + hp + drat + wt + qsec + vs + am + gear + +#> carb +#> attr(,"variables") +#> list(mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb) +#> attr(,"factors") +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> mpg 1 0 0 0 0 0 0 0 0 0 0 +#> cyl 0 1 0 0 0 0 0 0 0 0 0 +#> disp 0 0 1 0 0 0 0 0 0 0 0 +#> hp 0 0 0 1 0 0 0 0 0 0 0 +#> drat 0 0 0 0 1 0 0 0 0 0 0 +#> wt 0 0 0 0 0 1 0 0 0 0 0 +#> qsec 0 0 0 0 0 0 1 0 0 0 0 +#> vs 0 0 0 0 0 0 0 1 0 0 0 +#> am 0 0 0 0 0 0 0 0 1 0 0 +#> gear 0 0 0 0 0 0 0 0 0 1 0 +#> carb 0 0 0 0 0 0 0 0 0 0 1 +#> attr(,"term.labels") +#> [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear" +#> [11] "carb" +#> attr(,"order") +#> [1] 1 1 1 1 1 1 1 1 1 1 1 +#> attr(,"intercept") +#> [1] 1 +#> attr(,"response") +#> [1] 0 +#> attr(,".Environment") +#> +#> attr(,"predvars") +#> list(mpg, cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb) +#> attr(,"dataClasses") +#> mpg cyl disp hp drat wt qsec vs +#> "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" +#> am gear carb +#> "numeric" "numeric" "numeric" +#> +#> $preproc$xlevels +#> named list() +#> +#> $preproc$options +#> $preproc$options$indicators +#> [1] "traditional" +#> +#> $preproc$options$composition +#> [1] "matrix" +#> +#> $preproc$options$remove_intercept +#> [1] TRUE +#> +#> +#> +#> attr(,"class") +#> [1] "_kmeans" "cluster_fit" +``` diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 00000000..d71acfb9 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: ~ +template: + bootstrap: 5 + diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..04c55859 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/man/fit.Rd b/man/fit.Rd index 3bbb0085..5242be8a 100644 --- a/man/fit.Rd +++ b/man/fit.Rd @@ -95,11 +95,11 @@ kmeans_mod <- k_means(k = 5) using_formula <- kmeans_mod \%>\% set_engine_celery("stats") \%>\% - fit( ~ ., data = mtcars) + fit(~., data = mtcars) using_x <- kmeans_mod \%>\% - set_engine_celery("stats") \%>\% + set_engine_celery("stats") \%>\% fit_x(x = mtcars) using_formula diff --git a/man/get_model_env_celery.Rd b/man/get_model_env_celery.Rd index 6fa6c27a..69fb5f3c 100644 --- a/man/get_model_env_celery.Rd +++ b/man/get_model_env_celery.Rd @@ -32,6 +32,5 @@ information about model specifications. # Access the model data: current_code <- get_model_env_celery() ls(envir = current_code) - } \keyword{internal} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 00000000..726dc8b1 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(celery) + +test_check("celery") diff --git a/tests/testthat/_snaps/k_means.md b/tests/testthat/_snaps/k_means.md new file mode 100644 index 00000000..fddfcde8 --- /dev/null +++ b/tests/testthat/_snaps/k_means.md @@ -0,0 +1,23 @@ +# printing + + Code + k_means() + Output + K Means Cluster Specification (partition) + + Computational engine: stats + + +--- + + Code + k_means(k = 10) + Output + K Means Cluster Specification (partition) + + Main Arguments: + k = 10 + + Computational engine: stats + + diff --git a/tests/testthat/test-k_means.R b/tests/testthat/test-k_means.R new file mode 100644 index 00000000..d7e55569 --- /dev/null +++ b/tests/testthat/test-k_means.R @@ -0,0 +1,13 @@ +test_that("Right classes", { + expect_equal(class(k_means()), c("k_means", "cluster_spec")) +}) + +test_that("printing", { + expect_snapshot( + k_means() + ) + expect_snapshot( + k_means(k = 10) + ) +}) +