Skip to content

Tuning k-means fails when running in parallel  #182

Open
@KazBarker

Description

@KazBarker

The problem

I'm having trouble with tuning k-means clustering in parallel. Everything works as expected when running sequentially, but in parallel the following warning is returned:

Warning: All models failed. See the `.notes` column.

No error is thrown so the code continues running, but the returned data frame is empty.

Reproducible example

library(doParallel)
#> Loading required package: foreach
#> Loading required package: iterators
#> Loading required package: parallel
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
#> Warning: package 'tidyr' was built under R version 4.3.3
#> Warning: package 'readr' was built under R version 4.3.3
#> Warning: package 'dplyr' was built under R version 4.3.3
#> Warning: package 'stringr' was built under R version 4.3.3
#> Warning: package 'lubridate' was built under R version 4.3.3
library(tidymodels)
#> Warning: package 'dials' was built under R version 4.3.3
#> Warning: package 'scales' was built under R version 4.3.3
#> Warning: package 'parsnip' was built under R version 4.3.3
#> Warning: package 'recipes' was built under R version 4.3.3
#> Warning: package 'workflows' was built under R version 4.3.3
library(tidyclust)
#> Warning: package 'tidyclust' was built under R version 4.3.3
#> 
#> Attaching package: 'tidyclust'
#> The following objects are masked from 'package:parsnip':
#> 
#>     knit_engine_docs, list_md_problems
library(tune)

my_parallel <- parallel::makePSOCKcluster(parallel::detectCores(logical = FALSE))
doParallel::registerDoParallel(my_parallel)

set.seed(123)

penguin_data <- modeldata::penguins %>% drop_na()
data_form <- ~bill_length_mm + bill_depth_mm

penguin_recipe <- recipe(data_form, data = penguin_data) %>% 
  step_normalize(all_predictors())

kmeans_spec <- k_means(num_clusters = tune(),
                       engine = 'stats',
                       mode = 'partition')

kmeans_wkfl <- workflow() %>% 
  add_recipe(penguin_recipe) %>% 
  add_model(kmeans_spec)

cl_resamples <- vfold_cv(penguin_data, v = 2)
cl_grid <- tibble(num_clusters = 1:10)

tuning_result <- tune_cluster(kmeans_wkfl,
                              resamples = cl_resamples,
                              grid = cl_grid,
                              metrics = cluster_metric_set(sse_within_total))
#> Warning: All models failed. See the `.notes` column.
tuning_result$.notes
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL

Created on 2024-04-26 with reprex v2.1.0

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.2 (2023-10-31 ucrt)
#>  os       Windows 11 x64 (build 22631)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  English_United States.utf8
#>  ctype    English_United States.utf8
#>  tz       America/New_York
#>  date     2024-04-26
#>  pandoc   3.1.7 @ C:/Users/Kbark/AppData/Local/Pandoc/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package      * version    date (UTC) lib source
#>  backports      1.4.1      2021-12-13 [1] CRAN (R 4.3.0)
#>  broom        * 1.0.5      2023-06-09 [1] CRAN (R 4.3.1)
#>  class          7.3-22     2023-05-03 [2] CRAN (R 4.3.2)
#>  cli            3.6.2      2023-12-11 [1] CRAN (R 4.3.3)
#>  codetools      0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
#>  colorspace     2.1-0      2023-01-23 [1] CRAN (R 4.3.1)
#>  data.table     1.15.2     2024-02-29 [1] CRAN (R 4.3.3)
#>  dials        * 1.2.1      2024-02-22 [1] CRAN (R 4.3.3)
#>  DiceDesign     1.10       2023-12-07 [1] CRAN (R 4.3.2)
#>  digest         0.6.35     2024-03-11 [1] CRAN (R 4.3.3)
#>  doParallel   * 1.0.17     2022-02-07 [1] CRAN (R 4.3.2)
#>  dplyr        * 1.1.4      2023-11-17 [1] CRAN (R 4.3.3)
#>  evaluate       0.23       2023-11-01 [1] CRAN (R 4.3.3)
#>  fansi          1.0.6      2023-12-08 [1] CRAN (R 4.3.3)
#>  fastmap        1.1.1      2023-02-24 [1] CRAN (R 4.3.1)
#>  forcats      * 1.0.0      2023-01-29 [1] CRAN (R 4.3.1)
#>  foreach      * 1.5.2      2022-02-02 [1] CRAN (R 4.3.2)
#>  fs             1.6.3      2023-07-20 [1] CRAN (R 4.3.1)
#>  furrr          0.3.1      2022-08-15 [1] CRAN (R 4.3.2)
#>  future         1.33.1     2023-12-22 [1] CRAN (R 4.3.2)
#>  future.apply   1.11.1     2023-12-21 [1] CRAN (R 4.3.2)
#>  generics       0.1.3      2022-07-05 [1] CRAN (R 4.3.1)
#>  ggplot2      * 3.5.0      2024-02-23 [1] CRAN (R 4.3.3)
#>  globals        0.16.3     2024-03-08 [1] CRAN (R 4.3.3)
#>  glue           1.7.0      2024-01-09 [1] CRAN (R 4.3.3)
#>  gower          1.0.1      2022-12-22 [1] CRAN (R 4.3.1)
#>  GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.3.2)
#>  gtable         0.3.4      2023-08-21 [1] CRAN (R 4.3.1)
#>  hardhat        1.3.1      2024-02-02 [1] CRAN (R 4.3.2)
#>  hms            1.1.3      2023-03-21 [1] CRAN (R 4.3.1)
#>  htmltools      0.5.7      2023-11-03 [1] CRAN (R 4.3.3)
#>  infer        * 1.0.6      2024-01-31 [1] CRAN (R 4.3.2)
#>  ipred          0.9-14     2023-03-09 [1] CRAN (R 4.3.2)
#>  iterators    * 1.0.14     2022-02-05 [1] CRAN (R 4.3.2)
#>  knitr          1.45       2023-10-30 [1] CRAN (R 4.3.3)
#>  lattice        0.21-9     2023-10-01 [2] CRAN (R 4.3.2)
#>  lava           1.8.0      2024-03-05 [1] CRAN (R 4.3.3)
#>  lhs            1.1.6      2022-12-17 [1] CRAN (R 4.3.2)
#>  lifecycle      1.0.4      2023-11-07 [1] CRAN (R 4.3.3)
#>  listenv        0.9.1      2024-01-29 [1] CRAN (R 4.3.2)
#>  lubridate    * 1.9.3      2023-09-27 [1] CRAN (R 4.3.3)
#>  magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.3.1)
#>  MASS           7.3-60     2023-05-04 [2] CRAN (R 4.3.2)
#>  Matrix         1.6-5      2024-01-11 [1] CRAN (R 4.3.3)
#>  modeldata    * 1.3.0      2024-01-21 [1] CRAN (R 4.3.2)
#>  modelenv       0.1.1      2023-03-08 [1] CRAN (R 4.3.2)
#>  munsell        0.5.0      2018-06-12 [1] CRAN (R 4.3.1)
#>  nnet           7.3-19     2023-05-03 [2] CRAN (R 4.3.2)
#>  parallelly     1.37.1     2024-02-29 [1] CRAN (R 4.3.3)
#>  parsnip      * 1.2.0      2024-02-16 [1] CRAN (R 4.3.3)
#>  pillar         1.9.0      2023-03-22 [1] CRAN (R 4.3.1)
#>  pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.3.1)
#>  prodlim        2023.08.28 2023-08-28 [1] CRAN (R 4.3.2)
#>  purrr        * 1.0.2      2023-08-10 [1] CRAN (R 4.3.1)
#>  R.cache        0.16.0     2022-07-21 [1] CRAN (R 4.3.3)
#>  R.methodsS3    1.8.2      2022-06-13 [1] CRAN (R 4.3.1)
#>  R.oo           1.26.0     2024-01-24 [1] CRAN (R 4.3.2)
#>  R.utils        2.12.3     2023-11-18 [1] CRAN (R 4.3.3)
#>  R6             2.5.1      2021-08-19 [1] CRAN (R 4.3.1)
#>  Rcpp           1.0.12     2024-01-09 [1] CRAN (R 4.3.3)
#>  readr        * 2.1.5      2024-01-10 [1] CRAN (R 4.3.3)
#>  recipes      * 1.0.10     2024-02-18 [1] CRAN (R 4.3.3)
#>  reprex         2.1.0      2024-01-11 [1] CRAN (R 4.3.3)
#>  rlang          1.1.3      2024-01-10 [1] CRAN (R 4.3.3)
#>  rmarkdown      2.26       2024-03-05 [1] CRAN (R 4.3.3)
#>  rpart          4.1.21     2023-10-09 [2] CRAN (R 4.3.2)
#>  rsample      * 1.2.0      2023-08-23 [1] CRAN (R 4.3.2)
#>  rstudioapi     0.15.0     2023-07-07 [1] CRAN (R 4.3.1)
#>  scales       * 1.3.0      2023-11-28 [1] CRAN (R 4.3.3)
#>  sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.3.3)
#>  stringi        1.8.3      2023-12-11 [1] CRAN (R 4.3.2)
#>  stringr      * 1.5.1      2023-11-14 [1] CRAN (R 4.3.3)
#>  styler         1.10.2     2023-08-29 [1] CRAN (R 4.3.3)
#>  survival       3.5-8      2024-02-14 [1] CRAN (R 4.3.3)
#>  tibble       * 3.2.1      2023-03-20 [1] CRAN (R 4.3.1)
#>  tidyclust    * 0.2.1      2024-02-29 [1] CRAN (R 4.3.3)
#>  tidymodels   * 1.1.1      2023-08-24 [1] CRAN (R 4.3.2)
#>  tidyr        * 1.3.1      2024-01-24 [1] CRAN (R 4.3.3)
#>  tidyselect     1.2.1      2024-03-11 [1] CRAN (R 4.3.3)
#>  tidyverse    * 2.0.0      2023-02-22 [1] CRAN (R 4.3.3)
#>  timechange     0.3.0      2024-01-18 [1] CRAN (R 4.3.3)
#>  timeDate       4032.109   2023-12-14 [1] CRAN (R 4.3.2)
#>  tune         * 1.2.1      2024-04-18 [1] CRAN (R 4.3.2)
#>  tzdb           0.4.0      2023-05-12 [1] CRAN (R 4.3.1)
#>  utf8           1.2.4      2023-10-22 [1] CRAN (R 4.3.3)
#>  vctrs          0.6.5      2023-12-01 [1] CRAN (R 4.3.3)
#>  withr          3.0.0      2024-01-16 [1] CRAN (R 4.3.3)
#>  workflows    * 1.1.4      2024-02-19 [1] CRAN (R 4.3.3)
#>  workflowsets * 1.0.1      2023-04-06 [1] CRAN (R 4.3.2)
#>  xfun           0.42       2024-02-08 [1] CRAN (R 4.3.3)
#>  yaml           2.3.8      2023-12-11 [1] CRAN (R 4.3.2)
#>  yardstick    * 1.3.0      2024-01-19 [1] CRAN (R 4.3.2)
#> 
#>  [1] C:/Users/Kbark/AppData/Local/R/win-library/4.3
#>  [2] C:/Program Files/R/R-4.3.2/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions