diff --git a/R/msigdbr-check-data.R b/R/msigdbr-check-data.R index e48c697..815899b 100644 --- a/R/msigdbr-check-data.R +++ b/R/msigdbr-check-data.R @@ -7,7 +7,7 @@ #' @importFrom utils install.packages menu msigdbr_check_data <- function() { if (!requireNamespace("msigdbdf", quietly = TRUE)) { - message("The 'msigdbdf' package must be installed to access all the data.") + message("The 'msigdbdf' package must be installed to access the full dataset.") install_instructions <- paste0( "Please run the following command to install the 'msigdbdf' package:\n", diff --git a/R/msigdbr.R b/R/msigdbr.R index 14a7eb8..bfe4ca0 100644 --- a/R/msigdbr.R +++ b/R/msigdbr.R @@ -60,12 +60,10 @@ msigdbr <- function(species = "Homo sapiens", db_species = "HS", collection = NU # Get the gene sets table mdb <- msigdbdf::msigdbdf(target_species = db_species) } else { - if (species == "Homo sapiens" && db_species == "HS" && collection == "H") { - # Use an internal human Hallmark dataset for minimal functionality without msigdbdf - # msigdb_h <- msigdbdf::msigdbdf(target_species = "HS") - # msigdb_h <- dplyr::filter(msigdb_h, gs_collection == "H") - # usethis::use_data(msigdb_h, internal = TRUE, overwrite = TRUE, compress = "xz") - mdb <- msigdb_h + if (species == "Homo sapiens" && db_species == "HS") { + # Use an internal dataset for minimal functionality without msigdbdf + mdb <- testdb + message("The 'msigdbdf' package must be installed to access the full dataset.") } else { # Check if msigdbdf is available and try to install otherwise msigdbr_check_data() diff --git a/R/sysdata.rda b/R/sysdata.rda index fffaf81..b28071c 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/test-dataset.R b/data-raw/test-dataset.R new file mode 100644 index 0000000..e330fec --- /dev/null +++ b/data-raw/test-dataset.R @@ -0,0 +1,31 @@ +# Prepare the test dataset + +library(dplyr) + +# Get the full human dataset +mdb <- msigdbdf::msigdbdf(target_species = "HS") + +# Get the Hallmark gene sets +hallmark_gs_ids <- filter(mdb, gs_collection == "H") |> pull(gs_id) + +# Subsample smaller gene sets from every collection and sub-collection +set.seed(99) +random_gs_ids <- mdb |> + count(gs_id, gs_collection, gs_subcollection) |> + filter(n < 100) |> + group_by(gs_collection, gs_subcollection) |> + slice_sample(n = 5) |> + pull(gs_id) + +# Subset the full table to the +subset_gs_ids <- unique(sort(c(hallmark_gs_ids, random_gs_ids))) +testdb <- filter(mdb, gs_id %in% subset_gs_ids) + +# count(testdb, gs_collection, gs_subcollection) +# count(testdb, db_gene_symbol, sort = TRUE) + +# Modify the version to indicate that this is a test dataset +testdb$db_version <- paste0("TEST.", testdb$db_version) + +# Save package data +usethis::use_data(testdb, internal = TRUE, overwrite = TRUE, compress = "xz")