Skip to content

Commit

Permalink
Update the internal dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
igordot committed Feb 20, 2025
1 parent 8a39cae commit d159c2d
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
2 changes: 1 addition & 1 deletion R/msigdbr-check-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom utils install.packages menu
msigdbr_check_data <- function() {
if (!requireNamespace("msigdbdf", quietly = TRUE)) {
message("The 'msigdbdf' package must be installed to access all the data.")
message("The 'msigdbdf' package must be installed to access the full dataset.")

install_instructions <- paste0(
"Please run the following command to install the 'msigdbdf' package:\n",
Expand Down
10 changes: 4 additions & 6 deletions R/msigdbr.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,10 @@ msigdbr <- function(species = "Homo sapiens", db_species = "HS", collection = NU
# Get the gene sets table
mdb <- msigdbdf::msigdbdf(target_species = db_species)
} else {
if (species == "Homo sapiens" && db_species == "HS" && collection == "H") {
# Use an internal human Hallmark dataset for minimal functionality without msigdbdf
# msigdb_h <- msigdbdf::msigdbdf(target_species = "HS")
# msigdb_h <- dplyr::filter(msigdb_h, gs_collection == "H")
# usethis::use_data(msigdb_h, internal = TRUE, overwrite = TRUE, compress = "xz")
mdb <- msigdb_h
if (species == "Homo sapiens" && db_species == "HS") {
# Use an internal dataset for minimal functionality without msigdbdf
mdb <- testdb
message("The 'msigdbdf' package must be installed to access the full dataset.")
} else {
# Check if msigdbdf is available and try to install otherwise
msigdbr_check_data()
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
31 changes: 31 additions & 0 deletions data-raw/test-dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Prepare the test dataset

library(dplyr)

# Get the full human dataset
mdb <- msigdbdf::msigdbdf(target_species = "HS")

# Get the Hallmark gene sets
hallmark_gs_ids <- filter(mdb, gs_collection == "H") |> pull(gs_id)

# Subsample smaller gene sets from every collection and sub-collection
set.seed(99)
random_gs_ids <- mdb |>
count(gs_id, gs_collection, gs_subcollection) |>
filter(n < 100) |>
group_by(gs_collection, gs_subcollection) |>
slice_sample(n = 5) |>
pull(gs_id)

# Subset the full table to the
subset_gs_ids <- unique(sort(c(hallmark_gs_ids, random_gs_ids)))
testdb <- filter(mdb, gs_id %in% subset_gs_ids)

# count(testdb, gs_collection, gs_subcollection)
# count(testdb, db_gene_symbol, sort = TRUE)

# Modify the version to indicate that this is a test dataset
testdb$db_version <- paste0("TEST.", testdb$db_version)

# Save package data
usethis::use_data(testdb, internal = TRUE, overwrite = TRUE, compress = "xz")

0 comments on commit d159c2d

Please sign in to comment.