Skip to content

Commit

Permalink
Move data to a separate package
Browse files Browse the repository at this point in the history
  • Loading branch information
igordot committed Feb 14, 2025
1 parent 3bfe6ed commit 1cc6423
Show file tree
Hide file tree
Showing 24 changed files with 602 additions and 664 deletions.
21 changes: 12 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
Type: Package
Package: msigdbr
Title: MSigDB Gene Sets for Multiple Organisms in a Tidy Data Format
Version: 2023.1.1
Version: 9.0.0.9000
Authors@R:
person("Igor", "Dolgalev", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4451-126X"))
Description: Provides the 'Molecular Signatures Database' (MSigDB) gene
sets typically used with the 'Gene Set Enrichment Analysis' (GSEA)
software (Subramanian et al. 2005 <doi:10.1073/pnas.0506580102>,
Liberzon et al. 2015 <doi:10.1016/j.cels.2015.12.004>) in a standard R
data frame with key-value pairs. The package includes the human genes
as listed in MSigDB as well as the corresponding symbols and IDs for
frequently studied model organisms such as mouse, rat, pig, fly, and
yeast.
Liberzon et al. 2015 <doi:10.1016/j.cels.2015.12.004>, Castanza et al.
2023 <doi:10.1038/s41592-023-02014-7>) as an R data frame. The package
includes the human genes as listed in MSigDB as well as the
corresponding symbols and IDs for frequently studied model organisms
such as mouse, rat, pig, fly, and yeast.
License: MIT + file LICENSE
URL: https://igordot.github.io/msigdbr/
BugReports: https://github.com/igordot/msigdbr/issues
Depends:
R (>= 3.6)
R (>= 4.1)
Imports:
babelgene (>= 22.9),
dplyr (>= 1.1.1),
magrittr,
lifecycle,
methods,
rlang,
tibble,
tidyselect
tidyselect (>= 1.2.0)
Suggests:
knitr,
msigdbdf,
rmarkdown,
roxygen2,
testthat
VignetteBuilder:
knitr
Additional_repositories: https://igordot.r-universe.dev
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
13 changes: 7 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(msigdbr)
export(msigdbr_collections)
export(msigdbr_show_species)
export(msigdbr_species)
import(tibble)
import(tidyselect)
importFrom(babelgene,orthologs)
importFrom(babelgene,species)
importFrom(dplyr,arrange)
Expand All @@ -16,8 +15,10 @@ importFrom(dplyr,inner_join)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(magrittr,"%>%")
importFrom(lifecycle,deprecated)
importFrom(lifecycle,is_present)
importFrom(methods,is)
importFrom(rlang,.data)
importFrom(tibble,as_tibble)
importFrom(tidyselect,any_of)
importFrom(tidyselect,everything)
importFrom(rlang,check_installed)
importFrom(utils,install.packages)
importFrom(utils,menu)
50 changes: 50 additions & 0 deletions R/msigdbr-check-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Check that the data package is installed
#'
#' Check that the 'msigdbdf' data package is installed.
#' If not, provide instructions for installation.
#' A dependency listed in DESCRIPTION Suggests is not guaranteed to be installed.
#'
#' @importFrom utils install.packages menu
msigdbr_check_data <- function() {
if (!requireNamespace("msigdbdf", quietly = TRUE)) {
message("The 'msigdbdf' package must be installed.")

install_instructions <- paste0(
"Please run the following command to install the 'msigdbdf' package:\n",
"install.packages('msigdbdf', repos = 'https://igordot.r-universe.dev')"
)

error_message <- function(e) {
message(e)
cat(paste0("\nFailed to install the 'msigdbdf' package.\n", install_instructions, "\n"))
}

if (interactive()) {
# If running R interactively
input <- utils::menu(c("Yes", "No"), title = "Would you like to install 'msigdbdf'?")
if (input == 1) {
# Answered "Yes"
message("Installing the 'msigdbdf' package.")
tryCatch(
utils::install.packages("msigdbdf", repos = c("https://igordot.r-universe.dev", getOption("repos"))),
error = error_message, warning = error_message
)
} else {
# Answered "No"
stop(install_instructions)
}
} else {
# If not running R interactively
stop(install_instructions)
}
}
}

.onAttach <- function(libname, pkgname) {
if (!requireNamespace("msigdbdf", quietly = TRUE)) {
packageStartupMessage(
"To access all the data, please install the 'msigdbdf' package with:\n",
"install.packages('msigdbdf', repos = 'https://igordot.r-universe.dev')"
)
}
}
36 changes: 31 additions & 5 deletions R/msigdbr-collections.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,40 @@
#'
#' @return A data frame of the available collections.
#'
#' @param db_species Species abbreviation for the human or mouse databases (`"Hs"` or `"Mm"`).
#'
#' @importFrom dplyr arrange count distinct
#' @export
#'
#' @examples
#' msigdbr_collections()
msigdbr_collections <- function() {
msigdbr_genesets %>%
distinct(.data$gs_cat, .data$gs_subcat, .data$gs_id) %>%
count(.data$gs_cat, .data$gs_subcat, name = "num_genesets") %>%
arrange(.data$gs_cat, .data$gs_subcat)
msigdbr_collections <- function(db_species = "Hs") {
# rlang::check_installed("msigdbdf")
msigdbr_check_data()

# Get the full table of gene sets and their member genes
mc <- msigdbdf::msigdbdf(target_species = db_species)

# Keep only gene set information (ignors genes)
mc <- dplyr::distinct(
mc,
.data$gs_collection,
.data$gs_subcollection,
.data$gs_collection_name,
.data$gs_id
)

# Count the number of gene sets per collection
mc <- dplyr::count(
mc,
.data$gs_collection,
.data$gs_subcollection,
.data$gs_collection_name,
name = "num_genesets"
)

# Sort
mc <- dplyr::arrange(mc, .data$gs_collection, .data$gs_subcollection)

return(mc)
}
11 changes: 11 additions & 0 deletions R/msigdbr-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#' @keywords internal
"_PACKAGE"

## usethis namespace: start
#' @import tibble
#' @import tidyselect
#' @importFrom lifecycle deprecated is_present
#' @importFrom methods is
#' @importFrom rlang .data check_installed
## usethis namespace: end
NULL
24 changes: 6 additions & 18 deletions R/msigdbr-species.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,19 @@
#'
#' @importFrom babelgene species
#' @importFrom dplyr arrange distinct select
#' @importFrom tibble as_tibble
#'
#' @export
#'
#' @examples
#' msigdbr_species()
msigdbr_species <- function() {
species() %>%
as_tibble() %>%
babelgene::species() |>
as_tibble() |>
select(
species_name = "scientific_name",
species_common_name = "common_name"
) %>%
rbind(c("Homo sapiens", "human")) %>%
distinct() %>%
) |>
rbind(c("Homo sapiens", "human")) |>
distinct() |>
arrange(.data$species_name)
}

#' List the species available in the msigdbr package
#'
#' This function is being deprecated and replaced by `msigdbr_species()`.
#'
#' @return A vector of possible species.
#'
#' @export
msigdbr_show_species <- function() {
.Deprecated("msigdbr_species")
sort(msigdbr_species()[["species_name"]])
}
Loading

0 comments on commit 1cc6423

Please sign in to comment.