Skip to content

Commit 5365cac

Browse files
authored
Merge pull request #275 from stemangiola/dev
Dev
2 parents 963cb22 + 43aeb1e commit 5365cac

15 files changed

+1737
-276
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ egsea_report_*
1919
/Meta/
2020
/doc/
2121
_targets.R
22+
_targets*

DESCRIPTION

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: tidybulk
33
Title: Brings transcriptomics to the tidyverse
4-
Version: 1.11.4
4+
Version: 1.11.5
55
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
66
role = c("aut", "cre")),
77
person("Maria", "Doyle", email = "[email protected]",
@@ -17,7 +17,7 @@ Depends:
1717
Imports:
1818
tibble,
1919
readr,
20-
dplyr,
20+
dplyr (>= 1.1.0),
2121
magrittr,
2222
tidyr,
2323
stringi,
@@ -78,7 +78,12 @@ Suggests:
7878
igraph,
7979
EGSEA,
8080
IRanges,
81-
here
81+
here,
82+
glmmSeq,
83+
pbapply,
84+
pbmcapply,
85+
lme4,
86+
MASS
8287
VignetteBuilder:
8388
knitr
8489
RdMacros:

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export(mutate)
4949
export(nest)
5050
export(pivot_sample)
5151
export(pivot_transcript)
52+
export(quantile_normalise_abundance)
5253
export(reduce_dimensions)
5354
export(remove_redundancy)
5455
export(rename)
@@ -70,6 +71,7 @@ export(tidybulk)
7071
export(tidybulk_SAM_BAM)
7172
export(unnest)
7273
exportMethods(as_SummarizedExperiment)
74+
exportMethods(quantile_normalise_abundance)
7375
exportMethods(scale_abundance)
7476
exportMethods(tidybulk)
7577
exportMethods(tidybulk_SAM_BAM)
@@ -103,6 +105,7 @@ importFrom(dplyr,group_by_drop_default)
103105
importFrom(dplyr,group_cols)
104106
importFrom(dplyr,if_else)
105107
importFrom(dplyr,inner_join)
108+
importFrom(dplyr,join_by)
106109
importFrom(dplyr,left_join)
107110
importFrom(dplyr,mutate)
108111
importFrom(dplyr,mutate_if)
@@ -125,13 +128,15 @@ importFrom(magrittr,"%$%")
125128
importFrom(magrittr,"%>%")
126129
importFrom(magrittr,divide_by)
127130
importFrom(magrittr,equals)
131+
importFrom(magrittr,extract2)
128132
importFrom(magrittr,multiply_by)
129133
importFrom(magrittr,set_colnames)
130134
importFrom(magrittr,set_rownames)
131135
importFrom(parallel,mclapply)
132136
importFrom(purrr,as_mapper)
133137
importFrom(purrr,map)
134138
importFrom(purrr,map2)
139+
importFrom(purrr,map2_dfc)
135140
importFrom(purrr,map2_dfr)
136141
importFrom(purrr,map_chr)
137142
importFrom(purrr,map_dfr)

R/functions.R

Lines changed: 152 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,6 @@ get_scaled_counts_bulk <- function(.data,
355355

356356
}
357357

358-
359-
360358
#' Get differential transcription information to a tibble using edgeR.
361359
#'
362360
#' @keywords internal
@@ -643,6 +641,158 @@ get_differential_transcript_abundance_bulk <- function(.data,
643641
}
644642
}
645643

644+
#' Get differential transcription information to a tibble using glmmSeq
645+
#'
646+
#' @keywords internal
647+
#' @noRd
648+
#'
649+
#'
650+
#'
651+
#' @import tibble
652+
#' @importFrom magrittr set_colnames
653+
#' @importFrom stats model.matrix
654+
#' @importFrom utils install.packages
655+
#' @importFrom purrr when
656+
#' @importFrom rlang inform
657+
#' @importFrom tidyr spread
658+
#' @importFrom tidyr pivot_wider
659+
#' @importFrom dplyr slice
660+
#'
661+
#'
662+
#' @param .data A tibble
663+
#' @param .formula a formula with no response variable, referring only to numeric variables
664+
#' @param .sample The name of the sample column
665+
#' @param .transcript The name of the transcript/gene column
666+
#' @param .abundance The name of the transcript/gene abundance column
667+
#' @param .contrasts A character vector. Not used for this method
668+
#' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)
669+
#' @param test_above_log2_fold_change A positive real value. This works for edgeR and limma_voom methods. It uses the `treat` function, which tests that the difference in abundance is bigger than this threshold rather than zero \url{https://pubmed.ncbi.nlm.nih.gov/19176553}.
670+
#' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
671+
#' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames.
672+
#' @param .sample_total_read_count
673+
#'
674+
#' @return A tibble with glmmSeq results
675+
#'
676+
get_differential_transcript_abundance_glmmSeq <- function(.data,
677+
.formula,
678+
.sample = NULL,
679+
.transcript = NULL,
680+
.abundance = NULL,
681+
.contrasts = NULL,
682+
method ,
683+
test_above_log2_fold_change = NULL,
684+
scaling_method = NULL,
685+
omit_contrast_in_colnames = FALSE,
686+
prefix = "",
687+
.sample_total_read_count = NULL,
688+
...) {
689+
# Get column names
690+
.sample = enquo(.sample)
691+
.transcript = enquo(.transcript)
692+
.abundance = enquo(.abundance)
693+
.sample_total_read_count = enquo(.sample_total_read_count)
694+
695+
# Check if omit_contrast_in_colnames is correctly setup
696+
if(omit_contrast_in_colnames & length(.contrasts) > 1){
697+
warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
698+
omit_contrast_in_colnames = FALSE
699+
}
700+
701+
# # Specify the design column tested
702+
# if(is.null(.contrasts))
703+
# message(
704+
# sprintf(
705+
# "tidybulk says: The design column being tested is %s",
706+
# design %>% colnames %>% .[1]
707+
# )
708+
# )
709+
710+
# # If I don't have intercept or I have categorical factor of interest BUT I don't have contrasts
711+
# if(
712+
# is.null(.contrasts) &
713+
# (
714+
# (
715+
# ! .data |> pull(parse_formula(.formula)[1]) |> is("numeric") &
716+
# .data |> pull(parse_formula(.formula)[1]) |> unique() |> length() |> gt(2)
717+
# ) |
718+
# colnames(design)[1] != "(Intercept)"
719+
# )
720+
# )
721+
# warning("tidybulk says: If you have (i) an intercept-free design (i.e. ~ 0 + factor) or you have a categorical factor of interest with more than 2 values you should use the `contrasts` argument.")
722+
#
723+
# my_contrasts =
724+
# .contrasts %>%
725+
# ifelse_pipe(length(.) > 0,
726+
# ~ limma::makeContrasts(contrasts = .x, levels = design),
727+
# ~ NULL)
728+
729+
# Check if package is installed, otherwise install
730+
if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
731+
message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
732+
if (!requireNamespace("BiocManager", quietly = TRUE))
733+
install.packages("BiocManager", repos = "https://cloud.r-project.org")
734+
BiocManager::install("edgeR", ask = FALSE)
735+
}
736+
737+
# Check if package is installed, otherwise install
738+
if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
739+
message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
740+
if (!requireNamespace("BiocManager", quietly = TRUE))
741+
install.packages("BiocManager", repos = "https://cloud.r-project.org")
742+
BiocManager::install("glmmSeq", ask = FALSE)
743+
}
744+
745+
metadata =
746+
.data |>
747+
pivot_sample(!!.sample) |>
748+
as_data_frame(rownames = !!.sample)
749+
750+
counts =
751+
.data %>%
752+
select(!!.transcript,!!.sample,!!.abundance) %>%
753+
spread(!!.sample,!!.abundance) %>%
754+
as_matrix(rownames = !!.transcript)
755+
756+
# Reorder counts
757+
counts = counts[,rownames(metadata),drop=FALSE]
758+
759+
glmmSeq_object =
760+
glmmSeq( .formula,
761+
countdata = counts ,
762+
metadata = metadata,
763+
dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
764+
progress = TRUE,
765+
method = method |> str_remove("(?i)^glmmSeq_"),
766+
...
767+
)
768+
769+
glmmSeq_object |>
770+
summary() |>
771+
as_tibble(rownames = "gene") |>
772+
mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
773+
774+
# Attach attributes
775+
reattach_internals(.data) %>%
776+
777+
# select method
778+
memorise_methods_used("glmmSeq") |>
779+
780+
# Add raw object
781+
attach_to_internals(glmmSeq_object, "glmmSeq") %>%
782+
# Communicate the attribute added
783+
{
784+
785+
rlang::inform("\ntidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once")
786+
787+
(.)
788+
} %>%
789+
790+
# Attach prefix
791+
setNames(c(
792+
colnames(.)[1],
793+
sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
794+
))
795+
}
646796

647797
#' Get differential transcription information to a tibble using voom.
648798
#'

0 commit comments

Comments
 (0)