diff --git a/DESCRIPTION b/DESCRIPTION index ecb232e..b604414 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,14 @@ Package: hciR Title: RNA-seq workflows at HCI -Version: 1.7 +Version: 1.8 Author: Chris Stubben Maintainer: Chris Stubben +Depends: + R (>= 2.10) Imports: dplyr, readr, + readxl, tidyr, tibble, magrittr, @@ -30,5 +33,6 @@ License: GPL-3 Encoding: UTF-8 Description: This package simplifies the R code in a differential expression analysis for markdown reports. The package includes functions to run DESeq2 using sample and count tibbles as input, get annotated DESeq results for all pairwise comparisons and create interactive plots and other visualizations. LazyData: true +LazyDataCompression: xz biocViews: Software RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index 985bb87..bf7a8e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ export(as_tbl) export(check_contrasts) export(deseq_from_tibble) export(drop_empty_columns) -export(enricher_all) export(extract_samples) export(fgsea_all) export(filter_counts) @@ -20,11 +19,9 @@ export(palette255) export(plot_biotypes) export(plot_counts) export(plot_dist) -export(plot_enricher) export(plot_fgsea) export(plot_filter) export(plot_genes) -export(plot_gsea) export(plot_interactions) export(plot_ma) export(plot_pca) diff --git a/R/enricher_all.R b/R/enricher_all.R deleted file mode 100644 index 8af8706..0000000 --- a/R/enricher_all.R +++ /dev/null @@ -1,95 +0,0 @@ -#' Run enricher from clusterProfiler on all DESeq2 result tables -#' -#' @param res A list of DESeq results -#' @param gsets A list of gene sets (or two column dataframe with pathway and gene) -#' @param maxGSSize Maximum set size, default 2000 -#' @param expressed_universe Use all expressed genes as universe, default TRUE -#' @param deseq.padj Adjusted p-value cutoff for significant genes, default 0.05 -#' @param logFC absolute fold change cutoff -#' @param protein_coding compare protein coding genes only, default TRUE -#' @param quiet suppress messages except total significant -#' @param \dots Additional options like minGSSize and pvalueCutoff passed to \code{enricher} -#' -#' @return A list of tibbles -#' -#' @author Chris Stubben -#' -#' @examples -#' \dontrun{ -#' library(hciRdata) -#' enricher_all(res, msig_pathways$KEGG) -#' } -#' @export - -enricher_all <- function(res, gsets, maxGSSize = 2000, expressed_universe=TRUE, deseq.padj = 0.05, - logFC, protein_coding = TRUE, quiet=FALSE, ...){ - ## convert to 2 column data.frame - if(class(gsets)[1] == "list"){ - term2gene <- dplyr::bind_rows( - lapply(gsets, tibble::enframe, value="gene"), .id="pathway") %>% - dplyr::select(1,3) - }else{ - term2gene <- gsets - } - ## if results are a tibble (since simplify=TRUE by default) - if( class(res)[1] != "list"){ - n <- attr(res, "contrast") - if(is.null(n)) stop("Missing contrast name") - res <- list(res) - names(res) <- n - } - n <- length(res) - genes_res <- vector("list", n) - vs <- names(res) - ## padded for message - vs1 <- sprintf(paste0("%-", max(nchar(vs))+2, "s"), paste0(vs, ":") ) - names(genes_res) <- vs - pc <- " " - if(protein_coding) pc <- " protein coding " - if(!missing(logFC)){ - if(!quiet) message( "Using padj < ", deseq.padj, " AND abs(log2FoldChange) > ", logFC, " to find significant", pc, "genes") - }else{ - if(!quiet) message( "Using padj < ", deseq.padj, " to find significant", pc, "genes") - } - - for(i in 1:n){ - r1 <- res[[i]] - if(protein_coding){ - r1 <- dplyr::filter(r1, biotype == "protein_coding") - } - ## use first gene name in comma-separated lists of human homologs? - if( "human_homolog" %in% colnames(r1)){ - # drop genes without homolog??? - r1 <- dplyr::filter(r1, !is.na(human_homolog)) - r1$human_homolog <- gsub(",.*", "", r1$human_homolog) - } - sig <- dplyr::filter( r1, padj <= deseq.padj) - if( !missing(logFC)){ - sig <- dplyr::filter(sig, abs(log2FoldChange) >= logFC) - } - if(nrow(sig)==0){ - message( i, ". ", vs1[[i]], " No significant genes") - genes_res[[i]] <- tibble::tibble() - }else{ - # get unique genes?? - if( "human_homolog" %in% colnames(r1)){ - sig_genes <- unique(sig$human_homolog) - universe <- unique(r1$human_homolog) - }else{ - sig_genes <- unique(sig$gene_name) - universe <- unique(r1$gene_name) - } - # use NULL to swith universe to all genes in term2gene - if(!expressed_universe) universe <- NULL - em1 <- clusterProfiler::enricher(sig_genes, TERM2GENE=term2gene, universe = universe, maxGSSize=maxGSSize, ...) - # ID and Description are the same - z <- tibble::as_tibble(em1)[, -2] - names(z)[1] <- "pathway" - genes_res[[i]] <- z - message( i, ". ", vs1[[i]], " ", nrow(z), " enriched sets") - } - } - genes_res <- genes_res[sapply(genes_res, nrow) > 0] - if(length(genes_res) == 1) genes_res <- genes_res[[1]] - genes_res -} diff --git a/R/plot_counts.R b/R/plot_counts.R index 9486553..cccc34f 100644 --- a/R/plot_counts.R +++ b/R/plot_counts.R @@ -17,7 +17,8 @@ #' @author Chris Stubben #' #' @examples -#' plot_counts(pasilla$count, "FBgn0033635", c("type","condition"), title = "Prip") +#' x <- top_counts(pasilla$results, pasilla$rlog, top=12) +#' plot_counts(x, "condition") #' @export plot_counts <- function(x, intgroup, n=25, geom="jitter", ylab = "Log2 counts", reorder, ncol=NULL, scales="fixed", ...){ @@ -32,7 +33,7 @@ plot_counts <- function(x, intgroup, n=25, geom="jitter", ylab = "Log2 counts", x1 <- x1[, 1:n] } y <- data.frame(s1[, intgroup, drop=FALSE], x1) - z <- tidyr::gather(y, -all_of(intgroup), key="gene2", value = "count") + z <- tidyr::gather(y, -dplyr::all_of(intgroup), key="gene2", value = "count") ## reorder? if(!missing(reorder)) z[[1]] <- factor(z[[1]], levels = reorder) diff --git a/R/plot_enricher.R b/R/plot_enricher.R deleted file mode 100644 index 337695b..0000000 --- a/R/plot_enricher.R +++ /dev/null @@ -1,68 +0,0 @@ -#' Plot enricher output -#' -#' Plot heatmap with enrichment scores by contrast -#' -#' @param x list from \code{link{fgsea_all}} -#' @param trim trim long names, default more than 70 characters -#' @param sets display contrasts sharing n or more sets for n > 1. If n = 1, -#' then only plot unique sets. If missing, then plots all sets, default. -#' @param top_n Number of top sets to plot -#' @param min_p Minimum p-value on -log10 scale -#' @param max_rows Maximun number of rows to cluster -#' @param cluster_row Cluster dendrogram rows, default FALSE for an alphabetical list -#' @param cluster_col Cluster dendrogram columns, default FALSE -#' @param \dots other options passed to \code{pheatmap} -#' @author Chris Stubben -#' @examples -#' \dontrun{ -#' library(hciRdata) -#' fc <- write_gsea_rnk(res, write=FALSE) -#' x <- fgsea_all(fc, msig_pathways$KEGG, FDR= 0.25) -#' plot_fgsea(x) -#' } -#' @export - -plot_enricher <- function(x, trim=70, sets, top_n, min_p, max_rows, cluster_row=FALSE, cluster_col=FALSE, ...){ - # from enricher_markers - if(is.data.frame(x)){ - y <- x - names(y)[1]<-"contrast" - }else{ - y <- dplyr::bind_rows(x, .id = "contrast") - ## order columns by order in list (or alphabetical) - y$contrast <- factor(y$contrast, levels= names(x)) - } - if(!missing(top_n)){ - y <- dplyr::group_by(y, contrast) %>% top_n(top_n, -p.adjust) %>% dplyr::ungroup() - } - z <- dplyr::select(y, contrast, pathway, p.adjust) %>% - dplyr::mutate(p.adjust = -log10(p.adjust)) %>% - tidyr::spread(contrast, p.adjust) - if(!missing(sets)){ - n <- apply(z[, -1], 1, function(x) sum(!is.na(x))) - if(sets ==1){ - z <- dplyr::filter(z, n == 1) - }else{ - z <- dplyr::filter(z, n >= sets) - } - } - clrs <- grDevices::colorRampPalette( - c("white", RColorBrewer::brewer.pal(n = 9, name = "Reds")[-1]))(255) - ## too many NAs to cluster - z <- as_matrix(z) - z[is.na(z)] <- 0 - if(!missing(min_p)) z[z>min_p] <- min_p - message(nrow(z) , " total sets") - if(!missing(max_rows)){ - if(max_rows< nrow(z)){ - z <- z[order(rowSums(z), decreasing=TRUE), ] - z <- z[1:max_rows,] - z <- z[order(rownames(z)),] - message(" plotting top ", max_rows) - } - } - rownames(z) <- ifelse(nchar(rownames(z)) > trim, - paste0(substr(rownames(z), 1, trim-2), "..."), rownames(z)) - pheatmap::pheatmap(z, color = clrs, cluster_cols=cluster_col, - cluster_rows=cluster_row, ...) -} diff --git a/R/plot_gsea.R b/R/plot_gsea.R deleted file mode 100644 index 923a3dc..0000000 --- a/R/plot_gsea.R +++ /dev/null @@ -1,52 +0,0 @@ -#' Plot GSEA output -#' -#' Plot heatmap with enrichment scores by contrast -#' -#' @param x list with two or more \code{link{read_gsea}} tables -#' @param trim trim long set names, default more than 70 characters -#' @param n_sets display contrasts sharing n or more sets for n > 1. If n = 1, -#' then only plot unique sets. If missing, then plots all sets, default. -#' @param nes plot NES (or ES if FALSE) -#' @param \dots other options passed to \code{pheatmap} -#' @author Chris Stubben -#' @examples -#' \dontrun{ -#' data(msig) -#' x <- list( X1= read_gsea("gsea1.txt"), X2 = read_gsea("gsea2.txt") ) -#' plot_gsea(x) -#' } -#' @export - -plot_gsea <- function(x, trim=70, n_sets, nes=TRUE, ...){ - if(is.data.frame(x)) stop("A list of read_gsea tables is required") - y <- dplyr::bind_rows(x, .id = "contrast") - ## order columns by order in list (or alphabetical) - y$contrast <- factor(y$contrast, levels= names(x)) - y$name <- ifelse(nchar(y$name) > trim, - paste0(substr(y$name, 1, trim-2), "..."), y$name) - ## ES or NES ? - if(nes){ - z <- dplyr::select(y, contrast, name, nes) %>% - tidyr::spread(contrast, nes) - }else{ - z <- dplyr::select(y, contrast, name, es) %>% - tidyr::spread(contrast, es) - } - if(!missing(n_sets)){ - n <- apply(z[, -1], 1, function(x) sum(!is.na(x))) - if(n_sets ==1){ - z <- dplyr::filter(z, n == 1) - }else{ - z <- dplyr::filter(z, n >= n_sets) - } - } - clrs <- grDevices::colorRampPalette(rev( - RColorBrewer::brewer.pal(n = 11, name = "RdYlBu")))(255) - ## too many NAs to cluster - z <- as_matrix(z) - message(nrow(z) , " total sets") - n1 <- max(abs(z), na.rm=TRUE) - brks <- seq(-n1, n1, length = 255) - pheatmap::pheatmap(z, color = clrs, breaks = brks, cluster_cols=FALSE, - cluster_rows=FALSE, ...) -} diff --git a/R/plot_interactions.R b/R/plot_interactions.R index d276869..24eafa2 100644 --- a/R/plot_interactions.R +++ b/R/plot_interactions.R @@ -6,6 +6,7 @@ #' @param ylab y-axis label #' @param scaled scale counts #' @param n total number of genes to plot +#' @param max_scale maximum values for scaled values in case of outliers #' @param reorder level names to reorder factor levels on x-axis #' @param \dots additional options like ncol or nrow passed to facet_wrap #' @@ -14,7 +15,7 @@ #' @author Chris Stubben #' #' @examples -#' x <- top_counts( pasilla$res, pasilla$rlog, top=25) +#' x <- top_counts( pasilla$results, pasilla$rlog, top=25) #' # no interaction from single or paired end as expected #' plot_interactions(x, c("condition", "type")) #' @export diff --git a/R/read_deseq.R b/R/read_deseq.R index 9fe5233..6dac8f4 100644 --- a/R/read_deseq.R +++ b/R/read_deseq.R @@ -24,12 +24,12 @@ read_deseq <- function(file, object="results"){ names(res) <- gsub("_vs_", " vs. ", wk[n]) for(i in seq_along(n)) { message("Loading ", names(res)[i]) - res[[i]] <- read_excel(file, sheet= n[i]) + res[[i]] <- readxl::read_excel(file, sheet= n[i]) } } else{ s1 <- readxl::read_excel(file, sheet="samples") message("Loading ", object, " worksheet") - r1 <- read_excel(file, sheet= object) + r1 <- readxl::read_excel(file, sheet= object) ## delete gene_name and biotype n2 <- which(colnames(r1) %in% c("gene_name", "biotype")) if(length(n2) !=2) message("Note: Gene name and biotype columns are missing?") diff --git a/data/pasilla.rda b/data/pasilla.rda index c89dd89..365a683 100644 Binary files a/data/pasilla.rda and b/data/pasilla.rda differ diff --git a/man/enricher_all.Rd b/man/enricher_all.Rd deleted file mode 100644 index a12b07a..0000000 --- a/man/enricher_all.Rd +++ /dev/null @@ -1,52 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/enricher_all.R -\name{enricher_all} -\alias{enricher_all} -\title{Run enricher from clusterProfiler on all DESeq2 result tables} -\usage{ -enricher_all( - res, - gsets, - maxGSSize = 2000, - expressed_universe = TRUE, - deseq.padj = 0.05, - logFC, - protein_coding = TRUE, - quiet = FALSE, - ... -) -} -\arguments{ -\item{res}{A list of DESeq results} - -\item{gsets}{A list of gene sets (or two column dataframe with pathway and gene)} - -\item{maxGSSize}{Maximum set size, default 2000} - -\item{expressed_universe}{Use all expressed genes as universe, default TRUE} - -\item{deseq.padj}{Adjusted p-value cutoff for significant genes, default 0.05} - -\item{logFC}{absolute fold change cutoff} - -\item{protein_coding}{compare protein coding genes only, default TRUE} - -\item{quiet}{suppress messages except total significant} - -\item{\dots}{Additional options like minGSSize and pvalueCutoff passed to \code{enricher}} -} -\value{ -A list of tibbles -} -\description{ -Run enricher from clusterProfiler on all DESeq2 result tables -} -\examples{ -\dontrun{ -library(hciRdata) -enricher_all(res, msig_pathways$KEGG) -} -} -\author{ -Chris Stubben -} diff --git a/man/plot_counts.Rd b/man/plot_counts.Rd index 9ec8682..b542163 100644 --- a/man/plot_counts.Rd +++ b/man/plot_counts.Rd @@ -45,7 +45,8 @@ Plot counts for a single gene See \code{link{plot_interactions}} to plot many genes. } \examples{ - plot_counts(pasilla$count, "FBgn0033635", c("type","condition"), title = "Prip") + x <- top_counts(pasilla$results, pasilla$rlog, top=12) + plot_counts(x, "condition") } \author{ Chris Stubben diff --git a/man/plot_enricher.Rd b/man/plot_enricher.Rd deleted file mode 100644 index 09e5044..0000000 --- a/man/plot_enricher.Rd +++ /dev/null @@ -1,52 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_enricher.R -\name{plot_enricher} -\alias{plot_enricher} -\title{Plot enricher output} -\usage{ -plot_enricher( - x, - trim = 70, - sets, - top_n, - min_p, - max_rows, - cluster_row = FALSE, - cluster_col = FALSE, - ... -) -} -\arguments{ -\item{x}{list from \code{link{fgsea_all}}} - -\item{trim}{trim long names, default more than 70 characters} - -\item{sets}{display contrasts sharing n or more sets for n > 1. If n = 1, -then only plot unique sets. If missing, then plots all sets, default.} - -\item{top_n}{Number of top sets to plot} - -\item{min_p}{Minimum p-value on -log10 scale} - -\item{max_rows}{Maximun number of rows to cluster} - -\item{cluster_row}{Cluster dendrogram rows, default FALSE for an alphabetical list} - -\item{cluster_col}{Cluster dendrogram columns, default FALSE} - -\item{\dots}{other options passed to \code{pheatmap}} -} -\description{ -Plot heatmap with enrichment scores by contrast -} -\examples{ -\dontrun{ - library(hciRdata) - fc <- write_gsea_rnk(res, write=FALSE) - x <- fgsea_all(fc, msig_pathways$KEGG, FDR= 0.25) - plot_fgsea(x) - } -} -\author{ -Chris Stubben -} diff --git a/man/plot_gsea.Rd b/man/plot_gsea.Rd deleted file mode 100644 index 12fd55f..0000000 --- a/man/plot_gsea.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_gsea.R -\name{plot_gsea} -\alias{plot_gsea} -\title{Plot GSEA output} -\usage{ -plot_gsea(x, trim = 70, n_sets, nes = TRUE, ...) -} -\arguments{ -\item{x}{list with two or more \code{link{read_gsea}} tables} - -\item{trim}{trim long set names, default more than 70 characters} - -\item{n_sets}{display contrasts sharing n or more sets for n > 1. If n = 1, -then only plot unique sets. If missing, then plots all sets, default.} - -\item{nes}{plot NES (or ES if FALSE)} - -\item{\dots}{other options passed to \code{pheatmap}} -} -\description{ -Plot heatmap with enrichment scores by contrast -} -\examples{ -\dontrun{ - data(msig) - x <- list( X1= read_gsea("gsea1.txt"), X2 = read_gsea("gsea2.txt") ) - plot_gsea(x) - } -} -\author{ -Chris Stubben -} diff --git a/man/plot_interactions.Rd b/man/plot_interactions.Rd index 7540a4c..68950ec 100644 --- a/man/plot_interactions.Rd +++ b/man/plot_interactions.Rd @@ -27,6 +27,8 @@ grouping} \item{n}{total number of genes to plot} +\item{max_scale}{maximum values for scaled values in case of outliers} + \item{reorder}{level names to reorder factor levels on x-axis} \item{\dots}{additional options like ncol or nrow passed to facet_wrap} @@ -38,7 +40,7 @@ A ggplot Plot counts for many genes } \examples{ - x <- top_counts( pasilla$res, pasilla$rlog, top=25) + x <- top_counts( pasilla$results, pasilla$rlog, top=25) # no interaction from single or paired end as expected plot_interactions(x, c("condition", "type")) } diff --git a/slides/21223X2_V43A05-267_D1_CytA.tiff b/slides/21223X2_V43A05-267_D1_CytA.tiff deleted file mode 100755 index 9d39af3..0000000 Binary files a/slides/21223X2_V43A05-267_D1_CytA.tiff and /dev/null differ diff --git a/slides/21223X2_V43A05-267_D1_HE.tiff b/slides/21223X2_V43A05-267_D1_HE.tiff deleted file mode 100755 index 84100e7..0000000 Binary files a/slides/21223X2_V43A05-267_D1_HE.tiff and /dev/null differ diff --git a/slides/23837X1_V53B02-103_A_CytA.tiff b/slides/23837X1_V53B02-103_A_CytA.tiff deleted file mode 100755 index fae0445..0000000 Binary files a/slides/23837X1_V53B02-103_A_CytA.tiff and /dev/null differ diff --git a/slides/23837X1_V53B02-103_A_IF.tiff b/slides/23837X1_V53B02-103_A_IF.tiff deleted file mode 100755 index 9087145..0000000 Binary files a/slides/23837X1_V53B02-103_A_IF.tiff and /dev/null differ