diff --git a/NAMESPACE b/NAMESPACE
index 1fc9f8ae..63e6839b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,13 @@ export(MNNcorrect)
export(MultiOmicsSAE)
export(abbreviate_pheno)
export(add_opacity)
+export(ai.ask)
+export(ai.genesets_keywords)
+export(ai.genesets_summary)
+export(ai.get_models)
+export(ai.get_ollama_models)
+export(ai.get_remote_models)
+export(ai.model_is_available)
export(alias2hugo)
export(allSpecies)
export(allSpecies.ANNOTHUB)
@@ -41,6 +48,7 @@ export(clean_probe_names)
export(clean_symbols)
export(clustermap)
export(colSignedRanks)
+export(collapseTraitMatrix)
export(collapse_by_humansymbol)
export(color_from_middle)
export(colorscale)
@@ -95,6 +103,7 @@ export(extend_metabolite_sets)
export(extend_metabolite_sets2)
export(extremeCorrelation)
export(fastCor)
+export(fastTOMsimilarity)
export(filterProbes)
export(first_feature)
export(fixContrastMatrix)
@@ -193,7 +202,6 @@ export(is_logged)
export(isanumber)
export(itercluster_louvain)
export(justGSEA)
-export(labels2rainbow)
export(lasagna.create_from_pgx)
export(lasagna.create_model)
export(lasagna.plot3D)
@@ -243,6 +251,7 @@ export(mofa.get_prefix)
export(mofa.intNMF)
export(mofa.log1s)
export(mofa.merge_data)
+export(mofa.merge_data2)
export(mofa.plot_all_factortraits)
export(mofa.plot_biplot)
export(mofa.plot_centrality)
@@ -484,6 +493,7 @@ export(pos.compact)
export(probe2symbol)
export(psort)
export(pubmedlink)
+export(purpleGreyYellow)
export(rbind_sparse_matrix)
export(read.as_matrix)
export(read.gmt)
@@ -565,23 +575,50 @@ export(validate_samples)
export(visPrint)
export(visplot.PCSF)
export(wgcna.compute)
+export(wgcna.computeConsensusGeneStats)
+export(wgcna.computeConsensusMatrix)
+export(wgcna.computeDistinctMatrix)
+export(wgcna.computeModuleEnrichment)
+export(wgcna.compute_multiomics)
+export(wgcna.createConsensusLayers)
export(wgcna.filterColors)
+export(wgcna.getConsensusGeneStats)
+export(wgcna.getConsensusTopGenesAndSets)
export(wgcna.getGeneStats)
-export(wgcna.labels2colors)
+export(wgcna.getModuleCrossGenes)
+export(wgcna.getTopGenesAndSets)
+export(wgcna.matchColors)
+export(wgcna.pickSoftThreshold)
+export(wgcna.plotConsensusOverlapHeatmap)
+export(wgcna.plotConsensusSampleDendroAndColors)
export(wgcna.plotDendroAndColors)
+export(wgcna.plotDendroAndTraitCorrelation)
+export(wgcna.plotDendroAndTraitCorrelation_multi)
export(wgcna.plotEigenGeneAdjacencyHeatmap)
export(wgcna.plotEigenGeneClusterDendrogram)
export(wgcna.plotEigenGeneGraph)
export(wgcna.plotFeatureUMAP)
+export(wgcna.plotGeneNetwork)
export(wgcna.plotLabeledCorrelationHeatmap)
export(wgcna.plotMDS)
export(wgcna.plotMMvsGS)
+export(wgcna.plotModuleHeatmap)
export(wgcna.plotModuleHubGenes)
+export(wgcna.plotModuleScores)
export(wgcna.plotModuleSignificance)
export(wgcna.plotModuleTraitHeatmap)
+export(wgcna.plotMultiEigengeneCorrelation)
+export(wgcna.plotPowerAnalysis)
+export(wgcna.plotPowerAnalysis_multi)
+export(wgcna.plotPreservationModuleTraits)
+export(wgcna.plotPreservationSummaries)
export(wgcna.plotSampleDendroAndColors)
export(wgcna.plotTOM)
+export(wgcna.plotTopModules)
+export(wgcna.plotTopModules_multi)
+export(wgcna.plotTraitCorrelationBarPlots)
export(wgcna.runConsensusWGCNA)
+export(wgcna.runPreservationWGCNA)
export(wikipathview)
export(wrapHyperLink)
export(write.gmt)
diff --git a/R/ai-llm.R b/R/ai-llm.R
new file mode 100644
index 00000000..5a47da8b
--- /dev/null
+++ b/R/ai-llm.R
@@ -0,0 +1,130 @@
+#'
+#' @export
+ai.get_ollama_models <- function(models=NULL) {
+ available.models <- system("ollama list | tail -n +2 | cut -d' ' -f 1", intern=TRUE)
+ if(!is.null(models)) available.models <- intersect(models,available.models)
+ return(available.models)
+}
+
+OLLAMA_MODELS = ai.get_ollama_models()
+DEFAULT_LLM = "gpt-5-nano"
+
+if(0) {
+ model="gpt-5-nano";prompt=NULL
+ model="gemma3:1b";prompt=NULL
+ model="grok-4-fast-non-reasoning";prompt=NULL
+}
+
+#' @export
+ai.get_remote_models <- function(models=NULL) {
+ keys <- NULL
+
+ dbg("[ai.get_remote_models] models = ",models)
+ dbg("[ai.get_remote_models] len.models = ",length(models))
+ dbg("[ai.get_remote_models] OPENAI_API_KEY = ",Sys.getenv("OPENAI_API_KEY"))
+ dbg("[ai.get_remote_models] XAI_API_KEY = ",Sys.getenv("XAI_API_KEY"))
+ dbg("[ai.get_remote_models] GROQ_API_KEY = ",Sys.getenv("GROQ_API_KEY"))
+ dbg("[ai.get_remote_models] GEMINI_API_KEY = ",Sys.getenv("GEMINI_API_KEY"))
+
+ if (Sys.getenv("OPENAI_API_KEY")!="") keys <- c(keys,"gpt-.*")
+ if (Sys.getenv("XAI_API_KEY")!="") keys <- c(keys,"grok-.*")
+ if (Sys.getenv("GROQ_API_KEY")!="") keys <- c(keys,"groq:.*")
+ if (Sys.getenv("GEMINI_API_KEY")!="") keys <- c(keys,"gemini-.*")
+
+ if(is.null(models) || length(models)==0 || models[1]=="" ) {
+ models <- keys
+ } else if(!is.null(keys)) {
+ regex <- paste0("^",keys,collapse="|")
+ models <- grep(regex,models,value=TRUE)
+ } else {
+ models <- NULL
+ }
+ models
+}
+
+#' @export
+ai.get_models <- function(models=NULL) {
+ local.models <- ai.get_ollama_models(models)
+ remote.models <- ai.get_remote_models(models)
+ if(!is.null(models)) {
+ models <- models[ models %in% c(local.models, remote.models)]
+ } else {
+ models <- c(local.models, remote.models)
+ }
+ return(models)
+}
+
+#' @export
+ai.model_is_available <- function(model) {
+ model %in% ai.get_models(models=model)
+}
+
+#' @export
+ai.ask <- function(question, model=DEFAULT_LLM, prompt=NULL) {
+ chat <- NULL
+ if(inherits(model, "Chat")) {
+ chat <- model
+ } else if(is.character(model)) {
+ if (model %in% OLLAMA_MODELS) {
+ chat <- ellmer::chat_ollama(model = model, system_prompt = prompt)
+ } else if (grepl("^gpt",model) && Sys.getenv("OPENAI_API_KEY")!="") {
+ message("warning: using remote GPT model:", model)
+ chat <- ellmer::chat_openai(
+ model = model, system_prompt = prompt,
+ api_key = Sys.getenv("OPENAI_API_KEY") )
+ } else if (grepl("^grok",model) && Sys.getenv("XAI_API_KEY")!="") {
+ chat <- ellmer::chat_openai(
+ model = model, system_prompt = prompt,
+ api_key = Sys.getenv("XAI_API_KEY"),
+ base_url="https://api.x.ai/v1/")
+ } else if (grepl("^groq",model) && Sys.getenv("GROQ_API_KEY")!="") {
+ model <- sub("groq:","",model)
+ chat <- ellmer::chat_groq(
+ model = model, system_prompt = prompt,
+ api_key = Sys.getenv("GROQ_API_KEY")
+ )
+ } else if (grepl("^gemini",model) && Sys.getenv("GEMINI_API_KEY")!="") {
+ chat <- ellmer::chat_google_gemini(
+ model = model, system_prompt = prompt,
+ api_key = Sys.getenv("GEMINI_API_KEY")
+ )
+ }
+ }
+
+ if(is.null(chat)) {
+ message("ERROR. could not create model ", model)
+ return(NULL)
+ }
+ . <- chat$chat(question, echo=FALSE)
+ chat$last_turn()@text
+}
+
+#' @export
+ai.genesets_summary <- function(gsets, pheno=NULL, model=DEFAULT_LLM,
+ detail=1, html=FALSE, verbose=1) {
+ q <- "Extract the main biological function of this list of gene sets that were found by doing geneset enrichment. Just give the answer. Do not acknowledge."
+ if(!is.null(pheno)) q <- paste0(q, "Discuss in relation with the phenotype: '",pheno,"'.")
+ if(detail==0) q <- paste(q, "Be very very short.")
+ if(detail==1) q <- paste(q, "Describe in one short paragraph.")
+ if(detail>=2) q <- paste(q, "Describe in detail.")
+ if(html) q <- paste(q, "Use HTML formatting.")
+ if(verbose>0) cat("Question:",q,"... \n")
+ ss <- paste(gsets, collapse='; ')
+ q <- paste(q, "These are the genesets: ",ss,"
. ")
+ r <- ai.ask(q, model=model)
+ #r <- ai.ask(q, model="gemma3:270m")
+ #r <- ai.ask(q, model="gemma3:1b")
+ return(r)
+}
+
+num=3
+#' @export
+ai.genesets_keywords <- function(gsets, num=3, pheno=NULL, model=DEFAULT_LLM) {
+ ss <- paste(gsets, collapse='; ')
+ q <- paste0("Extract ",num," keywords describing the following collection of gene sets. ")
+ q <- paste0(q, "These are the genesets: ",ss,"
. ")
+ r <- ai.ask(q, model=model)
+ return(r)
+}
+
+
diff --git a/R/compute2-extra.R b/R/compute2-extra.R
index 40c17735..d852f8da 100644
--- a/R/compute2-extra.R
+++ b/R/compute2-extra.R
@@ -236,7 +236,10 @@ compute_extra <- function(pgx, extra = c(
tt <- system.time({
tryCatch(
{
- pgx$wgcna <- pgx.wgcna(pgx)
+ pgx$wgcna <- pgx.wgcna(
+ pgx,
+ ai_model = NULL ## no AI by default (yet)
+ )
},
error = function(e) {
message("[ERROR_WGCNA] FATAL: ", as.character(e))
diff --git a/R/gset-fisher.r b/R/gset-fisher.r
index dd0b35e1..3e2a81b5 100644
--- a/R/gset-fisher.r
+++ b/R/gset-fisher.r
@@ -104,7 +104,8 @@ gset.fisher2 <- function(genes.up, genes.dn, genesets, background = NULL,
gset.fisher <- function(genes, genesets, background = NULL,
fdr = 0.05, mc = TRUE, sort.by = "zratio", nmin = 3,
min.genes = 15, max.genes = 500, method = "fast.fisher",
- check.background = TRUE, common.genes = TRUE, verbose = 1) {
+ check.background = TRUE, common.genes = TRUE,
+ no.pass=NA, verbose = 1) {
if (is.null(background)) {
background <- unique(unlist(genesets))
if (verbose > 0) {
@@ -221,6 +222,11 @@ gset.fisher <- function(genes, genesets, background = NULL,
stop("unknown method")
}
+ ## replace NA values
+ if(any(is.na(pv))) {
+ pv[is.na(pv)] <- no.pass
+ }
+
## compute q-value
qv <- rep(NA, length(pv))
qv <- stats::p.adjust(pv, method = "fdr")
diff --git a/R/pgx-annot.R b/R/pgx-annot.R
index 1fc9efab..98074a21 100644
--- a/R/pgx-annot.R
+++ b/R/pgx-annot.R
@@ -1194,25 +1194,40 @@ getHumanOrtholog.biomart <- function(organism, symbols, verbose = 1) {
#' }
#' @import data.table
#' @export
-probe2symbol <- function(probes, annot_table, query = c("symbol", "gene_name"),
+probe2symbol <- function(probes, annot_table, query = "symbol",
key = NULL, fill_na = FALSE) {
- # Prepare inputs
+
+ # Prepare inputs. add extra matching columns.
annot_table <- cbind(rownames = rownames(annot_table), annot_table)
+ id.cols <- intersect(c("feature","gene_name","symbol"),colnames(annot_table))
+ if(length(id.cols)>0) {
+ stripped_annot <- apply(annot_table[,id.cols,drop=FALSE],2,function(a) sub("^[A-Za-z]+:","",a))
+ ##colnames(stripped_annot) <- paste0(colnames(stripped_annot),"_stripped")
+ annot_table <- cbind(annot_table, stripped_annot)
+ }
+
probes1 <- setdiff(probes, c(NA, ""))
if (is.null(key) || !key %in% colnames(annot_table)) {
key <- which.max(apply(annot_table, 2, function(a) sum(probes1 %in% a)))
}
if (is.null(key)) {
- stop("[probe2symbol] FATAL. could not get key column.")
+ message("[probe2symbol] FATAL. could not get key column.")
+ return(NULL)
}
- # match query
- ii <- match(probes, annot_table[, key])
- query <- intersect(query, colnames(annot_table))
+ query <- head(intersect(query, colnames(annot_table)),1)
if (length(query) == 0) {
- stop("ERROR. no symbol column.")
+ message("ERROR. no symbol column.")
+ return(NULL)
}
- query_col <- annot_table[ii, query[1]]
+
+ # fall back on old gene_name
+ if(query=="symbol" && !"symbol" %in% colnames(annot_table) &&
+ "gene_name" %in% colnames(annot_table)) query <- "gene_name"
+
+ # match query
+ ii <- match(probes, annot_table[, key])
+ query_col <- annot_table[ii, query]
# Deal with NA
if (fill_na) {
diff --git a/R/pgx-deconv.R b/R/pgx-deconv.R
index 5b778f2c..ca67239b 100644
--- a/R/pgx-deconv.R
+++ b/R/pgx-deconv.R
@@ -500,7 +500,6 @@ pgx.deconvolution <- function(X, ref,
dbg("WARNING:: pgx.deconvolution: is X really counts? (not logarithmic)\n")
}
-
## clean up matrix, remove duplicate names
mat <- as.matrix(X)
rownames(mat) <- gsub(".*:", "", rownames(mat)) ## strip prefix
@@ -565,12 +564,14 @@ pgx.deconvolution <- function(X, ref,
source(CIBERSORT.code)
stime <- system.time(
# The f CIBERSORT is likely from CIBERSORT package but better confirm
- ## try(ciber.out <- CIBERSORT(ref, mat, perm = 0, QN = FALSE))
+ ## try(ciber.out <- CIBERSORT(ref, mat, perm = 0, QN = FALSE))
+ NULL
)
}
if (has.ciber2) {
stime <- system.time(
- ## try(ciber.out <- CIBERSORT::cibersort(ref, mat, perm = 0, QN = FALSE))
+ ## try(ciber.out <- CIBERSORT::cibersort(ref, mat, perm = 0, QN = FALSE))
+ NULL
)
}
if (!is.null(ciber.out)) {
diff --git a/R/pgx-functions.R b/R/pgx-functions.R
index b327a66c..d4ef4cda 100644
--- a/R/pgx-functions.R
+++ b/R/pgx-functions.R
@@ -1507,6 +1507,9 @@ filterProbes <- function(annot, genes) {
#' @export
rename_by2 <- function(counts, annot_table, new_id = "symbol",
na.rm = TRUE, unique = TRUE, keep.prefix = FALSE) {
+
+ ##new_id="symbol";na.rm=TRUE;unique=TRUE;keep.prefix=FALSE
+
## add rownames
annot_table$rownames <- rownames(annot_table)
annot_table$rownames2 <- sub("^[A-Za-z]+:", "", rownames(annot_table)) ## strip prefix
@@ -1528,8 +1531,11 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol",
counts <- cbind(counts)
}
- ## dummy do-noting return
from_id <- names(which.max(probe_match))
+ if(new_id=="symbol" && !"symbol" %in% colnames(annot_table) &&
+ "gene_name" %in% colnames(annot_table)) new_id <- "gene_name"
+
+ ## dummy do-noting return
if (new_id == from_id) {
sel <- which(probes %in% annot_table[,from_id])
counts <- counts[sel, , drop=FALSE]
@@ -1544,39 +1550,31 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol",
keep.prefix <- (keep.prefix && all(grepl(":", probes)))
from <- annot_table[, from_id]
- if (!any(duplicated(from)) || unique) {
- ii <- match(probes, from)
- if (keep.prefix) {
- dt <- mofa.get_prefix(probes)
- new.name <- annot_table[ii, new_id]
- new.name <- paste0(dt, ":", new.name)
- } else {
- new.name <- annot_table[ii, new_id]
- }
+ ## if (!any(duplicated(from)) || unique) {
+ ii <- match(probes, from)
+ if (keep.prefix) {
+ dt <- mofa.get_prefix(probes)
+ new.name <- annot_table[ii, new_id]
+ new.name <- paste0(dt, ":", new.name)
} else {
- ## map probes to 'from' vector but retains duplicated entries in
- ## 'from'
- to <- lapply(probes, function(p) which(from == p))
- ii <- lapply(1:length(to), function(i) rep(i, length(to[[i]])))
- counts <- counts[unlist(ii), , drop = FALSE]
- if (keep.prefix) {
- dt <- mofa.get_prefix(unlist(to))
- new.name <- annot_table[unlist(to), new_id]
- new.name <- paste0(dt, ":", new.name)
- } else {
- new.name <- annot_table[unlist(to), new_id]
- }
+ new.name <- annot_table[ii, new_id]
}
rownames(counts) <- new.name
- # Take out rows without name
+ # Remove rows with missing name
if (na.rm) {
counts <- counts[!rownames(counts) %in% c("", "NA", NA), , drop = FALSE]
}
- # Sum columns of rows with the same gene symbol
- ## if (unique) rownames(counts) <- make_unique(rownames(counts))
- if (unique) {
- counts <- rowmean(counts, rownames(counts))
+
+ # Average columns of rows with the same gene symbol
+ ndup <- sum(duplicated(rownames(counts)))
+ if (unique && ndup>0) {
+ rowdup <- rownames(counts)[which(duplicated(rownames(counts)))]
+ ii <- which( rownames(counts) %in% rowdup )
+ nodup.counts <- rowmean(counts[ii,,drop = FALSE], rownames(counts)[ii])
+ rown <- unique(rownames(counts))
+ counts <- rbind( counts[-ii,,drop=FALSE], nodup.counts )
+ counts <- counts[rown,]
}
if (type == "vector") {
@@ -1595,6 +1593,9 @@ rename_by <- function(counts, annot_table, new_id = "symbol", unique = TRUE) {
if (is.vector(counts)) {
probes <- names(counts)
}
+
+ if(new_id=="symbol" && !"symbol" %in% colnames(annot_table) &&
+ "gene_name" %in% colnames(annot_table)) new_id <- "gene_name"
symbol <- annot_table[probes, new_id]
# Guard against NA
@@ -2349,6 +2350,29 @@ expandPhenoMatrix <- function(M, drop.ref = TRUE, keep.numeric = FALSE, check =
}
+#' Collapses an expanded (binarized) trait matrix to its original
+#' categorical phenotype with levels. Colnames must be "pheno1=A",
+#' "pheno1=B" etc.
+#'
+#' @export
+collapseTraitMatrix <- function(Y) {
+ if(sum(grepl("=",colnames(Y))) < 2) return(Y)
+ is.cat <- grepl("=",colnames(Y))
+ M <- Y[,which(!is.cat),drop=FALSE]
+ categories <- unique(sub("=.*","",colnames(Y)[which(is.cat)]))
+ y=categories[1]
+ for(y in categories) {
+ ii <- which(sub("=.*","",colnames(Y)) == y)
+ Y1 <- Y[,ii]
+ colnames(Y1) <- sub(".*=","",colnames(Y1))
+ m1 <- colnames(Y1)[max.col(Y1)]
+ M <- cbind(M, m1)
+ colnames(M)[ncol(M)] <- y
+ }
+ return(M)
+}
+
+
#' @title P-value for Pearson's Correlation Coefficient
#'
#' @description This function calculates the p-value for Pearson's correlation coefficient.
diff --git a/R/pgx-mofa.R b/R/pgx-mofa.R
index 76e009ca..9290fd51 100644
--- a/R/pgx-mofa.R
+++ b/R/pgx-mofa.R
@@ -87,8 +87,11 @@ pgx.compute_mofa <- function(pgx, kernel = "MOFA", numfactors = 8,
)
mofa$lasagna <- lasagna.create_model(
las.data,
- pheno = "contrasts", ntop = ntop, nc = 20,
- annot = pgx$genes, use.graphite = FALSE
+ pheno = "contrasts",
+ ntop = ntop,
+ nc = 20,
+ annot = pgx$genes,
+ use.graphite = FALSE
)
}
@@ -345,7 +348,7 @@ mofa.compute <- function(xdata,
X, samples,
minmodsize = 10,
power = 6,
- cutheight = 0.15,
+ mergeCutHeight = 0.15,
deepsplit = 2,
networktype = "signed",
tomtype = "signed",
@@ -839,24 +842,26 @@ mofa.plot_all_factortraits <- function(meta.res) {
#' @export
mofa.topSD <- function(xdata, ntop) {
- if (inherits(xdata, "list")) {
+ if(is.list(xdata)) {
res <- lapply(xdata, function(x) {
sdx <- matrixStats::rowSds(x, na.rm = TRUE)
- head(x[order(-sdx), ], ntop)
+ head(x[order(-sdx),,drop=FALSE], ntop)
})
- }
- if (!is.null(dim(xdata))) {
- if (all(grepl(":", rownames(xdata)))) {
+ } else if(is.matrix(xdata)) {
+ if(all(grepl(":",rownames(xdata)))) {
xdata <- mofa.split_data(xdata)
res <- lapply(xdata, function(x) {
sdx <- matrixStats::rowSds(x, na.rm = TRUE)
- head(x[order(-sdx), ], ntop)
+ head(x[order(-sdx),,drop=FALSE], ntop)
})
res <- mofa.merge_data(res)
} else {
sdx <- matrixStats::rowSds(xdata, na.rm = TRUE)
- res <- head(xdata[order(-sdx), ], ntop)
+ res <- head(xdata[order(-sdx),,drop=FALSE], ntop)
}
+ } else {
+ message("[mofa.topSD] WARNING: could not detect type")
+ res <- xdata
}
return(res)
}
@@ -894,6 +899,63 @@ mofa.merge_data <- function(xx) {
do.call(rbind, mofa.prefix(xx))
}
+
+#' This merges list of multi-omics data to a single matrix. Note that
+#' it can handle non-matched data by taking union of rownames or
+#' colnames and extending the final matrix. Be careful it can
+#' introduce NA in such non-matched cases.
+#'
+#' @export
+mofa.merge_data2 <- function(xdata, prefix.rows=NULL, prefix.cols=NULL) {
+ n1 <- length(Reduce(intersect,lapply(xdata,rownames)))
+ n2 <- length(Reduce(intersect,lapply(xdata,colnames)))
+ c(n1,n2)
+ if(n1 && n2) {
+ message("WARNING: matrices are overlapping both in rows and columns")
+ }
+ if(is.null(prefix.cols)) prefix.cols <- (n1 > 0 && n2 > 0)
+ if(is.null(prefix.rows)) prefix.rows <- (n1 > 0 && n2 > 0)
+ if(prefix.cols) {
+ ## if rows overlap (i.e. same genes), prefix the column names
+ ## (i.e. different datasets)
+ for(i in 1:length(xdata)) {
+ nn <- sub("[A-Za-z]+:","",colnames(xdata[[i]]))
+ colnames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn)
+ }
+ }
+ if(prefix.rows) {
+ ## if columns overlap (i.e. same samples), prefix the feature
+ ## names.
+ for(i in 1:length(xdata)) {
+ nn <- sub("[A-Za-z]+:","",rownames(xdata[[i]]))
+ rownames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn)
+ }
+ }
+ allfeatures <- unique(unlist(lapply(xdata, rownames)))
+ allsamples <- unique(unlist(lapply(xdata, colnames)))
+ D <- matrix(0, length(allfeatures), length(allsamples))
+ nn <- matrix(0, length(allfeatures), length(allsamples))
+ rownames(D) <- allfeatures
+ colnames(D) <- allsamples
+ i=1
+ for(i in 1:length(xdata)) {
+ A <- xdata[[i]]
+ ii <- match(rownames(D),rownames(A))
+ jj <- match(colnames(D),colnames(A))
+ A1 <- A[ii,jj]
+ nn <- nn + !is.na(A1)*1
+ A1[is.na(A1)] <- 0
+ D <- D + A1
+ }
+ D <- D / nn
+ D[which(nn==0)] <- NA
+ rownames(D) <- allfeatures
+ colnames(D) <- allsamples
+ return(D)
+}
+
+
+
#' @export
mofa.prefix <- function(xx) {
xx <- mofa.strip_prefix(xx)
@@ -1240,7 +1302,6 @@ mofa.plot_factor_boxplots <- function(mofa, k = 1, pheno = NULL,
}
}
-
mofa.combine_layers <- function(xx, weights = 1) {
k <- 1
if (length(weights) == 1) weights <- rep(weights, length(xx))
@@ -2545,8 +2606,8 @@ lasagna.create_from_pgx <- function(pgx, xdata = NULL, layers = NULL,
}
names(xdata) <- tolower(names(xdata))
- if (is.null(layers)) {
- layers <- c("hx", "mir", "gx", "px", "mx", "gset")
+ if(is.null(layers)) {
+ layers <- c("hx","mir","mi","gx","px","mx","gset")
layers <- unique(layers, names(xdata))
}
@@ -2554,37 +2615,48 @@ lasagna.create_from_pgx <- function(pgx, xdata = NULL, layers = NULL,
layers <- intersect(layers, names(xdata))
xdata <- xdata[layers]
}
-
- lasagna.data <- list(
+ model.data <- list(
X = xdata,
samples = pgx$samples,
contrasts = pgx$contrasts
)
model <- lasagna.create_model(
- lasagna.data,
- pheno = pheno, ntop = ntop, nc = nc,
- annot = annot, use.gmt = use.gmt, use.graphite = use.graphite,
- add.sink = add.sink, intra = intra, fully_connect = fully_connect,
+ model.data,
+ pheno = pheno,
+ ntop = ntop,
+ nc = nc,
+ annot = annot,
+ use.gmt = use.gmt,
+ use.graphite = use.graphite,
+ add.sink = add.sink,
+ intra = intra,
+ fully_connect = fully_connect,
add.revpheno = add.revpheno
)
return(model)
}
-
-
#'
#' @export
-lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
- annot = NULL, use.gmt = TRUE, use.graphite = TRUE,
- add.sink = FALSE, intra = TRUE, fully_connect = FALSE,
- add.revpheno = TRUE) {
+lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20,
+ annot=NULL, use.gmt=TRUE, use.graphite=TRUE,
+ add.sink=FALSE, intra=TRUE, fully_connect=FALSE,
+ add.revpheno = TRUE, condition.edges=TRUE
+ ) {
if (pheno == "pheno") {
- Y <- expandPhenoMatrix(data$samples, drop.ref = FALSE)
- } else {
+ Y <- expandPhenoMatrix(data$samples, drop.ref=FALSE)
+ } else if (pheno == "expanded") {
+ Y <- 1 * data$samples
+ } else if (pheno == "contrasts") {
+ if(!"contrasts" %in% names(data)) {
+ message("ERROR: contrasts missing in data")
+ return(NULL)
+ }
Y <- makeContrastsFromLabelMatrix(data$contrasts)
- if (any(grepl("^IA:", colnames(Y)))) {
+ Y <- sign(Y)
+ if(any(grepl("^IA:",colnames(Y)))) {
## drop interaction terms
Y <- Y[, grep("^IA:", colnames(Y), invert = TRUE), drop = FALSE]
}
@@ -2593,6 +2665,9 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
colnames(revY) <- reverse.AvsB(colnames(Y))
Y <- cbind(Y, revY)
}
+ } else {
+ message("[lasagna.create_model] ERROR invalid pheno type")
+ return(NULL)
}
data$X[["PHENO"]] <- t(Y)
@@ -2600,11 +2675,16 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
xx <- data$X
if (!is.null(ntop) && ntop > 0) {
xx <- lapply(xx, function(x) head(x[order(-apply(x, 1, sd)), , drop = FALSE], ntop))
+ xx <- mofa.topSD(xx, ntop)
}
- xx <- mofa.prefix(xx)
- X <- do.call(rbind, xx)
- remove(xx)
+ ## what about not overlapping samples??
+ X <- mofa.merge_data2(xx, prefix.rows=TRUE, prefix.cols=FALSE)
+ ##remove(xx)
+ kk <- intersect(colnames(X),rownames(Y))
+ X <- X[,kk]
+ Y <- Y[kk,]
+
## add SOURCE/SINK
if (add.sink) {
X <- rbind(X, "SOURCE" = 1, "SINK" = 1)
@@ -2612,13 +2692,40 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
## Compute BIG correlation matrix. WARNING can become huge! NOTE:
## Needs optimization using SPARSE matrix.
- suppressWarnings(R <- cor(t(X), use = "pairwise"))
- R[is.na(R)] <- 0.01
- ii <- grep("SINK|SOURCE", rownames(R))
- if (length(ii)) R[ii, ii] <- 1
+ suppressWarnings( R <- cor(t(X), use = "pairwise") )
+
+ ## Sink/source need to be connected allways
+ ii <- grep("SINK|SOURCE",rownames(R))
+ if(length(ii)) {
+ R[ii,] <- 1
+ R[,ii] <- 1
+ }
+
+ ## save 'pure' correlation
+ corrR <- R
+
+ ## missing correlation values will be replaced with some constant
+ ## value. This in particular can happen for constant features or
+ ## inter-correlation edges if samples are not matched. NOTE: we
+ ## should ideally place correct sign.
+ R[is.na(R)] <- 0.1234
+
+ ## Weigh with pheno correlation or foldchange for conditioning. This
+ ## uses phenotype matrix Y. This is particularly important when
+ ## inter-correlation is unavailable and edge prioritaztion/pruning
+ ## is done.
+ if(condition.edges) {
+ message("conditioning edges...")
+ rho <- cor(t(X), Y, use='pairwise.complete.obs')
+ maxrho <- apply(abs(rho), 1, max, na.rm=TRUE)
+ ii <- grep("SINK|SOURCE",names(maxrho))
+ if(length(ii)) maxrho[ii] <- 1
+ rho.wt <- outer( maxrho, maxrho )
+ R <- R * rho.wt
+ }
## mask for proteomic <> metabolics PPI
- if (use.graphite) {
+ if (FALSE && use.graphite) {
xtypes <- setdiff(names(data$X), "PHENO")
xtypes
has.mx <- ("mx" %in% xtypes)
@@ -2659,8 +2766,9 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
if (add.sink) layers <- c("SOURCE", layers, "SINK")
## mask for inter-layer connections
- if (!fully_connect) {
- layer_mask <- as.matrix(R) * 0
+ if(!fully_connect) {
+ layer_mask <- matrix(0, nrow(R), ncol(R))
+ dimnames(layer_mask) <- dimnames(R)
for (i in 1:(length(layers) - 1)) {
ii <- which(dt == layers[i])
jj <- which(dt == layers[i + 1])
@@ -2675,8 +2783,10 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
}
R <- R * layer_mask
}
-
- ## Reduce inter-connections
+
+ ## Reduce inter-connections to nc top most correlated edges per
+ ## node. This will avoid graph to be too large. NOTE: this will not
+ ## work well if inter-correlation is unavailable or disables!!!
if (!is.null(nc) && nc > 0) {
message(paste("reducing edges to maximum", nc, "connections"))
## NEED CHECK!!! SOMETHING WRONG.
@@ -2706,14 +2816,21 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
R <- R * reduce_mask
}
+ ##--------------------------------------
## create graph
+ ##--------------------------------------
+
+ ## transfer masked zeroes. create graph from 'unweighted'
+ ## correlation.
+ corrR[which(R==0)] <- 0
gr <- igraph::graph_from_adjacency_matrix(
- R,
- diag = FALSE, weighted = TRUE, mode = "undirected"
+ corrR, diag = FALSE, weighted = TRUE, mode = "undirected"
)
- igraph::E(gr)$rho <- igraph::E(gr)$weight ## copy
- gr$layers <- layers
+ ## copy correlation from weights, put in 'rho' slot
+ igraph::E(gr)$rho <- igraph::E(gr)$weight
+ gr$layers <- layers
+
## add edge connection type as attribute
igraph::V(gr)$layer <- sub(":.*", "", igraph::V(gr)$name)
ee <- igraph::as_edgelist(gr)
@@ -2737,23 +2854,23 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20,
)
}
-sp_edge_weight <- function(gr, layers) {
- layers <- unique(c("SOURCE", layers, "SINK"))
- wt <- abs(igraph::E(gr)$weight)
+sp_edge_weight <- function(graph, layers) {
+ layers <- unique(c("SOURCE",layers,"SINK"))
+ wt <- abs(igraph::E(graph)$weight)
wt[is.na(wt)] <- 0
- ee <- igraph::as_edgelist(gr)
- v1 <- ee[, 1]
- v2 <- ee[, 2]
- l1 <- match(igraph::V(gr)[v1]$layer, layers)
- l2 <- match(igraph::V(gr)[v2]$layer, layers)
+ ee <- igraph::as_edgelist(graph)
+ v1 <- ee[,1]
+ v2 <- ee[,2]
+ l1 <- match(igraph::V(graph)[v1]$layer, layers)
+ l2 <- match(igraph::V(graph)[v2]$layer, layers)
p1 <- ifelse(l1 < l2, v1, v2)
p2 <- ifelse(l1 < l2, v2, v1)
wt <- wt + 1e-8
- s1 <- igraph::shortest_paths(gr,
- from = "SOURCE", to = p1, weights = 1 / wt, output = "epath"
+ s1 <- igraph::shortest_paths(graph,
+ from = "SOURCE", to = p1, weights = 1/wt, output = "epath"
)
- s2 <- igraph::shortest_paths(gr,
- from = "SINK", to = p2, weights = 1 / wt, output = "epath"
+ s2 <- igraph::shortest_paths(graph,
+ from = "SINK", to = p2, weights = 1/wt, output = "epath"
)
sp <- mapply(c, s1$epath, s2$epath)
## sp.score <- sapply(sp, function(e) exp(mean(log(wt[e]))))
@@ -2761,13 +2878,13 @@ sp_edge_weight <- function(gr, layers) {
sp.score
}
+#'
+#'
#' @export
lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho",
min_rho = 0, prune = TRUE, fc.weights = TRUE,
sp.weight = FALSE) {
if (!pheno %in% colnames(obj$Y)) {
- dbg("[lasagna.solve] pheno = ", pheno)
- dbg("[lasagna.solve] colnames(obj$Y) = ", colnames(obj$Y))
stop("pheno not in Y")
}
if (!"rho" %in% names(igraph::edge_attr(obj$graph))) {
@@ -2776,14 +2893,23 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho",
graph <- obj$graph
X <- obj$X
- y <- obj$Y[, pheno]
- ii <- grep("PHENO", rownames(X))
- if (length(ii)) X[ii, ][which(X[ii, ] == 0)] <- NA
- y[y == 0] <- NA
- rho <- cor(t(X), y, use = "pairwise")[, 1]
- i0 <- which(sign(y) == -1)
- i1 <- which(sign(y) == +1)
- fc <- rowMeans(X[, i1], na.rm = TRUE) - rowMeans(X[, i0], na.rm = TRUE)
+ y <- obj$Y[,pheno]
+
+ ## check if phenotype was coded -1/0/1
+ ii <- grep("PHENO",rownames(X))
+ has.min1 <- (min(X[ii,], na.rm=TRUE) < 0)
+ has.min1
+ if(length(ii) && has.min1) {
+ X[ii,][which(X[ii,]==0)] <- NA
+ y[y==0] <- NA
+ }
+
+ rho <- cor(t(X), y, use="pairwise")[,1]
+ i0 <- which( y <= 0 )
+ i1 <- which( y > 0 )
+ m1 <- rowMeans(X[,i1,drop=FALSE],na.rm=TRUE)
+ m0 <- rowMeans(X[,i0,drop=FALSE],na.rm=TRUE)
+ fc <- m1 - m0
rho[is.na(rho)] <- 0
## for PHENO nodes 'foldchange' does not make sense. replace with rho.
@@ -2797,8 +2923,8 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho",
igraph::V(graph)$value <- fc
}
graph$value.type <- value.type
-
- ## set edge weights
+
+ ## set edge weights from node values
ww <- 1
weight.type <- "rho"
if (fc.weights) {
@@ -2808,8 +2934,12 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho",
ww <- abs(ff[ee[, 1]] * ff[ee[, 2]])^0.5
weight.type <- paste0(weight.type, "*vv")
}
- igraph::E(graph)$weight <- igraph::E(graph)$rho * ww
+ ## do the edge weighting. set NA correlation values to some constant
+ ee.rho <-igraph::E(graph)$rho
+ ee.rho[is.na(ee.rho)] <- 0.1234
+ igraph::E(graph)$weight <- ee.rho * ww
+
## set SINK/SOURCE edges to 1
if (any(grepl("SINK|SOURCE", igraph::V(graph)$name))) {
igraph::E(graph)[.to("SINK")]$weight <- 1
@@ -2818,18 +2948,20 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho",
if (sp.weight) {
sp.wt <- sp_edge_weight(graph, obj$layers)
- sp.wt <- (sp.wt / max(sp.wt))**2
+ sp.wt <- (sp.wt / max(sp.wt, na.rm=TRUE))**2 ## why quadratic?
igraph::E(graph)$weight <- igraph::E(graph)$weight * sp.wt
weight.type <- paste0(weight.type, "*sp")
}
graph$weight.type <- weight.type
## take subgraph
- if (min_rho > 0) {
- dsel <- which(abs(igraph::E(graph)$weight) < min_rho)
+ if(min_rho > 0) {
+ dsel <- which(abs(igraph::E(graph)$weight) < min_rho) ## rho or weight??
igraph::E(graph)$weight[dsel] <- 0
}
- if (max_edges > 0) {
+
+ ## Global limit number of edges per type
+ if(max_edges > 0) {
ewt <- igraph::E(graph)$weight
esel <- tapply(
1:length(igraph::E(graph)), igraph::E(graph)$connection_type,
@@ -2873,6 +3005,9 @@ lasagna.prune_graph <- function(graph, ntop = 100, layers = NULL,
edge.sign = "both", edge.type = "both",
filter = NULL,
prune = TRUE) {
+
+ if (is.null(layers))
+ layers <- graph$layers
if (is.null(layers)) layers <- unique(igraph::V(graph)$layer)
layers <- setdiff(layers, c("SOURCE", "SINK"))
graph <- igraph::subgraph(graph, igraph::V(graph)$layer %in% layers)
@@ -2918,18 +3053,36 @@ lasagna.prune_graph <- function(graph, ntop = 100, layers = NULL,
if (length(ii)) igraph::E(graph)$weight[ii] <- 0
}
- if (edge.sign != "both") {
- ewt <- igraph::E(graph)$weight
- if (grepl("pos", edge.sign)) igraph::E(graph)$weight[ewt < 0] <- 0
- if (grepl("neg", edge.sign)) igraph::E(graph)$weight[ewt > 0] <- 0
- }
- if (edge.type != "both") {
- ic <- grepl("->", igraph::E(graph)$connection_type)
- if (grepl("intra", edge.type)) igraph::E(graph)$weight[ic] <- 0
- if (grepl("inter", edge.type)) igraph::E(graph)$weight[!ic] <- 0
+ ewt <- igraph::E(graph)$weight
+ if (grepl("pos", edge.sign)) {
+ igraph::E(graph)$weight[ewt < 0] <- 0
+ } else if (grepl("neg", edge.sign)) {
+ igraph::E(graph)$weight[ewt > 0] <- 0
+ } else if(edge.sign == "consensus") {
+ layersign <- rep(1, length(layers))
+ names(layersign) <- layers
+ layersign[grep("^mi|^mir",layers)] <- -1
+ v1 <- igraph::as_edgelist(graph)[,1]
+ esign <- layersign[ igraph::V(graph)[v1]$layer ]
+ vsign <- sign(igraph::V(graph)[v1]$value)
+ igraph::E(graph)$weight <- ewt * (sign(ewt) == esign & vsign == esign)
+ }
+
+ ## delete intra or inter edges
+ ic <- grepl("->", igraph::E(graph)$connection_type)
+ if (edge.type == "inter") {
+ igraph::E(graph)$weight[!ic] <- 0
+ } else if (edge.type == "intra") {
+ igraph::E(graph)$weight[ic] <- 0
+ } else if (edge.type == "both2") {
+ sel <- (!ic & igraph::E(graph)$weight < 0)
+ igraph::E(graph)$weight[sel] <- 0
+ } else {
+ ## nop
}
graph <- igraph::delete_edges(graph, which(igraph::E(graph)$weight == 0))
+
if (prune) {
graph <- igraph::subgraph_from_edges(graph, igraph::E(graph))
}
@@ -3167,7 +3320,7 @@ plotly_lasagna <- function(pos, vars = NULL, edges = NULL, znames = NULL,
## ======================================================================
-## ======================================================================
+## ======================== MOFA FUNCTIONS ==============================
## ======================================================================
#'
@@ -3331,6 +3484,81 @@ mofa.intNMF <- function(datasets, k = NULL, method = "RcppML",
return(res)
}
+
+#' Impute missing values for a multiomics expression matrix
+#' X. Features must be prefixed with datatype.
+#'
+mofa.imputeMissing <- function(X, method="SVD2") {
+ xx <- mofa.split_data(X)
+ xx <- lapply(xx, function(x) imputeMissing(x, method=method))
+ impX <- mofa.merge_data(xx)
+ impX[rownames(X),]
+}
+
+#' Normalize matrix for a multiomics expression matrix X. Features
+#' must be prefixed with datatype.
+#'
+#' See also: normalizeMultiOmics()
+#'
+mofa.normalizeExpression <- function(X, method1="maxMedian", method2="none") {
+ ##method1="maxMedian";method2="none"
+
+ xx <- mofa.split_data(X)
+
+ ## First normalization normalizes samples within each datatype but
+ ## not (yet) between datatypes.
+ xx <- lapply(xx, function(x) normalizeExpression(x, method=method1))
+
+ ## Second normalization
+ normX <- NULL
+ if(method2 != "none") {
+ if(method2=='median') {
+ ## Median normalization on datatypes. This will effectively
+ ## equalize the median for each datatype.
+ xmedian <- sapply(xx, function(x) mean(matrixStats::colMedians(x,na.rm=TRUE)))
+ xx <- lapply(xx, function(x) x - median(x,na.rm=TRUE) + mean(xmedian))
+ }
+ if(method2=='combat') {
+ ## ComBat normalization on datatypes. This will effectively
+ ## equalize the mean and SD of each datatype in each sample.
+ normX <- mofa.merge_data(xx)
+ dtype <- mofa.get_prefix(rownames(normX))
+ normX <- t(sva::ComBat( t(normX), batch=dtype))
+ }
+ if(method2=='quantile') {
+ ## Quantile normalization on datatypes. We will need to cbind
+ ## and augment the datatypes so the number of features are
+ ## equal. We do this by repeating rows (so distribution not
+ ## affected). Then after quantile normalization we unpack again.
+ nr <- max(sapply(xx,nrow))
+ mx <- list()
+ i=1
+ for(i in 1:length(xx)) {
+ n <- ceiling(nr / nrow(xx[[i]]))
+ mx[[i]] <- do.call( rbind, rep( list(xx[[i]]), n))
+ mx[[i]] <- head(mx[[i]], nr)
+ dim(mx[[i]])
+ }
+ mxx <- do.call(cbind, mx)
+ mxx <- limma::normalizeQuantiles(mxx)
+ for(i in 1:length(xx)) {
+ kk <- ((i-1)*ncol(X)+1):(i*ncol(X))
+ jj <- 1:nrow(xx[[i]])
+ mx[[i]] <- mxx[jj,kk]
+ }
+ names(mx) <- names(xx)
+ normX <- mofa.merge_data(mx)
+ }
+ }
+ if(is.null(normX)) normX <- mofa.merge_data(xx)
+ normX <- normX[rownames(X),]
+ return(normX)
+}
+
+
+
+
+
## ======================================================================
## ======================================================================
## ======================================================================
diff --git a/R/pgx-plotting.R b/R/pgx-plotting.R
index 599b1586..1c8b8e1a 100644
--- a/R/pgx-plotting.R
+++ b/R/pgx-plotting.R
@@ -7132,72 +7132,49 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL,
edge.cex <- 1
edge.alpha <- 0.33
fc <- "value"
+ xdist = 1
+ normalize.edges = FALSE
+ yheight = 2
+ edge.sign = "both"
edge.type <- "both"
labpos <- NULL
layout <- c("parallel", "hive")[1]
normalize.edges <- FALSE
value.name <- "rho"
strip.prefix <- FALSE
- }
-
- if (is.null(layers)) layers <- unique(igraph::V(graph)$layer)
- layers <- setdiff(layers, c("SOURCE", "SINK"))
- graph <- igraph::subgraph(graph, igraph::V(graph)$layer %in% layers)
- if (!is.null(labpos) && length(labpos) < length(layers)) {
- labpos <- head(rep(labpos, 99), length(layers))
- }
+ prune = FALSE
+ }
+
+ vattr <- igraph::vertex_attr_names(graph)
+ edgeattr <- igraph::edge_attr_names(graph)
+ if(!"rho" %in% edgeattr) message("WARNING: no rho in edge attributes!")
+ if(!"weight" %in% edgeattr) message("WARNING: no weight in edge attributes!")
+ if(!"value" %in% vattr) stop("ERROR: no value in vertex attributes!")
+ if(!"layer" %in% vattr) stop("ERROR: no layer in vertex attributes!")
+
+ graph <- lasagna.prune_graph(
+ graph,
+ ntop = ntop,
+ layers = layers,
+ normalize.edges = normalize.edges,
+ min.rho = min.rho,
+ edge.sign = edge.sign,
+ edge.type = edge.type,
+ filter = NULL,
+ prune = prune)
+
+ layers <- graph$layers
+ layers <- setdiff(layers, c("SOURCE","SINK"))
- if (!"value" %in% names(igraph::vertex_attr(graph))) {
- stop("vertex must have 'value' attribute")
- }
fc <- igraph::V(graph)$value
names(fc) <- igraph::V(graph)$name
-
- ## select ntop features
- if (!is.null(ntop) && ntop > 0) {
- ii <- tapply(
- 1:length(fc), igraph::V(graph)$layer,
- function(i) head(i[order(-abs(fc[i]))], ntop)
- )
- ii <- unlist(ii[names(ii) %in% layers])
- fc <- fc[ii]
- graph <- igraph::subgraph(graph, igraph::V(graph)[ii])
- }
-
- if (normalize.edges) {
- for (e in unique(igraph::E(graph)$connection_type)) {
- ii <- which(igraph::E(graph)$connection_type == e)
- max.wt <- max(abs(igraph::E(graph)$weight[ii]), na.rm = TRUE) + 1e-3
- igraph::E(graph)$weight[ii] <- igraph::E(graph)$weight[ii] / max.wt
- }
- }
-
- if (min.rho > 0) {
- ii <- which(abs(igraph::E(graph)$weight) < min.rho)
- if (length(ii)) igraph::E(graph)$weight[ii] <- 0
- }
-
- if (edge.sign != "both") {
- ewt <- igraph::E(graph)$weight
- if (grepl("pos", edge.sign)) igraph::E(graph)$weight[ewt < 0] <- 0
- if (grepl("neg", edge.sign)) igraph::E(graph)$weight[ewt > 0] <- 0
- }
- if (edge.type != "both") {
- ic <- grepl("->", igraph::E(graph)$connection_type)
- if (grepl("intra", edge.type)) igraph::E(graph)$weight[ic] <- 0
- if (grepl("inter", edge.type)) igraph::E(graph)$weight[!ic] <- 0
- }
- graph <- igraph::delete_edges(graph, which(igraph::E(graph)$weight == 0))
-
- if (prune) {
- graph <- igraph::subgraph_from_edges(graph, igraph::E(graph))
- }
-
+
## layout
vlayer <- igraph::V(graph)$layer
if (layout == "parallel") {
if (is.null(xpos)) xpos <- c(0:(length(layers) - 1))
xpos <- xpos * xdist
+ xpos <- head(rep(xpos,10),length(layers))
x <- xpos[match(vlayer, layers)]
y <- fc[igraph::V(graph)$name]
layout.xy <- cbind(x = x, y = y)
@@ -7233,16 +7210,21 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL,
max.wt <- max(ewt, na.rm = TRUE) + 1e-3
ew <- (ewt / max.wt)**2
- vx <- log(1000 * igraph::page_rank(graph, weights = ewt)$vector)
- # vx <- abs(y)
- vx <- (0.1 + abs(vx) / max(abs(vx)))**1
- vcol <- c("blue2", "red2")[1 + 1 * (fc[vv] > 0)]
+ ## vertex size relative to centrality
+ vx <- log(1000*igraph::page_rank(graph, weights=ewt)$vector)
+ vx <- (0.1+abs(vx)/max(abs(vx)))**1
+ vcol <- c("blue2","red2")[ 1+1*(fc[vv] > 0)]
- ecol <- c("darkorange3", "magenta4")[1 + 1 * (igraph::E(graph)$weight >= 0)]
+ ## color edges by sign or correlation. set NA edges to grey
+ ecol <- c("darkorange3","magenta4")[ 1+1*(igraph::E(graph)$rho >= 0)]
+ ii <- which(is.na(igraph::E(graph)$rho))
+ if(length(ii)) ecol[ii] <- "grey70"
ecol <- adjustcolor(ecol, edge.alpha)
- ecurv <- c(-0.25, 0)[1 + 1 * grepl("->", igraph::E(graph)$connection_type)]
- # ecurv <- c(TRUE,FALSE)[1 + 1*grepl("->",igraph::E(graph)$connection_type)]
-
+
+ ## add curvature for intra-edges
+ ecurv <- c(-0.25,0)[1 + 1*grepl("->",igraph::E(graph)$connection_type)]
+ #ecurv <- c(TRUE,FALSE)[1 + 1*grepl("->",igraph::E(graph)$connection_type)]
+
igraph::V(graph)$label <- ""
if (is.null(xlim)) {
xlim <- range(layout.xy[, 1])
@@ -7314,7 +7296,7 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL,
labels <- mofa.strip_prefix(labels)
}
if (strip.prefix2) {
- labels <- sub(".*:", "", labels)
+ labels <- sub("^[a-zA-Z]+:", "", labels)
}
labels <- gsub("^NA \\(", "(", labels)
text(x, y, labels, cex = cex.label, pos = labposx, adj = 1, offset = 2.8)
@@ -7365,9 +7347,9 @@ plotHivePlot <- function(X, f, group, groups = NULL,
gr <- igraph::graph_from_edgelist(ee, directed = FALSE)
igraph::E(gr)$weight <- abs(R[idx])
igraph::V(gr)$group <- group[igraph::V(gr)$name]
-
- if (length(gr) == 0) {
- message("[plotMultiPartiteGraph] ERROR. empty graph.")
+
+ if(length(gr)==0) {
+ message("[plotHivePlot] ERROR. empty graph.")
return(NULL)
}
diff --git a/R/pgx-singlecell.R b/R/pgx-singlecell.R
index 160a391e..a30485cc 100644
--- a/R/pgx-singlecell.R
+++ b/R/pgx-singlecell.R
@@ -479,6 +479,7 @@ pgx.supercell <- function(counts,
meta,
group = NULL,
gamma = 20,
+ nvargenes = 1000,
log.transform = TRUE) {
if (log.transform) { ## supercell uses log2 matrix
X <- logCPM(counts, total = 1e4)
@@ -486,14 +487,21 @@ pgx.supercell <- function(counts,
X <- counts
}
- if (!is.null(group) && "group" %in% colnames(meta)) {
- message("using group detected in meta\n")
+ if (is.null(group) && "group" %in% colnames(meta)) {
+ message("using group column detected in meta\n")
group <- meta[, "group"]
}
+ if (!is.null(group) && any(group %in% colnames(meta))) {
+ group <- intersect(group, colnames(meta))
+ message("using groups: ", paste(group,collapse="."))
+ group <- meta[, group]
+ if(NCOL(group) > 1) group <- apply(group, 1, paste, collapse=".")
+ }
+
SC <- SuperCell::SCimplify(X,
gamma = gamma,
- n.var.genes = 1000,
+ n.var.genes = nvargenes,
cell.split.condition = group
)
message("[pgx.supercell] SuperCell::SCimplify completed")
@@ -514,7 +522,16 @@ pgx.supercell <- function(counts,
## Compute metacall expression as sum of counts
counts <- as.matrix(counts)
- sc.counts <- SuperCell::supercell_GE(counts, mode = "sum", groups = SC$membership)
+ if(log.transform) {
+ sc.counts <- SuperCell::supercell_GE(
+ counts, mode = "sum",
+ groups = SC$membership)
+ } else {
+ sc.counts <- SuperCell::supercell_GE(
+ counts, mode = "average",
+ groups = SC$membership)
+ }
+
message("[pgx.supercell] SuperCell::supercell_GE completed")
sc.membership <- paste0("mc", SC$membership)
colnames(sc.counts) <- paste0("mc", 1:ncol(sc.counts))
diff --git a/R/pgx-wgcna.R b/R/pgx-wgcna.R
index 2abbd6b1..714409cf 100644
--- a/R/pgx-wgcna.R
+++ b/R/pgx-wgcna.R
@@ -4,7 +4,6 @@
##
##
-
#' @title WGCNA network construction and module detection
#'
#' @param pgx PGX object containing gene expression data
@@ -40,10 +39,16 @@ pgx.wgcna <- function(
networktype = "signed",
tomtype = "signed",
numericlabels = FALSE,
- ngenes = 4000,
- maxBlockSize = 5000,
- gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]") {
- ## minmodsize=10;power=NULL;cutheight=0.15;deepsplit=2;ngenes=4000;networktype="signed";tomtype="signed";numericlabels=FALSE;ngenes=2000;gset.filter=NULL;minKME=0.8;maxBlockSize=5000
+ ngenes = 2000,
+ maxBlockSize = 9999,
+ gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]",
+ summary = TRUE,
+ ai_model = DEFAULT_LLM,
+ verbose = 1,
+ progress = NULL
+ ) {
+
+ ##minmodsize=10;power=NULL;cutheight=0.15;deepsplit=2;ngenes=4000;networktype="signed";tomtype="signed";numericlabels=FALSE;ngenes=2000;gset.filter=NULL;minKME=0.8;maxBlockSize=5000
samples <- pgx$samples
## no dot pheno
@@ -79,20 +84,22 @@ pgx.wgcna <- function(
X <- normalizeMultiOmics(X)
}
+ if(!is.null(progress)) progress$set(message = "Calculating WGCNA...", value=0.2)
message("[pgx.wgcna] start wgcna.compute...")
wgcna <- wgcna.compute(
X = X,
samples = samples,
minmodsize = minmodsize, # default: min(20,...)
power = power, # default: 12 (for signed)
- cutheight = cutheight, # default: 0.15
+ mergeCutHeight = cutheight, # default: 0.15
deepsplit = deepsplit, # default: 2
minKME = minKME, # default: 0.30
networktype = networktype, # default: unsigned (but signed is better...)
tomtype = tomtype, # default: signed
numericlabels = numericlabels,
maxBlockSize = maxBlockSize,
- ngenes = ngenes
+ ngenes = ngenes,
+ verbose = verbose
)
message("[pgx.wgcna] finished computing wgcna.compute!")
@@ -114,16 +121,33 @@ pgx.wgcna <- function(
## ----------------------------------------------------
## Do geneset analysis
## ----------------------------------------------------
- res.gse <- wgcna.compute_enrichment(
- wgcna, pgx,
- method = c("fisher", "gsetcor", "mmcor"),
+ if(!is.null(progress)) progress$set(message = "Computing enrichment...", value=0.4)
+ message("computing module enrichment...")
+ wgcna$gsea <- wgcna.computeModuleEnrichment(
+ wgcna,
+ annot = pgx$genes,
+ # GMT = pgx$GMT,
+ # gsetX = pgx$gsetX,
+ methods = c("fisher", "gsetcor", "xcor"),
ntop = 1000,
+ xtop = 100,
filter = gset.filter
)
+ if(summary) {
+ if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6)
+ message("Annotating modules using ", ai_model)
+ wgcna$summary <- wgcna.describeModules(
+ wgcna,
+ ntop = 25,
+ model = ai_model,
+ annot = pgx$genes,
+ experiment = pgx$description,
+ verbose = 0)
+ }
+
+
## add to results object
- wgcna$gse <- res.gse$gsets
- wgcna$enriched_genes <- res.gse$genes
wgcna$clust <- clust
wgcna$networktype <- networktype
wgcna$tomtype <- tomtype
@@ -131,44 +155,90 @@ pgx.wgcna <- function(
return(wgcna)
}
+
#' @export
wgcna.compute <- function(X,
samples,
+ ngenes = 2000,
minmodsize = 20,
power = 12,
- cutheight = 0.15,
+ mergeCutHeight = 0.15,
deepsplit = 2,
minKME = 0.3,
+ treeCut = 0.99,
+ treeCutCeiling = 1,
networktype = "signed",
tomtype = "signed",
- reassignThreshold = 1e-6,
- ngenes = 4000,
+ clustMethod = "average",
+ cutMethod = "hybrid",
+ calcMethod = "fast",
+ lowrank = 40,
numericlabels = FALSE,
- maxBlockSize = 8000,
+ maxBlockSize = 9999,
merge.dendro = TRUE,
+ compute.stats = TRUE,
prefix = "ME",
- sv.dim = 80,
- verbose = 0) {
- ## minmodsize=20;power=12;cutheight=0.15;deepsplit=2;ngenes=2000;networktype="signed";tomtype="signed";numericlabels=FALSE;prefix="ME";minKME=0.3;reassignThreshold=1e-6;maxBlockSize=5000
-
+ sv.tom = 40,
+ drop.ref = FALSE,
+ net = NULL,
+ is.multiomics = NULL,
+ verbose = 0
+ ) {
+
+ if(0) {
+ ngenes = 2000;
+ minmodsize = 20;
+ power = 12;
+ mergeCutHeight = 0.15;
+ deepsplit = 2;
+ minKME = 0.3;
+ treeCut = 0.99;
+ treeCutCeiling = 1;
+ networktype = "signed";
+ tomtype = "signed";
+ clustMethod = "average";
+ cutMethod = "hybrid";
+ calcMethod = "fast";
+ lowrank = 40;
+ numericlabels = FALSE;
+ maxBlockSize = 9999;
+ merge.dendro = TRUE;
+ compute.stats = TRUE;
+ prefix = "ME";
+ sv.tom = 40;
+ drop.ref = FALSE;
+ net = NULL;
+ is.multiomics = NULL;
+ verbose = 0
+ }
+
require(WGCNA)
+ if(nchar(prefix)!=2) {
+ stop("prefix must be two capital letters")
+ }
+
+ kk <- intersect(colnames(X),rownames(samples))
+ X <- as.matrix(X[,kk])
+ samples <- as.data.frame(samples, check.names=FALSE)
+ samples <- samples[kk, , drop=FALSE]
+
nmissing <- sum(is.na(X))
if (nmissing > 0) {
message("Found ", nmissing, " missing values in X. Imputing prior to WGCNA.")
X <- svdImpute2(X)
}
- X <- X[!duplicated(rownames(X)), ]
-
+ X <- X[!duplicated(rownames(X)), , drop = FALSE]
+
## restrict number of genes
if (ngenes > 0 && nrow(X) > ngenes) {
- is.multiomics <- all(grepl(":", rownames(X)))
- if (is.multiomics) {
- message("[wgcna.compute] multiomics. topSD = ", ngenes)
- X <- mofa.topSD(X, ngenes)
+ if(is.null(is.multiomics)) is.multiomics <- all(grepl(":",rownames(X))) ## not robust!!
+ if(is.multiomics) {
+ message("[wgcna.compute] topSD = ",ngenes, " (multi-omics)")
+ X <- mofa.topSD(X, ngenes)
} else {
- message("[wgcna.compute] topSD = ", ngenes)
+ message("[wgcna.compute] topSD = ", ngenes, " (single omics)")
sdx <- matrixStats::rowSds(X, na.rm = TRUE)
X <- X[sdx > 0.1 * mean(sdx, na.rm = TRUE), ] ## filter low SD??
X <- X[order(-matrixStats::rowSds(X, na.rm = TRUE)), ]
@@ -176,82 +246,71 @@ wgcna.compute <- function(X,
}
}
- message("[wgcna.compute] dim(X) = ", paste(dim(X), collapse = " x "))
- message("[wgcna.compute] dim(samples) = ", paste(dim(samples), collapse = " x "))
-
datExpr <- t(X)
- if (is.null(power)) {
- ## Estimate best power
- message("[wgcna.compute] estimating optimal power...")
- powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
- sft <- WGCNA::pickSoftThreshold(
- datExpr,
- powerVector = powers,
- networkType = networktype,
- verbose = verbose
- )
- power <- sft$powerEstimate
- if (is.na(power)) power <- 20
- }
-
## adapt for small datasets (also done in WGCNA package)
minmodsize <- min(minmodsize, ncol(datExpr) / 2)
-
+ if(!is.null(power)) power <- power[1]
+
message("[wgcna.compute] minmodsize = ", minmodsize)
message("[wgcna.compute] number of features = ", nrow(X))
message("[wgcna.compute] minKME = ", minKME)
message("[wgcna.compute] power = ", power)
- message("[wgcna.compute] mergeCutHeight = ", cutheight)
+ message("[wgcna.compute] mergeCutHeight = ", mergeCutHeight)
+ message("[wgcna.compute] calcMethod = ", calcMethod)
- message("[wgcna.compute] computing blockwise Modules...")
- ## WGCNA::enableWGCNAThreads()
- cor <- WGCNA::cor ## needed...
- net <- WGCNA::blockwiseModules(
- datExpr,
- power = power,
- networkType = networktype,
- TOMType = tomtype,
- minModuleSize = minmodsize,
- reassignThreshold = reassignThreshold,
- mergeCutHeight = cutheight,
- minKMEtoStay = minKME,
- numericLabels = numericlabels, ## numeric or 'color' labels
- deepSplit = deepsplit,
- maxBlockSize = maxBlockSize,
- verbose = verbose
- )
- cor <- stats::cor
- message("[wgcna.compute] finished blockwise Modules...")
+ ##WGCNA::enableWGCNAThreads()
+ if(is.null(net)) {
+ message("[wgcna.compute] wgcna.computeModules....")
+ net <- wgcna.computeModules(
+ datExpr,
+ power = power,
+ networkType = networktype,
+ TOMType = tomtype,
+ calcMethod = calcMethod,
+ lowrank = lowrank,
+ clustMethod = clustMethod,
+ cutMethod = cutMethod,
+ deepSplit = deepsplit,
+ minModuleSize = minmodsize,
+ mergeCutHeight = mergeCutHeight,
+ minKMEtoStay = minKME,
+ treeCut = treeCut,
+ treeCutCeiling = treeCutCeiling,
+ numericLabels = numericlabels,
+ maxBlockSize = maxBlockSize,
+ returnTOM = TRUE,
+ verbose = verbose
+ )
+ }
- ## prefix="ME"
- table(net$colors)
- names(net$MEs) <- sub("^ME", prefix, names(net$MEs))
- net$labels <- paste0(prefix, net$colors)
+ if(!"MEs" %in% names(net)) {
+ message("[wgcna.compute]: running WGCNA::moduleEigengenes")
+ net$MEs = WGCNA::moduleEigengenes(datExpr, colors = net$colors)$eigengenes
+ }
+ ## Substitue prefix="ME"
+ if(prefix!="ME") names(net$MEs) <- sub("^ME", prefix, names(net$MEs))
+ net$labels <- paste0(prefix, net$colors)
+ names(net$labels) <- colnames(datExpr)
+
## clean up traits matrix
datTraits <- data.frame(samples, check.names = FALSE)
isdate <- apply(datTraits, 2, is.Date)
datTraits <- datTraits[, !isdate, drop = FALSE]
- ## rm unvarying phenotypes (cause NAs and errors in OPG plots)
## Expand multi-class discrete phenotypes into binary vectors
datTraits <- utils::type.convert(datTraits, as.is = TRUE)
- tr.class <- sapply(datTraits, class)
- sel1 <- which(tr.class %in% c("factor", "character"))
- sel2 <- which(tr.class %in% c("integer", "numeric"))
- tr1 <- datTraits[, 0]
- tr2 <- datTraits[, 0]
- if (length(sel1)) {
- tr1 <- expandPhenoMatrix(datTraits[, sel1, drop = FALSE], drop.ref = FALSE)
- if (is.null(tr1)) tr1 <- datTraits[, 0]
- }
- if (length(sel2)) {
- ## keeping numeric phenotypes
- tr2 <- datTraits[, sel2, drop = FALSE]
+ datTraits <- expandPhenoMatrix(datTraits, keep.numeric=TRUE, drop.ref=drop.ref)
+
+ if(is.null(datTraits)) {
+ message("WARNING:: no valid traits. creating random traits.")
+ ##random.trait <- sample(c(0,1), nrow(samples), replace=TRUE)
+ random.trait <- head(rep(c(0,1), nrow(samples)), nrow(samples))
+ datTraits <- data.frame( random = random.trait )
+ rownames(datTraits) <- rownames(samples)
}
- datTraits <- cbind(tr1, tr2)
-
+
## list of genes in modules
me.genes <- tapply(names(net$colors), net$colors, list)
names(me.genes) <- paste0(prefix, names(me.genes))
@@ -264,43 +323,60 @@ wgcna.compute <- function(X,
names(me.colors) <- me.labels
me.colors <- me.colors[names(net$MEs)]
- ## compute clustering based on TOM matrix
- message("[wgcna.compute] computing TOM matrix...")
- TOM <- NULL
- TOM <- WGCNA::TOMsimilarityFromExpr(
- datExpr,
- power = power,
- TOMType = tomtype,
- networkType = networktype,
- verbose = verbose
- )
+ ## compute TOM matrix (we need for some plots)
+ if(!is.null(net$TOM)) {
+ TOM <- net$TOM
+ net$TOM <- NULL
+ } else {
+ message("[wgcna.compute] recomputing TOM matrix...")
+ TOM <- WGCNA::TOMsimilarityFromExpr(
+ datExpr,
+ power = net$power,
+ TOMType = tomtype,
+ networkType = networktype,
+ verbose = verbose
+ )
+ }
## instead of the huge TOM matrix we save a smaller SVD.
- message("[wgcna.compute] reducing TOM. sv.dim = ", sv.dim)
- rownames(TOM) <- colnames(TOM) <- colnames(datExpr)
- sv <- irlba::irlba(TOM, nv = min(sv.dim, dim(X)))
- svTOM <- sv$v %*% diag(sqrt(sv$d))
- rownames(svTOM) <- colnames(datExpr)
-
+ svTOM <- NULL
+ if(sv.tom>0) {
+ ## sv.tom <- ceiling(min(sv.tom,dim(datExpr)/2))
+ message("[wgcna.compute] reducing TOM. sv.tom = ", sv.tom)
+ rownames(TOM) <- colnames(TOM) <- colnames(datExpr)
+ sv.tom <- min(sv.tom, ncol(TOM)-1)
+ sv <- irlba::irlba(TOM, nv = sv.tom)
+ svTOM <- sv$v %*% diag(sqrt(sv$d))
+ rownames(svTOM) <- colnames(datExpr)
+ }
+
## compute module eigenvectors (loading matrix)
message("[wgcna.compute] computing module eigenvectors...")
MVs <- list()
for (clr in unique(net$colors)) {
ii <- which(net$colors == clr)
- mX <- datExpr[, ii]
+ mX <- datExpr[, ii, drop=FALSE]
mX <- scale(mX) ## NOTE: seems WGCNA is using full scaling
- sv1 <- irlba::irlba(mX, nv = 1, nu = 1)
+ if(ncol(mX)>1) {
+ res <- irlba::irlba(mX, nv = 1, nu = 1)
+ sv1 <- res$v[,1] * res$d[1]
+ } else {
+ sv1 <- 1
+ }
sv <- rep(0, ncol(datExpr))
- sv[ii] <- sv1$v[, 1] * sv1$d[1]
+ sv[ii] <- sv1
MVs[[paste0(prefix, clr)]] <- sv
}
MVs <- as.matrix(do.call(cbind, MVs[names(net$MEs)]))
rownames(MVs) <- colnames(datExpr)
## compute gene statistics
- message("[wgcna.compute] computing gene statistics...")
- stats <- wgcna.compute_geneStats(net, datExpr, datTraits, TOM = TOM)
-
+ stats <- NULL
+ if(compute.stats) {
+ message("[wgcna.compute] computing gene statistics...")
+ stats <- wgcna.computeGeneStats(net, datExpr, datTraits, TOM=TOM)
+ }
+
## module-traits matrix
message("[wgcna.compute] computing module-traits matrix...")
modTraits <- cor(net$MEs, datTraits, use = "pairwise")
@@ -315,16 +391,16 @@ wgcna.compute <- function(X,
net$merged_dendro <- NULL
}
}
- remove(TOM)
-
+ remove(TOM) ## big
+
## construct results object
results <- list(
datExpr = datExpr,
datTraits = datTraits,
- ## TOM = TOM, ## this can be BIG!!!
- svTOM = svTOM, ## smaller singular vectors
+ ## TOM = TOM, ## this can be BIG!!! generally no need, just for plotting
+ svTOM = svTOM, ## smaller singular vectors
net = net,
- power = power,
+ #power = net$power,
me.genes = me.genes,
me.colors = me.colors,
W = MVs,
@@ -332,10 +408,540 @@ wgcna.compute <- function(X,
stats = stats
)
+ message("[wgcna.compute] completed. \n\n")
+
return(results)
+
+}
+
+#' @export
+wgcna.compute_multiomics <- function(dataX,
+ samples,
+ power = 12,
+ ngenes = 2000,
+ clustMethod = "average",
+ cutMethod = "hybrid",
+ minmodsize = 10,
+ minKME = 0.3,
+ deepsplit = 2,
+ compute.enrichment = TRUE,
+ annot = NULL,
+ GMT = NULL,
+ gsetX = NULL,
+ drop.ref = FALSE,
+ add.pheno = FALSE,
+ add.gsets = FALSE,
+ do.consensus = FALSE,
+ gset.methods = c("fisher","gsetcor","xcor"),
+ gset.ntop = 1000,
+ gset.xtop = 100,
+ summary = TRUE,
+ ai_model = DEFAULT_LLM,
+ ai_experiment = "",
+ verbose = 1,
+ progress = NULL
+ ) {
+
+ if(0) {
+ do.consensus = 1
+ cutMethod = "hybrid"
+ deepsplit = 2
+ power = 12
+ ngenes = 2000
+ minmodsize = 10
+ minKME = 0.3
+ compute.enrichment = TRUE
+ gset.ntop = 1000
+ gset.xtop = 100
+ annot = pgx$genes
+ GMT = pgx$GMT ##??
+ gsetX = pgx$gsetX
+ progress = NULL
+ }
+
+ ## preprocessing
+ if(!is.null(annot)) {
+ dataX <- lapply(dataX, function(x) rename_by2(x, annot, "symbol"))
+ }
+
+ ## add pheno matrix??
+ if(add.gsets) {
+ if(is.null(GMT)) GMT <- Matrix::t(playdata::GSETxGENE)
+ if(is.null(gsetX)) {
+ X <- do.call(rbind, dataX)
+ if(!is.null(annot)) GMT <- rename_by2(GMT, annot, "symbol")
+ kk <- intersect(rownames(X), rownames(GMT))
+ if(length(kk)) gsetX <- plaid::plaid(X[kk,], GMT[kk,])
+ }
+ if(!is.null(gsetX)) dataX$gs <- gsetX
+ }
+
+ ## add phenomatrix??
+ if(add.pheno) {
+ phenoX <- expandPhenoMatrix(samples, keep.numeric=TRUE, drop.ref=drop.ref)
+ dataX$ph <- t(phenoX)
+ }
+
+ dt.na <- which(unlist(lapply(dataX, function(x) sum(is.na(x)))) > 0)
+ if (any(dt.na)) {
+ dataX[dt.na] <- lapply(dataX[dt.na], imputeMissing, method="SVD2")
+ }
+ names(dataX) <- substring(names(dataX),1,2)
+ #dataX <- mofa.topSD(dataX, ngenes)
+
+ if(!is.null(progress)) {
+ progress$set(message = paste("computing WGCNA modules..."), value = 0.33)
+ }
+
+ if(is.null(power) || is.na(power) ) power <- "sft"
+ if(as.character(power[1]) %in% c("sft","iqr")) {
+ message("[wgcna.compute_multiomics] estimating power with method = ", power[1])
+ est.power <- rep(NA, length(dataX))
+ i=1
+ for(i in 1:length(dataX)) {
+ p <- wgcna.pickSoftThreshold(
+ Matrix::t(dataX[[i]]), sft=NULL, rcut=0.85, powers = NULL,
+ method=power[1], nmax=1000, verbose=1)
+ if(length(p)==0 || is.null(p) ) p <- NA
+ est.power[i] <- p
+ }
+ est.power
+ power <- ifelse (is.na(est.power), 12, est.power)
+ message("[wgcna.compute_multiomics] estimated power = ", power)
+ } else {
+ power <- as.numeric(power)
+ }
+ nw <- length(dataX)
+ power <- head(rep(power, nw),nw)
+ names(power) <- names(dataX)
+
+ ## This runs WGCNA on an expression list.
+ wgcna <- list()
+ has.gxpx <- all(c("gx","px") %in% names(dataX))
+ if (do.consensus && has.gxpx) {
+ cat("[wgcna.compute_multiomics] computing consensus WGCNA for GX+PX --------\n")
+ nn <- mean(rownames(dataX[['gx']]) %in% rownames(dataX[['px']]))
+ if (nn < 0.10) {
+ message("[wgcna.compute_multiomics] ERROR: gx and px features do not overlap")
+ } else {
+ wgcna <- wgcna.createConsensusLayers(
+ dataX[c('gx','px')],
+ samples = samples,
+ prefix = c('GX','PX'),
+ ngenes = ngenes,
+ power = power[c('gx','px')],
+ minModuleSize = minmodsize,
+ deepSplit = deepsplit,
+ mergeCutHeight = 0.15,
+ minKME = minKME,
+ maxBlockSize = 9999,
+ verbose = 1
+ )
+ }
+ }
+
+ dtlist <- setdiff(names(dataX), names(wgcna))
+ for(dt in dtlist) {
+ cat("[wgcna.compute_multiomics] computing WGCNA for", dt, "-------------\n")
+ minKME <- ifelse(dt=='ph', 0, minKME)
+ minmodsize <- ifelse(dt=='ph', 1, minmodsize)
+ wgcna[[dt]] <- wgcna.compute(
+ X = dataX[[dt]],
+ samples = samples,
+ ngenes = ngenes,
+ calcMethod = "fast",
+ power = power[dt],
+ lowrank = 40,
+ clustMethod = clustMethod,
+ cutMethod = cutMethod,
+ deepsplit = deepsplit,
+ minKME = minKME,
+ minmodsize = minmodsize,
+ mergeCutHeight = 0.15,
+ compute.stats = TRUE,
+ sv.tom = 40,
+ prefix = toupper(dt),
+ drop.ref = drop.ref,
+ is.multiomics = FALSE,
+ verbose = verbose
+ )
+ }
+
+ wgcna <- wgcna[names(dataX)]
+
+ ## Compute enrichment
+ if(compute.enrichment) {
+
+ message("[wgcna.compute_multiomics] computing module enrichment...")
+
+ if(!is.null(progress)) {
+ progress$set(message = paste("computing module enrichment..."), value = 0.66)
+ }
+
+ gse <- wgcna.computeModuleEnrichment(
+ wgcna = wgcna,
+ multi = TRUE,
+ methods = gset.methods,
+ ntop = gset.ntop,
+ xtop = gset.xtop,
+ annot = annot,
+ GMT = GMT,
+ gsetX = gsetX,
+ filter = NULL
+ #filter = "^PATHWAY|^HALLMARK|^GO|^C[1-9]"
+ )
+
+ ## split up results
+ for(k in names(wgcna)) {
+ mm <- names(wgcna[[k]]$me.genes)
+ wgcna[[k]]$gsea <- gse[mm]
+ }
+
+ if(summary) {
+ if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6)
+ message("Annotating modules using ", ai_model)
+ for(k in names(wgcna)) {
+ wgcna[[k]]$summary <- wgcna.describeModules(
+ wgcna[[k]],
+ multi = FALSE,
+ ntop = 25,
+ model = ai_model,
+ annot = annot,
+ experiment = ai_experiment,
+ verbose = 0
+ )
+ }
+ }
+
+
+ }
+
+ return(wgcna)
+}
+
+
+#' Reimplementation for WGCNA::blockwiseModules(). This returns exact
+#' same object as WGCNA::blockwiseModules() but is faster and allows
+#' different clustering linkage methods (ward, complete).
+#'
+wgcna.computeModules <- function(
+ datExpr,
+ power = 6,
+ networkType = "signed",
+ TOM = NULL,
+ TOMType = "signed",
+ calcMethod = "fast",
+ lowrank = 20,
+ clustMethod = "average",
+ cutMethod = "hybrid", ## hybrid, tree, static
+ deepSplit = 2,
+ treeCut = 0.99,
+ treeCutCeiling = 1,
+ minModuleSize = 20,
+ minModuleSize2 = minModuleSize,
+ mergeCutHeight = 0.15,
+ minKMEtoStay = 0.3,
+ numericLabels = FALSE, ## numeric or 'color' labels
+ maxBlockSize = 9999,
+ returnTOM = FALSE,
+ verbose = 1 ) {
+
+ #power=6;networkType=TOMType="signed";minModuleSize=20;mergeCutHeight=0.15;minKMEtoStay=0.3;numericLabels=FALSE;clustMethod="average";deepSplit = 2;treeCut = 0.99;treeCutCeiling = 1;
+
+ cor <- WGCNA::cor ## needed...
+ deepSplit <- as.integer(deepSplit)
+ lowrank <- as.integer(lowrank)
+
+ if(is.null(power) || is.na(power) ) power <- "sft" ## use iqr?
+ auto.power <- power[1] %in% c("sft","iqr")
+ if (auto.power) {
+ message("[wgcna.computeModules] estimating power with method = ", power[1])
+ powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
+ powers <- c(powers, seq(from = 25, to = 50, by = 5))
+ power <- wgcna.pickSoftThreshold(datExpr, sft=NULL, rcut=0.85,
+ method=power[1], nmax=2000, verbose=0)
+ if (is.na(power)) power <- 6
+ message("[wgcna.compute_multiomics] estimated power = ", power)
+ }
+
+ if(calcMethod == "blockwise") {
+ message("[wgcna.computeModules] computing blockwiseModules...")
+ net <- WGCNA::blockwiseModules(
+ datExpr,
+ power = power,
+ networkType = networkType,
+ TOMType = TOMType,
+ minModuleSize = minModuleSize,
+ mergeCutHeight = mergeCutHeight,
+ minKMEtoStay = minKMEtoStay,
+ numericLabels = numericLabels, ## numeric or 'color' labels
+ deepSplit = deepSplit,
+ maxBlockSize = maxBlockSize,
+
+ verbose = verbose
+ )
+ return(net)
+ }
+
+ clustMethod <- sub("^ward$","ward.D",clustMethod)
+ ## define distance matrix
+ if(is.null(TOM)) {
+ adjacency <- WGCNA::adjacency(datExpr, power = power, type = networkType)
+ adjacency[is.na(adjacency)] <- 0
+ if (calcMethod == "fast") {
+ if(verbose>0)
+ message("[wgcna.computeModules] Computing TOM matrix using fast method...")
+ TOM <- fastTOMsimilarity(adjacency, tomtype=TOMType, lowrank=lowrank)
+ } else if (calcMethod == "adjacency") {
+ if(verbose>0)
+ message("[wgcna.computeModules] Computing using adjacency as TOM matrix...")
+ TOM <- adjacency
+ } else if (calcMethod == "full") {
+ if(verbose>0) message("[wgcna.computeModules] Computing full TOM matrix...")
+ ## SLOW!!!
+ TOM <- WGCNA::TOMsimilarity(adjacency, TOMType=TOMType, verbose=verbose)
+ } else {
+ stop("[wgcna.computeModules] ERROR: invalid calcMethod parameter:", calcMethod)
+ }
+ dimnames(TOM) <- dimnames(adjacency)
+ }
+
+ ## transform to dissTOM
+ dissTOM <- 1 - TOM
+
+ ## clustering
+ if(verbose>0) message("Clustering features using ", clustMethod, " linkage")
+ ##geneTree <- flashClust::flashClust(as.dist(dissTOM), method=clustMethod)
+ geneTree <- stats::hclust(as.dist(dissTOM), method=clustMethod)
+
+ ## sometimes there is a height error. following is a fix.
+ geneTree$height <- round(geneTree$height, 6)
+
+ ## exception1
+ if(minModuleSize <= 1 && cutMethod!="static") {
+ message("WARNING: minModuleSize==1. Changing to static cutting")
+ cutMethod <- "static"
+ }
+
+ if(treeCut > 1) cutMethod <- "static"
+ if(treeCut <= 1) {
+ ## transform from relative to actual
+ qq <- quantile( geneTree$height, probs = c(0.05, treeCutCeiling))
+ treeCut <- qq[1] + treeCut * diff(qq)
+ }
+
+ if(verbose>0) {
+ message("Tree cut method: ", cutMethod)
+ message("treeCut = ", treeCut)
+ message("deepSplit = ", deepSplit)
+ message("minModuleSize = ", minModuleSize)
+ }
+
+ if(cutMethod %in% c("hybrid","tree")) {
+
+ if(cutMethod=="tree") deepSplit <- (deepSplit > 0)
+ label <- dynamicTreeCut::cutreeDynamic(
+ geneTree,
+ method = cutMethod,
+ cutHeight = treeCut,
+ distM = dissTOM,
+ deepSplit = deepSplit,
+ minClusterSize = minModuleSize2
+ )
+ } else if( cutMethod == "static" && treeCut <= 1 ) {
+ if(verbose>0) message("Static cutting tree at fixed H = ", treeCut)
+ label <- cutree( geneTree, h = treeCut)
+ } else if(cutMethod == "static" && treeCut > 1 ) {
+ if(verbose>0) message("Static cutting tree with fixed K = ", treeCut)
+ label <- cutree( geneTree, k = treeCut)
+ } else {
+ stop("ERROR: could not determine cutMethod")
+ }
+ label <- as.integer(label)
+ table(label)
+ nmodules <- length(unique(label))
+ if(verbose>0) message("Found ", nmodules, " modules")
+
+ ## Eigengenes
+ if(verbose>0) message("Calculating eigengenes...")
+ colors <- WGCNA::labels2colors(label)
+ MEs <- WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes
+
+ ## prune using minKME
+ if(minKMEtoStay > 0) {
+ if(verbose>0) message("Pruning features using minKME = ",minKMEtoStay)
+ ngrey <- sum(colors == 'grey')
+ for(k in unique(colors)) {
+ if (! paste0("ME",k) %in% colnames(MEs)) next
+ ii <- which(colors == k)
+ if(length(ii)>1) {
+ eg <- MEs[,paste0("ME",k)]
+ kme <- cor(datExpr[,ii, drop=FALSE], eg, use="pairwise")[,1]
+ kme.sign <- as.numeric(names(which.max(table(sign(kme)))))
+ kme <- kme * kme.sign
+ ii <- ii[which(kme < minKMEtoStay)]
+ if(length(ii)) colors[ii] <- 'grey'
+ }
+ }
+ ngrey <- sum(colors == 'grey') - ngrey
+ if(verbose>0) message("Pruned ",ngrey, " low KME features")
+ }
+
+ ## Merge similar modules
+ unmergedColors <- colors
+ if(mergeCutHeight>0 && length(MEs)>1) {
+ if(verbose>0) message("Merging similar modules: mergeCutHeight = ",mergeCutHeight)
+ merge <- wgcna.mergeCloseModules(datExpr, colors, cutHeight=mergeCutHeight, MEs=MEs)
+ unmergedColors <- colors
+ colors <- merge$colors
+ MEs <- merge$MEs
+ if(verbose>0) {
+ n0 <- length(unique(unmergedColors))
+ n1 <- length(unique(colors))
+ message("Merged ", n0 - n1, " modules")
+ }
+ }
+
+ ## filter on minModuleSize
+ if(minModuleSize>1) {
+ too.small <- names(which(table(colors) < minModuleSize))
+ too.small
+ if(length(too.small)) {
+ if(verbose>0) message("Removing ",length(too.small)," too small modules")
+ colors[colors %in% too.small] <- "grey"
+ }
+ }
+
+ ## Update MEs
+ if("grey" %in% colors && !"MEgrey" %in% names(MEs)) {
+ MEs <- WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes
+ }
+ MEs <- MEs[sub("^ME","",names(MEs)) %in% colors]
+
+ # Rename to numeric
+ if(numericLabels) {
+ if(verbose>0) message("Renaming to numeric labels")
+ colorOrder <- names(sort(table(colors),decreasing=TRUE))
+ colorOrder <- unique(c("grey", colorOrder))
+ colors <- match(colors, colorOrder)-1
+ unmergedColors <- match(unmergedColors, colorOrder)-1
+ mecolor <- sub("^ME","",names(MEs))
+ names(MEs) <- paste0("ME",match(mecolor, colorOrder)-1)
+ } else {
+ # Rename to standard colors, most frequent first
+ if(verbose>0) message("Renaming to standard colors")
+ colorOrder <- names(sort(table(colors),decreasing=TRUE))
+ colorOrder <- unique(c("grey", colorOrder))
+ newcolor <- setdiff( WGCNA::standardColors(), "grey")
+ n0 <- length(colorOrder)
+ n1 <- length(newcolor)
+ if(n1 < n0) newcolor <- make.unique(rep(newcolor,ceiling(n0/n1)))
+ newcolor <- unique(c("grey", newcolor))
+ colors <- newcolor[match(colors, colorOrder)]
+ unmergedColors <- newcolor[match(unmergedColors, colorOrder)]
+ mecolor <- sub("^ME","",names(MEs))
+ names(MEs) <- paste0("ME",newcolor[match(mecolor, colorOrder)])
+ }
+
+ names(colors) <- colnames(datExpr)
+ names(unmergedColors) <- colnames(datExpr)
+
+ net <- list()
+ net$colors <- colors
+ net$unmergedColors <- unmergedColors
+ net$MEs <- MEs
+ net$goodSamples <- rep(TRUE, nrow(datExpr))
+ net$goodGenes <- rep(TRUE, ncol(datExpr))
+ net$dendrograms <- list( geneTree )
+ net['TOMFiles'] <- list(NULL)
+ net$blockGenes <- list(1:ncol(datExpr))
+ net$blocks <- rep(1, ncol(datExpr))
+ net$MEsOK <- TRUE
+ net$power <- power
+ if(returnTOM) net$TOM <- TOM
+ return(net)
+}
+
+#' Faster implementation of TOM computation using low-rank SVD
+#' approximation.
+#' @export
+fastTOMsimilarity <- function(A, tomtype="signed", lowrank=20) {
+ #https://stackoverflow.com/questions/56574729
+ #
+ #Given square symmetric adjacency matrix A, its possible to
+ #calculate the TOM matrix W without the use of for loops, which
+ #speeds up the process tremendously
+ if(!tomtype %in% c("signed","unsigned")) {
+ stop("only works for signed and unsigned tomtype")
+ }
+
+ ## Adjacency matrix A can be approximated with SVD. This can make
+ ## TOM calculation much faster.
+ diag(A) <- 0
+ if(lowrank > (ncol(A)/2) ) lowrank <- -1
+ if(lowrank>0) {
+ res <- try( irlba::irlba(A, nv=lowrank) )
+ if(!"try-error" %in% class(res)) {
+ U <- res$u %*% diag(sqrt(res$d))
+ L <- U %*% (Matrix::t(U) %*% U) %*% Matrix::t(U)
+ } else {
+ message("[fastTOMsimilarity] Warning: irlba error. nv = ", lowrank)
+ L <- A %*% A ## full computation
+ }
+ } else {
+ L <- A %*% A
+ }
+ k <- Matrix::colSums(A)
+ Kmat <- outer(k, k, FUN = function(x, y) pmin(x, y))
+ D <- (Kmat + 1 - A)
+ W <- (L + A) / D
+ diag(W) <- 1
+ W <- as.matrix(W)
+ dimnames(W) <- dimnames(A)
+ W <- pmax(W, 0) ## sometimes has negative values...
+ return(W)
}
-wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) {
+
+wgcna.mergeCloseModules <- function(datExpr, colors, cutHeight, MEs=NULL) {
+ if(is.null(MEs)) {
+ MEs = WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes
+ dim(MEs)
+ }
+ hc <- hclust(as.dist(1 - cor(MEs)), method="average")
+ idx <- cutree(hc, h=cutHeight)
+ names(idx) <- sub("^ME","",names(idx))
+ table(idx)
+ new.colors <- colors
+ m=2
+ for(m in unique(idx)) {
+ cc <- names(which(idx == m))
+ cc <- setdiff(cc, "grey") ## never merge grey
+ ii <- which(colors %in% cc)
+ new.colors[ii] <- cc[1]
+ }
+ new.MEs = WGCNA::moduleEigengenes(datExpr, colors = new.colors)$eigengenes
+ list(
+ colors = new.colors,
+ MEs = new.MEs
+ )
+}
+
+##---------------------------------------------------------------------
+## Gene statistics
+##---------------------------------------------------------------------
+
+#' Compute general feature statistics after WGCNA results.
+#'
+#'
+#'
+wgcna.computeGeneStats <- function(net, datExpr, datTraits, TOM) {
+
+ ## align
+ kk <- intersect(rownames(datExpr), rownames(datTraits))
+ datExpr <- datExpr[kk, , drop = FALSE]
+ datTraits <- datTraits[kk, , drop = FALSE]
+
## Define numbers of genes and samples
nGenes <- ncol(datExpr)
nSamples <- nrow(datExpr)
@@ -345,79 +951,122 @@ wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) {
moduleTraitPvalue <- WGCNA::corPvalueStudent(moduleTraitCor, nSamples)
## Module membership correlation (with p-values)
- moduleMembership <- cor(datExpr, net$MEs, use = "p")
+ moduleMembership <- cor(datExpr, net$MEs, use = "pairwise.complete")
MMPvalue <- WGCNA::corPvalueStudent(as.matrix(moduleMembership), nSamples)
## Gene-trait significance (trait correlation) (with p-values)
- traitSignificance <- cor(datExpr, datTraits, use = "p")
- GSPvalue <- WGCNA::corPvalueStudent(as.matrix(traitSignificance), nSamples)
+ traitSignificance <- cor(datExpr, datTraits, use = "pairwise.complete")
+ TSPvalue <- WGCNA::corPvalueStudent(as.matrix(traitSignificance), nSamples)
## Fold-change
+ foldChange <- NULL
+ foldChangePvalue <- NULL
is.binary <- apply(datTraits, 2, function(a) length(unique(a[!is.na(a)])) == 2)
- binY <- datTraits[, which(is.binary), drop = FALSE]
- lm <- lapply(binY, function(y) {
- gx.limma(t(datExpr), y,
- lfc = 0, fdr = 1,
- sort.by = "none", verbose = 0
- )
- })
-
- foldChange <- sapply(lm, function(m) m$logFC)
- foldChangePvalue <- sapply(lm, function(m) m$P.Value)
- rownames(foldChange) <- rownames(lm[[1]])
- rownames(foldChangePvalue) <- rownames(lm[[1]])
-
+ is.binary
+ binY <- NULL
+ if(any(is.binary)) {
+ binY <- datTraits[, which(is.binary), drop = FALSE]
+ nmin <- apply(binY, 2, function(x) min(table(x)))
+ binY <- binY[,which(nmin>=2),drop=FALSE]
+ }
+ if(!is.null(binY) && NCOL(binY)>0) {
+ lm <- list()
+ i=1
+ for(i in 1:ncol(binY)) {
+ y <- 1*binY[,i]
+ X <- t(datExpr)
+ suppressWarnings(suppressMessages(
+ res <- try(gx.limma(X, y, lfc=0, fdr=1, sort.by="none", verbose=0, max.na=1))
+ ))
+ if(!"try-error" %in% class(res)) {
+ k <- colnames(binY)[i]
+ lm[[k]] <- res
+ }
+ }
+ lm <- lm[!sapply(lm,is.null)]
+ foldChange <- sapply(lm, function(m) m$logFC)
+ foldChangePvalue <- sapply(lm, function(m) m$P.Value)
+ if(length(lm)==1) {
+ foldChange <- cbind(foldChange)
+ foldChangePvalue <- cbind(foldChangePvalue)
+ }
+ rownames(foldChange) <- rownames(lm[[1]])
+ rownames(foldChangePvalue) <- rownames(lm[[1]])
+ }
+
# Continuous traits (not always present)
contY <- datTraits[, which(!is.binary), drop = FALSE]
- if (ncol(contY) > 0) {
- contlm <- lapply(contY, function(y) {
- rho <- cor(datExpr, y, use = "pairwise")[, 1]
- P.Value <- cor.pvalue(rho, length(y))
+ foldChange.cont <- NULL
+ foldChangePvalue.cont <- NULL
+ dim(contY)
+ if(NCOL(contY) > 0) {
+ contlm <- apply(contY, 2, function(y) {
+ rho <- cor(datExpr, y, use="pairwise")[,1]
+ P.Value <- WGCNA::corPvalueStudent(rho, n = length(y))
data.frame(rho, P.Value)
})
+ contlm <- contlm[!sapply(contlm, is.null)]
foldChange.cont <- sapply(contlm, function(m) m$rho)
foldChangePvalue.cont <- sapply(contlm, function(m) m$P.Value)
+ if(length(contlm)==1) {
+ foldChange.cont <- cbind(foldChange.cont)
+ foldChangePvalue.cont <- cbind(foldChangePvalue.cont)
+ }
rownames(foldChange.cont) <- rownames(contlm[[1]])
rownames(foldChangePvalue.cont) <- rownames(contlm[[1]])
-
- # Merge
- foldChange <- cbind(foldChange, foldChange.cont)
- foldChangePvalue <- cbind(foldChangePvalue, foldChangePvalue.cont)
+ colnames(foldChange.cont) <- colnames(contY)
+ colnames(foldChangePvalue.cont) <- colnames(contY)
}
+ # Merge
+ foldChange <- cbind(foldChange, foldChange.cont)
+ foldChangePvalue <- cbind(foldChangePvalue, foldChangePvalue.cont)
+
## Gene Centrality. Compute centrality of gene in Module subgraph
## using TOM matrix.
- adj <- TOM
-# diag(adj) <- NA
- adj[which(abs(adj) < 0.01)] <- 0
- gr <- igraph::graph_from_adjacency_matrix(
- adj,
- mode = "undirected", weighted = TRUE, diag = FALSE
- )
- geneCentrality <- rep(NA, nrow(adj))
- names(geneCentrality) <- rownames(adj)
- me.genes <- tapply(names(net$colors), net$colors, list)
- gg <- me.genes[[1]]
- for (gg in me.genes) {
- gr1 <- igraph::subgraph(gr, gg)
- ct <- igraph::page_rank(gr1, weights = NULL)$vector
- ct <- ct / mean(ct, na.rm = TRUE)
- geneCentrality[gg] <- ct
+ geneCentrality <- NULL
+ if(!is.null(TOM)) {
+ if(is.null(dimnames(TOM))) dimnames(TOM) <- list(colnames(datExpr),colnames(datExpr))
+ adj <- TOM
+ diag(adj) <- 0
+ adj[which(abs(adj) < 0.01)] <- 0
+ gr <- igraph::graph_from_adjacency_matrix(
+ adj, mode = "undirected", weighted = TRUE, diag = FALSE
+ )
+ geneCentrality <- rep(NA, nrow(adj))
+ names(geneCentrality) <- rownames(adj)
+ me.genes <- tapply(names(net$colors), net$colors, list)
+ for (gg in me.genes) {
+ gr1 <- igraph::subgraph(gr, gg)
+ ct <- igraph::page_rank(gr1, weights = NULL)$vector
+ ct <- ct / mean(ct, na.rm = TRUE)
+ geneCentrality[gg] <- ct
+ }
}
- ## propotion variance explained (PVE) per module
- propVarExplained <- WGCNA::propVarExplained(
- datExpr,
- colors = net$colors, MEs = net$MEs
- )
-
+ ## force align. Sometime they are shorter for some reason...
+ gg <- rownames(moduleMembership)
+ matMatch <- function(m, gg) {
+ if(is.null(m)) return(NULL)
+ m <- m[match(gg,rownames(m)),,drop=FALSE]
+ rownames(m) <- gg
+ return(m)
+ }
+
+ ##moduleMembership <- matMatch(moduleMembership)
+ MMPvalue <- matMatch(MMPvalue, gg)
+ traitSignificance = matMatch(traitSignificance, gg)
+ TSPvalue = matMatch(TSPvalue, gg)
+ foldChange = matMatch(foldChange, gg)
+ foldChangePvalue = matMatch(foldChangePvalue, gg)
+
stats <- list(
moduleTraitCor = moduleTraitCor,
moduleTraitPvalue = moduleTraitPvalue,
moduleMembership = moduleMembership,
MMPvalue = MMPvalue,
traitSignificance = traitSignificance,
- GSPvalue = GSPvalue,
+ TSPvalue = TSPvalue,
foldChange = foldChange,
foldChangePvalue = foldChangePvalue,
geneCentrality = geneCentrality
@@ -426,42 +1075,271 @@ wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) {
return(stats)
}
+#'
+#'
+#' @export
+wgcna.getGeneStats <- function(wgcna, trait, module=NULL, plot = TRUE,
+ stats = NULL, labels = NULL, showlogFC = TRUE,
+ col = NULL, main = NULL) {
+
+ if(!is.null(stats)) {
+ features <- rownames(stats[['moduleMembership']])
+ if(is.null(stats)) stop("must give stats")
+ if(is.null(labels)) stop("must give labels")
+ } else if(!is.null(wgcna)) {
+ labels <- wgcna$net$labels
+ features <- names(wgcna$net$colors)
+ stats <- wgcna$stats
+ } else {
+ stop("must supply wgcna or trait")
+ }
+
+ is.color <- mean(labels %in% c("grey",WGCNA::standardColors())) > 0.9
+ if(is.color) {
+ prefix <- substring(rownames(stats[['moduleTraitCor']]),1,2)[1]
+ labels <- paste0(prefix, labels)
+ }
+
+ p1 <- c("moduleMembership", "MMPvalue")
+ p2 <- c("traitSignificance", "TSPvalue", "foldChange", "foldChangePvalue")
+ p3 <- c("geneCentrality")
+
+ df <- data.frame(
+ feature = features,
+ module = labels
+ )
+ head(df)
+
+ ## get moduleMembership
+ mm.stats <- stats[p1]
+ mm.label <- colnames(mm.stats[[1]])
+ idx <- cbind(1:length(labels), match(labels, mm.label))
+ A1 <- sapply( mm.stats, function(x) x[idx])
+ rownames(A1) <- labels
+ df <- cbind(df, A1)
+
+ ## get traitSig columns for trait
+ tt.cols <- colnames(stats[[p2[1]]])
+ if(is.null(trait)) trait <- tt.cols
+ trait <- intersect(trait, tt.cols)
+
+ if (length(trait)>1) {
+ A2 <- lapply(stats[p2], function(x) x[,trait])
+ for(i in 1:length(A2)) colnames(A2[[i]]) <- paste0(names(A2)[i],".",colnames(A2[[i]]))
+ A2 <- do.call(cbind, A2)
+ df <- cbind(df, A2)
+ } else if(length(trait)==1) {
+ A2 <- lapply(stats[p2], function(x) x[,trait])
+ A2 <- do.call(cbind, A2)
+ df <- cbind(df, A2)
+ } else {
+ message("[wgcna.getGeneStats] ERROR: trait not in stats object")
+ return(NULL)
+ }
+
+ A3 <- stats[[p3]]
+ if(!is.null(A3)) {
+ df <- cbind(df, centrality = A3)
+ }
+
+ ## calculate score
+ sel <- c("moduleMembership", "traitSignificance", "foldChange", "centrality")
+ sel <- intersect(sel, colnames(df))
+ #df1 <- pmax(as.matrix(df[, sel]), 1e-8)
+ df1 <- as.matrix(abs(df[, sel]))
+ score <- exp(rowMeans(log(1e-8 + df1))) * sign(df[,"foldChange"])
+ df <- data.frame(df[,1:2], score = score, df[,-c(1,2)])
+
+ if (!is.null(module)) {
+ ##sel <- which(df$module == module)
+ sel <- grep( paste0(module,"$"), df$module)
+ df <- df[sel, , drop = FALSE]
+ }
+
+ ## reorder, sort on score
+ score.sign <- sign(median(df$score, na.rm=TRUE))
+ df <- df[order(-df$score * score.sign), ]
+
+ if (plot) {
+ cols <- c("moduleMembership", "traitSignificance")
+ if(showlogFC) {
+ cols <- c(cols, "foldChange", "centrality")
+ }
+ cols <- intersect(cols, colnames(df))
+ df1 <- df[, cols]
+ col1 <- wgcna.labels2colors(labels[rownames(df1)])
+ if (!is.null(col)) col1 <- col
+ pairs(df1, col = col1, oma=c(1,1,1,1)*1.8)
+ if (is.null(main)) {
+ main <- paste("Gene significance for module", module, "and trait", trait)
+ }
+ title(main, line = 3, cex.main = 1.15)
+ }
+
+ rownames(df) <- df$feature
+ df
+}
+
+#' Compute gene statistics with original datExpr but with consensus
+#' colors/labels for each layers. A separate function
+#' wgcna.getConsensusGeneStats() extracts clean tables from this
+#' results object.
+#'
+#' @export
+wgcna.computeConsensusGeneStats <- function(cons) {
+ k <- names(cons$layers)[1]
+ stats <- list()
+ for(k in names(cons$layers)) {
+ w <- cons$layers[[k]]
+ colors <- cons$net$colors
+ wMEs <- cons$net$multiMEs[[k]]$data
+ wnet <- list( MEs = wMEs, colors = colors)
+ stats[[k]] <- wgcna.computeGeneStats(
+ wnet, w$datExpr, w$datTraits, TOM=NULL)
+ }
+ return(stats)
+}
+
+#'
+#'
+#' @export
+wgcna.getConsensusGeneStats <- function(cons, stats, trait, module=NULL) {
+
+ ## create extended color vector
+ labels = paste0("ME",cons$net$colors)
+ gstats <- list()
+ for(k in names(stats)) {
+ gstats[[k]] <- wgcna.getGeneStats(
+ wgcna = NULL,
+ stats = stats[[k]],
+ labels = labels,
+ trait = trait,
+ plot = FALSE,
+ module = module,
+ col = NULL,
+ main = NULL
+ )
+ }
+
+ ## Align rows
+ ff <- gstats[[1]]$feature
+ for(k in names(gstats)) {
+ ii <- match(ff, gstats[[k]]$feature)
+ gstats[[k]] <- gstats[[k]][ii,]
+ }
+
+ ## Compute consensus statistics. Consensus statistics are computed
+ ## as geometric mean of score variables, and/or maximum pvalue for
+ ## p.value columns.
+ xcols <- c(3,4,6,8)
+ pcols <- c(10,5,7,9)
+ pcols1 <- c(5,7,9)
+ xcols <- c("score","moduleMembership","traitSignificance","foldChange")
+ pcols <- c("scorePvalue","MMPvalue","TSPvalue","foldChangePvalue")
+ pcols1 <- pcols[-1]
+ for(i in 1:length(gstats)) {
+ gstats[[i]][,'scorePvalue'] <- apply(gstats[[i]][,pcols1],1,max,na.rm=TRUE)
+ }
+ ##xc <- lapply(gstats, function(x) log(abs(x[,xcols])*(x[,pcols]<0.05)))
+ xc <- lapply(gstats, function(x) log(abs(x[,xcols])))
+ xc <- exp(Reduce('+', xc) / length(xc))
+ xp <- Reduce(pmax, lapply(gstats, function(x) x[,pcols]))
+ df3 <- data.frame( gstats[[1]][,1:2], xc, xp)
+ df3 <- df3[,colnames(gstats[[1]])]
+ head(df3)
+
+ ## Determine consensus status. Feature is 'C' (concordant) if sign
+ ## in all layers are equal and significant. 'D' (discordant) if sign
+ ## if not equal in all layers but significant. 'N' is any is
+ ## non-significant.
+ sign.pos <- Reduce('*',lapply(gstats,function(g) sign(g$score) == 1))
+ sign.neg <- Reduce('*',lapply(gstats,function(g) sign(g$score) == -1))
+ allsig <- Reduce('*',lapply(gstats,function(g) (g$scorePvalue) < 0.05))
+ table(allsig)
+ consensus <- c("D","C")[ 1 + 1*(sign.pos | sign.neg)]
+ consensus[which(allsig==0)] <- 'N'
+ cons.df <- data.frame(df3[,1:2], consensus, df3[,-c(1,2)])
+ head(cons.df)
+
+ ## This creates the full stats matrix (all subgroups)
+ df1 <- gstats[[1]][,c("feature","module")]
+ df2 <- gstats[[1]][,0]
+ cols <- colnames(gstats[[1]])[-c(1:2)]
+ for(k in cols ) {
+ xx <- sapply(gstats, function(g) g[,k])
+ df2[[k]] <- I(xx)
+ }
+ df2 <- do.call(cbind, lapply(df2,unclass))
+ newcols <- unlist(lapply(cols, function(k) paste0(k,'.',names(gstats))))
+ colnames(df2) <- newcols
+ full.df <- data.frame(df1, consensus=cons.df$consensus, df2)
+
+
+ ## sort??
+ ii <- order(-cons.df$score * sign(mean(cons.df$score,na.rm=TRUE)))
+ cons.df <- cons.df[ii,]
+ full.df <- full.df[ii,]
+
+ list(
+ consensus = cons.df,
+ full = full.df
+ )
+}
+
## ----------------------------------------------------
## Perform geneset analysis on modules
## ----------------------------------------------------
-wgcna.compute_enrichment <- function(wgcna, pgx,
- method = c("fisher", "gsetcor", "mmcor", "wcor"),
- ntop = 200,
- annot = NULL, GMT = NULL,
- filter = "^PATHWAY|^HALLMARK|^GO|^C[1-9]") {
- ## collapse features to symbol
- geneX <- t(wgcna$datExpr)
- symbol.col <- NULL
- ## annot=NULL;GMT=NULL
- if (!is.null(pgx)) {
- GMT <- pgx$GMT
- gsetX <- pgx$gsetX
- annot <- pgx$genes
- if (!is.null(annot)) {
- symbol.col <- intersect(c("symbol", "gene_name"), colnames(annot))[1]
- }
+
+#' @export
+wgcna.computeModuleEnrichment <- function(wgcna,
+ multi = FALSE,
+ methods = c("fisher","gsetcor","xcor"),
+ ntop = 200,
+ xtop = 100,
+ annot = NULL,
+ GMT = NULL,
+ gsetX = NULL,
+ filter = NULL
+ ) {
+
+ if(!multi) {
+ wgcna <- list(gx = wgcna)
}
- if (is.null(GMT)) {
- stop("FATAL. must supply GMT matrix")
+ if(!any( c("gx","px") %in% names(wgcna))) {
+ message("ERROR: datasets must have gx or px datatype!")
return(NULL)
}
- if (is.null(gsetX)) {
- stop("FATAL. must supply GMT matrix")
- return(NULL)
+
+ ## collapse features to symbol
+ selx <- intersect(c("gx","px"), names(wgcna))[1]
+ geneX <- t(as.matrix(wgcna[[selx]]$datExpr))
+ symbol.col <- NULL
+
+ if(is.null(annot)) {
+ gg <- lapply(wgcna, function(w) colnames(w$datExpr))
+ gg1 <- lapply(names(wgcna), function(a) paste0(a,":",gg[[a]]))
+ gg <- as.character(unlist(gg))
+ gg1 <- as.character(unlist(gg1))
+ annot <- data.frame(feature = gg1, symbol = gg)
}
- if (!is.null(annot)) {
- geneX <- rename_by(geneX, annot, symbol.col)
- } else {
- rownames(geneX) <- gsub(".*:|[_ -].*", "", rownames(geneX))
+
+ if(is.null(GMT)) {
+ message("Using playdata GSETxGENE genesets")
+ GMT <- Matrix::t(playdata::GSETxGENE)
}
+ if(!is.null(annot)) {
+ symbol.col <- intersect(c("symbol","gene_name"),colnames(annot))[1]
+ geneX <- rename_by2(geneX, annot, symbol.col)
+ GMT <- rename_by2( GMT, annot, symbol.col)
+ }
+ if(length(intersect(rownames(geneX),rownames(GMT)))==0) {
+ message("ERROR: no overlap for geneX and GMT features. Please add annotation.")
+ return(NULL)
+ }
+
bg <- rownames(geneX)
bg <- intersect(bg, rownames(GMT))
if (length(bg) == 0) {
@@ -474,99 +1352,356 @@ wgcna.compute_enrichment <- function(wgcna, pgx,
if (length(sel)) G1 <- G1[, sel, drop = FALSE]
}
G1 <- G1[, which(Matrix::colSums(G1 != 0) >= 4), drop = FALSE]
- gmt <- mat2gmt(G1)
- W <- as.matrix(wgcna$W)
- if (!is.null(annot)) {
- W <- rename_by2(W, annot, symbol.col)
+ if(is.null(gsetX)) {
+ gsetX <- plaid::plaid( geneX, G1)
}
- rho.list <- list()
- pval.list <- list()
-
- ## Perform fisher-test on ME genes
- if ("fisher" %in% method) {
- message("[wgcna.compute_enrichment] calculating Fisher tests...")
- i <- 1
- rho <- NULL
- pval <- NULL
- me.genes <- wgcna$me.genes
- for (i in 1:length(me.genes)) {
- gg <- me.genes[[i]]
- if (!is.null(annot)) {
- gg <- probe2symbol(me.genes[[i]], annot, query = symbol.col)
- }
- ## perform Fisher test
- rr <- try(gset.fisher(gg, gmt, background = bg, fdr = 1, min.genes = 4, verbose = 0))
- if (!"try-error" %in% class(rr)) {
- rr <- rr[match(names(gmt), rownames(rr)), ]
- if (is.null(rho)) {
- rho <- cbind(rr$odd.ratio)
- pval <- cbind(rr$p.value)
- colnames(rho) <- colnames(pval) <- names(me.genes)[i]
- } else {
- rho <- cbind(rho, rr$odd.ratio)
- pval <- cbind(pval, rr$p.value)
- me <- names(me.genes)[i]
- colnames(rho)[ncol(rho)] <- me
- colnames(pval)[ncol(pval)] <- me
- }
- rownames(rho) <- names(gmt)
- rownames(pval) <- names(gmt)
+ ## align dimensions
+ ss <- intersect(rownames(gsetX), colnames(G1))
+ G1 <- G1[,ss,drop=FALSE]
+ gsetX <- gsetX[ss,]
+
+ ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs))
+ me.genes <- lapply(wgcna, function(w) w$me.genes)
+ names(me.genes) <- NULL
+ me.genes <- do.call(c, me.genes)
+ me.genes <- lapply(me.genes, function(g) probe2symbol(g, annot, query = symbol.col))
+
+ ## compute most correlated gx/px genes. limit xtop if geneX is too
+ ## small
+ xtop <- min(xtop, round(nrow(geneX)/4))
+ nbx.genes <- list()
+ for(i in 1:length(wgcna)) {
+ nbx.cor <- cor(t(geneX), ME[[i]])
+ for(k in colnames(nbx.cor)) {
+ if(is.null(xtop)) {
+ n <- length(wgcna[[i]]$me.genes[[k]])
+ } else {
+ n <- xtop
}
+ nbx.genes[[k]] <- head(names(sort(-nbx.cor[,k])), n)
}
- ## handle infinite
- rho[is.infinite(rho)] <- 2 * max(rho, na.rm = TRUE)
- rho[is.na(rho)] <- 0
- pval[is.na(pval)] <- 1
- rho.list[["fisher"]] <- rho
- pval.list[["fisher"]] <- pval
}
- ## Here we correlate geneset score (averageCLR) with the module
+ ## add nearest expression neighbors to module genes
+ nbx.genes <- nbx.genes[names(me.genes)]
+ for(k in names(me.genes)) {
+ me.genes[[k]] <- unique(c(me.genes[[k]], nbx.genes[[k]]))
+ }
+
+ ## make single ME matrix
+ MEx <- do.call(cbind, ME)
+
+ gsea <- wgcna.run_enrichment_methods(
+ ME = MEx,
+ me.genes = me.genes,
+ G = G1,
+ geneX = geneX,
+ gsetX = gsetX,
+ methods = methods,
+ ntop = ntop
+ )
+
+ return(gsea)
+}
+
+#'
+#'
+#' @export
+wgcna.getModuleCrossGenes <- function(wgcna, ref=NULL, ngenes = 100,
+ multi=TRUE, modules=NULL)
+{
+
+ if(!multi) {
+ wgcna <- list(gx = wgcna)
+ ref <- 'gx'
+ }
+
+ if(is.null(ref)) ref <- head(intersect(names(wgcna),c("gx","px")),1)
+ if(is.null(ref) || !ref %in% names(wgcna)) ref <- names(wgcna)[1]
+
+ W <- wgcna[[ref]]
+ geneX <- W$datExpr
+
+ MEx <- sapply(wgcna, function(w) as.matrix(w$net$MEs))
+ MEx <- do.call(cbind, MEx)
+
+ if(!is.null(modules)) {
+ modules <- intersect(modules, colnames(MEx))
+ MEx <- MEx[,modules,drop=FALSE]
+ }
+
+ nbx.cor <- cor(geneX, MEx)
+
+ nbx.list <- list()
+ for(k in colnames(nbx.cor)) {
+ ii <- head(order(-nbx.cor[,k]), ngenes)
+ rho <- nbx.cor[ii,k]
+ gene <- rownames(nbx.cor)[ii]
+ me <- W$net$labels[gene]
+ nbx.list[[k]] <- data.frame( gene = gene, rho = rho, module = me)
+ }
+
+ ##if(length(nbx.list)==1) nbx.list <- nbx.list[[1]]
+ return(nbx.list)
+}
+
+
+#' Compute consensus enrichment by calculating overlapping enriched
+#' terms.
+#'
+wgcna.computeConsensusModuleEnrichment <- function(cons,
+ GMT,
+ annot,
+ methods = c("fisher","gsetcor","xcor"),
+ min.genes = 10,
+ ntop = 400 )
+{
+ if(0) {
+ methods = c("fisher","gsetcor","xcor")
+ min.genes = 10
+ ntop = 400
+ annot = NULL
+ GMT <- Matrix::t(playdata::GSETxGENE)
+ }
+
+ gseaX <- list()
+ i=1
+ for(i in 1:length(cons$datExpr)) {
+
+ geneX <- t(cons$datExpr[[i]])
+ dim(geneX)
+
+ ## Rename everything to symbols
+ if(!is.null(annot)) {
+ geneX <- rename_by2(geneX, annot, "symbol")
+ GMT <- rename_by2(GMT, annot, "symbol")
+ }
+ ng <- length(intersect(rownames(geneX), rownames(GMT)))
+ if(ng == 0) {
+ message("[wgcna.computeConsensusModuleEnrichment] ERROR. No symbol overlap.")
+ return(NULL)
+ }
+ symbols <- intersect(rownames(GMT),rownames(geneX))
+ message("[wgcna.computeConsensusModuleEnrichment] number of symbols: ", length(symbols))
+ geneX <- geneX[symbols,]
+ GMT <- GMT[symbols,]
+
+ ## select on minimum gene sets size
+ sel <- which( Matrix::colSums(GMT!=0) >= min.genes )
+ GMT <- GMT[,sel]
+
+ ## Compute single-samples gene set expression
+ gsetX <- plaid::plaid(geneX, matG=GMT)
+ dim(gsetX)
+
+ ## Create extended Eigengene matrix (ME). ME should be nicely
+ ## normalized/scaled so we just rbind across datasets
+ ME <- cons$net$multiMEs[[i]]$data
+ dim(ME)
+
+ ## get genes in modules
+ me.genes <- tapply(names(cons$net$colors), cons$net$colors, list)
+ names(me.genes) <- paste0("ME",names(me.genes))
+ if(!is.null(annot)) {
+ me.genes <- lapply(me.genes, function(gg) probe2symbol(gg, annot))
+ }
+ me.genes <- lapply(me.genes, function(g) intersect(g, symbols))
+ rownames(ME)
+ colnames(geneX) <- rownames(ME)
+ colnames(gsetX) <- rownames(ME)
+
+ k <- names(cons$datExpr)[i]
+ gseaX[[k]] <- wgcna.run_enrichment_methods(
+ ME,
+ me.genes = me.genes,
+ GMT= GMT,
+ geneX = geneX,
+ gsetX = gsetX,
+ methods = methods,
+ min.genes = min.genes,
+ ntop = ntop)
+ }
+
+ cons.gsea <- list()
+ m=1
+ for(m in names(gseaX[[1]])) {
+ xx <- lapply( gseaX, function(g) g[[m]] )
+ sel <- Reduce(intersect, lapply(xx, rownames))
+ if(length(sel) > 0) {
+ if(length(sel)==1) sel <- c(sel,sel) ## length==1 crashes...
+ xx <- lapply(xx, function(x) x[sel,,drop=FALSE] )
+ xx.score <- sapply(xx, function(x) x[,"score"])
+ colnames(xx.score) <- paste0("score.",colnames(xx.score))
+
+ xx.pvalue <- lapply(xx, function(x) x[,grep("^p",colnames(x))])
+ xx.pvalue <- do.call(cbind, xx.pvalue)
+
+ m.score <- rowMeans(xx.score,na.rm=TRUE)
+ m.pvalue <- apply(sapply(xx, function(x) x[,"p.value"]), 1, max, na.rm=TRUE)
+ m.qvalue <- p.adjust( m.pvalue )
+ df <- data.frame(
+ module = xx[[1]]$module,
+ geneset = xx[[1]]$geneset,
+ score = m.score,
+ xx.score,
+ p.value = m.pvalue,
+ q.value = m.qvalue,
+ overlap = xx[[1]]$overlap,
+ genes = xx[[1]]$genes,
+ xx.pvalue
+ )
+ df <- df[order(df$p.value),]
+ #df <- df[!duplicated(df$geneset),,drop=FALSE]
+ cons.gsea[[m]] <- df
+ }
+ }
+
+ return(cons.gsea)
+}
+
+
+wgcna.run_enrichment_methods <- function(ME, me.genes, GMT, geneX, gsetX,
+ methods = c("fisher","gsetcor","xcor"),
+ ntop = 400, min.genes = 3
+ )
+{
+
+ rho.list <- list()
+ pval.list <- list()
+
+ ## align matrices
+ bg <- intersect(rownames(GMT), rownames(geneX))
+ ss <- intersect(colnames(GMT), rownames(gsetX))
+ GMT <- GMT[bg,ss]
+ geneX <- geneX[bg,]
+ gsetX <- gsetX[ss,]
+
+ ## select on minimum genes
+ sel <- which( Matrix::colSums(GMT!=0) >= min.genes )
+ GMT <- GMT[,ss]
+ gsetX <- gsetX[ss,]
+
+ message("Computing enrichment for ", length(ss), " genesets")
+
+ ## Here we correlate geneset score (averageCLR) with the module
## eigengene (ME). This should select genesets correlated with the
## ME.
- if ("gsetcor" %in% method) {
- message("[wgcna.compute_enrichment] calculating geneset correlation...")
- gsetX <- gsetX[colnames(G1), ]
- ME <- as.matrix(wgcna$net$MEs)
- if (ncol(gsetX) > nrow(ME)) {
- kk <- sample(colnames(gsetX), nrow(ME))
- gsetX <- gsetX[, kk, drop = FALSE]
- }
- rc.rho <- cor(t(gsetX), ME, use = "pairwise")
- rc.pvalue <- cor.pvalue(rc.rho, n = nrow(ME))
+ if ("gsetcor" %in% methods && !is.null(gsetX)) {
+ message("[wgcna.run_enrichment_methods] calculating single-sample geneset correlation...")
+ rc.rho <- matrix(NA, ncol(GMT), ncol(ME))
+ rc.pvalue <- matrix(NA, ncol(GMT), ncol(ME))
+ dimnames(rc.rho) <- list( colnames(GMT), colnames(ME))
+ dimnames(rc.pvalue) <- list( colnames(GMT), colnames(ME))
+ jj <- which(rownames(gsetX) %in% colnames(GMT))
+ kk <- intersect(colnames(gsetX), rownames(ME)) ## common samples
+ rho.jj <- cor(t(gsetX[jj,kk]), ME[kk,], use = "pairwise")
+ #rc.pvalue <- cor.pvalue(rc.rho, n = length(kk))
+ pvalue.jj <- WGCNA::corPvalueStudent(rho.jj, n = length(kk))
+ ii <- match(rownames(gsetX)[jj], rownames(rc.rho))
+ rc.rho[ii,] <- rho.jj
+ rc.pvalue[ii,] <- pvalue.jj
rho.list[["gsetcor"]] <- rc.rho
pval.list[["gsetcor"]] <- rc.pvalue
}
- ## Here we correlate genes with the module eigengene (ME) then do
- ## a gset.rankcor() on the ME correlation.
- if ("mmcor" %in% method) {
- message("[wgcna.compute_enrichment] calculating MM correlation...")
- mm <- cor(t(geneX), wgcna$net$MEs, use = "pairwise")
- rc <- gset.rankcor(mm, G1, compute.p = TRUE) ## NEEDS CHECK!!!
- rho.list[["mmcor"]] <- rc$rho
- pval.list[["mmcor"]] <- rc$p.value
+ ## Here we correlate the module eigengene (ME) with genes and then
+ ## do a gset.rankcor() on the ME correlation.
+ if ("xcor" %in% methods) {
+ message("[wgcna.run_enrichment_methods] calculating eigengene GBA correlation...")
+ gba <- cor(t(geneX), ME, use = "pairwise")
+ rc <- gset.rankcor(gba, GMT, compute.p = TRUE) ## NEEDS CHECK!!!
+ rho.list[["xcor"]] <- rc$rho
+ pval.list[["xcor"]] <- rc$p.value
}
- ## Here we correlate genes with the module eigengene (ME) then do
- ## a gset.rankcor() on the ME correlation.
- if ("wcor" %in% method) {
- message("[wgcna.compute_enrichment] calculating W correlation...")
- rc <- gset.rankcor(W, G1, compute.p = TRUE) ## NEEDS CHECK!!!
- rho.list[["wcor"]] <- rc$rho
- pval.list[["wcor"]] <- rc$p.value
+ gmt <- mat2gmt(GMT)
+ if(1) {
+ ## we pre-select to make this faster
+ Pmin <- sapply(pval.list, function(P) apply(P,1,min))
+ sel <- head(order(rowMeans(apply(Pmin, 2, rank))), 5*ntop)
+ message("[wgcna.run_enrichment_methods] pre-selecting ",length(sel)," sets for fgsea/Fisher test...")
+ sel <- rownames(Pmin)[sel]
+ gmt <- gmt[sel]
+ }
+
+ ## fGSEA
+ if ("fgsea" %in% methods) {
+ message("[wgcna.run_enrichment_methods] calculating module fgsea...")
+ xrho <- cor(t(geneX), ME, use = "pairwise")
+ res <- list()
+ i=1
+ for(i in 1:ncol(xrho)) {
+ k <- colnames(xrho)[i]
+ res[[k]] <- fgsea::fgsea(gmt, xrho[,i]) ## NEEDS CHECK!!!
+ }
+ pw <- res[[1]]$pathway
+ res <- lapply(res, function(r) r[match(pw,r$pathway),])
+ nes <- sapply(res, function(r) r$NES)
+ pval <- sapply(res, function(r) r$pval)
+ rownames(nes) <- rownames(pval) <- pw
+ colnames(nes) <- colnames(pval) <- names(res)
+ rho.list[["fgsea"]] <- nes
+ pval.list[["fgsea"]] <- pval
}
+
+ ## Perform fisher-test on ME genes
+ if ("fisher" %in% methods) {
+
+ message("[wgcna.run_enrichment_methods] calculating Fisher tests...")
+ rho <- matrix(NA, length(gmt), ncol(ME))
+ pval <- matrix(NA, length(gmt), ncol(ME))
+ dimnames(rho) <- list( names(gmt), colnames(ME))
+ dimnames(pval) <- list( names(gmt), colnames(ME))
+
+ ## perform Fisher test for all modules using the module genes
+ i=1
+ for (i in 1:ncol(rho)) {
+ k <- colnames(rho)[i]
+ gg <- me.genes[[k]]
+ rr <- try(gset.fisher(gg, gmt, background = bg, fdr = 1,
+ min.genes = -1, verbose = 0, sort.by='none', no.pass=1))
+
+ if (!"try-error" %in% class(rr)) {
+ rr <- rr[match(rownames(rho), rownames(rr)), ]
+ rho[,i] <- rr$odd.ratio
+ pval[,i] <- rr$p.value
+ }
+ }
+
+ ## handle infinite or NA
+ rho[is.infinite(rho)] <- 2*max(rho, na.rm=TRUE) ## Inf odd.ratio
+ pval[is.na(pval)] <- 1
+ rho[is.na(rho)] <- 0
- ## Compute meta rank and pval
- meta.p <- Reduce(pmax, pval.list) ## NEED RETHINK!!!
+ rho.list[["fisher"]] <- rho
+ pval.list[["fisher"]] <- pval
+ }
+
+ lapply(rho.list, dim)
+
+ ## ensure dimensions
+ gsets <- Reduce(intersect, lapply( rho.list, rownames))
+ modules <- Reduce(intersect, lapply( rho.list, colnames))
+ rho.list <- lapply( rho.list, function(x) x[gsets,modules,drop=FALSE])
+ pval.list <- lapply( pval.list, function(x) x[gsets,modules,drop=FALSE])
+
+ ## Compute meta rank and pval. Handle NA for failing methods.
+ pvalNA <- lapply(pval.list, function(x) {x[is.na(x)]=0;x})
+ ##pvalNA <- lapply(pval.list, function(x) {x[is.na(x)]=1;x})
+ meta.p <- Reduce(pmax, pvalNA) ## NEED RETHINK!!!
meta.q <- apply(meta.p, 2, p.adjust, method = "fdr")
- rnk.list <- lapply(rho.list, function(x) apply(x, 2, rank) / nrow(x))
+
+ ## NEED RETHINK: how about negative FC???
+ rnk.list <- lapply(rho.list, function(x) apply(x, 2, rank, na.last='keep') / nrow(x))
meta.rnk <- Reduce("+", rnk.list) / length(rnk.list)
+ rnk.NAZERO <- lapply(rnk.list, function(x) {x[is.na(x)]=0;x})
+ rnk.NSUM <- Reduce('+', lapply(rnk.list, function(x) !is.na(x)))
+ meta.rnk <- Reduce('+', rnk.NAZERO) / rnk.NSUM
## create dataframe by module
- message("[wgcna.compute_enrichment] creating dataframes...")
+ message("[wgcna.run_enrichment_methods] creating dataframes...")
gse.list <- list()
i <- 1
for (i in 1:ncol(meta.p)) {
@@ -581,8 +1716,8 @@ wgcna.compute_enrichment <- function(wgcna, pgx,
q.value = meta.q[, i],
pv
)
- df <- df[order(-df$score), ]
- if (!is.null(ntop) && ntop > 0) df <- head(df, n = ntop)
+ df <- df[order(-abs(df$score)), ]
+ df <- head(df, ntop)
gse.list[[k]] <- df
}
@@ -591,272 +1726,1280 @@ wgcna.compute_enrichment <- function(wgcna, pgx,
k <- names(gse.list)[1]
for (k in names(gse.list)) {
gset <- rownames(gse.list[[k]])
- gg <- wgcna$me.genes[[k]]
- if (!is.null(annot)) {
- gg <- probe2symbol(gg, annot, query = symbol.col)
- } else {
- gg <- gsub(".*:", "", gg)
- }
+ gg <- me.genes[[k]]
set.genes <- lapply(gmt[gset], function(s) intersect(s, gg))
n0 <- sapply(gmt[gset], length)
n1 <- sapply(set.genes, length)
gse.genes[[k]] <- sort(table(unlist(set.genes)), decreasing = TRUE)
set.genes <- sapply(set.genes, function(g) paste(sort(g), collapse = "|"))
- gse.list[[k]]$genes <- set.genes
gse.list[[k]]$overlap <- paste0(n1, "/", n0)
+ gse.list[[k]]$genes <- set.genes
}
- ## compute gene enrichment
- gene.list <- list()
- ## message("[wgcna.compute_enrichment] computing gene enrichment...")
- ## k = names(gse.list)[1]
- ## for(k in names(gse.list)) {
- ## me <- gse.list[[k]]
- ## rnk <- me$score
- ## names(rnk) <- rownames(me)
- ## me.genes <- strsplit(me$genes,split='\\|')
- ## num.genes <- sapply(me.genes,length)
- ## if(any(num.genes>0)) {
- ## ii <- which(num.genes>0)
- ## me.names <- unlist(mapply(rep, rownames(me)[ii], num.genes[ii]))
- ## me.genes <- me.genes[ii]
- ## me.gmt <- tapply(me.names, unlist(me.genes), list)
- ## res <- fgsea::fgsea(me.gmt, rnk)
- ## res <- res[order(-res$NES),]
- ## gene.list[[k]] <- head(res,100)
- ## }
- ## }
+ return(gse.list)
+}
- res <- list(
- gsets = gse.list,
- genes = gene.list
- )
- return(res)
+wgcna.merge_block_dendrograms <- function(net, X, method = 1) {
+ ## This function is fragile: it can give a C stack limit error. In
+ ## case that happens you can increase the stack limit by running on
+ ## the cmd line: >>> ulimit -s unlimited
+ ##
+ hc <- net$dendrograms
+
+ ## merge block dendrogram
+ mx <- list()
+ for (b in 1:length(net$dendrograms)) {
+ ii <- which(net$goodGenes & net$blocks == b)
+ mx[[b]] <- colMeans(X[ii, ])
+ hc[[b]]$labels <- rownames(X)[ii]
+ }
+
+ if (length(hc) == 1) {
+ return(hc[[1]])
+ }
+
+ ## compute parent dendrogram
+ M <- do.call(rbind, mx)
+ hclust_p <- hclust(dist(M), method = "average")
+ dend_p <- as.dendrogram(hclust_p)
+ dend.list <- lapply(hc, as.dendrogram)
+
+ if (method == 1) {
+ merged <- ComplexHeatmap::merge_dendrogram(dend_p, dend.list)
+ } else {
+ mrg <- hclust_p$merge
+ merged_branch <- list()
+ k <- 1
+ for (k in 1:nrow(mrg)) {
+ i <- mrg[k, 1]
+ j <- mrg[k, 2]
+ if (i < 0) d1 <- dend.list[[-i]]
+ if (i > 0) d1 <- merged_branch[[i]]
+ if (j < 0) d2 <- dend.list[[-j]]
+ if (j > 0) d2 <- merged_branch[[j]]
+ merged_branch[[k]] <- merge(d1, d2) ## actual merge
+ }
+ merged <- merged_branch[[k]]
+ }
+
+ merged_hclust <- as.hclust(merged)
+ merged_hclust
}
+##=========================================================================
+## CONSENSUS WGCNA
+##=========================================================================
+
+#'
+#'
#' @export
wgcna.runConsensusWGCNA <- function(exprList,
- datTraits,
+ phenoData,
+ GMT = NULL,
+ annot = NULL,
+ ngenes = 2000,
power = 12,
- minKME = 0.8,
- cutheight = 0.15,
- deepsplit = 2,
- maxBlockSize = 5000) {
- ## exprList <- list(Set1 = V1, Set2 = V2)
- multiExpr <- WGCNA::list2multiData(lapply(exprList, Matrix::t))
-
- # The variable exprSize contains useful information about the sizes of all
- # of the data sets now we run automatic module detection procedure
- net.list <- list()
- for (k in names(multiExpr)) {
- message("[wgcna.runConsensusWGCNA] computing WGCNA for ", k)
- net.list[[k]] <- WGCNA::blockwiseModules(
- datExpr = multiExpr[[k]]$data,
- power = power,
- networkType = "signed",
- TOMType = "signed",
- minModuleSize = 20,
- deepSplit = deepsplit,
- mergeCutHeight = cutheight,
- numericLabels = FALSE,
- minKMEtoStay = minKME,
- saveTOMs = FALSE,
+ minModuleSize = 20,
+ minKME = 0.3,
+ mergeCutHeight = 0.15,
+ deepSplit = 2,
+ maxBlockSize = 9999,
+ addCombined = FALSE,
+ calcMethod = "fast",
+ drop.ref = FALSE,
+ compute.stats = TRUE,
+ compute.enrichment = TRUE,
+ summary = TRUE,
+ ai_model = DEFAULT_LLM,
+ ai_experiment = "",
+ gsea.mingenes = 10,
+ gsea.ntop = 1000,
+ gset.methods = c("fisher","gsetcor","xcor"),
+ verbose = 1,
+ progress = NULL
+ ) {
+
+ ## if(0) {
+ ## power=6;minKME=0.5;cutheight=0.15;deepSplit=2;maxBlockSize=5000;verbose=1;calcMethod="fast";addCombined=0;ngenes=2000;minModuleSize=20;mergeCutHeight=0.15
+ ## gsea.mingenes=20;gset.methods = c("fisher","gsetcor","xcor")
+ ## }
+
+ colors <- NULL
+
+ ## Align and reduce matrices if needed
+ gg <- Reduce(intersect, lapply(exprList,rownames))
+ exprList <- lapply(exprList, function(x) x[gg, , drop = FALSE])
+ if( length(gg) > ngenes ) {
+ sdx <- Reduce('*',lapply(exprList, function(x) matrixStats::rowSds(x)))
+ ii <- head(order(-sdx), ngenes)
+ exprList <- lapply(exprList, function(x) x[ii, , drop = FALSE])
+ }
+
+ if(addCombined) {
+ exprList[['Combined']] <- do.call(cbind, exprList)
+ }
+
+ exprsamples <- unlist(lapply(exprList, colnames))
+ if(!all(exprsamples %in% rownames(phenoData))) {
+ stop("samples mismatch for exprList and phenoData")
+ }
+
+ multiExpr = WGCNA::list2multiData(lapply(exprList, Matrix::t))
+ cor <- WGCNA::cor ## needed...
+
+ if(!is.null(power) && length(power)==1) {
+ power <- rep(power, length(multiExpr))
+ }
+
+ # module detection procedure
+ layers <- list()
+ if(!is.null(progress)) progress$inc(0.1, "Computing layers...")
+ for (i in 1:length(multiExpr)) {
+ k <- names(multiExpr)[i]
+ message("[wgcna.runConsensusWGCNA] >>> computing WGCNA for ", k)
+ X = Matrix::t(multiExpr[[i]]$data)
+ layers[[k]] <- wgcna.compute(
+ X = X,
+ samples = phenoData,
+ ngenes = ngenes,
+ power = power[i],
+ minmodsize = minModuleSize,
+ calcMethod = calcMethod,
+ deepsplit = deepSplit,
+ mergeCutHeight = mergeCutHeight,
+ numericlabels = FALSE,
+ minKME = minKME,
maxBlockSize = maxBlockSize,
- verbose = 0
+ compute.stats = compute.stats,
+ sv.tom = 40,
+ verbose = verbose
)
}
# now we run automatic consensus module detection
- message("[wgcna.runConsensusWGCNA] computing consensus modules...")
- net <- WGCNA::blockwiseConsensusModules(
- multiExpr[],
- power = power,
+ message("[wgcna.runConsensusWGCNA] >>> computing CONSENSUS modules...")
+ if(!is.null(progress)) progress$inc(0.1, "Computing consensus...")
+ consensusPower <- unlist(sapply(layers, function(w) w$net$power))
+ if(is.null(consensusPower) && !is.null(power)) {
+ consensusPower <- power
+ }
+ if(is.null(consensusPower)) {
+ consensusPower <- rep(12, length(layers))
+ }
+
+ sel <- setdiff(names(multiExpr), c("Combined"))
+ cons <- WGCNA::blockwiseConsensusModules(
+ multiExpr[sel],
+ power = as.numeric(consensusPower),
networkType = "signed",
TOMType = "signed",
- minModuleSize = 20,
- deepSplit = deepsplit,
- mergeCutHeight = cutheight,
+ minModuleSize = as.integer(minModuleSize),
+ deepSplit = as.integer(deepSplit),
+ mergeCutHeight = as.numeric(mergeCutHeight),
numericLabels = FALSE,
- minKMEtoStay = minKME,
- maxBlockSize = maxBlockSize,
+ minKMEtoStay = as.numeric(minKME),
+ maxBlockSize = as.integer(maxBlockSize),
saveTOMs = FALSE,
- verbose = 1
+ useDiskCache = FALSE,
+ verbose = verbose
)
-
+ cons$power = consensusPower
+
## create and match colors
- colors <- sapply(net.list, function(net) net$colors)
- c0 <- net$colors
- matched.colors <- apply(colors, 2, function(k) WGCNA::matchLabels(k, c0))
- colors <- cbind(Consensus = c0, matched.colors)
+ for(i in 1:length(layers)) {
+ layers[[i]] <- wgcna.matchColors(layers[[i]], cons$colors)
+ }
+ layers.colors <- sapply(layers, function(r) r$net$colors)
+ colors <- cbind(Consensus = cons$colors, layers.colors)
+
## add labels to dendrogram
- for (i in 1:length(net$dendrograms)) {
- ii <- which(net$goodGenes & net$blocks == i)
- xnames <- names(net$colors)
- net$dendrograms[[i]]$labels <- xnames[ii]
+ for(i in 1:length(cons$dendrograms)) {
+ ii <- which(cons$goodGenes & cons$blocks==i)
+ xnames <- names(cons$colors)
+ cons$dendrograms[[i]]$labels <- xnames[ii]
}
-
- ## merge dendrograms
- message("[wgcna.compute] merge_block_dendrograms...")
- multiX <- Matrix::t(do.call(rbind, lapply(exprList, function(x) scale(t(x)))))
- merged <- try(wgcna.merge_block_dendrograms(net, multiX))
- if (!inherits(merged, "try-error")) {
- net$merged_dendro <- merged
+
+ ## merge dendrograms ????
+ message("[wgcna.runConsensusWGCNA] merge_block_dendrograms...")
+ multiX <- Matrix::t(do.call(rbind,lapply(exprList,function(x)scale(t(x)))))
+ merged <- try(wgcna.merge_block_dendrograms(cons, multiX))
+ if(!inherits(merged,"try-error")) {
+ cons$merged_dendro <- merged
} else {
- net$merged_dendro <- NULL
+ cons$merged_dendro <- NULL
}
-
+
## create module-trait matrices for each set
- message("[wgcna.compute] computing module-traits matrices...")
- Z.list <- list()
- k <- 1
- for (k in names(net$multiME)) {
- M <- (net$multiME[[k]][[1]])
- Z.list[[k]] <- cor(M, datTraits[rownames(M), ], use = "pairwise")
- }
- z.check <- sapply(Z.list, function(z) colSums(z != 0, na.rm = TRUE) > 0)
- sel <- names(which(rowMeans(z.check) == 1))
- sel
- Z.list <- lapply(Z.list, function(z) z[, sel, drop = FALSE])
- Z.list
+ message("[wgcna.runConsensusWGCNA] >>> computing module-traits matrices...")
+ datTraits <- 1*expandPhenoMatrix(
+ phenoData,
+ drop.ref = drop.ref,
+ keep.numeric = TRUE
+ )
+ zlist <- list()
+ k=1
+ for(k in names(cons$multiME)) {
+ M <- (cons$multiME[[k]][[1]])
+ Z <- datTraits
+ kk <- intersect(rownames(M),rownames(Z))
+ zrho <- cor(M[kk,], Z[kk,], use="pairwise")
+ zrho[is.na(zrho)] <- 0 ## NEED RETHINK!!
+ zlist[[k]] <- zrho
+ }
## create consensus module-trait matrix
- message("[wgcna.compute] computing consensus module-traits matrix...")
- mdim <- sapply(exprList, ncol)
- pv <- mapply(function(z, n) corPvalueStudent(z, n), Z.list, mdim, SIMPLIFY = FALSE)
- all.sig <- Reduce("*", lapply(pv, function(p) 1 * (p < 0.05)))
- all.sig
- all.pos <- Reduce("*", lapply(Z.list, function(z) sign(z) == 1))
- all.neg <- Reduce("*", lapply(Z.list, function(z) sign(z) == -1))
- disconcordant <- !(all.pos | all.neg)
- disconcordant
- zsign <- sign(Reduce("+", lapply(Z.list, sign)))
- consZ <- Reduce(pmin, lapply(Z.list, abs)) * zsign
- consZ[disconcordant] <- NA
- ## consZ[!all.sig] <- NA
- ydim <- sapply(exprList, ncol)
-
+ ydim <- sapply(exprList, ncol)
+ consZ <- wgcna.computeConsensusMatrix(zlist, ydim=ydim, psig=0.05)
+
+ ## add slots
+ datExpr <- lapply(exprList, Matrix::t)
+
res <- list(
- net = net,
+ net = cons,
+ layers = layers,
+ datExpr = datExpr,
+ datTraits = datTraits,
modTraits = consZ,
- dendro = net$merged_dendro,
+ dendro = cons$merged_dendro,
colors = colors,
- zlist = Z.list,
- ydim = ydim
+ zlist = zlist,
+ ydim = ydim,
+ class = "consensus"
)
+ ## run stats
+ if(compute.stats) {
+ message("[wgcna.runConsensusWGCNA] >>> computing gene statistics...")
+ res$stats <- wgcna.computeConsensusGeneStats(res)
+ }
+
+ ## run enrichment
+ if(compute.enrichment) {
+ if(!is.null(progress)) progress$inc(0.2, "Computing enrichment...")
+ message("[wgcna.runConsensusWGCNA] >>> computing module enrichment...")
+ if(is.null(GMT)) GMT <- Matrix::t(playdata::GSETxGENE)
+ res$gsea <- wgcna.computeConsensusModuleEnrichment(
+ res,
+ GMT = GMT,
+ method = gset.methods,
+ annot = annot,
+ min.genes = gsea.mingenes,
+ ntop = gsea.ntop
+ )
+ if(summary) {
+ if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6)
+ message("Annotating modules using ", ai_model)
+ res$summary <- wgcna.describeModules(
+ res, multi = TRUE, ntop = 25, model = ai_model,
+ annot = annot, experiment = ai_experiment,
+ verbose = 0
+ )
+ }
+ }
+
res
}
+#' @export
+wgcna.matchColors <- function(wgcna, refcolors) {
+
+ oldcolors <- wgcna$net$colors
+ newcolors <- WGCNA::matchLabels(oldcolors, refcolors)
+ lut <- table(oldcolors, newcolors)
+ old2new <- colnames(lut)[max.col(lut)]
+ names(old2new) <- rownames(lut)
+ prefix <- substring(names(wgcna$me.colors),1,2)[1]
+ old2newME <- paste0(prefix,old2new)
+ names(old2newME) <- paste0(prefix,names(old2new))
+ old2new <- c(old2new, old2newME)
+
+ newcol <- function(x) {
+ array( old2new[x], dimnames = list(names(x)))
+ }
+
+ ## rename everything in net object
+ wgcna$net$colors <- newcol(wgcna$net$colors)
+ if("labels" %in% names(wgcna$net)) {
+ wgcna$net$labels <- newcol(wgcna$net$labels)
+ }
+
+ ## rename unmergedColors
+ if("unmergedColors" %in% names(wgcna$net)) {
+ wgcna$net$unmergedColors <- newcol(wgcna$net$unmergedColors)
+ }
+ names(wgcna$net$MEs) <- newcol(names(wgcna$net$MEs))
+
+ ## rename everything in wgcna object
+ names(wgcna$me.genes) <- newcol(names(wgcna$me.genes))
+ wgcna$me.colors <- newcol(wgcna$me.colors)
+ names(wgcna$me.colors) <- newcol(names(wgcna$me.colors))
+ colnames(wgcna$W) <- newcol(colnames(wgcna$W))
+ rownames(wgcna$modTraits) <- newcol(rownames(wgcna$modTraits))
+
+ ## rename everything in stats object
+ if("stats" %in% names(wgcna) && !is.null(wgcna$stats)) {
+ rownames(wgcna$stats[['moduleTraitCor']]) <- newcol(rownames(wgcna$stats[['moduleTraitCor']]))
+ rownames(wgcna$stats[['moduleTraitPvalue']]) <- newcol(rownames(wgcna$stats[['moduleTraitPvalue']]))
+ colnames(wgcna$stats[['moduleMembership']]) <- newcol(colnames(wgcna$stats[['moduleMembership']]))
+ colnames(wgcna$stats[['MMPvalue']]) <- newcol(colnames(wgcna$stats[['MMPvalue']]))
+ }
+ return(wgcna)
+}
+
+#' @export
+wgcna.createConsensusLayers <- function(exprList,
+ samples,
+ ngenes = 2000,
+ power = 12,
+ minModuleSize = 20,
+ deepSplit = 2,
+ mergeCutHeight = 0.15,
+ minKME = 0.3,
+ maxBlockSize = 9999,
+ prefix = NULL,
+ verbose = 1
+ ) {
+
+ if(0) {
+ ngenes = 2000
+ power = 12
+ minModuleSize = 5
+ deepSplit = 2
+ mergeCutHeight = 0.15
+ minKME = 0.3
+ maxBlockSize = 9999
+ verbose = 1
+ prefix=NULL
+ }
+
+ if(is.null(prefix)) prefix <- names(exprList)
+ nx <- length(exprList)
+ prefix <- head(rep(prefix, nx),nx)
+
+ ## reduce
+ message("[wgcna.computeConsensusLayers] Aligning matrices...")
+ gg <- Reduce(intersect, lapply(exprList,rownames))
+ exprList <- lapply(exprList, function(x) x[gg,])
+
+ if( length(gg) > ngenes ) {
+ message("[wgcna.computeConsensusLayers] Reducing to ",ngenes," genes")
+ sdx <- Reduce('*',lapply(exprList, function(x) matrixStats::rowSds(x)))
+ ii <- head(order(-sdx), ngenes)
+ exprList <- lapply(exprList, function(x) x[ii,])
+ }
+ multiExpr = WGCNA::list2multiData(lapply(exprList, Matrix::t))
+
+ ## determine power vector
+ if(is.null(power) || is.na(power) ) power <- "sft"
+ if(as.character(power[1]) %in% c("sft","iqr")) {
+ ## Estimate best power
+ power <- power[1]
+ message("[wgcna.createConsensusLayers] optimal power method = ", power)
+ est.power <- rep(NA, length(exprList))
+ i=1
+ for(i in 1:length(exprList)) {
+ p <- wgcna.pickSoftThreshold(
+ Matrix::t(exprList[[i]]), sft=NULL, rcut=0.85, powers = NULL,
+ method=power, nmax=1000, verbose=0)
+ if(length(p)==0 || is.null(p) ) p <- NA
+ est.power[i] <- p
+ }
+ est.power
+ power <- ifelse( is.na(est.power), 12, est.power)
+ } else {
+ power <- as.numeric(power)
+ }
+ nw <- length(exprList)
+ power <- head(rep(power, nw),nw)
+ names(power) <- names(exprList)
+
+ message("[wgcna.computeConsensusLayers] Computing consensus modules...")
+ cons = WGCNA::blockwiseConsensusModules(
+ multiExpr,
+ power = as.numeric(power),
+ networkType = "signed",
+ TOMType = "signed",
+ minModuleSize = as.integer(minModuleSize),
+ deepSplit = as.integer(deepSplit),
+ mergeCutHeight = as.numeric(mergeCutHeight),
+ numericLabels = FALSE,
+ minKMEtoStay = as.numeric(minKME),
+ maxBlockSize = as.integer(maxBlockSize),
+ saveTOMs = FALSE,
+ useDiskCache = FALSE,
+ verbose = verbose
+ )
+
+ ##
+ message("[wgcna.computeConsensusLayers] Creating consensus layers...")
+ aligned <- list()
+ i=1
+ for(i in 1:length(exprList)) {
+ k <- names(exprList)[i]
+ sel <- c("colors","unmergedColors","goodSamples","goodGenes",
+ "dendrograms","blockGenes","blocks")
+ net <- cons[sel]
+ net$power <- power[i]
+ X = exprList[[i]]
+ w <- wgcna.compute(
+ X = exprList[[i]],
+ samples,
+ prefix = prefix[i],
+ ngenes = -1,
+ net = net,
+ calcMethod = "fast",
+ sv.tom=0
+ )
+ aligned[[k]] <- w
+ }
+
+ return(aligned)
+}
+
+#' Compute consensus matrix from list of matrices. The consensus
+#' matrix checks for consistent sign and minimal threshold for each
+#' matrix. Optionally filters on consistent p-value.
#'
+#' @param ydim original dimension of data
#'
+#'
#' @export
-wgcna.getGeneStats <- function(wgcna, module, trait, plot = TRUE,
- showallmodules = TRUE, col = NULL,
- main = NULL) {
- p1 <- c("moduleMembership", "MMPvalue")
- p2 <- c("traitSignificance", "GSPvalue", "foldChange", "foldChangePvalue")
- p3 <- c("geneCentrality")
- nrow <- ncol(wgcna$datExpr)
- df <- wgcna$stats[[p1[1]]][, 0] ## empty data.frame
- rownames(df) <- colnames(wgcna$datExpr)
+wgcna.computeConsensusMatrix <- function(matlist, ydim, psig = 0.05, consfun="min") {
- mm.cols <- colnames(wgcna$stats[[p1[1]]])
- if (!is.null(module) && module %in% mm.cols) {
- A1 <- sapply(wgcna$stats[p1], function(x) x[, module])
- df <- cbind(df, A1)
+ ## create consensus module-trait matrix
+ matsign <- lapply(matlist, sign)
+ matsign <- lapply(matsign, function(x) {x[is.na(x)]=0; x})
+ all.pos <- Reduce("*", lapply(matsign, function(z) (z >= 0) ))
+ all.neg <- Reduce("*", lapply(matsign, function(z) (z <= 0) ))
+ concordant <- (all.pos | all.neg)
+
+ matlistN <- Reduce("+", lapply(matlist, function(x) !is.na(x)))
+ matlist0 <- lapply(matlist, function(x) {x[is.na(x)]=0; x})
+
+ zsign <- sign(Reduce("+", matsign)) ## mean sign??
+ if(consfun=="min") {
+ pminFUN <- function(...) pmin(..., na.rm=TRUE)
+ consZ <- Reduce(pminFUN, lapply(matlist, abs)) * zsign
+ } else if(consfun=="gmean") {
+ ## geometric mean
+ matlistG <- lapply(matlist, function(x) {x=log(abs(x));x[is.na(x)]=0;x})
+ consZ <- exp(Reduce('+', matlistG) / matlistN)
+ consZ <- consZ * zsign
+ } else {
+ ## mean
+ consZ <- Reduce('+', matlist0) / matlistN
}
+ consZ[!concordant] <- NA
+
+ if(psig < 1) {
+ if(length(ydim) == 1) ydim <- rep(ydim[1], length(matlist))
+ pv <- mapply(function(z, n)
+ WGCNA::corPvalueStudent(z, n), matlist, ydim, SIMPLIFY = FALSE)
+ for(i in 1:length(pv)) pv[[i]][is.na(pv[[i]])] <- 0 ## missing???
+ all.sig <- Reduce("*", lapply(pv, function(p) 1 * (p < psig)))
+ consZ[!all.sig] <- NA
+ }
+ return(consZ)
+}
- tt.cols <- colnames(wgcna$stats[[p2[1]]])
- if (!is.null(trait) && trait %in% tt.cols) {
- A2 <- sapply(wgcna$stats[p2], function(x) x[, trait])
- df <- cbind(df, A2)
+#' Compute consensus matrix from list of matrices. The consensus
+#' matrix checks for consistent sign and minimal threshold for each
+#' matrix. Optionally filters on consistent p-value.
+#'
+#' @export
+wgcna.computeDistinctMatrix <- function(matlist, ydim, psig = 0.05, min.diff=0.3,
+ consmax = 0) {
+ ## create difference module-trait matrix
+ pv <- mapply(function(z, n) corPvalueStudent(z, n),
+ matlist, ydim, SIMPLIFY = FALSE)
+ matsign <- lapply( matlist, sign )
+ Q <- matlist
+ i=1
+ for(i in 1:length(matlist)) {
+
+ ## Any entry not significant is anyway invalid
+ notsig <- (pv[[i]] > psig)
+ Q[[i]][notsig] <- NA
+
+ ## Any entry with too small difference with others is invalid
+ refmat <- Reduce("+", matlist[-i]) / (length(matlist)-1)
+ diff <- matlist[[i]] - refmat
+ notdiff <- ( abs(diff) < min.diff )
+ Q[[i]][notdiff] <- NA
+
+ ## any entry that has consensus is invalid
+ cons <- mapply(function(P, S) (P<0.05)*(S==matsign[[i]]),
+ pv[-i], matsign[-i], SIMPLIFY=FALSE)
+ cons <- (Reduce('+', cons) > consmax) ## or function
+ Q[[i]][cons] <- NA
}
+ return(Q)
+}
- A3 <- wgcna$stats[[p3]]
- df <- cbind(df, centrality = A3)
- sel <- c("moduleMembership", "traitSignificance", "foldChange", "centrality")
- sel <- intersect(sel, colnames(df))
- df1 <- pmax(as.matrix(df[, sel]), 1e-8)
- score <- exp(rowMeans(log(df1)))
-
- labels <- wgcna$net$labels
- df <- data.frame(module = labels, score = score, df)
- df <- df[order(-df$score), ]
- if (!is.null(module) && !showallmodules) {
- sel <- which(df$module == module)
- df <- df[sel, , drop = FALSE]
+#' @export
+wgcna.plotConsensusOverlapHeatmap <- function(net1, net2,
+ setLabels=NULL,
+ lab.line = c(8,8),
+ plotDendro = FALSE,
+ setpar=TRUE) {
+
+ if(is.null(setLabels))
+ setLabels <- c("Set1","Set2")
+ if(length(setLabels)==1) setLabels <- paste0(setLabels,1:2)
+
+ if(plotDendro) {
+
+ layout.matrix <- matrix( c(1,2,5,3,4,5), nrow = 3, ncol = 2)
+ layout(mat = layout.matrix, heights= c(0.8,0.2,2.5), widths = c(1,1))
+
+ WGCNA::plotDendroAndColors(
+ dendro = net1$dendrograms[[1]],
+ colors = net1$colors,
+ dendroLabels = FALSE,
+ hang = 0.03,
+ addGuide = FALSE,
+ guideHang = 0.05,
+ #marAll = marAll,
+ setLayout = FALSE,
+ main = setLabels[1]
+ )
+
+ WGCNA::plotDendroAndColors(
+ dendro = net2$dendrograms[[1]],
+ colors = net2$colors,
+ dendroLabels = FALSE,
+ hang = 0.03,
+ addGuide = FALSE,
+ guideHang = 0.05,
+ #marAll = marAll,
+ setLayout = FALSE,
+ main = setLabels[2]
+ )
}
- if (plot) {
- cols <- c("moduleMembership", "traitSignificance", "foldChange", "centrality")
- cols <- intersect(cols, colnames(df))
- df1 <- df[, cols]
- col1 <- wgcna.labels2colors(wgcna$net$colors[rownames(df1)])
- if (!is.null(col)) col1 <- col
- pairs(df1, col = col1)
- if (is.null(main)) {
- main <- paste("Gene significance for module", module, "and trait", trait)
- }
- title(main, line = 3, cex.main = 1.15)
+
+ firstColors <- wgcna.labels2colors(net1$colors)
+ secondColors <- wgcna.labels2colors(net2$colors)
+ overlap <- overlapTable(firstColors, secondColors)
+ names(overlap)
+
+ T1 = overlap$countTable
+ T2 = table(firstColors, secondColors)
+ T3 = table(net1$colors, net2$colors)
+
+ firstModTotals <- rowSums(overlap$countTable)
+ secondModTotals <- colSums(overlap$countTable)
+ firstModules <- rownames(overlap$countTable)
+ secondModules <- colnames(overlap$countTable)
+
+ # Truncate p values smaller than 10^{-50} to 10^{-50}
+ pTable <- -log10(overlap$pTable)
+ pTable[is.infinite(pTable)] = 1.3*max(pTable[is.finite(pTable)]);
+ pTable[pTable>50 ] = 50 ;
+
+ if(setpar) {
+ par(mfrow=c(1,1));
+ par(cex = 1.0);
+ par(mar=c(10, 12.4, 2.7, 1)+0.3);
}
- df
+ # Use function labeledHeatmap to produce the color-coded table with all the trimmings
+ WGCNA::labeledHeatmap(
+ Matrix = t(pTable),
+ xLabels = paste(" ", firstModules),
+ yLabels = paste(" ", secondModules),
+ colorLabels = TRUE,
+ #xSymbols = paste0(setLabels[1],":", firstModules, " (", firstModTotals,")"),
+ #ySymbols = paste0(setLabels[2],":", secondModules, " (", secondModTotals, ")"),
+ xSymbols = paste0(firstModules, " (", firstModTotals,")"),
+ ySymbols = paste0(secondModules, " (", secondModTotals, ")"),
+ textMatrix = t(overlap$countTable),
+ colors = WGCNA::blueWhiteRed(100)[50:100],
+ main = paste("Correspondence of",setLabels[1],"and ",
+ setLabels[2],"modules"),
+ cex.text = 1.0, cex.lab = 1.0, setStdMargins = FALSE);
+ mtext(toupper(setLabels[1]), side=1, line=lab.line[1], cex=1.1)
+ mtext(toupper(setLabels[2]), side=2, line=lab.line[2], cex=1.1)
+
}
-wgcna.merge_block_dendrograms <- function(net, X, method = 1) {
- ## This function is fragile: it can give a C stack limit error. In
- ## case that happens you can increase the stack limit by running on
- ## the cmd line: >>> ulimit -s unlimited
- ##
- hc <- net$dendrograms
+##=========================================================================
+## PRESERVATION WGCNA
+##=========================================================================
- ## merge block dendrogram
- mx <- list()
- for (b in 1:length(net$dendrograms)) {
- ii <- which(net$goodGenes & net$blocks == b)
- mx[[b]] <- colMeans(X[ii, ])
- hc[[b]]$labels <- rownames(X)[ii]
+#' @export
+wgcna.runPreservationWGCNA <- function(exprList,
+ phenoData,
+ power = 12,
+ reference = 1,
+ add.merged=FALSE,
+ ngenes = 2000,
+ minModuleSize = 20,
+ deepSplit = 2,
+ annot = NULL,
+ compute.stats = TRUE,
+ compute.enrichment = TRUE,
+ gset.methods = c("fisher","gsetcor","xcor")
+ ) {
+
+ if(is.character(reference)) {
+ reference <- match(reference, names(exprList))
+ }
+ if(reference > 0) {
+ reference.name <- names(exprList)[reference]
+ } else {
+ reference.name <- "Consensus"
+ }
+
+ ## multiset WGCNA
+ pres <- wgcna.runConsensusWGCNA(
+ exprList,
+ phenoData,
+ GMT = NULL,
+ annot = NULL,
+ ngenes = ngenes,
+ power = power,
+ minModuleSize = minModuleSize,
+ minKME = 0.3,
+ mergeCutHeight = 0.15,
+ deepSplit = deepSplit,
+ maxBlockSize = 9999,
+ addCombined = FALSE,
+ calcMethod = "fast",
+ drop.ref = FALSE,
+ compute.stats = FALSE,
+ compute.enrichment = FALSE,
+ gsea.mingenes = 10,
+ gset.methods = gset.methods
+ )
+
+ colorList <- lapply(pres$layers, function(w) w$net$colors)
+ names(colorList) <- names(pres$layers)
+ exprList <- lapply( pres$layers, function(w) t(w$datExpr))
+
+ if(add.merged || reference==0) {
+ message("[wgcna.runPreservationWGCNA] adding merged layer...")
+ cX <- lapply(exprList, function(x) x - rowMeans(x))
+ merged <- do.call( cbind, cX)
+ exprList$Merged <- NULL
+ exprList <- c( list( Merged = merged), exprList)
+ cons.colors <- pres$net$colors
+ colorList <- c( list( Consensus = cons.colors ), colorList)
+ reference <- reference + 1
}
+
+ message("[wgcna.runPreservationWGCNA] running WGCNA::modulePreservation...")
+ multiExpr = WGCNA::list2multiData(lapply(exprList,Matrix::t))
+ mp <- WGCNA::modulePreservation(
+ multiExpr,
+ colorList,
+ referenceNetworks = reference,
+ nPermutations = 10,
+ networkType = "signed",
+ quickCor = 0,
+ verbose = 2,
+ indent = 0
+ )
- if (length(hc) == 1) {
- return(hc[[1]])
+ ## Zsummary tables
+ mp.tables <- mp$preservation$Z[[1]][-reference]
+ Z <- sapply(mp.tables, function(mat) mat[,"Zsummary.pres"])
+ rownames(Z) <- rownames(mp.tables[[1]])
+ rownames(Z) <- paste0("ME",rownames(Z))
+ colnames(Z) <- names(multiExpr)[-reference]
+
+ ## median rank
+ mp.tables <- mp$preservation$observed[[1]][-reference]
+ M <- sapply(mp.tables, function(mat) mat[,"medianRank.pres"])
+ rownames(M) <- rownames(mp.tables[[1]])
+ rownames(M) <- paste0("ME",rownames(M))
+ colnames(M) <- names(multiExpr)[-reference]
+
+ ## module size
+ moduleSize <- mp.tables[[1]][,"moduleSize"]
+ names(moduleSize) <- rownames(Z)
+
+ ## module-traits. We need to recompute the MEs (module eigengenes)
+ ## using the color coding of the reference set.
+ refColors <- colorList[[1]]
+ MEx = lapply(exprList, function(x)
+ WGCNA::moduleEigengenes(t(x), colors = refColors)$eigengenes
+ )
+
+ ## Compute module-trait correlation matrices
+ Y <- lapply(pres$layers, function(w) w$datTraits)
+ names(Y)
+ if("Merged" %in% names(MEx) && !"Merged" %in% names(Y) ) {
+ kk <- rownames(MEx[['Merged']])
+ Y[['Merged']] <- pres$datTraits[kk,]
+ Y <-Y[names(MEx)]
+ }
+ kk <- Reduce( union, lapply(Y, colnames))
+ Y <- lapply(Y, function(y) y[,match(kk,colnames(y))])
+ for(i in 1:length(Y)) colnames(Y[[i]]) <- kk
+ R <- mapply(cor, MEx, Y, use="pairwise", SIMPLIFY=FALSE)
+ ##for(i in 1:length(R)) colnames(R[[i]]) <- paste0(names(R)[i],":",colnames(R[[i]]))
+
+ ## gene statistics of reference layer
+ if(compute.stats) {
+ message("[wgcna.runPreservationWGCNA] computing gene statistics...")
+ ref = reference.name
+ wnet <- list( MEs = MEx[[ref]], colors = pres$colors[,ref])
+ pres$stats <- wgcna.computeGeneStats(wnet, pres$datExpr[[ref]],
+ pres$datTraits, TOM=NULL)
}
+
+ ## geneset enrichment of reference layer
+ if(compute.enrichment) {
+ message("[wgcna.runPreservationWGCNA] computing geneset enrichment...")
+ pres$gsea <- wgcna.computeModuleEnrichment(
+ pres$layers[[ref]],
+ GMT = NULL,
+ gsetX = NULL,
+ annot = annot,
+ methods = gset.methods,
+ ntop = 1000,
+ xtop = 100,
+ filter = NULL
+ )
+ }
+
+ pres$modulePreservation <- mp
+ pres$Zsummary <- Z
+ pres$medianRank <- M
+ pres$moduleSize <- moduleSize
+ pres$modTraits <- R
+ pres$MEs <- MEx
+
+ return(pres)
+}
- M <- do.call(rbind, mx)
- hclust_p <- hclust(dist(M), method = "average")
- dend_p <- as.dendrogram(hclust_p)
- dend.list <- lapply(hc, as.dendrogram)
+#' @export
+wgcna.plotPreservationSummaries <- function(pres, setpar=TRUE) {
- if (method == 1) {
- merged <- ComplexHeatmap::merge_dendrogram(dend_p, dend.list)
- } else {
- mrg <- hclust_p$merge
- merged_branch <- list()
- k <- 1
- for (k in 1:nrow(mrg)) {
- i <- mrg[k, 1]
- j <- mrg[k, 2]
- if (i < 0) d1 <- dend.list[[-i]]
- if (i > 0) d1 <- merged_branch[[i]]
- if (j < 0) d2 <- dend.list[[-j]]
- if (j > 0) d2 <- merged_branch[[j]]
- merged_branch[[k]] <- merge(d1, d2) ## actual merge
+ # Create a simple bar plot of Zsummary:
+ Z <- pres$Zsummary
+ ntest <- ncol(Z)
+
+ if(setpar) {
+ par(mfrow=c(3, ntest), mar=c(5,5,4,1))
+ }
+ xylist <- list(
+ c("moduleSize", "Zsummary.pres"),
+ c("moduleSize", "medianRank.pres"),
+ c("Zsummary.pres", "medianRank.pres")
+ )
+
+ for(xy in xylist) {
+ for(k in colnames(Z)) {
+ X <- data.frame(
+ color = substring(names(pres$moduleSize),3,99),
+ moduleSize = pres$moduleSize,
+ Zsummary.pres = pres$Zsummary[,k],
+ medianRank.pres = pres$medianRank[,k]
+ )
+ xvar <- xy[1]
+ yvar <- xy[2]
+ ylim <- c(0,max(X[,yvar]))
+ if(yvar == "medianRank.pres") ylim <- rev(ylim)
+ plot(
+ X[,xvar],
+ X[,yvar],
+ pch = 21,
+ cex=2,
+ bg = X$color,
+ ylim = ylim,
+ xlab = xvar,
+ ylab = yvar
+ )
+ title(yvar, cex.main=1.4, line=2.2)
+ sub <- paste( k, "vs.", "reference")
+ title(sub, cex.main=1, line=0.9)
+ if(yvar=="Zsummary.pres") abline(h = c(2,10), lty = 2)
}
- merged <- merged_branch[[k]]
+ }
+}
+
+#' @export
+wgcna.plotPreservationModuleTraits <- function(pres,
+ subplots = c("zsummary","consmt","wt.consmt"),
+ order.by = "name",
+ setpar=TRUE, rm.na=FALSE) {
+
+
+ if(all(is.numeric(subplots))) {
+ subplots <- c("zsummary","consmt","wt.consmt")[subplots]
+ }
+
+ if(setpar) {
+ par(mfrow=c(2,2), mar=c(14,12,4,2))
+ }
+
+ ## compute consensus
+ Zsummary <- pres$Zsummary
+
+ cR <- pres$modTraits
+ ydim <- sapply(pres$layers, function(w) nrow(w$datTraits))
+ consZ <- wgcna.computeConsensusMatrix(cR, ydim, psig=1, consfun='gmean')
+ ##consZ <- consZ[rownames(cR[[1]]), colnames(cR[[1]])]
+
+ ## match
+ ii <- intersect(rownames(Zsummary),rownames(consZ))
+ Zsummary <- Zsummary[ii,,drop=FALSE]
+ consZ <- consZ[ii,,drop=FALSE]
+
+ ## order
+ order.method="clust"
+ if(order.by == "name") {
+ ii <- order(rownames(Zsummary))
+ Zsummary <- Zsummary[ii,,drop=FALSE]
+ consZ <- consZ[ii,,drop=FALSE]
+ }
+ if(order.by == "zsummary") {
+ ii <- order(-rowMeans(Zsummary**2))
+ Zsummary <- Zsummary[ii,,drop=FALSE]
+ consZ <- consZ[ii,,drop=FALSE]
+ }
+ if(order.by == "clust") {
+ consZ1 <- consZ
+ consZ1[is.na(consZ1)] <- 0
+ ii <- hclust(dist(consZ1))$order
+ jj <- hclust(dist(t(consZ1)))$order
+ Zsummary <- Zsummary[ii,,drop=FALSE]
+ consZ <- consZ[ii,jj,drop=FALSE]
+ }
+
+ ##--------------------------------------
+ ## Zsummary heatmap
+ ##--------------------------------------
+ if("zsummary" %in% subplots) {
+ WGCNA::labeledHeatmap(
+ Matrix = Zsummary,
+ xLabels = colnames(Zsummary),
+ yLabels = rownames(Zsummary),
+ ySymbols = rownames(Zsummary),
+ colors = tail(WGCNA::blueWhiteRed(100),50),
+ colorLabels = TRUE,
+ setStdMargins = FALSE
+ )
+ title("Module preservation (Zsummary)", line=1.2, cex.main=1.2)
+ }
+
+ ##--------------------------------------
+ ## Consensus Module-Trait
+ ##--------------------------------------
+ validcol <- function(R) which( colMeans(is.na(R)) < 1 &
+ matrixStats::colSds(R,na.rm=TRUE) > 0.01 )
+
+ if("consmt" %in% subplots) {
+ clim <- max(abs(consZ),na.rm=TRUE)
+ cval <- seq( -clim, clim, length.out=201)
+ ii <- which( cval >= min(consZ,na.rm=TRUE) & cval <= max(consZ,na.rm=TRUE) )
+ col2 <- WGCNA::blueWhiteRed(201)[ii]
+ jj <- 1:ncol(consZ)
+ if(rm.na) jj <- validcol(consZ)
+ WGCNA::labeledHeatmap(
+ Matrix = consZ[,jj,drop=FALSE],
+ xLabels = colnames(consZ)[jj],
+ yLabels = rownames(consZ),
+ ySymbols = rownames(consZ),
+ colors = col2,
+ colorLabels = TRUE,
+ setStdMargins = FALSE
+ )
+ title("Consensus Module-Traits", line=1.2, cex.main=1.2)
+ }
+
+ ##--------------------------------------
+ ## preservation-weighted Consensus Module-Trait
+ ##--------------------------------------
+ if("wt.consmt" %in% subplots) {
+ wz <- rowMeans(Zsummary**2, na.rm=TRUE)
+ wz <- wz / max(wz)
+ consW <- consZ * wz[rownames(consZ)]
+
+ clim <- max(abs(consW),na.rm=TRUE)
+ cval <- seq( -clim, clim, length.out=201)
+ ii <- which( cval >= min(consW,na.rm=TRUE) & cval <= max(consW,na.rm=TRUE) )
+ col2 <- WGCNA::blueWhiteRed(201)[ii]
+
+ jj <- 1:ncol(consW)
+ if(rm.na) jj <- validcol(consW)
+ WGCNA::labeledHeatmap(
+ Matrix = consW[,jj,drop=FALSE],
+ xLabels = colnames(consW)[jj],
+ yLabels = rownames(consW),
+ ySymbols = rownames(consW),
+ colors = col2,
+ colorLabels = TRUE,
+ setStdMargins = FALSE
+ )
+ title("Preservation-weighted Consensus\nModule-Traits", line=1, cex.main=1.2)
}
- merged_hclust <- as.hclust(merged)
- merged_hclust
}
-## =========================================================================
+
+
+##=========================================================================
## PLOTTING FUNCTIONS
-## =========================================================================
+##=========================================================================
+
+
+#' @export
+wgcna.plotTopModules <- function(wgcna, trait, nmax=16, setpar=TRUE) {
+
+ MEx <- wgcna$net$MEs
+ Y <- wgcna$datTrait
+ kk <- intersect(rownames(MEx), rownames(Y))
+ MEx <- MEx[kk,]
+ Y <- Y[kk,,drop=FALSE]
+
+ rho <- cor(MEx, Y, use="pairwise")
+ sel <- order(-abs(rho[,trait]))
+ sel <- head(sel, nmax)
+ n <- length(sel)
+ nr <- ceiling(sqrt(n))
+ nc <- ceiling(n / nr)
+
+ yclass <- sapply(as.data.frame(Y), class)
+ is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA)))
+ yclass[which(is.binary)] <- "factor"
+
+ if(setpar==1) par(mfrow=c(nr,nc), mgp=c(2.6,0.85,0), mar=c(4,4,2.5,1))
+ if(setpar==2) par(mfrow=c(nc,nr), mgp=c(2.6,0.85,0), mar=c(4,4,2.5,1))
+
+ yclass <- sapply(as.data.frame(Y), class)
+ is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA)))
+ yclass[which(is.binary)] <- "logical"
+ yclass
+
+ i=sel[1]
+ for(i in head(sel, nmax)) {
+
+ x <- Y[, trait]
+ y <- MEx[,i]
+ label <- colnames(MEx)[i]
+ col <- substring(label, 3, 99)
+ col1 <- adjustcolor(col, alpha.f=0.5)
+
+ if( yclass[trait] == "factor" ) {
+ boxplot(y ~ x, main = label, col = col1,
+ xlab = trait, ylab = "ME score")
+ points(1 + x + 0.04*rnorm(length(x)), y,
+ pch=21, bg=col1, lwd=0.5)
+ }
+
+ if( yclass[trait] == "numeric" ) {
+ plot( x, y, main = label,
+ pch=21, cex = 1.1, col=1, bg = col1, lwd=0.25,
+ xlab = trait, ylab = "ME score")
+ abline(h=0, lty=2, lwd=0.5)
+ r <- cor(x,y,use="pairwise")
+ if(abs(r) > 0.3) {
+ abline( lm(y ~ x), col=1, lwd=0.6 )
+ legend("bottomright", legend=paste("r=",round(r,3)))
+ }
+ }
+ }
+}
+
+
+#' Plot top modules most correlated with trait for multi expression
+#' data.
+#'
+#' @export
+wgcna.plotTopModules_multi <- function(multi, trait, nmax=16, collapse=FALSE,
+ plotlib = "base", setpar=TRUE)
+{
+
+ if(!"MEs" %in% names(multi)) {
+ multi$MEs <- lapply(multi$net$multiMEs, function(m) m$data)
+ }
+
+ ## we 'just' concatenate the ME matrices
+ multi$MEs <- lapply(multi$MEs, as.matrix)
+ MEx <- as.matrix(mofa.merge_data(multi$MEs))
+ Y <- lapply(multi$MEs, function(m) multi$datTraits[rownames(m),,drop=FALSE])
+ Y <- mofa.merge_data(Y)
+
+ batch <- sub(":.*","",rownames(Y))
+ names(batch) <- rownames(Y)
+ nbatch <- length(multi$MEs)
+
+ ## select top modules
+ rho <- cor(MEx, Y, use="pairwise")
+ sel <- names(sort(-abs(rho[,trait])))
+ sel <- head(sel, nmax)
+
+ Y <- type.convert(data.frame(Y,check.names=FALSE),as.is=FALSE)
+ if(collapse) {
+ Y <- collapseTraitMatrix(Y)
+ trait <- sub("=.*","",trait)
+ }
+
+ n <- length(sel)
+ nr <- ceiling(sqrt(n))
+ nc <- ceiling(n / nr)
+ if(setpar==1) par(mfrow=c(nr,nc), mgp=c(2.6,0.85,0), mar=c(2.5,4,2.5,1))
+ if(setpar==2) par(mfrow=c(nc,nr), mgp=c(2.6,0.85,0), mar=c(2.5,4,2.5,1))
+
+ yclass <- sapply(as.data.frame(Y), class)
+ is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA)))
+ yclass[which(is.binary)] <- "logical"
+ yclass
+
+ i=sel[1]
+ for(i in head(sel, nmax)) {
+
+ x <- Y[, trait]
+ y <- MEx[,i]
+ label <- i
+ col <- substring(label, 3, 99)
+ col1 <- adjustcolor(col, alpha.f=0.66)
+ col2 <- col1
+
+ yclass[trait]
+ if(yclass[trait] %in% c("factor","logical","binary")) {
+
+ if(yclass[trait] %in% c("logical","binary")) {
+ x <- (x == 1)
+ }
+ df <- data.frame(x=x, y=y, group=factor(batch))
+
+ par(mgp=c(2.4,0.9,0))
+ aa <- c(0.15, 0.55)
+ col1 <- sapply(aa, function(a) adjustcolor(col, alpha.f=a))
+
+ nb <- nbatch
+ atx <- c( seq(1,2, length.out=nb), seq(4,5, length.out=nb) )
+ atx <- unlist(sapply(1:nbatch, function(i) i + c(-0.15, 0.15),simplify=FALSE))
+ atmid <- 1:nbatch
+
+ boxplot(
+ df$y ~ df$x + df$group,
+ #df$y ~ df$group + df$x,
+ at = atx,
+ xlim = c(0.6, nbatch+0.4),
+ cols = col1,
+ main = label,
+ col = col1,
+ boxwex = 0.24,
+ xaxt = 'n',
+ xlab = '',
+ ylab = "ME score"
+ )
+
+ mtext( levels(df$group), side=1, line=0.6, cex=1.0, at=atmid)
+ bb <- c("FALSE","TRUE")
+ legend("topright", legend=bb, fill = col1,
+ cex=0.8, y.intersp=0.82, title=trait, title.cex=1.1)
+
+ } ## end of if factor
+
+ if(yclass[trait] %in% c("numeric","integer")) {
+
+ df <- data.frame(x=x, y=y, group=factor(batch))
+ par(mgp=c(2.4,0.9,0))
+
+ aa <- seq(0.55, 0.15, length.out=nbatch)
+ col1 <- sapply(aa, function(a) adjustcolor(col, alpha.f=a))
+ names(col1) <- sort(unique(batch))
+
+ nb <- nbatch
+ colx <- col1[as.integer(factor(batch))]
+
+ plot(
+ df$x , df$y,
+ pch = 21,
+ cex = 1,
+ lwd = 0.4,
+ bg = colx,
+ main = label,
+ xlab = trait,
+ ylab = "ME score"
+ )
+ bb <- names(multi$MEs)
+ legend("topright", legend=bb, fill=col1, cex=0.9,
+ y.intersp=0.85)
+
+ ## add regression lines
+ b=df$group[1]
+ col2 <- adjustcolor(col1, red.f=0.5, green.f=0.5, blue.f=0.5)
+ names(col2) <- names(col1)
+ for(b in unique(df$group)) {
+ ii <- which(df$group == b)
+ abline( lm(df$y[ii] ~ df$x[ii]), lwd=1, lty=1, col=col2[1] )
+ }
+ } ## end of if continuous
+
+ }
+
+}
+
+
+
+#' Plot top modules most correlated with trait for multi expression
+#' data.
+#'
+#' @export
+wgcna.plotModuleScores <- function(res, trait,
+ multi=FALSE, nmax=16,
+ collapse.trait = FALSE,
+ plotlib = "base", setpar=TRUE)
+{
+
+ Y <- res$datTrait
+
+ ## For multi we 'just' concatenate the ME matrices
+ if(multi) {
+ MEx <- do.call(rbind, lapply(res$MEs, as.matrix))
+ me.samples <- lapply(res$MEs, rownames)
+ batch <- max.col(sapply(me.samples, function(s) rownames(Y) %in% s))
+ batch <- names(res$MEs)[batch]
+ names(batch) <- rownames(Y)
+ nbatch <- length(res$MEs)
+ } else {
+ MEx <- res$net$MEs
+ batch <- ""
+ nbatch <- 1
+ }
+
+ ## align
+ kk <- intersect(rownames(MEx), rownames(Y))
+ MEx <- MEx[kk,]
+ Y <- Y[kk,]
+ if(!is.null(batch)) batch <- batch[kk]
+
+ sel.modules <- colnames(MEx)
+ if(nmax > 0) {
+ ## select top modules
+ rho <- cor(MEx, Y, use="pairwise")
+ sel.modules <- names(sort(-abs(rho[,trait])))
+ sel.modules <- head(sel.modules, nmax)
+ }
+ ncol <- ceiling(sqrt(length(sel.modules)))
+
+ if(collapse.trait) {
+ Y <- type.convert(data.frame(Y,check.names=FALSE),as.is=FALSE)
+ Y <- collapseTraitMatrix(Y)
+ trait <- sub("=.*","",trait)
+ }
+
+ module <- as.vector(sapply(sel.modules, rep, nrow(MEx)))
+ dfx <- data.frame(
+ sample = rownames(MEx),
+ trait = Y[,trait],
+ score = as.vector(unlist(MEx[,sel.modules])),
+ module = module,
+ group = batch
+ )
+
+ xtype <- class(type.convert(Y[,trait],as.is=TRUE))
+ xtype
+ if(xtype != "numeric") {
+
+ dfx$trait <- factor(dfx$trait)
+ if(nbatch== 1) {
+ ggplot2::ggplot(dfx,
+ ggplot2::aes(x = factor(trait), y = score, fill = trait)) +
+ #ggplot2::aes(y = score, x = trait)) +
+ ggplot2::geom_boxplot() +
+ ggplot2::xlab(trait) +
+ ggplot2::ylab("ME score") +
+ ggplot2::facet_wrap(~module, ncol=ncol) +
+ ggplot2::theme_bw(base_size = 18)
+ }
+
+ if(nbatch > 1) {
+ ggplot2::ggplot(dfx,
+ ggplot2::aes(x = group, y = score, fill = trait)) +
+ ggplot2::geom_boxplot() +
+ ggplot2::xlab(trait) +
+ ggplot2::ylab("ME score") +
+ ggplot2::facet_wrap(~module, ncol=ncol) +
+ ggplot2::theme_bw(base_size = 18)
+ }
+ } else {
+
+ dfx$trait <- as.numeric(dfx$trait)
+ ggplot2::ggplot(dfx,
+ #ggplot2::aes(x = trait, y = score, color = group)) +
+ ggplot2::aes(x = trait, y = score, color = group)) +
+ ggplot2::geom_point(size = 0.6) +
+ ggplot2::geom_smooth(method="lm", se = FALSE, linewidth=0.6) +
+ ggplot2::xlab(trait) +
+ ggplot2::ylab("ME score") +
+ ggplot2::facet_wrap(~module, ncol = ncol, scales = "free") +
+ ggplot2::theme_bw(base_size = 18)
+
+ }
+}
+
+#'
+#'
+#' @export
+wgcna.plotTraitCorrelationBarPlots <- function(res, trait, multi=FALSE,
+ colored = TRUE, beside = TRUE,
+ main = NULL, cex.main = 1.3,
+ setpar=TRUE) {
+ if(setpar) {
+ nr <- ceiling( sqrt(length(trait)))
+ nc <- ceiling( length(trait)/nr)
+ par(mfrow=c(nr,nc))
+ }
+ p=trait[1]
+ for(p in trait) {
+ groups <- NULL
+ if(multi) {
+ mt <- res$modTraits
+ groups <- names(mt)
+ m1 <- sapply(mt, function(x) x[,p])
+ } else {
+ m1 <- res$modTraits[,p]
+ }
+ colnames(m1) <- paste0(p," (",colnames(m1),")")
+ me.col <- grey.colors(2)
+ if(colored) {
+ me.col <- sub("ME","",rownames(m1))
+ me.col <- rbind(me.col, me.col)
+ aa <- seq(0.7,0.25,length.out=nrow(me.col))
+ for(i in 1:nrow(me.col)) {
+ me.col[i,] <- adjustcolor(me.col[i,], alpha.f=aa[i])
+ }
+ }
+
+ if(beside) {
+ barplot( t(m1), las=3, beside=TRUE, col=me.col,
+ ylab = "trait correlation (rho)")
+ tt <- p
+ if(!is.null(main)) tt <- main
+ title(tt, cex.main = cex.main)
+ if(length(groups)>1) {
+ legend("topright", legend=groups, fill=grey.colors(length(groups)))
+ }
+ } else {
+ me.col <- NULL
+ if(colored) me.col <- sub("ME","",rownames(m1))
+ for(i in 1:ncol(m1)) {
+ barplot( m1[,i], las=3, beside=TRUE, col=me.col,
+ ylab = "trait correlation (rho)")
+ tt <- colnames(m1)[i]
+ if(!is.null(main)) tt <- main
+ title(tt, cex.main = cex.main)
+ }
+ }
+
+
+
+
+ }
+}
#'
#'
@@ -953,125 +3096,374 @@ wgcna.plotTOM <- function(wgcna, justdata = FALSE, block = NULL,
#'
#'
#' @export
-wgcna.plotDendroAndColors <- function(wgcna, main = NULL, block = NULL, extra.colors = NULL) {
- if (length(wgcna$net$dendrograms) > 1) {
+wgcna.plotDendroAndColors <- function(wgcna, main=NULL, block=1,
+ extra.colors=NULL,
+ show.kme = FALSE,
+ show.traits = FALSE,
+ use.tree = 0,
+ rm.na = TRUE,
+ marAll = c(0.4, 5, 1, 0.2),
+ setLayout=TRUE, ... ) {
+
+ if("net" %in% names(wgcna)) {
+ net <- wgcna$net
+ if("colors" %in% names(wgcna)) {
+ net$netcolors <- net$colors
+ net$colors <- wgcna$colors
+ }
+ } else {
+ net <- wgcna
+ }
+ dendro <- net$dendrograms
+
+ if("layers" %in% names(wgcna) && use.tree>0) {
+ dendro <- wgcna$layers[[use.tree]]$net$dendrograms
+ }
+
+ if(length(dendro)>1) {
message("warning: this wgcna has multiple blocks")
}
+ geneTree <- dendro[[block]]
+
+ colors <- cbind(wgcna.labels2colors(net$colors))
+ if(NCOL(colors)==1) colnames(colors)[1] <- "Module colors"
- if (is.null(block) && "merged_dendro" %in% names(wgcna$net)) {
- geneTree <- wgcna$net$merged_dendro
- } else {
- if (is.null(block)) block <- 1
- geneTree <- wgcna$net$dendrograms[[block]]
- }
- colors <- wgcna.labels2colors(wgcna$net$colors)
gg <- geneTree$labels
- if (is.null(gg) && !is.null(block)) {
- ii <- which(wgcna$net$blocks == block & wgcna$net$goodGenes == TRUE)
- gg <- names(wgcna$net$color)[ii]
+ if(is.null(gg) && !is.null(block)) {
+ ii <- which(net$blocks == block & net$goodGenes==TRUE)
+ gg <- names(net$color)[ii]
}
- if (is.null(gg) && is.null(block)) {
- ii <- which(wgcna$net$goodGenes == TRUE)
- gg <- names(wgcna$net$color)[ii]
+ if(is.null(gg) && is.null(block)) {
+ ii <- which(net$goodGenes==TRUE)
+ gg <- names(net$color)[ii]
}
- colors <- colors[gg]
- groupLabels <- "Module colors"
- if (!is.null(extra.colors)) {
+ colors <- colors[gg,,drop=FALSE]
+ if(!is.null(extra.colors)) {
jj <- match(gg, rownames(extra.colors))
- colors <- cbind(colors, extra.colors[jj, ])
- groupLabels <- c("Module colors", colnames(extra.colors))
+ colors <- cbind(colors, 0, extra.colors[jj,])
+ }
+
+ is.multi <- is.list(wgcna$datExpr)
+ is.multi
+ if(!is.multi) {
+ if(show.kme) {
+ X <- wgcna$datExpr
+ Y <- net$MEs
+ kme <- cor(X, Y, use="pairwise")
+ kmeColors <- rho2bluered(kme)
+ kmeColors <- kmeColors[gg,]
+ colors <- cbind(colors, 0, kmeColors)
+ }
+ if(show.traits) {
+ X <- wgcna$datExpr
+ Y <- wgcna$datTraits
+ kme <- cor(X, Y, use="pairwise")
+ kmeColors <- rho2bluered(kme)
+ kmeColors <- kmeColors[gg,,drop=FALSE]
+ colors <- cbind(colors, 0, kmeColors)
+ }
+ }
+
+ if(is.multi) {
+ if(show.kme) {
+ X <- wgcna$datExpr
+ Y <- wgcna$net$multiMEs
+ for(i in 1:length(X)) {
+ kme <- cor(X[[i]], Y[[i]]$data, use="pairwise")
+ kmeColors <- rho2bluered(kme)
+ kmeColors <- kmeColors[gg,,drop=FALSE]
+ colnames(kmeColors) <- paste0(names(X)[i],":",colnames(kmeColors))
+ colors <- cbind(colors, 0, kmeColors)
+ }
+ }
+ if(show.traits) {
+ X <- wgcna$datExpr
+ Y <- wgcna$datTraits
+ for(i in 1:length(X)) {
+ Y1 <- Y[rownames(X[[i]]),]
+ kme <- cor(X[[i]], Y1, use="pairwise")
+ kmeColors <- rho2bluered(kme)
+ kmeColors <- kmeColors[gg,]
+ colnames(kmeColors) <- paste0(names(X)[i],":",colnames(kmeColors))
+ colors <- cbind(colors, 0, kmeColors)
+ }
+ }
}
- if (is.null(main)) main <- "Gene dendrogram and module colors"
+ if(rm.na) {
+ all.eq <- rowMeans(t(colors) == colors[1,]) == 1
+ sel <- colMeans(is.na(colors)) < 1 & !all.eq
+ sel <- sel | colnames(colors) %in% c("",NA)
+ colors <- colors[, sel, drop=FALSE]
+ }
+
+ if(is.null(main)) main <- "Gene dendrogram and module colors"
## Plot the dendrogram and the module colors underneath
WGCNA::plotDendroAndColors(
dendro = geneTree,
colors = colors,
- groupLabels = groupLabels,
+ groupLabels = colnames(colors),
dendroLabels = FALSE,
hang = 0.03,
- addGuide = FALSE, guideHang = 0.05,
- marAll = c(0.2, 5, 1, 0.2),
- main = main
+ addGuide = FALSE,
+ guideHang = 0.05,
+ marAll = marAll,
+ setLayout = setLayout,
+ main = main,
+ ...
)
}
-
+#'
+#'
#' @export
-wgcna.labels2colors <- function(colors, ...) {
- if (all(is.numeric(colors))) {
- colors <- WGCNA::labels2colors(colors, ...)
- return(colors)
+wgcna.plotDendroAndTraitCorrelation <- function(wgcna,
+ traits = NULL,
+ show.traits = TRUE,
+ show.kme = FALSE,
+ main=NULL,
+ block=NULL,
+ rm.na = TRUE,
+ use.tree = 0,
+ marAll = c(0.2, 8, 2, 0.2),
+ setLayout=TRUE,
+ ... )
+{
+
+ ## if consensus output do this
+ is.cons <- ("class" %in% names(wgcna) && wgcna$class == "cons")
+ is.cons2 <- (all(c("layers","zlist") %in% names(wgcna)))
+ if(is.cons || is.cons2) {
+ message("object is consensus result")
+ wgcna.plotDendroAndTraitCorrelation_cons(
+ cons = wgcna,
+ traits = traits,
+ main = main,
+ rm.na = rm.na,
+ show.traits = show.traits,
+ marAll = marAll,
+ use.tree = use.tree,
+ setLayout = setLayout,
+ ...
+ )
+ return()
}
- stdColors <- c("grey", WGCNA::standardColors())
- if (all(colors %in% stdColors)) {
- return(colors)
+
+ moduleColors <- cbind(wgcna$net$colors)
+ if(NCOL(moduleColors)==1) colnames(moduleColors) <- "Module"
+ colors <- moduleColors
+
+ if(show.traits) {
+ X <- wgcna$datExpr
+ Y <- wgcna$datTraits
+ traitSig <- cor(X, Y, use="pairwise")
+ if(rm.na) {
+ sel <- colMeans(is.na(traitSig)) < 1
+ traitSig <- traitSig[, sel, drop=FALSE]
+ }
+ traitColors <- rho2bluered(traitSig)
+ colors <- cbind(moduleColors, 0, traitColors)
}
- icolors <- as.integer(factor(as.character(colors)))
- colors <- WGCNA::standardColors()[icolors]
- return(colors)
-}
+
+ geneTree <- wgcna$net$dendrograms[[1]]
+ geneTree$labels <- names(wgcna$net$colors)
+ colors <- colors[geneTree$labels, ]
+
+ if(is.null(main)) main = "Gene Dendrogram, Modules and Trait Correlation"
+
+ WGCNA::plotDendroAndColors(
+ geneTree,
+ colors = colors,
+ colnames(colors),
+ dendroLabels = FALSE,
+ hang = 0.03,
+ addGuide = TRUE,
+ guideHang = 0.05,
+ marAll = marAll,
+ main = main,
+ setLayout = setLayout,
+ ...
+ )
+}
-#' @export
-wgcna.plotModuleTraitHeatmap <- function(wgcna, setpar = TRUE, cluster = FALSE,
- main = NULL, justdata = FALSE,
- pstar = TRUE) {
- ## Define numbers of genes and samples
- nGenes <- ncol(wgcna$datExpr)
- nSamples <- nrow(wgcna$datExpr)
- if ("stats" %in% names(wgcna)) {
- moduleTraitCor <- wgcna$stats$moduleTraitCor
- moduleTraitPvalue <- wgcna$stats$moduleTraitPvalue
- } else {
- moduleTraitCor <- cor(wgcna$net$MEs, wgcna$datTraits, use = "pairwise.complete")
- moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
+#' wgcna.plotDendroAndTraits for Consensus output
+#'
+#'
+wgcna.plotDendroAndTraitCorrelation_cons <- function(cons,
+ show.traits = TRUE,
+ traits = NULL,
+ main = NULL,
+ rm.na = TRUE,
+ use.tree = 0,
+ marAll = c(0.2, 8, 2, 0.2),
+ setLayout=TRUE,
+ ... )
+{
+
+ if(0) {
+ show.traits = TRUE
+ traits = NULL
+ main = NULL
+ rm.na = TRUE
+ use.tree = 0
+ marAll = c(0.2, 8, 2, 0.2)
+ setLayout=TRUE
}
- if (cluster) {
- ii <- hclust(dist(moduleTraitCor))$order
- jj <- hclust(dist(t(moduleTraitCor)))$order
+ ## quick hack to use wgcna.plotDendroAndTraitCorrelation_multi()
+ multi <- c( list(Consensus = cons), cons$layers )
+ use.tree0 <- use.tree
+ if(use.tree %in% 0:99) use.tree <- as.integer(use.tree)
+ if(is.character(use.tree)) {
+ use.tree <- match(use.tree, names(multi))
} else {
- ii <- rev(order(rownames(moduleTraitCor)))
- jj <- order(colnames(moduleTraitCor))
+ use.tree <- as.integer(use.tree) + 1
+ }
+ if(is.na(use.tree)) {
+ message("ERROR: invalid class(use.tree) = ", class(use.tree0))
+ message("ERROR: invalid use.tree = ", use.tree0)
+ return(NULL)
}
- moduleTraitCor <- moduleTraitCor[ii, jj]
- moduleTraitPvalue <- moduleTraitPvalue[ii, jj]
- if (justdata) {
- return(moduleTraitCor)
+ wgcna.plotDendroAndTraitCorrelation_multi(
+ multi,
+ show.traits = show.traits,
+ traits = traits,
+ main = main,
+ rm.na = rm.na,
+ use.tree = use.tree,
+ marAll = marAll,
+ setLayout = setLayout,
+ ...
+ )
+
+}
+
+
+#' @export
+wgcna.plotDendroAndTraitCorrelation_multi <- function(multi,
+ show.traits = TRUE,
+ traits = NULL,
+ main = NULL,
+ rm.na = TRUE,
+ use.tree = 1,
+ marAll = c(0.2, 8, 2, 0.2),
+ setLayout=TRUE,
+ ... )
+{
+
+ ## module colors
+ colors <- sapply( multi, function(m) m$net$colors )
+
+ if(show.traits) {
+
+ traitSig <- list()
+ nsets <- length(multi)
+ i=1
+ for(k in names(multi)) {
+ if( k == "Consensus") next
+ w <- multi[[k]]
+ Y <- w$datTraits
+ sel <- colnames(Y)
+ if(!is.null(traits)) sel <- intersect(sel, traits)
+ X <- w$datExpr
+ kk <- intersect(rownames(X), rownames(Y))
+ traitSig[[k]] <- cor(X[kk,], Y[kk,sel], use="pairwise")
+ }
+
+ if(rm.na) {
+ for(i in 1:length(traitSig)) {
+ sel <- colMeans(is.na(traitSig[[i]])) < 1
+ traitSig[[i]] <- traitSig[[i]][, sel, drop=FALSE]
+ }
+ }
+
+ ## prepend datatype/set name
+ for(k in names(traitSig)) {
+ colnames(traitSig[[k]]) <- paste0(k,":",colnames(traitSig[[k]]))
+ }
+
+ traitSig2 <- c()
+ for(i in 1:length(traitSig)) {
+ traitSig2 <- cbind(traitSig2, traitSig[[i]])
+ if(i < length(traitSig)) traitSig2 <- cbind(traitSig2, 0)
+ }
+
+ traitColors <- rho2bluered(traitSig2, f=0.95)
+ ii <- which(colnames(traitColors)=='')
+ if(length(ii)) traitColors[,ii] <- "#FFFFFF"
+ if(is.null(colors)) {
+ colors <- traitColors
+ } else {
+ colors <- cbind( colors, 0, traitColors)
+ }
}
- ## Will display correlations and their p-values
- if (pstar) {
- textPv <- cut(moduleTraitPvalue,
- breaks = c(-1, 0.001, 0.01, 0.05, 99),
- labels = c("★★★", "★★", "★", "")
- )
+ message("using tree of layer: ", names(multi)[use.tree] )
+ geneTree <- multi[[use.tree]]$net$dendrograms[[1]]
+
+ if(is.null(main)) main = "Gene Dendrogram, Modules and Trait Correlation"
+
+ WGCNA::plotDendroAndColors(
+ geneTree,
+ colors = colors,
+ colnames(colors),
+ dendroLabels = FALSE,
+ hang = 0.03,
+ addGuide = TRUE,
+ guideHang = 0.05,
+ marAll = marAll,
+ main = main,
+ ...
+ )
+
+}
+
+#' @export
+purpleGreyYellow <- function(n) {
+ colorRampPalette(c("purple","grey65","yellow"))(n)
+}
+
+#' Converts correlation values [-1;1] to blue-white-red colors. Good
+#' for creating color labels for labeledHeatmaps that expect colors.
+#' NOTE: use WGCNA::numbers2colors???
+#'
+rho2bluered <- function(R, a=1, f=0.95) {
+ BLUERED <- WGCNA::blueWhiteRed(100)
+ if(a!=1) R <- sign(R) * abs(R)**a
+ if(is.null(ncol(R))) {
+ col <- BLUERED[1+round(99*(1+R)/2)]
} else {
- textPv <- paste0("(", signif(moduleTraitPvalue, 1), ")")
+ col <- apply(R, 2, function(x) BLUERED[1+round(99*(1+x)/2)])
+ dimnames(col) <- dimnames(R)
+ }
+ if(f < 1) {
+ col <- apply(col, 2, adjustcolor, red.f=f, green.f=f, blue.f=f)
+ }
+ dimnames(col) <- dimnames(R)
+ col
+}
+
+
+#' Converts WGCNA labels (numeric or color) to colors.
+#'
+wgcna.labels2colors <- function(colors, ...) {
+ if (all(is.numeric(colors))) {
+ colors <- WGCNA::labels2colors(colors, ...)
+ return(colors)
+ }
+ stdColors <- c("grey", WGCNA::standardColors())
+ if (all(colors %in% stdColors)) {
+ return(colors)
}
-
- textMatrix <- paste0(signif(moduleTraitCor, 2), "\n", textPv)
- dim(textMatrix) <- dim(moduleTraitCor)
- if (setpar) par(mar = c(8, 8, 3, 3))
- if (is.null(main)) main <- "Module-trait relationships"
- ## Display the correlation values within a heatmap plot
- WGCNA::labeledHeatmap(
- Matrix = moduleTraitCor,
- xLabels = colnames(moduleTraitCor),
- yLabels = rownames(moduleTraitCor),
- ySymbols = rownames(moduleTraitCor),
- colorLabels = FALSE,
- colors = WGCNA::blueWhiteRed(50),
- textMatrix = textMatrix,
- setStdMargins = FALSE,
- cex.text = 0.7,
- zlim = c(-1, 1),
- main = main
- )
+ icolors <- as.integer(factor(as.character(colors)))
+ colors <- WGCNA::standardColors()[icolors]
+ return(colors)
}
+
+
#' Plot membership correlation vs gene signficance (correlation with
#' trait) to discover biomarkers/driver genes.
#'
@@ -1094,10 +3486,10 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE,
## Gene-trait significance (trait correlation) (with p-values)
if ("stats" %in% names(wgcna)) {
traitSignificance <- wgcna$stats$traitSignificance
- GSPvalue <- wgcna$stats$GSPvalue
+ TSPvalue <- wgcna$stats$TSPvalue
} else {
traitSignificance <- as.data.frame(cor(wgcna$datExpr, wgcna$datTraits, use = "p"))
- GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(traitSignificance), nSamples))
+ TSPvalue <- as.data.frame(corPvalueStudent(as.matrix(traitSignificance), nSamples))
}
x <- (moduleMembership[moduleGenes, module])
@@ -1108,7 +3500,7 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE,
}
##
px <- MMPvalue[moduleGenes, module]
- py <- GSPvalue[moduleGenes, trait]
+ py <- TSPvalue[moduleGenes, trait]
qx <- p.adjust(px, method = "fdr")
qy <- p.adjust(py, method = "fdr")
is.sig <- (qx < 0.05 & qy < 0.05)
@@ -1135,7 +3527,7 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE,
title = paste("Module membership vs. gene significance\n"),
cex.title = 0.9,
girafe = FALSE
- )
+ )
} else if (plotlib == "girafe") {
pgx.scatterPlotXY.GGPLOT(
pos,
@@ -1163,98 +3555,431 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE,
}
}
+#' @export
+wgcna.plotModuleTraitHeatmap <- function(wgcna, setpar = TRUE, cluster = FALSE,
+ multi = FALSE, main = NULL, justdata = FALSE,
+ transpose = FALSE, colorlabel = TRUE,
+ nmax = -1, tmax = -1,
+ text = TRUE, pstar = TRUE) {
+
+ if(!multi) wgcna <- list(wgcna)
+ nSamples <- nrow(wgcna[[1]]$datExpr)
+ MEs <- do.call(cbind, lapply(wgcna, function(w) as.matrix(w$net$MEs)))
+ Y <- wgcna[[1]]$datTraits
+
+ moduleTraitCor <- cor(MEs, Y, use = "pairwise.complete")
+ if(nmax > 0) {
+ sel <- head(order(-apply(abs(moduleTraitCor), 1, max, na.rm=TRUE)),nmax)
+ moduleTraitCor <- moduleTraitCor[sel,,drop=FALSE]
+ }
+ if(tmax > 0) {
+ sel <- head(order(-apply(abs(moduleTraitCor), 2, max, na.rm=TRUE)),tmax)
+ moduleTraitCor <- moduleTraitCor[,sel,drop=FALSE]
+ }
+
+ if(transpose) {
+ moduleTraitCor <- t(moduleTraitCor)
+ }
+
+ wgcna.plotLabeledCorrelationHeatmap(
+ R = moduleTraitCor,
+ nSamples = nSamples,
+ setpar = setpar,
+ cluster = cluster,
+ text = text,
+ main = main,
+ justdata = justdata,
+ colorlabel = colorlabel,
+ pstar = pstar
+ )
+}
+
#' Plot cluster dendrogram with eigengenes and traits.
#'
#' @export
-wgcna.plotEigenGeneClusterDendrogram <- function(wgcna, add_traits = TRUE,
+wgcna.plotEigenGeneClusterDendrogram <- function(wgcna = NULL,
+ ME = NULL,
+ add_traits = TRUE,
+ horiz = FALSE,
+ setMargins = TRUE,
+ method = 'wgcna',
+ showlabels = TRUE,
+ plot = TRUE,
+ multi = FALSE,
main = NULL) {
# Matrix with eigengenes and traits
- MET <- wgcna$net$MEs
- if (add_traits) {
- MET <- cbind(MET, wgcna$datTraits)
- }
- MET <- WGCNA::orderMEs(MET)
- if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!!
- if (is.null(main)) main <- "Eigengene cluster dendrogram"
- WGCNA::plotEigengeneNetworks(
- MET, main,
- marDendro = c(0, 4, 2, 0),
- plotHeatmaps = FALSE
- )
+ if(is.null(wgcna) && is.null(ME)) {
+ stop("ERROR: wgcna or ME must be given")
+ }
+
+ if(is.null(ME)) {
+
+ if(multi) {
+ ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs))
+ ME <- do.call(cbind, ME)
+ Y <- wgcna[[1]]$datTraits
+ } else {
+ ME <- wgcna$net$MEs
+ Y <- wgcna$datTraits
+ }
+
+ if (length(add_traits)==1 && is.logical(add_traits) && add_traits==TRUE) {
+ ME <- cbind(ME, Y)
+ } else if (length(add_traits) > 0 && !is.logical(add_traits)) {
+ sel <- intersect(add_traits, colnames(Y))
+ if(length(sel)) ME <- cbind(ME, Y[,sel])
+ }
+ }
+
+ impME <- svdImpute2(as.matrix(ME))
+ ME <- WGCNA::orderMEs(impME)
+
+ if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!!
+ if (is.null(main)) main <- "Eigengene Dendrogram"
+
+ hc <- NULL
+ if(method == 'wgcna') {
+ ## plot dendrogram with WGCNA function
+ WGCNA::plotEigengeneNetworks(
+ ME, main,
+ setMargins = setMargins,
+ marDendro = c(0, 4, 2, 0),
+ plotHeatmaps = FALSE
+ )
+ } else {
+ ## plot dendrogram with hclust function
+ if(setMargins && horiz) par(mar=c(4,4,4,8))
+ if(setMargins && !horiz) par(mar=c(8,4,4,1))
+ hc <- hclust( as.dist(1 - cor(ME)), method="average")
+ if(plot) {
+ save.labels <- hc$labels
+ if(!showlabels) hc$labels <- rep("",ncol(ME))
+ plot( as.dendrogram(hc), horiz = horiz, main = main)
+ hc$labels <- save.labels
+ }
+ }
+ invisible(hc)
}
+
#' Plot the adjacency correlation heatmap matrix of eigengenes with or
#' without traits. This can show how traits cluster together with the
#' eigengenes.
#'
#' @export
-wgcna.plotEigenGeneAdjacencyHeatmap <- function(wgcna, add_traits = TRUE,
+wgcna.plotEigenGeneAdjacencyHeatmap <- function(wgcna,
+ add_traits = TRUE,
+ traits = NULL,
+ add_me = TRUE,
marx = 1, main = NULL,
+ multi = FALSE,
+ phenotype = NULL,
+ colorlabel = TRUE,
+ text = FALSE,
+ pstar = TRUE,
+ setMargins = TRUE,
+ mar1 = c(5.6, 4.5, 1.8, 0),
+ mar2 = c(8, 10, 4, 2),
+ cex.lab = 0.8,
+ cex.text = 0.7,
+ plotDendro = TRUE,
+ plotHeatmap = TRUE,
+ dendro.horiz = TRUE,
+ dendro.width = 0.3,
+ dendro.labels = TRUE,
+ nmax = -1,
+ fixclust = FALSE,
+ mask.intra = FALSE,
justdata = FALSE) {
+
+ if(0) {
+ add_traits = TRUE;
+ traits = NULL;
+ marx = 1; main = NULL;
+ multi = FALSE;
+ phenotype = NULL;
+ colorlabel = TRUE;
+ text = FALSE;
+ pstar = TRUE;
+ setMargins = TRUE;
+ mar1 = c(5.5, 5, 1.6, 1);
+ mar2 = c(8, 10, 4, 2);
+ cex.lab = 0.8;
+ cex.text = 0.7;
+ plotDendro = TRUE;
+ plotHeatmap = TRUE;
+ dendro.horiz = TRUE;
+ dendro.width = 0.3;
+ dendro.labels = TRUE;
+ nmax = -1;
+ fixclust = FALSE;
+ mask.intra = FALSE;
+ justdata = FALSE
+ add_me = TRUE
+ }
+
+
+ if(!multi) wgcna <- list(gx=wgcna)
+
# Matrix with eigengenes and traits
- MET <- wgcna$net$MEs
+ ME <- c()
+ if(add_me) {
+ ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs))
+ ME <- do.call(cbind, ME)
+ }
+ Y <- wgcna[[1]]$datTraits
+
if (add_traits) {
- MET <- cbind(MET, wgcna$datTraits)
+ sel <- colnames(Y)
+ if(!is.null(traits)) {
+ sel <- intersect(traits, sel)
+ }
+ ME <- cbind(ME, Y[,sel,drop=FALSE])
+ }
+
+ if (!add_traits && !is.null(phenotype)) {
+ ME <- cbind(ME, Y[,phenotype,drop=FALSE])
+ }
+
+ if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!!
+
+ ## eigengene correlation
+ impME <- svdImpute2(as.matrix(ME))
+ R <- cor(impME, use="pairwise")
+ R0 <- R
+
+ ## If phenotype is given we condition the heatmap using the
+ ## correlation to the phenotype.
+ if(!is.null(phenotype)) {
+ ## proper sign in case of inhibitor layer (like miRNA)
+ layersign <- rep(1, length(wgcna))
+ names(layersign) <- names(wgcna)
+ layersign[grep("^mi",names(wgcna),ignore.case=TRUE)] <- -1
+ ff <- list()
+ for(k in names(wgcna)) {
+ rho <- cor(ME, Y[,phenotype], use="pairwise")[,1]
+ ##rho <- wgcna[[k]]$modTraits[,phenotype]
+ ff[[k]] <- layersign[k] * rho
+ }
+ names(ff) <- NULL
+ ff <- unlist(ff)
+ ff <- 0.5 * (1 + ff) ## signed...
+ ff <- ff[match(rownames(R),names(ff))]
+ names(ff) <- rownames(R)
+ ff[is.na(ff)] <- 1 ## really??? NEED RETHINK
+ ww <- outer(ff, ff)
+ ##ww[is.na(ww)] <- 0
+ ww <- ww / max(ww,na.rm=TRUE)
+ R <- R * ww
}
- colnames(MET) <- paste0(" ", colnames(MET))
- MET <- WGCNA::orderMEs(MET)
- if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!!
-
+
+ if(nmax>0) {
+ if(!is.null(phenotype)) {
+ rho <- cor(ME, Y[,phenotype], use="pairwise")[,1]
+ ii <- head(order(-abs(rho)), nmax)
+ } else {
+ ii <- head( order(-Matrix::rowMeans(R**2)), nmax)
+ }
+ R <- R[ii,ii]
+ }
+
if (justdata) {
- R <- (1 + cor(MET)) / 2
return(R)
}
-
+
# Plot the correlation heatmap matrix (note: this plot will overwrite
# the dendrogram plot)
- if (is.null(main)) main <- "Eigengene adjacency heatmap"
- WGCNA::plotEigengeneNetworks(
- MET, main,
- marHeatmap = c(8 * marx, 10 * marx, 2, 2),
- plotDendrograms = FALSE,
- colorLabels = TRUE,
- xLabelsAngle = 45
- )
+ if (is.null(main)) main <- "Eigengene Adjacency Heatmap"
+
+ if(plotDendro && plotHeatmap) {
+ layout.matrix <- matrix( 1:2, nrow = 1, ncol = 2)
+ layout(mat = layout.matrix, heights = 1, widths = c(dendro.width, 1))
+ if(dendro.horiz && dendro.labels) {
+ mar1[4] <- mar2[2] ## copy left margin
+ }
+ }
+ if(plotDendro) par(mar=mar1)
+
+ #fixclust=FALSE
+ if(fixclust) {
+ ii <- rownames(R)
+ hc <- hclust(as.dist(1 - R0[ii,ii]), method="average")
+ } else {
+ hc <- hclust(as.dist(1 - R), method="average")
+ }
+ if(plotDendro) {
+ par(cex=cex.lab)
+ plot( as.dendrogram(hc), horiz = TRUE,
+ ylab="Eigengene dendrogram")
+ par(cex=1)
+ }
+
+ if(plotHeatmap) {
+ ii <- hc$labels[hc$order]
+ ii <- intersect(ii, rownames(R))
+ R1 <- R[rev(ii), ii]
+ nsamples <- nrow(Y)
+ par(mar=mar2)
+ wgcna.plotLabeledCorrelationHeatmap(
+ R1,
+ nSamples = nsamples,
+ text = text,
+ pstar = pstar,
+ colorlabel = colorlabel,
+ cluster = FALSE,
+ setpar = FALSE,
+ main = main,
+ cex.lab = cex.lab,
+ cex.text = cex.text
+ )
+ }
}
#' @export
-wgcna.plotEigenGeneGraph <- function(wgcna, add_traits = TRUE, main = NULL) {
- ## require(igraph)
- net <- wgcna$net
- MET <- net$MEs
+wgcna.plotMultiEigengeneCorrelation <- function(wgcna, addtraits = TRUE,
+ phenotype=NULL, nmax=-1, main = NULL,
+ showvalues = FALSE, showsig = TRUE,
+ cex.text = 0.7, cex.lab = 0.8,
+ fixcluster = TRUE, setpar = TRUE) {
+
+ ## Show inter-correlation of modules
+ me <- lapply(wgcna, function(w) w$net$MEs)
+ if(length(me) == 1) {
+ me <- list(me[[1]], me[[1]])
+ }
+
+ comb <- combn(length(me),2)
+ ncomb <- ncol(comb)
+ nsamples <- nrow(wgcna[[1]]$datExpr)
+ Y <- wgcna[[1]]$datTraits
+
+ ## for miRNA we need to flip sign
+ msign <- c(1,-1)[1 + 1*(names(wgcna) %in% c("mi","mir"))]
+
+ if(setpar) {
+ nc <- ceiling(sqrt(ncomb))
+ nr <- ceiling(ncomb / nc)
+ par(mfrow=c(nr,nc), mar=c(8,10,3,1))
+ }
- if (add_traits) {
- MET <- cbind(MET, wgcna$datTraits)
+ k=1
+ for(k in 1:ncol(comb)) {
+ i <- comb[1,k]
+ j <- comb[2,k]
+ M1 <- me[[i]]
+ M2 <- me[[j]]
+
+ if(addtraits) {
+ M1 <- cbind(M1, Y)
+ M2 <- cbind(M2, Y)
+ }
+ if(FALSE && !addtraits && !is.null(phenotype)) {
+ y <- Y[,phenotype,drop=FALSE]
+ M1 <- cbind(M1, y)
+ M2 <- cbind(M2, y)
+ }
+
+
+ R1 <- cor(M1, M2, use="pairwise.complete")
+
+ if(nmax > 0) {
+ ii <- head(order(-apply(abs(R1), 1, max)), nmax)
+ jj <- head(order(-apply(abs(R1), 2, max)), nmax)
+ R1 <- R1[ii,jj]
+ }
+
+ ## cluster unweighted matrix
+ ii <- hclust(dist(R1), method="average")$order
+ jj <- hclust(dist(t(R1)), method="average")$order
+ R1 <- R1[ii, jj]
+
+ ## This conditions the correlation on phenotype. Important.
+ do.condition <- !is.null(phenotype)
+ if(do.condition) {
+ y <- Y[,phenotype]
+ w1 <- cor( M1[,rownames(R1)], y, use="pairwise")[,1]
+ w2 <- cor( M2[,colnames(R1)], y, use="pairwise")[,1]
+ if(msign[i]!=0) w1 <- msign[i] * w1
+ if(msign[j]!=0) w2 <- msign[j] * w2
+ w1 <- pmax(w1,0)
+ w2 <- pmax(w2,0)
+ ww <- outer(w1, w2)
+ ww <- ww / max(ww, na.rm=TRUE)
+ R1 <- R1 * ww
+ }
+
+ main <- paste(names(me)[i],"vs.",names(me)[j])
+ if(do.condition) main <- paste(main, "(conditioned)")
+
+ wgcna.plotLabeledCorrelationHeatmap(
+ R1,
+ nsamples,
+ text = showvalues,
+ pstar = showsig,
+ is.dist = FALSE,
+ cluster = !fixcluster,
+ setpar = FALSE,
+ main = main,
+ cex.text = cex.text,
+ cex.lab = cex.lab
+ )
+
}
- if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!!
+
+
+}
+
- sdx <- matrixStats::colSds(as.matrix(MET), na.rm = TRUE)
- if (any(sdx == 0)) MET <- MET + runif(length(MET), 0, 1e-5)
+#' @export
+wgcna.plotEigenGeneGraph <- function(wgcna, add_traits = TRUE, main = NULL,
+ multi=FALSE, vcex=1, labcex=1) {
+
+ ## require(igraph)
+ if(multi) {
+ ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs))
+ ME <- do.call(cbind, ME)
+ if (add_traits) ME <- cbind(ME, wgcna[[1]]$datTraits)
+ } else {
+ ME <- wgcna$net$MEs
+ if (add_traits) ME <- cbind(ME, wgcna$datTraits)
+ }
+ if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!!
+
+ sdx <- matrixStats::colSds(as.matrix(ME*1), na.rm = TRUE)
+ if (any(sdx == 0)) ME <- ME + runif(length(ME), 0, 1e-5)
+
## Recalculate MEs with color as labels
- clust <- hclust(dist(t(scale(MET))))
- clust
+ clust <- hclust(dist(t(scale(ME))))
phylo <- ape::as.phylo(clust)
gr <- igraph::as.igraph(phylo, directed = FALSE)
is.node <- grepl("Node", igraph::V(gr)$name)
module.name <- igraph::V(gr)$name
- module.size <- table(wgcna$net$labels)
+ if(multi) {
+ module.size <- lapply(wgcna, function(w) table(w$net$labels))
+ names(module.size) <- NULL
+ module.size <- unlist(module.size)
+ module.colors <- sapply(wgcna, function(w) w$me.colors)
+ names(module.colors) <- NULL
+ module.colors <- unlist(module.colors)
+ } else {
+ module.size <- table(wgcna$net$labels)
+ module.colors <- wgcna$me.colors
+ }
module.size <- module.size / mean(module.size)
- module.size
-
igraph::V(gr)$label <- igraph::V(gr)$name
igraph::V(gr)$label[is.node] <- NA
- igraph::V(gr)$color <- wgcna$me.colors[module.name]
- igraph::V(gr)$size <- 20 * (module.size[module.name])**0.4
+ igraph::V(gr)$color <- module.colors[module.name]
+ igraph::V(gr)$size <- vcex * 18 * (module.size[module.name])**0.4
igraph::V(gr)$size[is.na(igraph::V(gr)$size)] <- 0
## par(mfrow = c(1, 1), mar = c(1, 1, 1, 1) * 0)
igraph::plot.igraph(
gr,
layout = igraph::layout.kamada.kawai,
- vertex.label.cex = 0.8,
+ vertex.label.cex = 0.85 * labcex,
edge.width = 3
)
if (!is.null(main)) title(main, line = -1.5)
@@ -1295,24 +4020,32 @@ wgcna.plotFeatureUMAP <- function(wgcna, nhub = 3, method = "clust",
if (is.null(main)) main <- "Feature UMAP colored by module"
- ## get top hub genes
- mm <- wgcna$stats$moduleMembership
- hubgenes <- apply(mm, 2, function(x) head(names(sort(-x)), 3), simplify = FALSE)
- hubgenes
- sel <- which(names(hubgenes) != "MEgrey")
- hubgenes <- unlist(hubgenes[sel])
+ hubgenes <- NULL
+ if(nhub > 0) {
+ ## get top hub genes
+ mm <- wgcna$stats$moduleMembership
+ hubgenes <- apply(mm, 2, function(x) head(names(sort(-x)), nhub), simplify = FALSE)
+ sel <- which(names(hubgenes) != "MEgrey")
+ hubgenes <- unlist(hubgenes[sel])
+ }
+
col1 <- wgcna$net$colors
genes1 <- names(which(col1 != "grey"))
pgx.scatterPlotXY(
pos,
- var = col1, col = sort(unique(col1)),
- hilight = genes1, hilight2 = hubgenes,
- cex.lab = 1.2, label.clusters = FALSE,
+ var = col1,
+ col = sort(unique(col1)),
+ hilight = genes1,
+ hilight2 = hubgenes,
+ cex.lab = 1.2,
+ label.clusters = FALSE,
title = main,
plotlib = plotlib
)
+
}
+
#' Plot module significance.
#'
#' @export
@@ -1335,38 +4068,86 @@ wgcna.plotModuleSignificance <- function(wgcna, trait, main = NULL, abs = FALSE)
)
}
+
+
+#'
+#'
+#' @export
+wgcna.plotConsensusSampleDendroAndColors <- function(cons, i,
+ what = c("both","me","traits")[1],
+ clust.expr = TRUE,
+ setLayout = TRUE,
+ marAll = c(0.2, 7, 1.5, 0.5),
+ colorHeightMax = 0.6,
+ main = NULL)
+{
+
+ wgcna.plotSampleDendroAndColors(
+ wgcna = cons$layers[[i]],
+ main = main,
+ datExpr = cons$datExpr[[i]],
+ datTraits = cons$datTraits,
+ datME = cons$net$multiME[[i]]$data,
+ what = what,
+ marAll = marAll,
+ clust.expr = clust.expr,
+ setLayout = setLayout,
+ colorHeightMax = colorHeightMax
+ )
+
+}
+
#'
#'
#' @export
-wgcna.plotSampleDendroAndColors <- function(wgcna,
+wgcna.plotSampleDendroAndColors <- function(wgcna, input.type="wgcna",
what = c("me", "traits", "both")[3],
- clust.expr = TRUE,
+ datTraits = NULL, datExpr = NULL, datME = NULL,
+ clust.expr = TRUE, setLayout = TRUE,
+ marAll = c(0.2, 7, 1.5, 0.5),
+ colorHeightMax = 0.6,
main = NULL, justdata = FALSE) {
- MET0 <- wgcna$net$MEs
- MET <- MET0[, 0]
+
+ if(input.type == 'net') {
+ ME0 <- wgcna$MEs
+ if(is.null(datExpr)) stop("must supply datExpr")
+ if(is.null(datTraits)) stop("must supply datTraits")
+ } else {
+ ME0 <- wgcna$net$MEs
+ datTraits <- 1*wgcna$datTraits
+ datExpr <- wgcna$datExpr
+ }
+
+ if(!is.null(datME)) {
+ ME0 <- datME
+ }
+
+ ME <- ME0[, 0]
+ samples <- rownames(ME)
if (any(what %in% c("me", "both"))) {
- MET <- cbind(MET, MET0)
+ ME <- cbind(ME, ME0)
}
+
if (any(what %in% c("traits", "both"))) {
- MET <- cbind(MET, wgcna$datTraits)
+ ME <- cbind(ME, datTraits[samples,,drop=FALSE])
}
- if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!!
-
- sdx <- matrixStats::colSds(as.matrix(MET), na.rm = TRUE)
- if (any(sdx == 0)) MET <- MET + runif(length(MET), 0, 1e-5)
-
+
+ if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!!
+ sdx <- matrixStats::colSds(as.matrix(ME*1), na.rm = TRUE)
+ ME <- ME[, which(sdx>0), drop=FALSE]
+
## Recalculate MEs with color as labels
if (clust.expr) {
- sampleTree <- hclust(dist(wgcna$datExpr), method = "average")
+ sampleTree <- hclust(as.dist(1 - cor(t(datExpr))), method = "average")
} else {
- sampleTree <- hclust(dist(scale(MET0)), method = "average")
+ sampleTree <- hclust(as.dist(1 - cor(t(ME0))), method = "average")
}
ii <- sampleTree$order
- jj <- hclust(dist(t(scale(MET))))$order
- colors <- WGCNA::numbers2colors(MET[, jj])
+ jj <- hclust(dist(t(scale(ME))))$order
+ colors <- WGCNA::numbers2colors(ME[, jj])
if (justdata) {
- return(MET)
+ return(ME)
}
if (is.null(main)) {
@@ -1376,38 +4157,50 @@ wgcna.plotSampleDendroAndColors <- function(wgcna,
}
## Plot the dendrogram and the module colors underneath
- plotDendroAndColors(
+ WGCNA::plotDendroAndColors(
dendro = sampleTree,
colors = colors,
- groupLabels = colnames(MET)[jj],
- dendroLabels = rownames(MET),
+ groupLabels = colnames(ME)[jj],
+ dendroLabels = rownames(ME),
hang = 0.03,
addGuide = FALSE,
guideHang = 0.05,
- marAll = c(0.2, 7, 1.5, 0.5),
- main = main
+ setLayout = setLayout,
+ marAll = marAll,
+ main = main,
+ colorHeightMax = colorHeightMax
)
}
#' @export
-wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples, setpar = TRUE,
- cluster = TRUE,
+wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples,
+ cluster = TRUE, text = TRUE,
main = NULL, justdata = FALSE,
- pstar = TRUE) {
+ colorlabel = TRUE, pstar = TRUE,
+ zlim = NULL, colorpal = NULL,
+ cex.text = 0.7, cex.lab = NULL,
+ setpar = TRUE, is.dist = FALSE) {
+
## Define numbers of genes and samples
- Pvalue <- corPvalueStudent(R, nSamples)
- if (cluster) {
+ if (cluster && nrow(R)>1 && ncol(R)>1) {
R0 <- R
R0[is.na(R0)] <- 0
- ii <- hclust(dist(R0))$order
- jj <- hclust(dist(t(R0)))$order
- } else {
- ii <- rev(order(rownames(R)))
- jj <- order(colnames(R))
+ is.sym <- nrow(R) == ncol(R) && all(rownames(R)==colnames(R))
+ if(is.dist) {
+ ii <- hclust(as.dist(abs(R0)))$order
+ jj <- ii
+ } else if(is.sym) {
+ ii <- hclust(dist(R0), method="average")$order
+ jj <- ii
+ } else {
+ ii <- hclust(dist(R0), method="average")$order
+ jj <- hclust(dist(t(R0)), method="average")$order
+ }
+ R <- R[ii, jj]
}
- R <- R[ii, jj]
- Pvalue <- Pvalue[ii, jj]
+ R0 <- pmax(pmin(R, 1, na.rm=TRUE), -1, na.rm=TRUE)
+ Pvalue <- corPvalueStudent(R0, nSamples)
if (justdata) {
return(R)
@@ -1417,32 +4210,65 @@ wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples, setpar = TRUE,
if (pstar) {
textPv <- cut(Pvalue,
breaks = c(-1, 0.001, 0.01, 0.05, 99),
- labels = c("★★★", "★★", "★", "")
+ #labels = c("★★★", "★★", "★", "")
+ #labels = c("***", "**", "*", "")
+ labels = c("+++", "++", "+", "")
)
} else {
textPv <- paste0("(", signif(Pvalue, 1), ")")
}
- textMatrix <- paste0(signif(R, 2), "\n", textPv)
- textMatrix[which(is.na(R))] <- ""
- dim(textMatrix) <- dim(R)
+ textMatrix <- NULL
+ if(text && pstar) textMatrix <- paste0(signif(R, 2), "\n", textPv)
+ if(text && !pstar) textMatrix <- paste0(signif(R, 2))
+ if(!text && pstar) textMatrix <- textPv
+ if(!text && !pstar) textMatrix <- NULL
+
+ if(!is.null(textMatrix)) {
+ textMatrix[which(is.na(R))] <- ""
+ dim(textMatrix) <- dim(R)
+ }
+
+ if(!colorlabel) {
+ colnames(R) <- paste0(" ", colnames(R))
+ rownames(R) <- paste0(" ", rownames(R))
+ }
+
if (setpar) par(mar = c(8, 8, 3, 3))
if (is.null(main)) main <- "Correlation heatmap"
+
+ ## set colorscale. make sure 0 is white if non-symmetric
+ col1 <- "grey90"
+ if(is.null(zlim)) {
+ zlim <- c(min(R,na.rm=TRUE), max(R,na.rm=TRUE))
+ }
+ rlim <- max(abs(zlim),na.rm=TRUE)
+ if(is.null(colorpal)) colorpal <- WGCNA::blueWhiteRed
+ if(rlim > 0) {
+ rval <- seq(-rlim, rlim, length.out=201)
+ ii <- which( rval >= zlim[1] & rval <= zlim[2] )
+ col1 <- colorpal(201)[ii]
+ }
+
## Display the correlation values within a heatmap plot
WGCNA::labeledHeatmap(
Matrix = R,
+ #xLabels = paste0(1:ncol(R),":",colnames(R)),
xLabels = colnames(R),
+ #xLabels = paste0(" ",colnames(R)),
yLabels = rownames(R),
xSymbols = colnames(R),
ySymbols = rownames(R),
- colorLabels = FALSE,
- colors = WGCNA::blueWhiteRed(50),
+ colorLabels = TRUE,
+ colors = col1,
textMatrix = textMatrix,
setStdMargins = FALSE,
- cex.text = 0.7,
- zlim = c(-1, 1),
+ cex.text = cex.text,
+ cex.lab = cex.lab,
+ zlim = zlim,
main = main
)
+
}
@@ -1475,11 +4301,10 @@ wgcna.plotModuleHubGenes <- function(wgcna, modules = NULL,
mm.score <- head(sort(mm, decreasing = TRUE), 30)
topgenes <- names(mm.score)
A <- cor(wgcna$datExpr[, topgenes])
- diag(A) <- NA
+ diag(A) <- 0
A <- (A - min(A, na.rm = TRUE)) / (max(A, na.rm = TRUE) - min(A, na.rm = TRUE))
gr <- igraph::graph_from_adjacency_matrix(
- A,
- mode = "undirected", weighted = TRUE, diag = FALSE
+ A, mode = "undirected", weighted = TRUE, diag = FALSE
)
norm.mm.score <- (mm.score - min(mm.score)) / (max(mm.score) - min(mm.score))
clr <- sub("ME", "", k)
@@ -1496,45 +4321,97 @@ wgcna.plotModuleHubGenes <- function(wgcna, modules = NULL,
}
}
-#' @title Map module colors to rainbow palette
-#'
-#' @description
-#' Maps the module colors from a WGCNA network to a rainbow palette.
-#'
-#' @param net A WGCNA network object.
-#'
-#' @details
-#' This function takes a WGCNA network object and maps the module colors
-#' to a rainbow palette based on the modules' hierarchical clustering order.
-#'
-#' It extracts the hierarchical clustering dendrogram order, gets the number
-#' of unique module colors, ranks the module color means based on the
-#' dendrogram order, and assigns rainbow colors accordingly.
-#'
-#' This allows easier visualization and interpretation of modules in the
-#' standard rainbow palette order.
-#'
-#' @return
-#' A named character vector mapping the original module colors to rainbow colors.
-#'
#' @export
-labels2rainbow <- function(net) {
- hc <- net$dendrograms[[1]]
- nc <- length(unique(net$colors))
- n <- length(net$colors)
- ii <- rep(NA, n)
- ii[net$goodGenes] <- hc$order
- col1 <- WGCNA::labels2colors(net$colors)
- col.rnk <- rank(tapply(1:n, col1[ii], mean))
- new.col <- grDevices::rainbow(nc)[col.rnk]
- names(new.col) <- names(col.rnk)
- new.col["grey"] <- "#AAAAAA"
- new.col <- new.col[col1]
- names(new.col) <- net$colors
- return(new.col)
+wgcna.plotGeneNetwork <- function(wgcna, genes, col=NULL,
+ edge.alpha = 0.3,
+ rgamma = 4,
+ edge.width = 6,
+ alpha = 0.5,
+ min.rho = 0.5,
+ setpar = TRUE) {
+
+ A <- cor(wgcna$datExpr[, genes])
+ A <- A * (abs(A) > min.rho)
+ gr <- igraph::graph_from_adjacency_matrix(
+ A, mode = "undirected", weighted = TRUE, diag = FALSE
+ )
+ vcex <- matrixStats::colVars(wgcna$datExpr[, genes])
+ vcex <- vcex / max(abs(vcex))
+ if(is.null(col)) {
+ col <- wgcna$net$color[genes]
+ }
+ col <- sub("black","grey40",col)
+ ecol <- c("darkred","darkgreen")[ 1 + 1*(igraph::E(gr)$weight > 0)]
+ table(ecol)
+ ecol <- adjustcolor(ecol, alpha.f = edge.alpha)
+ ewt <- abs(igraph::E(gr)$weight)
+ ewt <- (ewt / max(abs(ewt)))**rgamma
+ plot(gr,
+ layout = igraph::layout_in_circle,
+ edge.width = edge.width * ewt,
+ edge.color = ecol,
+ vertex.size = 5 + 20 * vcex,
+ vertex.color = adjustcolor(col, alpha.f = alpha),
+ vertex.frame.color = col
+ )
+}
+
+#' @export
+wgcna.plotModuleHeatmap <- function(wgcna,
+ module,
+ genes=NULL,
+ rgamma = 4,
+ min.rho = 0,
+ cex = 0.8,
+ nmax = -1,
+ cluster = TRUE,
+ type = c("expression","correlation")[1],
+ heatmap.mar = c(7,7),
+ main = NULL) {
+
+ if(!is.null(module) && is.null(genes) ) {
+ genes <- wgcna$me.genes[[module]]
+ }
+ if(is.null(genes) || length(genes)==0) {
+ stop("must specify genes or module")
+ }
+
+ if(nmax > 0) {
+ sdx <- matrixStats::colSds(wgcna$datExpr[,genes])
+ genes <- head( genes[order(-sdx)], nmax)
+ }
+
+ if(type == "expression") {
+ X <- t(wgcna$datExpr[,genes])
+ annot <- wgcna$datTraits
+ gx.heatmap(X, nmax=nmax,
+ ##col.annot = annot,
+ key=FALSE, keysize=0.5, mar=heatmap.mar)
+
+ }
+
+ if(type == "correlation") {
+ R <- cor(wgcna$datExpr[,genes])
+ R <- sign(R) * abs(R)**rgamma
+ if(cluster) {
+ ii <- hclust(as.dist(1-R), method="average")$order
+ R <- R[ii,ii]
+ }
+ R[abs(R) < min.rho] <- NA
+ image(R)
+ mtext( rownames(R), side=4, at=seq(0,1,1/(nrow(R)-1)),
+ las=2, adj = 0, cex=cex, line=0.5 )
+ }
+
+ if(is.null(main) && !is.null(module)) main <- module
+ if(is.null(main)) main <- "Module Heatmap"
+ if(!is.null(main) && main!="") {
+ title( main, line=2, cex.main=1.3)
+ }
}
+
#' Filter color vector by minimum KME and mergeCutHeight. Set color of
#' features with KME smaller than minKME to grey (or 0) group. Merge
#' similar modules with (module) correlation larger than
@@ -1625,3 +4502,533 @@ wgcna.filterColors <- function(X, colors, minKME = 0.3, mergeCutHeight = 0.15,
return(new.colors)
}
+
+#' Wrapper to hclust from matrix using default WGCNA parameters.
+#'
+wgcna.tomclust <- function(X, power=6) {
+ A <- WGCNA::adjacency(t(X), power=power, type = "signed")
+ TOM <- fastTOMsimilarity(A, tomtype="signed", lowrank=40)
+ hc <- hclust(as.dist(1-TOM), method="average")
+ hc
+}
+
+
+wgcna.checkDendroHeights <- function(datExpr, n=200, powers=NULL, maxpower=20) {
+ ii <- 1:ncol(datExpr)
+ if(n < ncol(datExpr)) {
+ ##ii <- sample(1:ncol(datExpr), n)
+ ii <- head(order(-matrixStats::colSds(datExpr)),n)
+ }
+ tX <- datExpr[,ii]
+ ht <- list()
+ p=9
+ p=24
+ if(is.null(powers)) {
+ powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
+ if(maxpower>20) {
+ powers <- c(powers, seq(from = 20, to = maxpower, by = 5))
+ }
+ }
+ for(i in 1:length(powers)) {
+ A <- WGCNA::adjacency(tX, power=powers[i], type = "signed")
+ TOM <- fastTOMsimilarity(A, tomtype="signed", lowrank=40)
+ hc <- hclust(as.dist(1-TOM), method="average")
+ ht[[i]] <- hc$height
+ }
+ names(ht) <- paste0("p=",powers)
+ S <- sapply(ht, quantile, probs=c(0.25,0.5,0.75))
+ iqr <- (S[3,] - S[1,])
+ optK <- powers[which.max(iqr)]
+
+ list(
+ quantiles = S,
+ IQR = iqr,
+ optK = optK
+ )
+}
+
+#'
+#'
+#' @export
+wgcna.plotPowerAnalysis <- function(datExpr, networktype = "signed",
+ cex=1, maxpower = 20, nmax = 2000,
+ plots=c("sft.modelfit", "mean.k",
+ "dendro.IQR"), main=NULL,
+ RsquaredCut = 0.85, setPar=TRUE) {
+
+ RsquaredCut <- RsquaredCut[1]
+
+ ## Choose a set of soft-thresholding powers
+ powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
+ if(maxpower>20) {
+ powers <- c(powers, seq(from = 20, to = maxpower, by = 5))
+ }
+
+ ## subsample for speed
+ if(ncol(datExpr) > nmax && nmax > 0) {
+ ii <- sample(1:ncol(datExpr),nmax)
+ datExpr <- datExpr[,ii]
+ }
+
+ ## Call the network topology analysis function
+ sft <- WGCNA::pickSoftThreshold(
+ datExpr,
+ powerVector = powers,
+ RsquaredCut = RsquaredCut,
+ networkType = networktype,
+ verbose = 0
+ )
+
+ ## This is more robust
+
+ if(setPar) {
+ np <- length(plots)
+ nc <- ceiling(sqrt(np))
+ par(mfrow = c(nc, nc), mar = c(3.3, 3.5, 1, 1), mgp = c(2, 0.9, 0))
+ par(mfrow = c(1, np), mar = c(3.8, 3.8, 1, 1), mgp = c(2.4, 0.95, 0))
+ }
+
+ ## Plot the results:
+ if("sft.modelfit" %in% plots) {
+ ## Scale-free topology fit index as a function of the soft-thresholding power
+ y <- -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2]
+ base::plot(
+ x = sft$fitIndices[, 1],
+ y = y,
+ ylim = c(min(y),1),
+ type = "n",
+ xlab = "Soft threshold (power)",
+ ylab = "SFT model fit (signed R^2)",
+ main = main
+ )
+ abline(h = 0, col = "black", lty=3)
+ text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
+ labels = powers, cex = cex, col = "red"
+ )
+ ## this line corresponds to using an R^2 cut-off of h
+ abline(h = RsquaredCut, col = "red", lty=2)
+ ##if(legend) legend("bottomright", legend=paste("opt. power =",optPower))
+ }
+
+ ## Mean connectivity as a function of the soft-thresholding power
+ if("mean.k" %in% plots) {
+ base::plot(sft$fitIndices[, "Power"], sft$fitIndices[, "mean.k."],
+ type = "n",
+ xlab = "Soft threshold (power)",
+ ylab = "Mean connectivity",
+ main = main
+ )
+ text(sft$fitIndices[,"Power"], sft$fitIndices[, "mean.k."], labels = powers,
+ cex = cex, col = "red")
+ }
+
+ ht <- NULL
+ if("dendro.IQR" %in% plots) {
+ ht <- wgcna.checkDendroHeights(datExpr, n=200, powers=powers)
+ base::plot(
+ sft$fitIndices[, 1], ht$IQR,
+ type = "n",
+ xlab = "Soft threshold (power)",
+ ylab = "Dendrogram height IQR",
+ main = main
+ )
+ text(sft$fitIndices[, 1], ht$IQR, labels = powers,
+ cex = cex, col = "red")
+ }
+}
+
+#'
+#'
+#' @export
+wgcna.plotPowerAnalysis_multi <- function(exprList,
+ cex=1, maxpower = 20,
+ nmax = 2000,
+ networktype = "signed",
+ plots=c("sft.modelfit", "mean.k",
+ "dendro.IQR"),
+ main=NULL,
+ cex.legend = 1,
+ RsquaredCut = 0.85,
+ setPar=TRUE) {
+ if(0) {
+ networktype = "signed"
+ cex=1; maxpower = 20; nmax = 2000;
+ plots=c("sft.modelfit", "mean.k","dendro.IQR");
+ main=NULL;
+ RsquaredCut = 0.85
+ }
+
+ RsquaredCut <- RsquaredCut[1]
+
+ ## Choose a set of soft-thresholding powers
+ powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
+ if(maxpower>20) {
+ powers <- c(powers, seq(from = 20, to = maxpower, by = 5))
+ }
+
+ ## process each list
+ sft <- list()
+ for(i in 1:length(exprList)) {
+ datExpr <- Matrix::t(exprList[[i]])
+
+ ## subsample for speed
+ if( nmax > 0 && nrow(exprList[[i]]) > nmax ) {
+ ii <- sample(1:ncol(datExpr),nmax)
+ datExpr <- datExpr[,ii]
+ }
+
+ ## Call the network topology analysis function
+ k <- names(exprList)[i]
+ sft[[k]] <- WGCNA::pickSoftThreshold(
+ datExpr,
+ powerVector = powers,
+ RsquaredCut = RsquaredCut,
+ networkType = networktype,
+ verbose = 0
+ )
+ }
+
+ if(setPar) {
+ np <- length(plots)
+ par(mfrow = c(1, np), mar = c(3.8, 4.5, 3, 1), mgp = c(2.6, 0.95, 0))
+ }
+
+ ## Plot the results:
+ if("sft.modelfit" %in% plots) {
+ ## Scale-free topology fit index as a function of the soft-thresholding power
+ Y <- c()
+ for(i in 1:length(sft)) {
+ y1 <- -sign(sft[[i]]$fitIndices[,"slope"]) * sft[[i]]$fitIndices[,"SFT.R.sq"]
+ Y <- cbind(Y, y1)
+ }
+ colnames(Y) <- names(sft)
+ x <- sft[[1]]$fitIndices[, "Power"]
+ Y <- pmax(Y,0)
+ matplot(
+ x = x,
+ y = Y,
+ ylim = c(0,1),
+ type = "l",
+ col = 2:99,
+ lty = 1,
+ lwd = 0.6,
+ xlab = "Soft threshold (power)",
+ ylab = "SFT model fit (signed R^2)",
+ main = main
+ )
+ #abline(h = 0, col = "black", lty=1)
+ for(i in 1:ncol(Y)) {
+ text(powers, Y[,i], labels = "█", cex=cex, col="white")
+ text(powers, Y[,i], labels = powers, cex=cex, col = 1+i)
+ }
+ ## this line corresponds to using an R^2 cut-off of h
+ abline(h = RsquaredCut, col = "grey10", lty=2)
+ legend("bottomright", legend=colnames(Y), fill=2:10,
+ cex=cex.legend, y.intersp=0.9)
+ title("SFT model fit")
+ }
+
+ ## Mean connectivity as a function of the soft-thresholding power
+ if("mean.k" %in% plots) {
+ Y <- sapply(sft, function(s) s$fitIndices[, "mean.k."])
+ matplot(
+ powers,
+ Y,
+ type = "l",
+ col = 2:99,
+ lty = 1,
+ lwd = 0.6,
+ xlab = "Soft threshold (power)",
+ ylab = "Mean connectivity",
+ main = main
+ )
+ for(i in 1:ncol(Y)) {
+ text(powers, Y[,i], labels = "█", cex=cex, col="white")
+ text(powers, Y[,i], labels = powers, cex=cex, col = 1+i)
+ }
+ legend("topright", legend=colnames(Y), fill=2:10,
+ cex=cex.legend, y.intersp=0.9)
+ title("Mean connectivity")
+ }
+
+ ht <- NULL
+ if("dendro.IQR" %in% plots) {
+ ht <- list()
+ for(i in 1:length(exprList)) {
+ ht[[i]] <- wgcna.checkDendroHeights(
+ Matrix::t(exprList[[i]]), n=200, powers=powers)
+ }
+ Y <- sapply(ht, function(h) h$IQR)
+ matplot(
+ powers,
+ Y,
+ type = "l",
+ col = 2:99,
+ lty = 1,
+ lwd = 0.6,
+ xlab = "Soft threshold (power)",
+ ylab = "Dendrogram height IQR",
+ main = main
+ )
+ for(i in 1:ncol(Y)) {
+ text(powers, Y[,i], labels = "█", cex=cex, col="white")
+ text(powers, Y[,i], labels = powers, cex = cex, col = 1+i)
+ }
+ legend("bottomright", legend=names(exprList), fill=2:10,
+ cex=cex.legend, y.intersp=0.9)
+ title("Dendrogram IQR")
+ }
+
+}
+
+
+
+#' Better (?) method to pick soft threshold (aka power).
+#'
+#' @export
+wgcna.pickSoftThreshold <- function(datExpr, sft=NULL, rcut=0.85,
+ method = c("sft","iqr")[1],
+ nmax = -1, powers = NULL,
+ verbose = 1) {
+
+ if(is.null(powers)) {
+ powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
+ # powers <- c(powers, seq(from = 25, to = 50, by = 5))
+ }
+
+ ## subsample for speed
+ if(ncol(datExpr) > nmax && nmax > 0) {
+ ii <- sample(1:ncol(datExpr),nmax)
+ datExpr <- datExpr[,ii]
+ }
+
+ if(is.null(sft)) {
+ sft <- WGCNA::pickSoftThreshold(
+ datExpr,
+ powerVector = powers,
+ networkType = "signed",
+ verbose = verbose
+ )
+ }
+
+ optPower <- NULL
+ if(method == "sft") {
+ ## Pick power according to scale-free (SFT) parameter
+ sqr <- -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2]
+ if(max(sqr, na.rm=TRUE) >= rcut) {
+ optPower <- min(powers[which(sqr >= rcut)])
+ } else {
+ ## remove initial value that are possible negative
+ if(sqr[1] < 0.05) {
+ for(i in 1:length(sqr)) sqr[i] <- ifelse(sqr[i] < 0.05, NA, sqr[i])
+ }
+ ds <- 0.5*median(abs(diff(sqr)),na.rm=TRUE) ##small step
+ if(any(diff(sqr) < -ds, na.rm=TRUE)) {
+ i <- min(which(diff(sqr) < -ds))+1
+ sqr[i:length(sqr)] <- NA
+ }
+ optPower <- powers[which.max(sqr)]
+ }
+ } else if(method == "iqr") {
+ ## Pick power according to IQR
+ ht <- wgcna.checkDendroHeights(datExpr, n=200, powers = powers)
+ ##base::plot( powers, ht$IQR)
+ optPower <- powers[ which.max(ht$IQR) ]
+ } else {
+ stop("[wgcna.pickSoftThreshold] invalid method = ", method)
+ }
+
+ if(verbose>0) {
+ message("[wgcna.pickSoftThreshold] sft$powerEstimate = ", sft$powerEstimate)
+ message("[wgcna.pickSoftThreshold] optPower = ", optPower)
+ }
+ optPower
+}
+
+
+#' Scale a list of TOM matrices so that the quantiles (default p=0.95)
+#' are equal after scaling with respect to the first TOM matrix.
+#'
+#'
+wgcna.scaleTOMs <- function(TOMs, scaleP=0.95) {
+ nGenes <- nrow(TOMs[[1]])
+ nSets <- length(TOMs)
+ # Sample sufficiently large number of TOM entries
+ nSamples = as.integer(1/(1-scaleP) * 1000);
+ # Choose the sampled TOM entries
+ scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples)
+ TOMScalingSamples = list();
+ # These are TOM values at reference percentile
+ scaleQuant = rep(1, nSets)
+ # Scaling powers to equalize reference TOM values
+ scalePowers = rep(1, nSets)
+ # Loop over sets
+ set=1
+ for (set in 1:nSets)
+ {
+ # Select the sampled TOM entries
+ tval = as.dist(TOMs[[set]])[scaleSample]
+ # Calculate the 95th percentile
+ scaleQuant[set] = quantile(tval, probs = scaleP, type = 8);
+ TOMScalingSamples[[set]] <- tval
+
+ # Scale the TOM
+ if (set>1)
+ {
+ scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]);
+ TOMs[[set]] = TOMs[[set]]^scalePowers[set];
+ }
+ }
+ return(TOMs)
+}
+
+#' @export
+wgcna.getTopGenesAndSets <- function(wgcna, annot=NULL, module=NULL, ntop=25) {
+ if(!"stats" %in% names(wgcna)) stop("object has no stats")
+ if(!"gsea" %in% names(wgcna)) stop("object has no enrichment results (gsea)")
+
+ if("layers" %in% names(wgcna) && class(wgcna$datExpr) == "list") {
+ cons <- wgcna.getConsensusTopGenesAndSets(wgcna, annot=annot, module=module,
+ ntop=ntop)
+ return(cons)
+ }
+
+ ## get top genes (highest kME)
+ mm <- wgcna$stats$moduleMembership
+ ##mm <- cor( wgcna$datExpr, wgcna$net$MEs )
+ if(!is.null(annot)) mm <- rename_by2(mm, annot)
+ gg <- rownames(mm)
+ mm <- as.list(data.frame(mm))
+ if(!is.null(module)) mm <- mm[module]
+ sel.topgenes <- lapply(mm, function(x) head(order(-x),2*ntop) )
+ topgenes <- lapply( sel.topgenes, function(i) gg[i])
+
+ ## top genesets
+ ee <- wgcna$gsea
+ if(!is.null(module)) ee <- ee[module]
+ topsets <- lapply(ee,function(x) head(rownames(x),ntop))
+ topmembers <- lapply(ee, function(x) {
+ names(head(sort(-table(unlist(strsplit(x$genes,split="\\|")))),2*ntop))
+ })
+
+ M <- wgcna$modTraits
+ toppheno <- apply(M, 1, function(x) names(which(x > 0.8*max(x))))
+
+ ## take intersection (high MM, high set membership)
+ topgenes <- mapply(intersect, topmembers, topgenes, SIMPLIFY=FALSE)
+ topgenes <- lapply(topgenes, head, ntop)
+
+ list( sets = topsets, genes = topgenes, pheno=toppheno )
+}
+
+#' @export
+wgcna.getConsensusTopGenesAndSets <- function(wgcna, annot=NULL, module=NULL, ntop=20) {
+ if(!"stats" %in% names(wgcna)) stop("object has no stats")
+ if(!"gsea" %in% names(wgcna)) stop("object has no enrichment results (gsea)")
+
+ ## get top genes (highest kME)
+ topgenesx <- list()
+ for(i in 1:length(wgcna$stats)) {
+ mm <- wgcna$stats[[i]]$moduleMembership
+ if(!is.null(annot)) mm <- rename_by2(mm, annot, "symbol")
+ gg <- rownames(mm)
+ mm <- as.list(data.frame(mm))
+ if(!is.null(module)) mm <- mm[module]
+ sel.topgenes <- lapply(mm, function(x) head(order(-x), 3*ntop) )
+ topgenesx[[i]] <- lapply(sel.topgenes, function(i) gg[i])
+ }
+ topgenes <- topgenesx[[1]]
+ k=2
+ for(k in 2:length(topgenesx)) {
+ topgenes <- mapply(intersect, topgenes, topgenesx[[k]], SIMPLIFY=FALSE)
+ }
+
+ ## top genesets (as symbol!)
+ ee <- wgcna$gsea
+ if(!is.null(module)) ee <- ee[module]
+ topsets <- lapply(ee,function(x) head(rownames(x),ntop))
+ topmembers <- lapply(ee, function(x) {
+ names(head(sort(-table(unlist(strsplit(x$genes,split="\\|")))),3*ntop))
+ })
+
+ ## module traits
+ M <- lapply(wgcna$net$multiMEs, function(x) as.matrix(x$data))
+ Y <- lapply(M, function(m) wgcna$datTraits[rownames(m),])
+ R <- mapply( function(x,y) abs(cor(x,y,use="pairwise")), M, Y, SIMPLIFY=FALSE)
+ R <- Reduce('+', R)
+ toppheno <- apply(R, 1, function(x) names(which(x > 0.9*max(x,na.rm=TRUE))))
+ toppheno
+
+ ## take intersection (high MM, high set membership)
+ topgenes <- mapply(intersect, topmembers, topgenes, SIMPLIFY=FALSE)
+ topgenes <- lapply(topgenes, head, ntop)
+
+ list( sets = topsets, genes = topgenes, pheno=toppheno )
+}
+
+wgcna.describeModules <- function(wgcna, ntop=25, annot=NULL, multi=FALSE,
+ experiment="", verbose=1, model="gpt-5-nano",
+ modules=NULL) {
+ if(multi) {
+ top <- wgcna.getConsensusTopGenesAndSets(wgcna, annot=annot, ntop=ntop)
+ } else {
+ top <- wgcna.getTopGenesAndSets(wgcna, annot=annot, ntop=ntop)
+ }
+
+ if(is.null(modules)) modules <- names(top$genes)
+
+ modules <- intersect(modules, names(top$genes))
+ modules <- intersect(modules, names(top$sets))
+ ##modules <- intersect(modules, names(top$pheno))
+
+ if(length(modules)==0) return(NULL)
+
+ ## If no LLM is available we do just a manual summary
+ if(is.null(model) || model == "") {
+ desc <- list()
+ for(m in modules) {
+ ss=gg=pp=NULL
+ gg <- paste( top$genes[[m]], collapse=', ')
+ ss <- paste( sub(".*:","",top$sets[[m]]), collapse='; ')
+ if(m %in% names(top$pheno)) pp <- paste( top$pheno[[m]], collapse='; ')
+
+ d <- ""
+ if(!is.null(pp)) d <- paste(d, "Correlated phenotypes: ", pp, "
")
+ d <- paste(d, "Key genes: ", gg, "
")
+ d <- paste(d, "Top enriched gene sets: ", ss, "
")
+
+ desc[[m]] <- d
+ }
+ return(desc)
+ }
+
+ prompt <- "Give a short summary of the main overall biological function of the following top enriched genesets belonging to module . Discuss the possible relationship with phenotypes of this experiment about . Use maximum one paragraph. Do not use bullet points. \n\nHere is list of enriched gene sets: \n"
+ if(verbose) cat(prompt)
+
+ desc <- list()
+ for(m in modules) {
+ ss=gg=pp=""
+ ss <- paste( top$sets[[m]], collapse=';')
+ gg <- paste( top$genes[[m]], collapse=';')
+ if(m %in% names(top$pheno)) pp <- paste( top$pheno[[m]], collapse=';')
+
+ q <- prompt
+ if(length(top$genes[[m]])>0) {
+ q <- paste(q, "\n\nAfter that, shortly discuss if any of these key genes might be involved in the biological function. No need to mention all genes, just a few. Here is list of key genes: ")
+ }
+ if(verbose) cat(q)
+
+ q <- sub("", m, q)
+ q <- sub("", pp, q)
+ q <- sub("", experiment, q)
+ q <- sub("", ss, q)
+ q <- sub("", gg, q)
+
+ answer <- ai.ask(q, model=model)
+ answer <- paste0(answer, "\n\n[AI generated using ",model,"]")
+ desc[[m]] <- answer
+ }
+
+ return(desc)
+}
+
diff --git a/R/ui-colors.R b/R/ui-colors.R
index 5e983ba2..990d65e5 100644
--- a/R/ui-colors.R
+++ b/R/ui-colors.R
@@ -285,3 +285,4 @@ color_from_middle <- function(data, color1, color2) {
max_val <- max(abs(data), na.rm = TRUE)
DT::JS(sprintf("isNaN(parseFloat(value)) || value < 0 ? 'linear-gradient(90deg, transparent, transparent ' + (50 + value/%s * 50) + '%%, %s ' + (50 + value/%s * 50) + '%%,%s 50%%,transparent 50%%)': 'linear-gradient(90deg, transparent, transparent 50%%, %s 50%%, %s ' + (50 + value/%s * 50) + '%%, transparent ' + (50 + value/%s * 50) + '%%)'", max_val, color1, max_val, color1, color2, color2, max_val, max_val))
}
+
diff --git a/R/utils-deep.R b/R/utils-deep.R
deleted file mode 100644
index c317f40b..00000000
--- a/R/utils-deep.R
+++ /dev/null
@@ -1,16 +0,0 @@
-OLLAMA_MODELS <- c("gemma3:4b", "gemma3:27b", "deepseek-r1:1.5b", "deepseek-r1:7b")
-GPT_MODELS <- c("gpt-4o-mini")
-prompt <- NULL
-deep.ask <- function(question, model = "gpt-4o-mini", prompt = NULL) {
- if (model == "gpt-4o-mini") {
- chat <- ellmer::chat_openai(model = "gpt-4o-mini", system_prompt = prompt)
- } else if (model %in% OLLAMA_MODELS) {
- chat <- ellmer::chat_ollama(model = model, system_prompt = prompt)
- } else {
- message("ERROR. unknown model", model)
- message("valid models: ", paste(c(OLLAMA_MODELS, GPT_MODELS), collapse = ", "))
- return(NULL)
- }
- . <- chat$chat(question, echo = FALSE)
- chat$last_turn()@text
-}
diff --git a/man/collapseTraitMatrix.Rd b/man/collapseTraitMatrix.Rd
new file mode 100644
index 00000000..e8e57458
--- /dev/null
+++ b/man/collapseTraitMatrix.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-functions.R
+\name{collapseTraitMatrix}
+\alias{collapseTraitMatrix}
+\title{Collapses an expanded (binarized) trait matrix to its original
+categorical phenotype with levels. Colnames must be "pheno1=A",
+"pheno1=B" etc.}
+\usage{
+collapseTraitMatrix(Y)
+}
+\description{
+Collapses an expanded (binarized) trait matrix to its original
+categorical phenotype with levels. Colnames must be "pheno1=A",
+"pheno1=B" etc.
+}
diff --git a/man/fastTOMsimilarity.Rd b/man/fastTOMsimilarity.Rd
new file mode 100644
index 00000000..1ed1cc6c
--- /dev/null
+++ b/man/fastTOMsimilarity.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{fastTOMsimilarity}
+\alias{fastTOMsimilarity}
+\title{Faster implementation of TOM computation using low-rank SVD
+approximation.}
+\usage{
+fastTOMsimilarity(A, tomtype = "signed", lowrank = 20)
+}
+\description{
+Faster implementation of TOM computation using low-rank SVD
+approximation.
+}
diff --git a/man/gset.fisher.Rd b/man/gset.fisher.Rd
index 178dc84d..61601270 100644
--- a/man/gset.fisher.Rd
+++ b/man/gset.fisher.Rd
@@ -17,6 +17,7 @@ gset.fisher(
method = "fast.fisher",
check.background = TRUE,
common.genes = TRUE,
+ no.pass = NA,
verbose = 1
)
}
diff --git a/man/labels2rainbow.Rd b/man/labels2rainbow.Rd
deleted file mode 100644
index 07943af1..00000000
--- a/man/labels2rainbow.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pgx-wgcna.R
-\name{labels2rainbow}
-\alias{labels2rainbow}
-\title{Map module colors to rainbow palette}
-\usage{
-labels2rainbow(net)
-}
-\arguments{
-\item{net}{A WGCNA network object.}
-}
-\value{
-A named character vector mapping the original module colors to rainbow colors.
-}
-\description{
-Maps the module colors from a WGCNA network to a rainbow palette.
-}
-\details{
-This function takes a WGCNA network object and maps the module colors
-to a rainbow palette based on the modules' hierarchical clustering order.
-
-It extracts the hierarchical clustering dendrogram order, gets the number
-of unique module colors, ranks the module color means based on the
-dendrogram order, and assigns rainbow colors accordingly.
-
-This allows easier visualization and interpretation of modules in the
-standard rainbow palette order.
-}
diff --git a/man/mofa.imputeMissing.Rd b/man/mofa.imputeMissing.Rd
new file mode 100644
index 00000000..947c3898
--- /dev/null
+++ b/man/mofa.imputeMissing.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-mofa.R
+\name{mofa.imputeMissing}
+\alias{mofa.imputeMissing}
+\title{Impute missing values for a multiomics expression matrix
+X. Features must be prefixed with datatype.}
+\usage{
+mofa.imputeMissing(X, method = "SVD2")
+}
+\description{
+Impute missing values for a multiomics expression matrix
+X. Features must be prefixed with datatype.
+}
diff --git a/man/mofa.merge_data2.Rd b/man/mofa.merge_data2.Rd
new file mode 100644
index 00000000..42bdf978
--- /dev/null
+++ b/man/mofa.merge_data2.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-mofa.R
+\name{mofa.merge_data2}
+\alias{mofa.merge_data2}
+\title{This merges list of multi-omics data to a single matrix. Note that
+it can handle non-matched data by taking union of rownames or
+colnames and extending the final matrix. Be careful it can
+introduce NA in such non-matched cases.}
+\usage{
+mofa.merge_data2(xdata, prefix.rows = NULL, prefix.cols = NULL)
+}
+\description{
+This merges list of multi-omics data to a single matrix. Note that
+it can handle non-matched data by taking union of rownames or
+colnames and extending the final matrix. Be careful it can
+introduce NA in such non-matched cases.
+}
diff --git a/man/mofa.normalizeExpression.Rd b/man/mofa.normalizeExpression.Rd
new file mode 100644
index 00000000..1dcde6bb
--- /dev/null
+++ b/man/mofa.normalizeExpression.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-mofa.R
+\name{mofa.normalizeExpression}
+\alias{mofa.normalizeExpression}
+\title{Normalize matrix for a multiomics expression matrix X. Features
+must be prefixed with datatype.}
+\usage{
+mofa.normalizeExpression(X, method1 = "maxMedian", method2 = "none")
+}
+\description{
+See also: normalizeMultiOmics()
+}
diff --git a/man/pgx.supercell.Rd b/man/pgx.supercell.Rd
index a8537914..1d7e6af1 100644
--- a/man/pgx.supercell.Rd
+++ b/man/pgx.supercell.Rd
@@ -4,7 +4,14 @@
\alias{pgx.supercell}
\title{SuperCell down sampling. Uniform down samplsing using gamma = 20.}
\usage{
-pgx.supercell(counts, meta, group = NULL, gamma = 20, log.transform = TRUE)
+pgx.supercell(
+ counts,
+ meta,
+ group = NULL,
+ gamma = 20,
+ nvargenes = 1000,
+ log.transform = TRUE
+)
}
\description{
SuperCell down sampling. Uniform down samplsing using gamma = 20.
diff --git a/man/pgx.wgcna.Rd b/man/pgx.wgcna.Rd
index dacff4ab..c11314c6 100644
--- a/man/pgx.wgcna.Rd
+++ b/man/pgx.wgcna.Rd
@@ -14,9 +14,13 @@ pgx.wgcna(
networktype = "signed",
tomtype = "signed",
numericlabels = FALSE,
- ngenes = 4000,
- maxBlockSize = 5000,
- gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]"
+ ngenes = 2000,
+ maxBlockSize = 9999,
+ gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]",
+ summary = TRUE,
+ ai_model = DEFAULT_LLM,
+ verbose = 1,
+ progress = NULL
)
}
\arguments{
diff --git a/man/probe2symbol.Rd b/man/probe2symbol.Rd
index 7c48606e..6bdec409 100644
--- a/man/probe2symbol.Rd
+++ b/man/probe2symbol.Rd
@@ -7,7 +7,7 @@
probe2symbol(
probes,
annot_table,
- query = c("symbol", "gene_name"),
+ query = "symbol",
key = NULL,
fill_na = FALSE
)
diff --git a/man/rho2bluered.Rd b/man/rho2bluered.Rd
new file mode 100644
index 00000000..7afbd59e
--- /dev/null
+++ b/man/rho2bluered.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{rho2bluered}
+\alias{rho2bluered}
+\title{Converts correlation values [-1;1] to blue-white-red colors. Good
+for creating color labels for labeledHeatmaps that expect colors.
+NOTE: use WGCNA::numbers2colors???}
+\usage{
+rho2bluered(R, a = 1, f = 0.95)
+}
+\description{
+Converts correlation values [-1;1] to blue-white-red colors. Good
+for creating color labels for labeledHeatmaps that expect colors.
+NOTE: use WGCNA::numbers2colors???
+}
diff --git a/man/wgcna.computeConsensusGeneStats.Rd b/man/wgcna.computeConsensusGeneStats.Rd
new file mode 100644
index 00000000..3229eaaf
--- /dev/null
+++ b/man/wgcna.computeConsensusGeneStats.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeConsensusGeneStats}
+\alias{wgcna.computeConsensusGeneStats}
+\title{Compute gene statistics with original datExpr but with consensus
+colors/labels for each layers. A separate function
+wgcna.getConsensusGeneStats() extracts clean tables from this
+results object.}
+\usage{
+wgcna.computeConsensusGeneStats(cons)
+}
+\description{
+Compute gene statistics with original datExpr but with consensus
+colors/labels for each layers. A separate function
+wgcna.getConsensusGeneStats() extracts clean tables from this
+results object.
+}
diff --git a/man/wgcna.computeConsensusMatrix.Rd b/man/wgcna.computeConsensusMatrix.Rd
new file mode 100644
index 00000000..3e6d893e
--- /dev/null
+++ b/man/wgcna.computeConsensusMatrix.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeConsensusMatrix}
+\alias{wgcna.computeConsensusMatrix}
+\title{Compute consensus matrix from list of matrices. The consensus
+matrix checks for consistent sign and minimal threshold for each
+matrix. Optionally filters on consistent p-value.}
+\usage{
+wgcna.computeConsensusMatrix(matlist, ydim, psig = 0.05, consfun = "min")
+}
+\arguments{
+\item{ydim}{original dimension of data}
+}
+\description{
+Compute consensus matrix from list of matrices. The consensus
+matrix checks for consistent sign and minimal threshold for each
+matrix. Optionally filters on consistent p-value.
+}
diff --git a/man/wgcna.computeConsensusModuleEnrichment.Rd b/man/wgcna.computeConsensusModuleEnrichment.Rd
new file mode 100644
index 00000000..a9c6faec
--- /dev/null
+++ b/man/wgcna.computeConsensusModuleEnrichment.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeConsensusModuleEnrichment}
+\alias{wgcna.computeConsensusModuleEnrichment}
+\title{Compute consensus enrichment by calculating overlapping enriched
+terms.}
+\usage{
+wgcna.computeConsensusModuleEnrichment(
+ cons,
+ GMT,
+ annot,
+ methods = c("fisher", "gsetcor", "xcor"),
+ min.genes = 10,
+ ntop = 400
+)
+}
+\description{
+Compute consensus enrichment by calculating overlapping enriched
+terms.
+}
diff --git a/man/wgcna.computeDistinctMatrix.Rd b/man/wgcna.computeDistinctMatrix.Rd
new file mode 100644
index 00000000..80b4b877
--- /dev/null
+++ b/man/wgcna.computeDistinctMatrix.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeDistinctMatrix}
+\alias{wgcna.computeDistinctMatrix}
+\title{Compute consensus matrix from list of matrices. The consensus
+matrix checks for consistent sign and minimal threshold for each
+matrix. Optionally filters on consistent p-value.}
+\usage{
+wgcna.computeDistinctMatrix(
+ matlist,
+ ydim,
+ psig = 0.05,
+ min.diff = 0.3,
+ consmax = 0
+)
+}
+\description{
+Compute consensus matrix from list of matrices. The consensus
+matrix checks for consistent sign and minimal threshold for each
+matrix. Optionally filters on consistent p-value.
+}
diff --git a/man/wgcna.computeGeneStats.Rd b/man/wgcna.computeGeneStats.Rd
new file mode 100644
index 00000000..21107b12
--- /dev/null
+++ b/man/wgcna.computeGeneStats.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeGeneStats}
+\alias{wgcna.computeGeneStats}
+\title{Compute general feature statistics after WGCNA results.}
+\usage{
+wgcna.computeGeneStats(net, datExpr, datTraits, TOM)
+}
+\description{
+Compute general feature statistics after WGCNA results.
+}
diff --git a/man/wgcna.computeModules.Rd b/man/wgcna.computeModules.Rd
new file mode 100644
index 00000000..08c440c5
--- /dev/null
+++ b/man/wgcna.computeModules.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.computeModules}
+\alias{wgcna.computeModules}
+\title{Reimplementation for WGCNA::blockwiseModules(). This returns exact
+same object as WGCNA::blockwiseModules() but is faster and allows
+different clustering linkage methods (ward, complete).}
+\usage{
+wgcna.computeModules(
+ datExpr,
+ power = 6,
+ networkType = "signed",
+ TOM = NULL,
+ TOMType = "signed",
+ calcMethod = "fast",
+ lowrank = 20,
+ clustMethod = "average",
+ cutMethod = "hybrid",
+ deepSplit = 2,
+ treeCut = 0.99,
+ treeCutCeiling = 1,
+ minModuleSize = 20,
+ minModuleSize2 = minModuleSize,
+ mergeCutHeight = 0.15,
+ minKMEtoStay = 0.3,
+ numericLabels = FALSE,
+ maxBlockSize = 9999,
+ returnTOM = FALSE,
+ verbose = 1
+)
+}
+\description{
+Reimplementation for WGCNA::blockwiseModules(). This returns exact
+same object as WGCNA::blockwiseModules() but is faster and allows
+different clustering linkage methods (ward, complete).
+}
diff --git a/man/wgcna.labels2colors.Rd b/man/wgcna.labels2colors.Rd
new file mode 100644
index 00000000..6805f511
--- /dev/null
+++ b/man/wgcna.labels2colors.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.labels2colors}
+\alias{wgcna.labels2colors}
+\title{Converts WGCNA labels (numeric or color) to colors.}
+\usage{
+wgcna.labels2colors(colors, ...)
+}
+\description{
+Converts WGCNA labels (numeric or color) to colors.
+}
diff --git a/man/wgcna.pickSoftThreshold.Rd b/man/wgcna.pickSoftThreshold.Rd
new file mode 100644
index 00000000..abfc9ba4
--- /dev/null
+++ b/man/wgcna.pickSoftThreshold.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.pickSoftThreshold}
+\alias{wgcna.pickSoftThreshold}
+\title{Better (?) method to pick soft threshold (aka power).}
+\usage{
+wgcna.pickSoftThreshold(
+ datExpr,
+ sft = NULL,
+ rcut = 0.85,
+ method = c("sft", "iqr")[1],
+ nmax = -1,
+ powers = NULL,
+ verbose = 1
+)
+}
+\description{
+Better (?) method to pick soft threshold (aka power).
+}
diff --git a/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd b/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd
new file mode 100644
index 00000000..35048d94
--- /dev/null
+++ b/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.plotDendroAndTraitCorrelation_cons}
+\alias{wgcna.plotDendroAndTraitCorrelation_cons}
+\title{wgcna.plotDendroAndTraits for Consensus output}
+\usage{
+wgcna.plotDendroAndTraitCorrelation_cons(
+ cons,
+ show.traits = TRUE,
+ traits = NULL,
+ main = NULL,
+ rm.na = TRUE,
+ use.tree = 0,
+ marAll = c(0.2, 8, 2, 0.2),
+ setLayout = TRUE,
+ ...
+)
+}
+\description{
+wgcna.plotDendroAndTraits for Consensus output
+}
diff --git a/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd b/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd
index 7f25ab40..4ae0ba3c 100644
--- a/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd
+++ b/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd
@@ -9,8 +9,28 @@ eigengenes.}
wgcna.plotEigenGeneAdjacencyHeatmap(
wgcna,
add_traits = TRUE,
+ traits = NULL,
+ add_me = TRUE,
marx = 1,
main = NULL,
+ multi = FALSE,
+ phenotype = NULL,
+ colorlabel = TRUE,
+ text = FALSE,
+ pstar = TRUE,
+ setMargins = TRUE,
+ mar1 = c(5.6, 4.5, 1.8, 0),
+ mar2 = c(8, 10, 4, 2),
+ cex.lab = 0.8,
+ cex.text = 0.7,
+ plotDendro = TRUE,
+ plotHeatmap = TRUE,
+ dendro.horiz = TRUE,
+ dendro.width = 0.3,
+ dendro.labels = TRUE,
+ nmax = -1,
+ fixclust = FALSE,
+ mask.intra = FALSE,
justdata = FALSE
)
}
diff --git a/man/wgcna.plotEigenGeneClusterDendrogram.Rd b/man/wgcna.plotEigenGeneClusterDendrogram.Rd
index 5e2b647c..738e967f 100644
--- a/man/wgcna.plotEigenGeneClusterDendrogram.Rd
+++ b/man/wgcna.plotEigenGeneClusterDendrogram.Rd
@@ -4,7 +4,18 @@
\alias{wgcna.plotEigenGeneClusterDendrogram}
\title{Plot cluster dendrogram with eigengenes and traits.}
\usage{
-wgcna.plotEigenGeneClusterDendrogram(wgcna, add_traits = TRUE, main = NULL)
+wgcna.plotEigenGeneClusterDendrogram(
+ wgcna = NULL,
+ ME = NULL,
+ add_traits = TRUE,
+ horiz = FALSE,
+ setMargins = TRUE,
+ method = "wgcna",
+ showlabels = TRUE,
+ plot = TRUE,
+ multi = FALSE,
+ main = NULL
+)
}
\description{
Plot cluster dendrogram with eigengenes and traits.
diff --git a/man/wgcna.plotModuleScores.Rd b/man/wgcna.plotModuleScores.Rd
new file mode 100644
index 00000000..80bc9171
--- /dev/null
+++ b/man/wgcna.plotModuleScores.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.plotModuleScores}
+\alias{wgcna.plotModuleScores}
+\title{Plot top modules most correlated with trait for multi expression
+data.}
+\usage{
+wgcna.plotModuleScores(
+ res,
+ trait,
+ multi = FALSE,
+ nmax = 16,
+ collapse.trait = FALSE,
+ plotlib = "base",
+ setpar = TRUE
+)
+}
+\description{
+Plot top modules most correlated with trait for multi expression
+data.
+}
diff --git a/man/wgcna.plotTopModules_multi.Rd b/man/wgcna.plotTopModules_multi.Rd
new file mode 100644
index 00000000..eb002953
--- /dev/null
+++ b/man/wgcna.plotTopModules_multi.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.plotTopModules_multi}
+\alias{wgcna.plotTopModules_multi}
+\title{Plot top modules most correlated with trait for multi expression
+data.}
+\usage{
+wgcna.plotTopModules_multi(
+ multi,
+ trait,
+ nmax = 16,
+ collapse = FALSE,
+ plotlib = "base",
+ setpar = TRUE
+)
+}
+\description{
+Plot top modules most correlated with trait for multi expression
+data.
+}
diff --git a/man/wgcna.scaleTOMs.Rd b/man/wgcna.scaleTOMs.Rd
new file mode 100644
index 00000000..ec5c859e
--- /dev/null
+++ b/man/wgcna.scaleTOMs.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.scaleTOMs}
+\alias{wgcna.scaleTOMs}
+\title{Scale a list of TOM matrices so that the quantiles (default p=0.95)
+are equal after scaling with respect to the first TOM matrix.}
+\usage{
+wgcna.scaleTOMs(TOMs, scaleP = 0.95)
+}
+\description{
+Scale a list of TOM matrices so that the quantiles (default p=0.95)
+are equal after scaling with respect to the first TOM matrix.
+}
diff --git a/man/wgcna.tomclust.Rd b/man/wgcna.tomclust.Rd
new file mode 100644
index 00000000..54bdbf13
--- /dev/null
+++ b/man/wgcna.tomclust.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pgx-wgcna.R
+\name{wgcna.tomclust}
+\alias{wgcna.tomclust}
+\title{Wrapper to hclust from matrix using default WGCNA parameters.}
+\usage{
+wgcna.tomclust(X, power = 6)
+}
+\description{
+Wrapper to hclust from matrix using default WGCNA parameters.
+}