diff --git a/NAMESPACE b/NAMESPACE index 1fc9f8ae..63e6839b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,13 @@ export(MNNcorrect) export(MultiOmicsSAE) export(abbreviate_pheno) export(add_opacity) +export(ai.ask) +export(ai.genesets_keywords) +export(ai.genesets_summary) +export(ai.get_models) +export(ai.get_ollama_models) +export(ai.get_remote_models) +export(ai.model_is_available) export(alias2hugo) export(allSpecies) export(allSpecies.ANNOTHUB) @@ -41,6 +48,7 @@ export(clean_probe_names) export(clean_symbols) export(clustermap) export(colSignedRanks) +export(collapseTraitMatrix) export(collapse_by_humansymbol) export(color_from_middle) export(colorscale) @@ -95,6 +103,7 @@ export(extend_metabolite_sets) export(extend_metabolite_sets2) export(extremeCorrelation) export(fastCor) +export(fastTOMsimilarity) export(filterProbes) export(first_feature) export(fixContrastMatrix) @@ -193,7 +202,6 @@ export(is_logged) export(isanumber) export(itercluster_louvain) export(justGSEA) -export(labels2rainbow) export(lasagna.create_from_pgx) export(lasagna.create_model) export(lasagna.plot3D) @@ -243,6 +251,7 @@ export(mofa.get_prefix) export(mofa.intNMF) export(mofa.log1s) export(mofa.merge_data) +export(mofa.merge_data2) export(mofa.plot_all_factortraits) export(mofa.plot_biplot) export(mofa.plot_centrality) @@ -484,6 +493,7 @@ export(pos.compact) export(probe2symbol) export(psort) export(pubmedlink) +export(purpleGreyYellow) export(rbind_sparse_matrix) export(read.as_matrix) export(read.gmt) @@ -565,23 +575,50 @@ export(validate_samples) export(visPrint) export(visplot.PCSF) export(wgcna.compute) +export(wgcna.computeConsensusGeneStats) +export(wgcna.computeConsensusMatrix) +export(wgcna.computeDistinctMatrix) +export(wgcna.computeModuleEnrichment) +export(wgcna.compute_multiomics) +export(wgcna.createConsensusLayers) export(wgcna.filterColors) +export(wgcna.getConsensusGeneStats) +export(wgcna.getConsensusTopGenesAndSets) export(wgcna.getGeneStats) -export(wgcna.labels2colors) +export(wgcna.getModuleCrossGenes) +export(wgcna.getTopGenesAndSets) +export(wgcna.matchColors) +export(wgcna.pickSoftThreshold) +export(wgcna.plotConsensusOverlapHeatmap) +export(wgcna.plotConsensusSampleDendroAndColors) export(wgcna.plotDendroAndColors) +export(wgcna.plotDendroAndTraitCorrelation) +export(wgcna.plotDendroAndTraitCorrelation_multi) export(wgcna.plotEigenGeneAdjacencyHeatmap) export(wgcna.plotEigenGeneClusterDendrogram) export(wgcna.plotEigenGeneGraph) export(wgcna.plotFeatureUMAP) +export(wgcna.plotGeneNetwork) export(wgcna.plotLabeledCorrelationHeatmap) export(wgcna.plotMDS) export(wgcna.plotMMvsGS) +export(wgcna.plotModuleHeatmap) export(wgcna.plotModuleHubGenes) +export(wgcna.plotModuleScores) export(wgcna.plotModuleSignificance) export(wgcna.plotModuleTraitHeatmap) +export(wgcna.plotMultiEigengeneCorrelation) +export(wgcna.plotPowerAnalysis) +export(wgcna.plotPowerAnalysis_multi) +export(wgcna.plotPreservationModuleTraits) +export(wgcna.plotPreservationSummaries) export(wgcna.plotSampleDendroAndColors) export(wgcna.plotTOM) +export(wgcna.plotTopModules) +export(wgcna.plotTopModules_multi) +export(wgcna.plotTraitCorrelationBarPlots) export(wgcna.runConsensusWGCNA) +export(wgcna.runPreservationWGCNA) export(wikipathview) export(wrapHyperLink) export(write.gmt) diff --git a/R/ai-llm.R b/R/ai-llm.R new file mode 100644 index 00000000..5a47da8b --- /dev/null +++ b/R/ai-llm.R @@ -0,0 +1,130 @@ +#' +#' @export +ai.get_ollama_models <- function(models=NULL) { + available.models <- system("ollama list | tail -n +2 | cut -d' ' -f 1", intern=TRUE) + if(!is.null(models)) available.models <- intersect(models,available.models) + return(available.models) +} + +OLLAMA_MODELS = ai.get_ollama_models() +DEFAULT_LLM = "gpt-5-nano" + +if(0) { + model="gpt-5-nano";prompt=NULL + model="gemma3:1b";prompt=NULL + model="grok-4-fast-non-reasoning";prompt=NULL +} + +#' @export +ai.get_remote_models <- function(models=NULL) { + keys <- NULL + + dbg("[ai.get_remote_models] models = ",models) + dbg("[ai.get_remote_models] len.models = ",length(models)) + dbg("[ai.get_remote_models] OPENAI_API_KEY = ",Sys.getenv("OPENAI_API_KEY")) + dbg("[ai.get_remote_models] XAI_API_KEY = ",Sys.getenv("XAI_API_KEY")) + dbg("[ai.get_remote_models] GROQ_API_KEY = ",Sys.getenv("GROQ_API_KEY")) + dbg("[ai.get_remote_models] GEMINI_API_KEY = ",Sys.getenv("GEMINI_API_KEY")) + + if (Sys.getenv("OPENAI_API_KEY")!="") keys <- c(keys,"gpt-.*") + if (Sys.getenv("XAI_API_KEY")!="") keys <- c(keys,"grok-.*") + if (Sys.getenv("GROQ_API_KEY")!="") keys <- c(keys,"groq:.*") + if (Sys.getenv("GEMINI_API_KEY")!="") keys <- c(keys,"gemini-.*") + + if(is.null(models) || length(models)==0 || models[1]=="" ) { + models <- keys + } else if(!is.null(keys)) { + regex <- paste0("^",keys,collapse="|") + models <- grep(regex,models,value=TRUE) + } else { + models <- NULL + } + models +} + +#' @export +ai.get_models <- function(models=NULL) { + local.models <- ai.get_ollama_models(models) + remote.models <- ai.get_remote_models(models) + if(!is.null(models)) { + models <- models[ models %in% c(local.models, remote.models)] + } else { + models <- c(local.models, remote.models) + } + return(models) +} + +#' @export +ai.model_is_available <- function(model) { + model %in% ai.get_models(models=model) +} + +#' @export +ai.ask <- function(question, model=DEFAULT_LLM, prompt=NULL) { + chat <- NULL + if(inherits(model, "Chat")) { + chat <- model + } else if(is.character(model)) { + if (model %in% OLLAMA_MODELS) { + chat <- ellmer::chat_ollama(model = model, system_prompt = prompt) + } else if (grepl("^gpt",model) && Sys.getenv("OPENAI_API_KEY")!="") { + message("warning: using remote GPT model:", model) + chat <- ellmer::chat_openai( + model = model, system_prompt = prompt, + api_key = Sys.getenv("OPENAI_API_KEY") ) + } else if (grepl("^grok",model) && Sys.getenv("XAI_API_KEY")!="") { + chat <- ellmer::chat_openai( + model = model, system_prompt = prompt, + api_key = Sys.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1/") + } else if (grepl("^groq",model) && Sys.getenv("GROQ_API_KEY")!="") { + model <- sub("groq:","",model) + chat <- ellmer::chat_groq( + model = model, system_prompt = prompt, + api_key = Sys.getenv("GROQ_API_KEY") + ) + } else if (grepl("^gemini",model) && Sys.getenv("GEMINI_API_KEY")!="") { + chat <- ellmer::chat_google_gemini( + model = model, system_prompt = prompt, + api_key = Sys.getenv("GEMINI_API_KEY") + ) + } + } + + if(is.null(chat)) { + message("ERROR. could not create model ", model) + return(NULL) + } + . <- chat$chat(question, echo=FALSE) + chat$last_turn()@text +} + +#' @export +ai.genesets_summary <- function(gsets, pheno=NULL, model=DEFAULT_LLM, + detail=1, html=FALSE, verbose=1) { + q <- "Extract the main biological function of this list of gene sets that were found by doing geneset enrichment. Just give the answer. Do not acknowledge." + if(!is.null(pheno)) q <- paste0(q, "Discuss in relation with the phenotype: '",pheno,"'.") + if(detail==0) q <- paste(q, "Be very very short.") + if(detail==1) q <- paste(q, "Describe in one short paragraph.") + if(detail>=2) q <- paste(q, "Describe in detail.") + if(html) q <- paste(q, "Use HTML formatting.") + if(verbose>0) cat("Question:",q,"... \n") + ss <- paste(gsets, collapse='; ') + q <- paste(q, "These are the genesets: ",ss,". ") + r <- ai.ask(q, model=model) + #r <- ai.ask(q, model="gemma3:270m") + #r <- ai.ask(q, model="gemma3:1b") + return(r) +} + +num=3 +#' @export +ai.genesets_keywords <- function(gsets, num=3, pheno=NULL, model=DEFAULT_LLM) { + ss <- paste(gsets, collapse='; ') + q <- paste0("Extract ",num," keywords describing the following collection of gene sets. ") + q <- paste0(q, "These are the genesets: ",ss,". ") + r <- ai.ask(q, model=model) + return(r) +} + + diff --git a/R/compute2-extra.R b/R/compute2-extra.R index 40c17735..d852f8da 100644 --- a/R/compute2-extra.R +++ b/R/compute2-extra.R @@ -236,7 +236,10 @@ compute_extra <- function(pgx, extra = c( tt <- system.time({ tryCatch( { - pgx$wgcna <- pgx.wgcna(pgx) + pgx$wgcna <- pgx.wgcna( + pgx, + ai_model = NULL ## no AI by default (yet) + ) }, error = function(e) { message("[ERROR_WGCNA] FATAL: ", as.character(e)) diff --git a/R/gset-fisher.r b/R/gset-fisher.r index dd0b35e1..3e2a81b5 100644 --- a/R/gset-fisher.r +++ b/R/gset-fisher.r @@ -104,7 +104,8 @@ gset.fisher2 <- function(genes.up, genes.dn, genesets, background = NULL, gset.fisher <- function(genes, genesets, background = NULL, fdr = 0.05, mc = TRUE, sort.by = "zratio", nmin = 3, min.genes = 15, max.genes = 500, method = "fast.fisher", - check.background = TRUE, common.genes = TRUE, verbose = 1) { + check.background = TRUE, common.genes = TRUE, + no.pass=NA, verbose = 1) { if (is.null(background)) { background <- unique(unlist(genesets)) if (verbose > 0) { @@ -221,6 +222,11 @@ gset.fisher <- function(genes, genesets, background = NULL, stop("unknown method") } + ## replace NA values + if(any(is.na(pv))) { + pv[is.na(pv)] <- no.pass + } + ## compute q-value qv <- rep(NA, length(pv)) qv <- stats::p.adjust(pv, method = "fdr") diff --git a/R/pgx-annot.R b/R/pgx-annot.R index 1fc9efab..98074a21 100644 --- a/R/pgx-annot.R +++ b/R/pgx-annot.R @@ -1194,25 +1194,40 @@ getHumanOrtholog.biomart <- function(organism, symbols, verbose = 1) { #' } #' @import data.table #' @export -probe2symbol <- function(probes, annot_table, query = c("symbol", "gene_name"), +probe2symbol <- function(probes, annot_table, query = "symbol", key = NULL, fill_na = FALSE) { - # Prepare inputs + + # Prepare inputs. add extra matching columns. annot_table <- cbind(rownames = rownames(annot_table), annot_table) + id.cols <- intersect(c("feature","gene_name","symbol"),colnames(annot_table)) + if(length(id.cols)>0) { + stripped_annot <- apply(annot_table[,id.cols,drop=FALSE],2,function(a) sub("^[A-Za-z]+:","",a)) + ##colnames(stripped_annot) <- paste0(colnames(stripped_annot),"_stripped") + annot_table <- cbind(annot_table, stripped_annot) + } + probes1 <- setdiff(probes, c(NA, "")) if (is.null(key) || !key %in% colnames(annot_table)) { key <- which.max(apply(annot_table, 2, function(a) sum(probes1 %in% a))) } if (is.null(key)) { - stop("[probe2symbol] FATAL. could not get key column.") + message("[probe2symbol] FATAL. could not get key column.") + return(NULL) } - # match query - ii <- match(probes, annot_table[, key]) - query <- intersect(query, colnames(annot_table)) + query <- head(intersect(query, colnames(annot_table)),1) if (length(query) == 0) { - stop("ERROR. no symbol column.") + message("ERROR. no symbol column.") + return(NULL) } - query_col <- annot_table[ii, query[1]] + + # fall back on old gene_name + if(query=="symbol" && !"symbol" %in% colnames(annot_table) && + "gene_name" %in% colnames(annot_table)) query <- "gene_name" + + # match query + ii <- match(probes, annot_table[, key]) + query_col <- annot_table[ii, query] # Deal with NA if (fill_na) { diff --git a/R/pgx-deconv.R b/R/pgx-deconv.R index 5b778f2c..ca67239b 100644 --- a/R/pgx-deconv.R +++ b/R/pgx-deconv.R @@ -500,7 +500,6 @@ pgx.deconvolution <- function(X, ref, dbg("WARNING:: pgx.deconvolution: is X really counts? (not logarithmic)\n") } - ## clean up matrix, remove duplicate names mat <- as.matrix(X) rownames(mat) <- gsub(".*:", "", rownames(mat)) ## strip prefix @@ -565,12 +564,14 @@ pgx.deconvolution <- function(X, ref, source(CIBERSORT.code) stime <- system.time( # The f CIBERSORT is likely from CIBERSORT package but better confirm - ## try(ciber.out <- CIBERSORT(ref, mat, perm = 0, QN = FALSE)) + ## try(ciber.out <- CIBERSORT(ref, mat, perm = 0, QN = FALSE)) + NULL ) } if (has.ciber2) { stime <- system.time( - ## try(ciber.out <- CIBERSORT::cibersort(ref, mat, perm = 0, QN = FALSE)) + ## try(ciber.out <- CIBERSORT::cibersort(ref, mat, perm = 0, QN = FALSE)) + NULL ) } if (!is.null(ciber.out)) { diff --git a/R/pgx-functions.R b/R/pgx-functions.R index b327a66c..d4ef4cda 100644 --- a/R/pgx-functions.R +++ b/R/pgx-functions.R @@ -1507,6 +1507,9 @@ filterProbes <- function(annot, genes) { #' @export rename_by2 <- function(counts, annot_table, new_id = "symbol", na.rm = TRUE, unique = TRUE, keep.prefix = FALSE) { + + ##new_id="symbol";na.rm=TRUE;unique=TRUE;keep.prefix=FALSE + ## add rownames annot_table$rownames <- rownames(annot_table) annot_table$rownames2 <- sub("^[A-Za-z]+:", "", rownames(annot_table)) ## strip prefix @@ -1528,8 +1531,11 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol", counts <- cbind(counts) } - ## dummy do-noting return from_id <- names(which.max(probe_match)) + if(new_id=="symbol" && !"symbol" %in% colnames(annot_table) && + "gene_name" %in% colnames(annot_table)) new_id <- "gene_name" + + ## dummy do-noting return if (new_id == from_id) { sel <- which(probes %in% annot_table[,from_id]) counts <- counts[sel, , drop=FALSE] @@ -1544,39 +1550,31 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol", keep.prefix <- (keep.prefix && all(grepl(":", probes))) from <- annot_table[, from_id] - if (!any(duplicated(from)) || unique) { - ii <- match(probes, from) - if (keep.prefix) { - dt <- mofa.get_prefix(probes) - new.name <- annot_table[ii, new_id] - new.name <- paste0(dt, ":", new.name) - } else { - new.name <- annot_table[ii, new_id] - } + ## if (!any(duplicated(from)) || unique) { + ii <- match(probes, from) + if (keep.prefix) { + dt <- mofa.get_prefix(probes) + new.name <- annot_table[ii, new_id] + new.name <- paste0(dt, ":", new.name) } else { - ## map probes to 'from' vector but retains duplicated entries in - ## 'from' - to <- lapply(probes, function(p) which(from == p)) - ii <- lapply(1:length(to), function(i) rep(i, length(to[[i]]))) - counts <- counts[unlist(ii), , drop = FALSE] - if (keep.prefix) { - dt <- mofa.get_prefix(unlist(to)) - new.name <- annot_table[unlist(to), new_id] - new.name <- paste0(dt, ":", new.name) - } else { - new.name <- annot_table[unlist(to), new_id] - } + new.name <- annot_table[ii, new_id] } rownames(counts) <- new.name - # Take out rows without name + # Remove rows with missing name if (na.rm) { counts <- counts[!rownames(counts) %in% c("", "NA", NA), , drop = FALSE] } - # Sum columns of rows with the same gene symbol - ## if (unique) rownames(counts) <- make_unique(rownames(counts)) - if (unique) { - counts <- rowmean(counts, rownames(counts)) + + # Average columns of rows with the same gene symbol + ndup <- sum(duplicated(rownames(counts))) + if (unique && ndup>0) { + rowdup <- rownames(counts)[which(duplicated(rownames(counts)))] + ii <- which( rownames(counts) %in% rowdup ) + nodup.counts <- rowmean(counts[ii,,drop = FALSE], rownames(counts)[ii]) + rown <- unique(rownames(counts)) + counts <- rbind( counts[-ii,,drop=FALSE], nodup.counts ) + counts <- counts[rown,] } if (type == "vector") { @@ -1595,6 +1593,9 @@ rename_by <- function(counts, annot_table, new_id = "symbol", unique = TRUE) { if (is.vector(counts)) { probes <- names(counts) } + + if(new_id=="symbol" && !"symbol" %in% colnames(annot_table) && + "gene_name" %in% colnames(annot_table)) new_id <- "gene_name" symbol <- annot_table[probes, new_id] # Guard against NA @@ -2349,6 +2350,29 @@ expandPhenoMatrix <- function(M, drop.ref = TRUE, keep.numeric = FALSE, check = } +#' Collapses an expanded (binarized) trait matrix to its original +#' categorical phenotype with levels. Colnames must be "pheno1=A", +#' "pheno1=B" etc. +#' +#' @export +collapseTraitMatrix <- function(Y) { + if(sum(grepl("=",colnames(Y))) < 2) return(Y) + is.cat <- grepl("=",colnames(Y)) + M <- Y[,which(!is.cat),drop=FALSE] + categories <- unique(sub("=.*","",colnames(Y)[which(is.cat)])) + y=categories[1] + for(y in categories) { + ii <- which(sub("=.*","",colnames(Y)) == y) + Y1 <- Y[,ii] + colnames(Y1) <- sub(".*=","",colnames(Y1)) + m1 <- colnames(Y1)[max.col(Y1)] + M <- cbind(M, m1) + colnames(M)[ncol(M)] <- y + } + return(M) +} + + #' @title P-value for Pearson's Correlation Coefficient #' #' @description This function calculates the p-value for Pearson's correlation coefficient. diff --git a/R/pgx-mofa.R b/R/pgx-mofa.R index 76e009ca..9290fd51 100644 --- a/R/pgx-mofa.R +++ b/R/pgx-mofa.R @@ -87,8 +87,11 @@ pgx.compute_mofa <- function(pgx, kernel = "MOFA", numfactors = 8, ) mofa$lasagna <- lasagna.create_model( las.data, - pheno = "contrasts", ntop = ntop, nc = 20, - annot = pgx$genes, use.graphite = FALSE + pheno = "contrasts", + ntop = ntop, + nc = 20, + annot = pgx$genes, + use.graphite = FALSE ) } @@ -345,7 +348,7 @@ mofa.compute <- function(xdata, X, samples, minmodsize = 10, power = 6, - cutheight = 0.15, + mergeCutHeight = 0.15, deepsplit = 2, networktype = "signed", tomtype = "signed", @@ -839,24 +842,26 @@ mofa.plot_all_factortraits <- function(meta.res) { #' @export mofa.topSD <- function(xdata, ntop) { - if (inherits(xdata, "list")) { + if(is.list(xdata)) { res <- lapply(xdata, function(x) { sdx <- matrixStats::rowSds(x, na.rm = TRUE) - head(x[order(-sdx), ], ntop) + head(x[order(-sdx),,drop=FALSE], ntop) }) - } - if (!is.null(dim(xdata))) { - if (all(grepl(":", rownames(xdata)))) { + } else if(is.matrix(xdata)) { + if(all(grepl(":",rownames(xdata)))) { xdata <- mofa.split_data(xdata) res <- lapply(xdata, function(x) { sdx <- matrixStats::rowSds(x, na.rm = TRUE) - head(x[order(-sdx), ], ntop) + head(x[order(-sdx),,drop=FALSE], ntop) }) res <- mofa.merge_data(res) } else { sdx <- matrixStats::rowSds(xdata, na.rm = TRUE) - res <- head(xdata[order(-sdx), ], ntop) + res <- head(xdata[order(-sdx),,drop=FALSE], ntop) } + } else { + message("[mofa.topSD] WARNING: could not detect type") + res <- xdata } return(res) } @@ -894,6 +899,63 @@ mofa.merge_data <- function(xx) { do.call(rbind, mofa.prefix(xx)) } + +#' This merges list of multi-omics data to a single matrix. Note that +#' it can handle non-matched data by taking union of rownames or +#' colnames and extending the final matrix. Be careful it can +#' introduce NA in such non-matched cases. +#' +#' @export +mofa.merge_data2 <- function(xdata, prefix.rows=NULL, prefix.cols=NULL) { + n1 <- length(Reduce(intersect,lapply(xdata,rownames))) + n2 <- length(Reduce(intersect,lapply(xdata,colnames))) + c(n1,n2) + if(n1 && n2) { + message("WARNING: matrices are overlapping both in rows and columns") + } + if(is.null(prefix.cols)) prefix.cols <- (n1 > 0 && n2 > 0) + if(is.null(prefix.rows)) prefix.rows <- (n1 > 0 && n2 > 0) + if(prefix.cols) { + ## if rows overlap (i.e. same genes), prefix the column names + ## (i.e. different datasets) + for(i in 1:length(xdata)) { + nn <- sub("[A-Za-z]+:","",colnames(xdata[[i]])) + colnames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn) + } + } + if(prefix.rows) { + ## if columns overlap (i.e. same samples), prefix the feature + ## names. + for(i in 1:length(xdata)) { + nn <- sub("[A-Za-z]+:","",rownames(xdata[[i]])) + rownames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn) + } + } + allfeatures <- unique(unlist(lapply(xdata, rownames))) + allsamples <- unique(unlist(lapply(xdata, colnames))) + D <- matrix(0, length(allfeatures), length(allsamples)) + nn <- matrix(0, length(allfeatures), length(allsamples)) + rownames(D) <- allfeatures + colnames(D) <- allsamples + i=1 + for(i in 1:length(xdata)) { + A <- xdata[[i]] + ii <- match(rownames(D),rownames(A)) + jj <- match(colnames(D),colnames(A)) + A1 <- A[ii,jj] + nn <- nn + !is.na(A1)*1 + A1[is.na(A1)] <- 0 + D <- D + A1 + } + D <- D / nn + D[which(nn==0)] <- NA + rownames(D) <- allfeatures + colnames(D) <- allsamples + return(D) +} + + + #' @export mofa.prefix <- function(xx) { xx <- mofa.strip_prefix(xx) @@ -1240,7 +1302,6 @@ mofa.plot_factor_boxplots <- function(mofa, k = 1, pheno = NULL, } } - mofa.combine_layers <- function(xx, weights = 1) { k <- 1 if (length(weights) == 1) weights <- rep(weights, length(xx)) @@ -2545,8 +2606,8 @@ lasagna.create_from_pgx <- function(pgx, xdata = NULL, layers = NULL, } names(xdata) <- tolower(names(xdata)) - if (is.null(layers)) { - layers <- c("hx", "mir", "gx", "px", "mx", "gset") + if(is.null(layers)) { + layers <- c("hx","mir","mi","gx","px","mx","gset") layers <- unique(layers, names(xdata)) } @@ -2554,37 +2615,48 @@ lasagna.create_from_pgx <- function(pgx, xdata = NULL, layers = NULL, layers <- intersect(layers, names(xdata)) xdata <- xdata[layers] } - - lasagna.data <- list( + model.data <- list( X = xdata, samples = pgx$samples, contrasts = pgx$contrasts ) model <- lasagna.create_model( - lasagna.data, - pheno = pheno, ntop = ntop, nc = nc, - annot = annot, use.gmt = use.gmt, use.graphite = use.graphite, - add.sink = add.sink, intra = intra, fully_connect = fully_connect, + model.data, + pheno = pheno, + ntop = ntop, + nc = nc, + annot = annot, + use.gmt = use.gmt, + use.graphite = use.graphite, + add.sink = add.sink, + intra = intra, + fully_connect = fully_connect, add.revpheno = add.revpheno ) return(model) } - - #' #' @export -lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, - annot = NULL, use.gmt = TRUE, use.graphite = TRUE, - add.sink = FALSE, intra = TRUE, fully_connect = FALSE, - add.revpheno = TRUE) { +lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20, + annot=NULL, use.gmt=TRUE, use.graphite=TRUE, + add.sink=FALSE, intra=TRUE, fully_connect=FALSE, + add.revpheno = TRUE, condition.edges=TRUE + ) { if (pheno == "pheno") { - Y <- expandPhenoMatrix(data$samples, drop.ref = FALSE) - } else { + Y <- expandPhenoMatrix(data$samples, drop.ref=FALSE) + } else if (pheno == "expanded") { + Y <- 1 * data$samples + } else if (pheno == "contrasts") { + if(!"contrasts" %in% names(data)) { + message("ERROR: contrasts missing in data") + return(NULL) + } Y <- makeContrastsFromLabelMatrix(data$contrasts) - if (any(grepl("^IA:", colnames(Y)))) { + Y <- sign(Y) + if(any(grepl("^IA:",colnames(Y)))) { ## drop interaction terms Y <- Y[, grep("^IA:", colnames(Y), invert = TRUE), drop = FALSE] } @@ -2593,6 +2665,9 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, colnames(revY) <- reverse.AvsB(colnames(Y)) Y <- cbind(Y, revY) } + } else { + message("[lasagna.create_model] ERROR invalid pheno type") + return(NULL) } data$X[["PHENO"]] <- t(Y) @@ -2600,11 +2675,16 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, xx <- data$X if (!is.null(ntop) && ntop > 0) { xx <- lapply(xx, function(x) head(x[order(-apply(x, 1, sd)), , drop = FALSE], ntop)) + xx <- mofa.topSD(xx, ntop) } - xx <- mofa.prefix(xx) - X <- do.call(rbind, xx) - remove(xx) + ## what about not overlapping samples?? + X <- mofa.merge_data2(xx, prefix.rows=TRUE, prefix.cols=FALSE) + ##remove(xx) + kk <- intersect(colnames(X),rownames(Y)) + X <- X[,kk] + Y <- Y[kk,] + ## add SOURCE/SINK if (add.sink) { X <- rbind(X, "SOURCE" = 1, "SINK" = 1) @@ -2612,13 +2692,40 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, ## Compute BIG correlation matrix. WARNING can become huge! NOTE: ## Needs optimization using SPARSE matrix. - suppressWarnings(R <- cor(t(X), use = "pairwise")) - R[is.na(R)] <- 0.01 - ii <- grep("SINK|SOURCE", rownames(R)) - if (length(ii)) R[ii, ii] <- 1 + suppressWarnings( R <- cor(t(X), use = "pairwise") ) + + ## Sink/source need to be connected allways + ii <- grep("SINK|SOURCE",rownames(R)) + if(length(ii)) { + R[ii,] <- 1 + R[,ii] <- 1 + } + + ## save 'pure' correlation + corrR <- R + + ## missing correlation values will be replaced with some constant + ## value. This in particular can happen for constant features or + ## inter-correlation edges if samples are not matched. NOTE: we + ## should ideally place correct sign. + R[is.na(R)] <- 0.1234 + + ## Weigh with pheno correlation or foldchange for conditioning. This + ## uses phenotype matrix Y. This is particularly important when + ## inter-correlation is unavailable and edge prioritaztion/pruning + ## is done. + if(condition.edges) { + message("conditioning edges...") + rho <- cor(t(X), Y, use='pairwise.complete.obs') + maxrho <- apply(abs(rho), 1, max, na.rm=TRUE) + ii <- grep("SINK|SOURCE",names(maxrho)) + if(length(ii)) maxrho[ii] <- 1 + rho.wt <- outer( maxrho, maxrho ) + R <- R * rho.wt + } ## mask for proteomic <> metabolics PPI - if (use.graphite) { + if (FALSE && use.graphite) { xtypes <- setdiff(names(data$X), "PHENO") xtypes has.mx <- ("mx" %in% xtypes) @@ -2659,8 +2766,9 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, if (add.sink) layers <- c("SOURCE", layers, "SINK") ## mask for inter-layer connections - if (!fully_connect) { - layer_mask <- as.matrix(R) * 0 + if(!fully_connect) { + layer_mask <- matrix(0, nrow(R), ncol(R)) + dimnames(layer_mask) <- dimnames(R) for (i in 1:(length(layers) - 1)) { ii <- which(dt == layers[i]) jj <- which(dt == layers[i + 1]) @@ -2675,8 +2783,10 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, } R <- R * layer_mask } - - ## Reduce inter-connections + + ## Reduce inter-connections to nc top most correlated edges per + ## node. This will avoid graph to be too large. NOTE: this will not + ## work well if inter-correlation is unavailable or disables!!! if (!is.null(nc) && nc > 0) { message(paste("reducing edges to maximum", nc, "connections")) ## NEED CHECK!!! SOMETHING WRONG. @@ -2706,14 +2816,21 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, R <- R * reduce_mask } + ##-------------------------------------- ## create graph + ##-------------------------------------- + + ## transfer masked zeroes. create graph from 'unweighted' + ## correlation. + corrR[which(R==0)] <- 0 gr <- igraph::graph_from_adjacency_matrix( - R, - diag = FALSE, weighted = TRUE, mode = "undirected" + corrR, diag = FALSE, weighted = TRUE, mode = "undirected" ) - igraph::E(gr)$rho <- igraph::E(gr)$weight ## copy - gr$layers <- layers + ## copy correlation from weights, put in 'rho' slot + igraph::E(gr)$rho <- igraph::E(gr)$weight + gr$layers <- layers + ## add edge connection type as attribute igraph::V(gr)$layer <- sub(":.*", "", igraph::V(gr)$name) ee <- igraph::as_edgelist(gr) @@ -2737,23 +2854,23 @@ lasagna.create_model <- function(data, pheno = "pheno", ntop = 1000, nc = 20, ) } -sp_edge_weight <- function(gr, layers) { - layers <- unique(c("SOURCE", layers, "SINK")) - wt <- abs(igraph::E(gr)$weight) +sp_edge_weight <- function(graph, layers) { + layers <- unique(c("SOURCE",layers,"SINK")) + wt <- abs(igraph::E(graph)$weight) wt[is.na(wt)] <- 0 - ee <- igraph::as_edgelist(gr) - v1 <- ee[, 1] - v2 <- ee[, 2] - l1 <- match(igraph::V(gr)[v1]$layer, layers) - l2 <- match(igraph::V(gr)[v2]$layer, layers) + ee <- igraph::as_edgelist(graph) + v1 <- ee[,1] + v2 <- ee[,2] + l1 <- match(igraph::V(graph)[v1]$layer, layers) + l2 <- match(igraph::V(graph)[v2]$layer, layers) p1 <- ifelse(l1 < l2, v1, v2) p2 <- ifelse(l1 < l2, v2, v1) wt <- wt + 1e-8 - s1 <- igraph::shortest_paths(gr, - from = "SOURCE", to = p1, weights = 1 / wt, output = "epath" + s1 <- igraph::shortest_paths(graph, + from = "SOURCE", to = p1, weights = 1/wt, output = "epath" ) - s2 <- igraph::shortest_paths(gr, - from = "SINK", to = p2, weights = 1 / wt, output = "epath" + s2 <- igraph::shortest_paths(graph, + from = "SINK", to = p2, weights = 1/wt, output = "epath" ) sp <- mapply(c, s1$epath, s2$epath) ## sp.score <- sapply(sp, function(e) exp(mean(log(wt[e])))) @@ -2761,13 +2878,13 @@ sp_edge_weight <- function(gr, layers) { sp.score } +#' +#' #' @export lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho", min_rho = 0, prune = TRUE, fc.weights = TRUE, sp.weight = FALSE) { if (!pheno %in% colnames(obj$Y)) { - dbg("[lasagna.solve] pheno = ", pheno) - dbg("[lasagna.solve] colnames(obj$Y) = ", colnames(obj$Y)) stop("pheno not in Y") } if (!"rho" %in% names(igraph::edge_attr(obj$graph))) { @@ -2776,14 +2893,23 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho", graph <- obj$graph X <- obj$X - y <- obj$Y[, pheno] - ii <- grep("PHENO", rownames(X)) - if (length(ii)) X[ii, ][which(X[ii, ] == 0)] <- NA - y[y == 0] <- NA - rho <- cor(t(X), y, use = "pairwise")[, 1] - i0 <- which(sign(y) == -1) - i1 <- which(sign(y) == +1) - fc <- rowMeans(X[, i1], na.rm = TRUE) - rowMeans(X[, i0], na.rm = TRUE) + y <- obj$Y[,pheno] + + ## check if phenotype was coded -1/0/1 + ii <- grep("PHENO",rownames(X)) + has.min1 <- (min(X[ii,], na.rm=TRUE) < 0) + has.min1 + if(length(ii) && has.min1) { + X[ii,][which(X[ii,]==0)] <- NA + y[y==0] <- NA + } + + rho <- cor(t(X), y, use="pairwise")[,1] + i0 <- which( y <= 0 ) + i1 <- which( y > 0 ) + m1 <- rowMeans(X[,i1,drop=FALSE],na.rm=TRUE) + m0 <- rowMeans(X[,i0,drop=FALSE],na.rm=TRUE) + fc <- m1 - m0 rho[is.na(rho)] <- 0 ## for PHENO nodes 'foldchange' does not make sense. replace with rho. @@ -2797,8 +2923,8 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho", igraph::V(graph)$value <- fc } graph$value.type <- value.type - - ## set edge weights + + ## set edge weights from node values ww <- 1 weight.type <- "rho" if (fc.weights) { @@ -2808,8 +2934,12 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho", ww <- abs(ff[ee[, 1]] * ff[ee[, 2]])^0.5 weight.type <- paste0(weight.type, "*vv") } - igraph::E(graph)$weight <- igraph::E(graph)$rho * ww + ## do the edge weighting. set NA correlation values to some constant + ee.rho <-igraph::E(graph)$rho + ee.rho[is.na(ee.rho)] <- 0.1234 + igraph::E(graph)$weight <- ee.rho * ww + ## set SINK/SOURCE edges to 1 if (any(grepl("SINK|SOURCE", igraph::V(graph)$name))) { igraph::E(graph)[.to("SINK")]$weight <- 1 @@ -2818,18 +2948,20 @@ lasagna.solve <- function(obj, pheno, max_edges = 100, value.type = "rho", if (sp.weight) { sp.wt <- sp_edge_weight(graph, obj$layers) - sp.wt <- (sp.wt / max(sp.wt))**2 + sp.wt <- (sp.wt / max(sp.wt, na.rm=TRUE))**2 ## why quadratic? igraph::E(graph)$weight <- igraph::E(graph)$weight * sp.wt weight.type <- paste0(weight.type, "*sp") } graph$weight.type <- weight.type ## take subgraph - if (min_rho > 0) { - dsel <- which(abs(igraph::E(graph)$weight) < min_rho) + if(min_rho > 0) { + dsel <- which(abs(igraph::E(graph)$weight) < min_rho) ## rho or weight?? igraph::E(graph)$weight[dsel] <- 0 } - if (max_edges > 0) { + + ## Global limit number of edges per type + if(max_edges > 0) { ewt <- igraph::E(graph)$weight esel <- tapply( 1:length(igraph::E(graph)), igraph::E(graph)$connection_type, @@ -2873,6 +3005,9 @@ lasagna.prune_graph <- function(graph, ntop = 100, layers = NULL, edge.sign = "both", edge.type = "both", filter = NULL, prune = TRUE) { + + if (is.null(layers)) + layers <- graph$layers if (is.null(layers)) layers <- unique(igraph::V(graph)$layer) layers <- setdiff(layers, c("SOURCE", "SINK")) graph <- igraph::subgraph(graph, igraph::V(graph)$layer %in% layers) @@ -2918,18 +3053,36 @@ lasagna.prune_graph <- function(graph, ntop = 100, layers = NULL, if (length(ii)) igraph::E(graph)$weight[ii] <- 0 } - if (edge.sign != "both") { - ewt <- igraph::E(graph)$weight - if (grepl("pos", edge.sign)) igraph::E(graph)$weight[ewt < 0] <- 0 - if (grepl("neg", edge.sign)) igraph::E(graph)$weight[ewt > 0] <- 0 - } - if (edge.type != "both") { - ic <- grepl("->", igraph::E(graph)$connection_type) - if (grepl("intra", edge.type)) igraph::E(graph)$weight[ic] <- 0 - if (grepl("inter", edge.type)) igraph::E(graph)$weight[!ic] <- 0 + ewt <- igraph::E(graph)$weight + if (grepl("pos", edge.sign)) { + igraph::E(graph)$weight[ewt < 0] <- 0 + } else if (grepl("neg", edge.sign)) { + igraph::E(graph)$weight[ewt > 0] <- 0 + } else if(edge.sign == "consensus") { + layersign <- rep(1, length(layers)) + names(layersign) <- layers + layersign[grep("^mi|^mir",layers)] <- -1 + v1 <- igraph::as_edgelist(graph)[,1] + esign <- layersign[ igraph::V(graph)[v1]$layer ] + vsign <- sign(igraph::V(graph)[v1]$value) + igraph::E(graph)$weight <- ewt * (sign(ewt) == esign & vsign == esign) + } + + ## delete intra or inter edges + ic <- grepl("->", igraph::E(graph)$connection_type) + if (edge.type == "inter") { + igraph::E(graph)$weight[!ic] <- 0 + } else if (edge.type == "intra") { + igraph::E(graph)$weight[ic] <- 0 + } else if (edge.type == "both2") { + sel <- (!ic & igraph::E(graph)$weight < 0) + igraph::E(graph)$weight[sel] <- 0 + } else { + ## nop } graph <- igraph::delete_edges(graph, which(igraph::E(graph)$weight == 0)) + if (prune) { graph <- igraph::subgraph_from_edges(graph, igraph::E(graph)) } @@ -3167,7 +3320,7 @@ plotly_lasagna <- function(pos, vars = NULL, edges = NULL, znames = NULL, ## ====================================================================== -## ====================================================================== +## ======================== MOFA FUNCTIONS ============================== ## ====================================================================== #' @@ -3331,6 +3484,81 @@ mofa.intNMF <- function(datasets, k = NULL, method = "RcppML", return(res) } + +#' Impute missing values for a multiomics expression matrix +#' X. Features must be prefixed with datatype. +#' +mofa.imputeMissing <- function(X, method="SVD2") { + xx <- mofa.split_data(X) + xx <- lapply(xx, function(x) imputeMissing(x, method=method)) + impX <- mofa.merge_data(xx) + impX[rownames(X),] +} + +#' Normalize matrix for a multiomics expression matrix X. Features +#' must be prefixed with datatype. +#' +#' See also: normalizeMultiOmics() +#' +mofa.normalizeExpression <- function(X, method1="maxMedian", method2="none") { + ##method1="maxMedian";method2="none" + + xx <- mofa.split_data(X) + + ## First normalization normalizes samples within each datatype but + ## not (yet) between datatypes. + xx <- lapply(xx, function(x) normalizeExpression(x, method=method1)) + + ## Second normalization + normX <- NULL + if(method2 != "none") { + if(method2=='median') { + ## Median normalization on datatypes. This will effectively + ## equalize the median for each datatype. + xmedian <- sapply(xx, function(x) mean(matrixStats::colMedians(x,na.rm=TRUE))) + xx <- lapply(xx, function(x) x - median(x,na.rm=TRUE) + mean(xmedian)) + } + if(method2=='combat') { + ## ComBat normalization on datatypes. This will effectively + ## equalize the mean and SD of each datatype in each sample. + normX <- mofa.merge_data(xx) + dtype <- mofa.get_prefix(rownames(normX)) + normX <- t(sva::ComBat( t(normX), batch=dtype)) + } + if(method2=='quantile') { + ## Quantile normalization on datatypes. We will need to cbind + ## and augment the datatypes so the number of features are + ## equal. We do this by repeating rows (so distribution not + ## affected). Then after quantile normalization we unpack again. + nr <- max(sapply(xx,nrow)) + mx <- list() + i=1 + for(i in 1:length(xx)) { + n <- ceiling(nr / nrow(xx[[i]])) + mx[[i]] <- do.call( rbind, rep( list(xx[[i]]), n)) + mx[[i]] <- head(mx[[i]], nr) + dim(mx[[i]]) + } + mxx <- do.call(cbind, mx) + mxx <- limma::normalizeQuantiles(mxx) + for(i in 1:length(xx)) { + kk <- ((i-1)*ncol(X)+1):(i*ncol(X)) + jj <- 1:nrow(xx[[i]]) + mx[[i]] <- mxx[jj,kk] + } + names(mx) <- names(xx) + normX <- mofa.merge_data(mx) + } + } + if(is.null(normX)) normX <- mofa.merge_data(xx) + normX <- normX[rownames(X),] + return(normX) +} + + + + + ## ====================================================================== ## ====================================================================== ## ====================================================================== diff --git a/R/pgx-plotting.R b/R/pgx-plotting.R index 599b1586..1c8b8e1a 100644 --- a/R/pgx-plotting.R +++ b/R/pgx-plotting.R @@ -7132,72 +7132,49 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL, edge.cex <- 1 edge.alpha <- 0.33 fc <- "value" + xdist = 1 + normalize.edges = FALSE + yheight = 2 + edge.sign = "both" edge.type <- "both" labpos <- NULL layout <- c("parallel", "hive")[1] normalize.edges <- FALSE value.name <- "rho" strip.prefix <- FALSE - } - - if (is.null(layers)) layers <- unique(igraph::V(graph)$layer) - layers <- setdiff(layers, c("SOURCE", "SINK")) - graph <- igraph::subgraph(graph, igraph::V(graph)$layer %in% layers) - if (!is.null(labpos) && length(labpos) < length(layers)) { - labpos <- head(rep(labpos, 99), length(layers)) - } + prune = FALSE + } + + vattr <- igraph::vertex_attr_names(graph) + edgeattr <- igraph::edge_attr_names(graph) + if(!"rho" %in% edgeattr) message("WARNING: no rho in edge attributes!") + if(!"weight" %in% edgeattr) message("WARNING: no weight in edge attributes!") + if(!"value" %in% vattr) stop("ERROR: no value in vertex attributes!") + if(!"layer" %in% vattr) stop("ERROR: no layer in vertex attributes!") + + graph <- lasagna.prune_graph( + graph, + ntop = ntop, + layers = layers, + normalize.edges = normalize.edges, + min.rho = min.rho, + edge.sign = edge.sign, + edge.type = edge.type, + filter = NULL, + prune = prune) + + layers <- graph$layers + layers <- setdiff(layers, c("SOURCE","SINK")) - if (!"value" %in% names(igraph::vertex_attr(graph))) { - stop("vertex must have 'value' attribute") - } fc <- igraph::V(graph)$value names(fc) <- igraph::V(graph)$name - - ## select ntop features - if (!is.null(ntop) && ntop > 0) { - ii <- tapply( - 1:length(fc), igraph::V(graph)$layer, - function(i) head(i[order(-abs(fc[i]))], ntop) - ) - ii <- unlist(ii[names(ii) %in% layers]) - fc <- fc[ii] - graph <- igraph::subgraph(graph, igraph::V(graph)[ii]) - } - - if (normalize.edges) { - for (e in unique(igraph::E(graph)$connection_type)) { - ii <- which(igraph::E(graph)$connection_type == e) - max.wt <- max(abs(igraph::E(graph)$weight[ii]), na.rm = TRUE) + 1e-3 - igraph::E(graph)$weight[ii] <- igraph::E(graph)$weight[ii] / max.wt - } - } - - if (min.rho > 0) { - ii <- which(abs(igraph::E(graph)$weight) < min.rho) - if (length(ii)) igraph::E(graph)$weight[ii] <- 0 - } - - if (edge.sign != "both") { - ewt <- igraph::E(graph)$weight - if (grepl("pos", edge.sign)) igraph::E(graph)$weight[ewt < 0] <- 0 - if (grepl("neg", edge.sign)) igraph::E(graph)$weight[ewt > 0] <- 0 - } - if (edge.type != "both") { - ic <- grepl("->", igraph::E(graph)$connection_type) - if (grepl("intra", edge.type)) igraph::E(graph)$weight[ic] <- 0 - if (grepl("inter", edge.type)) igraph::E(graph)$weight[!ic] <- 0 - } - graph <- igraph::delete_edges(graph, which(igraph::E(graph)$weight == 0)) - - if (prune) { - graph <- igraph::subgraph_from_edges(graph, igraph::E(graph)) - } - + ## layout vlayer <- igraph::V(graph)$layer if (layout == "parallel") { if (is.null(xpos)) xpos <- c(0:(length(layers) - 1)) xpos <- xpos * xdist + xpos <- head(rep(xpos,10),length(layers)) x <- xpos[match(vlayer, layers)] y <- fc[igraph::V(graph)$name] layout.xy <- cbind(x = x, y = y) @@ -7233,16 +7210,21 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL, max.wt <- max(ewt, na.rm = TRUE) + 1e-3 ew <- (ewt / max.wt)**2 - vx <- log(1000 * igraph::page_rank(graph, weights = ewt)$vector) - # vx <- abs(y) - vx <- (0.1 + abs(vx) / max(abs(vx)))**1 - vcol <- c("blue2", "red2")[1 + 1 * (fc[vv] > 0)] + ## vertex size relative to centrality + vx <- log(1000*igraph::page_rank(graph, weights=ewt)$vector) + vx <- (0.1+abs(vx)/max(abs(vx)))**1 + vcol <- c("blue2","red2")[ 1+1*(fc[vv] > 0)] - ecol <- c("darkorange3", "magenta4")[1 + 1 * (igraph::E(graph)$weight >= 0)] + ## color edges by sign or correlation. set NA edges to grey + ecol <- c("darkorange3","magenta4")[ 1+1*(igraph::E(graph)$rho >= 0)] + ii <- which(is.na(igraph::E(graph)$rho)) + if(length(ii)) ecol[ii] <- "grey70" ecol <- adjustcolor(ecol, edge.alpha) - ecurv <- c(-0.25, 0)[1 + 1 * grepl("->", igraph::E(graph)$connection_type)] - # ecurv <- c(TRUE,FALSE)[1 + 1*grepl("->",igraph::E(graph)$connection_type)] - + + ## add curvature for intra-edges + ecurv <- c(-0.25,0)[1 + 1*grepl("->",igraph::E(graph)$connection_type)] + #ecurv <- c(TRUE,FALSE)[1 + 1*grepl("->",igraph::E(graph)$connection_type)] + igraph::V(graph)$label <- "" if (is.null(xlim)) { xlim <- range(layout.xy[, 1]) @@ -7314,7 +7296,7 @@ plotMultiPartiteGraph2 <- function(graph, layers = NULL, labels <- mofa.strip_prefix(labels) } if (strip.prefix2) { - labels <- sub(".*:", "", labels) + labels <- sub("^[a-zA-Z]+:", "", labels) } labels <- gsub("^NA \\(", "(", labels) text(x, y, labels, cex = cex.label, pos = labposx, adj = 1, offset = 2.8) @@ -7365,9 +7347,9 @@ plotHivePlot <- function(X, f, group, groups = NULL, gr <- igraph::graph_from_edgelist(ee, directed = FALSE) igraph::E(gr)$weight <- abs(R[idx]) igraph::V(gr)$group <- group[igraph::V(gr)$name] - - if (length(gr) == 0) { - message("[plotMultiPartiteGraph] ERROR. empty graph.") + + if(length(gr)==0) { + message("[plotHivePlot] ERROR. empty graph.") return(NULL) } diff --git a/R/pgx-singlecell.R b/R/pgx-singlecell.R index 160a391e..a30485cc 100644 --- a/R/pgx-singlecell.R +++ b/R/pgx-singlecell.R @@ -479,6 +479,7 @@ pgx.supercell <- function(counts, meta, group = NULL, gamma = 20, + nvargenes = 1000, log.transform = TRUE) { if (log.transform) { ## supercell uses log2 matrix X <- logCPM(counts, total = 1e4) @@ -486,14 +487,21 @@ pgx.supercell <- function(counts, X <- counts } - if (!is.null(group) && "group" %in% colnames(meta)) { - message("using group detected in meta\n") + if (is.null(group) && "group" %in% colnames(meta)) { + message("using group column detected in meta\n") group <- meta[, "group"] } + if (!is.null(group) && any(group %in% colnames(meta))) { + group <- intersect(group, colnames(meta)) + message("using groups: ", paste(group,collapse=".")) + group <- meta[, group] + if(NCOL(group) > 1) group <- apply(group, 1, paste, collapse=".") + } + SC <- SuperCell::SCimplify(X, gamma = gamma, - n.var.genes = 1000, + n.var.genes = nvargenes, cell.split.condition = group ) message("[pgx.supercell] SuperCell::SCimplify completed") @@ -514,7 +522,16 @@ pgx.supercell <- function(counts, ## Compute metacall expression as sum of counts counts <- as.matrix(counts) - sc.counts <- SuperCell::supercell_GE(counts, mode = "sum", groups = SC$membership) + if(log.transform) { + sc.counts <- SuperCell::supercell_GE( + counts, mode = "sum", + groups = SC$membership) + } else { + sc.counts <- SuperCell::supercell_GE( + counts, mode = "average", + groups = SC$membership) + } + message("[pgx.supercell] SuperCell::supercell_GE completed") sc.membership <- paste0("mc", SC$membership) colnames(sc.counts) <- paste0("mc", 1:ncol(sc.counts)) diff --git a/R/pgx-wgcna.R b/R/pgx-wgcna.R index 2abbd6b1..714409cf 100644 --- a/R/pgx-wgcna.R +++ b/R/pgx-wgcna.R @@ -4,7 +4,6 @@ ## ## - #' @title WGCNA network construction and module detection #' #' @param pgx PGX object containing gene expression data @@ -40,10 +39,16 @@ pgx.wgcna <- function( networktype = "signed", tomtype = "signed", numericlabels = FALSE, - ngenes = 4000, - maxBlockSize = 5000, - gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]") { - ## minmodsize=10;power=NULL;cutheight=0.15;deepsplit=2;ngenes=4000;networktype="signed";tomtype="signed";numericlabels=FALSE;ngenes=2000;gset.filter=NULL;minKME=0.8;maxBlockSize=5000 + ngenes = 2000, + maxBlockSize = 9999, + gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]", + summary = TRUE, + ai_model = DEFAULT_LLM, + verbose = 1, + progress = NULL + ) { + + ##minmodsize=10;power=NULL;cutheight=0.15;deepsplit=2;ngenes=4000;networktype="signed";tomtype="signed";numericlabels=FALSE;ngenes=2000;gset.filter=NULL;minKME=0.8;maxBlockSize=5000 samples <- pgx$samples ## no dot pheno @@ -79,20 +84,22 @@ pgx.wgcna <- function( X <- normalizeMultiOmics(X) } + if(!is.null(progress)) progress$set(message = "Calculating WGCNA...", value=0.2) message("[pgx.wgcna] start wgcna.compute...") wgcna <- wgcna.compute( X = X, samples = samples, minmodsize = minmodsize, # default: min(20,...) power = power, # default: 12 (for signed) - cutheight = cutheight, # default: 0.15 + mergeCutHeight = cutheight, # default: 0.15 deepsplit = deepsplit, # default: 2 minKME = minKME, # default: 0.30 networktype = networktype, # default: unsigned (but signed is better...) tomtype = tomtype, # default: signed numericlabels = numericlabels, maxBlockSize = maxBlockSize, - ngenes = ngenes + ngenes = ngenes, + verbose = verbose ) message("[pgx.wgcna] finished computing wgcna.compute!") @@ -114,16 +121,33 @@ pgx.wgcna <- function( ## ---------------------------------------------------- ## Do geneset analysis ## ---------------------------------------------------- - res.gse <- wgcna.compute_enrichment( - wgcna, pgx, - method = c("fisher", "gsetcor", "mmcor"), + if(!is.null(progress)) progress$set(message = "Computing enrichment...", value=0.4) + message("computing module enrichment...") + wgcna$gsea <- wgcna.computeModuleEnrichment( + wgcna, + annot = pgx$genes, + # GMT = pgx$GMT, + # gsetX = pgx$gsetX, + methods = c("fisher", "gsetcor", "xcor"), ntop = 1000, + xtop = 100, filter = gset.filter ) + if(summary) { + if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6) + message("Annotating modules using ", ai_model) + wgcna$summary <- wgcna.describeModules( + wgcna, + ntop = 25, + model = ai_model, + annot = pgx$genes, + experiment = pgx$description, + verbose = 0) + } + + ## add to results object - wgcna$gse <- res.gse$gsets - wgcna$enriched_genes <- res.gse$genes wgcna$clust <- clust wgcna$networktype <- networktype wgcna$tomtype <- tomtype @@ -131,44 +155,90 @@ pgx.wgcna <- function( return(wgcna) } + #' @export wgcna.compute <- function(X, samples, + ngenes = 2000, minmodsize = 20, power = 12, - cutheight = 0.15, + mergeCutHeight = 0.15, deepsplit = 2, minKME = 0.3, + treeCut = 0.99, + treeCutCeiling = 1, networktype = "signed", tomtype = "signed", - reassignThreshold = 1e-6, - ngenes = 4000, + clustMethod = "average", + cutMethod = "hybrid", + calcMethod = "fast", + lowrank = 40, numericlabels = FALSE, - maxBlockSize = 8000, + maxBlockSize = 9999, merge.dendro = TRUE, + compute.stats = TRUE, prefix = "ME", - sv.dim = 80, - verbose = 0) { - ## minmodsize=20;power=12;cutheight=0.15;deepsplit=2;ngenes=2000;networktype="signed";tomtype="signed";numericlabels=FALSE;prefix="ME";minKME=0.3;reassignThreshold=1e-6;maxBlockSize=5000 - + sv.tom = 40, + drop.ref = FALSE, + net = NULL, + is.multiomics = NULL, + verbose = 0 + ) { + + if(0) { + ngenes = 2000; + minmodsize = 20; + power = 12; + mergeCutHeight = 0.15; + deepsplit = 2; + minKME = 0.3; + treeCut = 0.99; + treeCutCeiling = 1; + networktype = "signed"; + tomtype = "signed"; + clustMethod = "average"; + cutMethod = "hybrid"; + calcMethod = "fast"; + lowrank = 40; + numericlabels = FALSE; + maxBlockSize = 9999; + merge.dendro = TRUE; + compute.stats = TRUE; + prefix = "ME"; + sv.tom = 40; + drop.ref = FALSE; + net = NULL; + is.multiomics = NULL; + verbose = 0 + } + require(WGCNA) + if(nchar(prefix)!=2) { + stop("prefix must be two capital letters") + } + + kk <- intersect(colnames(X),rownames(samples)) + X <- as.matrix(X[,kk]) + samples <- as.data.frame(samples, check.names=FALSE) + samples <- samples[kk, , drop=FALSE] + nmissing <- sum(is.na(X)) if (nmissing > 0) { message("Found ", nmissing, " missing values in X. Imputing prior to WGCNA.") X <- svdImpute2(X) } - X <- X[!duplicated(rownames(X)), ] - + X <- X[!duplicated(rownames(X)), , drop = FALSE] + ## restrict number of genes if (ngenes > 0 && nrow(X) > ngenes) { - is.multiomics <- all(grepl(":", rownames(X))) - if (is.multiomics) { - message("[wgcna.compute] multiomics. topSD = ", ngenes) - X <- mofa.topSD(X, ngenes) + if(is.null(is.multiomics)) is.multiomics <- all(grepl(":",rownames(X))) ## not robust!! + if(is.multiomics) { + message("[wgcna.compute] topSD = ",ngenes, " (multi-omics)") + X <- mofa.topSD(X, ngenes) } else { - message("[wgcna.compute] topSD = ", ngenes) + message("[wgcna.compute] topSD = ", ngenes, " (single omics)") sdx <- matrixStats::rowSds(X, na.rm = TRUE) X <- X[sdx > 0.1 * mean(sdx, na.rm = TRUE), ] ## filter low SD?? X <- X[order(-matrixStats::rowSds(X, na.rm = TRUE)), ] @@ -176,82 +246,71 @@ wgcna.compute <- function(X, } } - message("[wgcna.compute] dim(X) = ", paste(dim(X), collapse = " x ")) - message("[wgcna.compute] dim(samples) = ", paste(dim(samples), collapse = " x ")) - datExpr <- t(X) - if (is.null(power)) { - ## Estimate best power - message("[wgcna.compute] estimating optimal power...") - powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) - sft <- WGCNA::pickSoftThreshold( - datExpr, - powerVector = powers, - networkType = networktype, - verbose = verbose - ) - power <- sft$powerEstimate - if (is.na(power)) power <- 20 - } - ## adapt for small datasets (also done in WGCNA package) minmodsize <- min(minmodsize, ncol(datExpr) / 2) - + if(!is.null(power)) power <- power[1] + message("[wgcna.compute] minmodsize = ", minmodsize) message("[wgcna.compute] number of features = ", nrow(X)) message("[wgcna.compute] minKME = ", minKME) message("[wgcna.compute] power = ", power) - message("[wgcna.compute] mergeCutHeight = ", cutheight) + message("[wgcna.compute] mergeCutHeight = ", mergeCutHeight) + message("[wgcna.compute] calcMethod = ", calcMethod) - message("[wgcna.compute] computing blockwise Modules...") - ## WGCNA::enableWGCNAThreads() - cor <- WGCNA::cor ## needed... - net <- WGCNA::blockwiseModules( - datExpr, - power = power, - networkType = networktype, - TOMType = tomtype, - minModuleSize = minmodsize, - reassignThreshold = reassignThreshold, - mergeCutHeight = cutheight, - minKMEtoStay = minKME, - numericLabels = numericlabels, ## numeric or 'color' labels - deepSplit = deepsplit, - maxBlockSize = maxBlockSize, - verbose = verbose - ) - cor <- stats::cor - message("[wgcna.compute] finished blockwise Modules...") + ##WGCNA::enableWGCNAThreads() + if(is.null(net)) { + message("[wgcna.compute] wgcna.computeModules....") + net <- wgcna.computeModules( + datExpr, + power = power, + networkType = networktype, + TOMType = tomtype, + calcMethod = calcMethod, + lowrank = lowrank, + clustMethod = clustMethod, + cutMethod = cutMethod, + deepSplit = deepsplit, + minModuleSize = minmodsize, + mergeCutHeight = mergeCutHeight, + minKMEtoStay = minKME, + treeCut = treeCut, + treeCutCeiling = treeCutCeiling, + numericLabels = numericlabels, + maxBlockSize = maxBlockSize, + returnTOM = TRUE, + verbose = verbose + ) + } - ## prefix="ME" - table(net$colors) - names(net$MEs) <- sub("^ME", prefix, names(net$MEs)) - net$labels <- paste0(prefix, net$colors) + if(!"MEs" %in% names(net)) { + message("[wgcna.compute]: running WGCNA::moduleEigengenes") + net$MEs = WGCNA::moduleEigengenes(datExpr, colors = net$colors)$eigengenes + } + ## Substitue prefix="ME" + if(prefix!="ME") names(net$MEs) <- sub("^ME", prefix, names(net$MEs)) + net$labels <- paste0(prefix, net$colors) + names(net$labels) <- colnames(datExpr) + ## clean up traits matrix datTraits <- data.frame(samples, check.names = FALSE) isdate <- apply(datTraits, 2, is.Date) datTraits <- datTraits[, !isdate, drop = FALSE] - ## rm unvarying phenotypes (cause NAs and errors in OPG plots) ## Expand multi-class discrete phenotypes into binary vectors datTraits <- utils::type.convert(datTraits, as.is = TRUE) - tr.class <- sapply(datTraits, class) - sel1 <- which(tr.class %in% c("factor", "character")) - sel2 <- which(tr.class %in% c("integer", "numeric")) - tr1 <- datTraits[, 0] - tr2 <- datTraits[, 0] - if (length(sel1)) { - tr1 <- expandPhenoMatrix(datTraits[, sel1, drop = FALSE], drop.ref = FALSE) - if (is.null(tr1)) tr1 <- datTraits[, 0] - } - if (length(sel2)) { - ## keeping numeric phenotypes - tr2 <- datTraits[, sel2, drop = FALSE] + datTraits <- expandPhenoMatrix(datTraits, keep.numeric=TRUE, drop.ref=drop.ref) + + if(is.null(datTraits)) { + message("WARNING:: no valid traits. creating random traits.") + ##random.trait <- sample(c(0,1), nrow(samples), replace=TRUE) + random.trait <- head(rep(c(0,1), nrow(samples)), nrow(samples)) + datTraits <- data.frame( random = random.trait ) + rownames(datTraits) <- rownames(samples) } - datTraits <- cbind(tr1, tr2) - + ## list of genes in modules me.genes <- tapply(names(net$colors), net$colors, list) names(me.genes) <- paste0(prefix, names(me.genes)) @@ -264,43 +323,60 @@ wgcna.compute <- function(X, names(me.colors) <- me.labels me.colors <- me.colors[names(net$MEs)] - ## compute clustering based on TOM matrix - message("[wgcna.compute] computing TOM matrix...") - TOM <- NULL - TOM <- WGCNA::TOMsimilarityFromExpr( - datExpr, - power = power, - TOMType = tomtype, - networkType = networktype, - verbose = verbose - ) + ## compute TOM matrix (we need for some plots) + if(!is.null(net$TOM)) { + TOM <- net$TOM + net$TOM <- NULL + } else { + message("[wgcna.compute] recomputing TOM matrix...") + TOM <- WGCNA::TOMsimilarityFromExpr( + datExpr, + power = net$power, + TOMType = tomtype, + networkType = networktype, + verbose = verbose + ) + } ## instead of the huge TOM matrix we save a smaller SVD. - message("[wgcna.compute] reducing TOM. sv.dim = ", sv.dim) - rownames(TOM) <- colnames(TOM) <- colnames(datExpr) - sv <- irlba::irlba(TOM, nv = min(sv.dim, dim(X))) - svTOM <- sv$v %*% diag(sqrt(sv$d)) - rownames(svTOM) <- colnames(datExpr) - + svTOM <- NULL + if(sv.tom>0) { + ## sv.tom <- ceiling(min(sv.tom,dim(datExpr)/2)) + message("[wgcna.compute] reducing TOM. sv.tom = ", sv.tom) + rownames(TOM) <- colnames(TOM) <- colnames(datExpr) + sv.tom <- min(sv.tom, ncol(TOM)-1) + sv <- irlba::irlba(TOM, nv = sv.tom) + svTOM <- sv$v %*% diag(sqrt(sv$d)) + rownames(svTOM) <- colnames(datExpr) + } + ## compute module eigenvectors (loading matrix) message("[wgcna.compute] computing module eigenvectors...") MVs <- list() for (clr in unique(net$colors)) { ii <- which(net$colors == clr) - mX <- datExpr[, ii] + mX <- datExpr[, ii, drop=FALSE] mX <- scale(mX) ## NOTE: seems WGCNA is using full scaling - sv1 <- irlba::irlba(mX, nv = 1, nu = 1) + if(ncol(mX)>1) { + res <- irlba::irlba(mX, nv = 1, nu = 1) + sv1 <- res$v[,1] * res$d[1] + } else { + sv1 <- 1 + } sv <- rep(0, ncol(datExpr)) - sv[ii] <- sv1$v[, 1] * sv1$d[1] + sv[ii] <- sv1 MVs[[paste0(prefix, clr)]] <- sv } MVs <- as.matrix(do.call(cbind, MVs[names(net$MEs)])) rownames(MVs) <- colnames(datExpr) ## compute gene statistics - message("[wgcna.compute] computing gene statistics...") - stats <- wgcna.compute_geneStats(net, datExpr, datTraits, TOM = TOM) - + stats <- NULL + if(compute.stats) { + message("[wgcna.compute] computing gene statistics...") + stats <- wgcna.computeGeneStats(net, datExpr, datTraits, TOM=TOM) + } + ## module-traits matrix message("[wgcna.compute] computing module-traits matrix...") modTraits <- cor(net$MEs, datTraits, use = "pairwise") @@ -315,16 +391,16 @@ wgcna.compute <- function(X, net$merged_dendro <- NULL } } - remove(TOM) - + remove(TOM) ## big + ## construct results object results <- list( datExpr = datExpr, datTraits = datTraits, - ## TOM = TOM, ## this can be BIG!!! - svTOM = svTOM, ## smaller singular vectors + ## TOM = TOM, ## this can be BIG!!! generally no need, just for plotting + svTOM = svTOM, ## smaller singular vectors net = net, - power = power, + #power = net$power, me.genes = me.genes, me.colors = me.colors, W = MVs, @@ -332,10 +408,540 @@ wgcna.compute <- function(X, stats = stats ) + message("[wgcna.compute] completed. \n\n") + return(results) + +} + +#' @export +wgcna.compute_multiomics <- function(dataX, + samples, + power = 12, + ngenes = 2000, + clustMethod = "average", + cutMethod = "hybrid", + minmodsize = 10, + minKME = 0.3, + deepsplit = 2, + compute.enrichment = TRUE, + annot = NULL, + GMT = NULL, + gsetX = NULL, + drop.ref = FALSE, + add.pheno = FALSE, + add.gsets = FALSE, + do.consensus = FALSE, + gset.methods = c("fisher","gsetcor","xcor"), + gset.ntop = 1000, + gset.xtop = 100, + summary = TRUE, + ai_model = DEFAULT_LLM, + ai_experiment = "", + verbose = 1, + progress = NULL + ) { + + if(0) { + do.consensus = 1 + cutMethod = "hybrid" + deepsplit = 2 + power = 12 + ngenes = 2000 + minmodsize = 10 + minKME = 0.3 + compute.enrichment = TRUE + gset.ntop = 1000 + gset.xtop = 100 + annot = pgx$genes + GMT = pgx$GMT ##?? + gsetX = pgx$gsetX + progress = NULL + } + + ## preprocessing + if(!is.null(annot)) { + dataX <- lapply(dataX, function(x) rename_by2(x, annot, "symbol")) + } + + ## add pheno matrix?? + if(add.gsets) { + if(is.null(GMT)) GMT <- Matrix::t(playdata::GSETxGENE) + if(is.null(gsetX)) { + X <- do.call(rbind, dataX) + if(!is.null(annot)) GMT <- rename_by2(GMT, annot, "symbol") + kk <- intersect(rownames(X), rownames(GMT)) + if(length(kk)) gsetX <- plaid::plaid(X[kk,], GMT[kk,]) + } + if(!is.null(gsetX)) dataX$gs <- gsetX + } + + ## add phenomatrix?? + if(add.pheno) { + phenoX <- expandPhenoMatrix(samples, keep.numeric=TRUE, drop.ref=drop.ref) + dataX$ph <- t(phenoX) + } + + dt.na <- which(unlist(lapply(dataX, function(x) sum(is.na(x)))) > 0) + if (any(dt.na)) { + dataX[dt.na] <- lapply(dataX[dt.na], imputeMissing, method="SVD2") + } + names(dataX) <- substring(names(dataX),1,2) + #dataX <- mofa.topSD(dataX, ngenes) + + if(!is.null(progress)) { + progress$set(message = paste("computing WGCNA modules..."), value = 0.33) + } + + if(is.null(power) || is.na(power) ) power <- "sft" + if(as.character(power[1]) %in% c("sft","iqr")) { + message("[wgcna.compute_multiomics] estimating power with method = ", power[1]) + est.power <- rep(NA, length(dataX)) + i=1 + for(i in 1:length(dataX)) { + p <- wgcna.pickSoftThreshold( + Matrix::t(dataX[[i]]), sft=NULL, rcut=0.85, powers = NULL, + method=power[1], nmax=1000, verbose=1) + if(length(p)==0 || is.null(p) ) p <- NA + est.power[i] <- p + } + est.power + power <- ifelse (is.na(est.power), 12, est.power) + message("[wgcna.compute_multiomics] estimated power = ", power) + } else { + power <- as.numeric(power) + } + nw <- length(dataX) + power <- head(rep(power, nw),nw) + names(power) <- names(dataX) + + ## This runs WGCNA on an expression list. + wgcna <- list() + has.gxpx <- all(c("gx","px") %in% names(dataX)) + if (do.consensus && has.gxpx) { + cat("[wgcna.compute_multiomics] computing consensus WGCNA for GX+PX --------\n") + nn <- mean(rownames(dataX[['gx']]) %in% rownames(dataX[['px']])) + if (nn < 0.10) { + message("[wgcna.compute_multiomics] ERROR: gx and px features do not overlap") + } else { + wgcna <- wgcna.createConsensusLayers( + dataX[c('gx','px')], + samples = samples, + prefix = c('GX','PX'), + ngenes = ngenes, + power = power[c('gx','px')], + minModuleSize = minmodsize, + deepSplit = deepsplit, + mergeCutHeight = 0.15, + minKME = minKME, + maxBlockSize = 9999, + verbose = 1 + ) + } + } + + dtlist <- setdiff(names(dataX), names(wgcna)) + for(dt in dtlist) { + cat("[wgcna.compute_multiomics] computing WGCNA for", dt, "-------------\n") + minKME <- ifelse(dt=='ph', 0, minKME) + minmodsize <- ifelse(dt=='ph', 1, minmodsize) + wgcna[[dt]] <- wgcna.compute( + X = dataX[[dt]], + samples = samples, + ngenes = ngenes, + calcMethod = "fast", + power = power[dt], + lowrank = 40, + clustMethod = clustMethod, + cutMethod = cutMethod, + deepsplit = deepsplit, + minKME = minKME, + minmodsize = minmodsize, + mergeCutHeight = 0.15, + compute.stats = TRUE, + sv.tom = 40, + prefix = toupper(dt), + drop.ref = drop.ref, + is.multiomics = FALSE, + verbose = verbose + ) + } + + wgcna <- wgcna[names(dataX)] + + ## Compute enrichment + if(compute.enrichment) { + + message("[wgcna.compute_multiomics] computing module enrichment...") + + if(!is.null(progress)) { + progress$set(message = paste("computing module enrichment..."), value = 0.66) + } + + gse <- wgcna.computeModuleEnrichment( + wgcna = wgcna, + multi = TRUE, + methods = gset.methods, + ntop = gset.ntop, + xtop = gset.xtop, + annot = annot, + GMT = GMT, + gsetX = gsetX, + filter = NULL + #filter = "^PATHWAY|^HALLMARK|^GO|^C[1-9]" + ) + + ## split up results + for(k in names(wgcna)) { + mm <- names(wgcna[[k]]$me.genes) + wgcna[[k]]$gsea <- gse[mm] + } + + if(summary) { + if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6) + message("Annotating modules using ", ai_model) + for(k in names(wgcna)) { + wgcna[[k]]$summary <- wgcna.describeModules( + wgcna[[k]], + multi = FALSE, + ntop = 25, + model = ai_model, + annot = annot, + experiment = ai_experiment, + verbose = 0 + ) + } + } + + + } + + return(wgcna) +} + + +#' Reimplementation for WGCNA::blockwiseModules(). This returns exact +#' same object as WGCNA::blockwiseModules() but is faster and allows +#' different clustering linkage methods (ward, complete). +#' +wgcna.computeModules <- function( + datExpr, + power = 6, + networkType = "signed", + TOM = NULL, + TOMType = "signed", + calcMethod = "fast", + lowrank = 20, + clustMethod = "average", + cutMethod = "hybrid", ## hybrid, tree, static + deepSplit = 2, + treeCut = 0.99, + treeCutCeiling = 1, + minModuleSize = 20, + minModuleSize2 = minModuleSize, + mergeCutHeight = 0.15, + minKMEtoStay = 0.3, + numericLabels = FALSE, ## numeric or 'color' labels + maxBlockSize = 9999, + returnTOM = FALSE, + verbose = 1 ) { + + #power=6;networkType=TOMType="signed";minModuleSize=20;mergeCutHeight=0.15;minKMEtoStay=0.3;numericLabels=FALSE;clustMethod="average";deepSplit = 2;treeCut = 0.99;treeCutCeiling = 1; + + cor <- WGCNA::cor ## needed... + deepSplit <- as.integer(deepSplit) + lowrank <- as.integer(lowrank) + + if(is.null(power) || is.na(power) ) power <- "sft" ## use iqr? + auto.power <- power[1] %in% c("sft","iqr") + if (auto.power) { + message("[wgcna.computeModules] estimating power with method = ", power[1]) + powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) + powers <- c(powers, seq(from = 25, to = 50, by = 5)) + power <- wgcna.pickSoftThreshold(datExpr, sft=NULL, rcut=0.85, + method=power[1], nmax=2000, verbose=0) + if (is.na(power)) power <- 6 + message("[wgcna.compute_multiomics] estimated power = ", power) + } + + if(calcMethod == "blockwise") { + message("[wgcna.computeModules] computing blockwiseModules...") + net <- WGCNA::blockwiseModules( + datExpr, + power = power, + networkType = networkType, + TOMType = TOMType, + minModuleSize = minModuleSize, + mergeCutHeight = mergeCutHeight, + minKMEtoStay = minKMEtoStay, + numericLabels = numericLabels, ## numeric or 'color' labels + deepSplit = deepSplit, + maxBlockSize = maxBlockSize, + + verbose = verbose + ) + return(net) + } + + clustMethod <- sub("^ward$","ward.D",clustMethod) + ## define distance matrix + if(is.null(TOM)) { + adjacency <- WGCNA::adjacency(datExpr, power = power, type = networkType) + adjacency[is.na(adjacency)] <- 0 + if (calcMethod == "fast") { + if(verbose>0) + message("[wgcna.computeModules] Computing TOM matrix using fast method...") + TOM <- fastTOMsimilarity(adjacency, tomtype=TOMType, lowrank=lowrank) + } else if (calcMethod == "adjacency") { + if(verbose>0) + message("[wgcna.computeModules] Computing using adjacency as TOM matrix...") + TOM <- adjacency + } else if (calcMethod == "full") { + if(verbose>0) message("[wgcna.computeModules] Computing full TOM matrix...") + ## SLOW!!! + TOM <- WGCNA::TOMsimilarity(adjacency, TOMType=TOMType, verbose=verbose) + } else { + stop("[wgcna.computeModules] ERROR: invalid calcMethod parameter:", calcMethod) + } + dimnames(TOM) <- dimnames(adjacency) + } + + ## transform to dissTOM + dissTOM <- 1 - TOM + + ## clustering + if(verbose>0) message("Clustering features using ", clustMethod, " linkage") + ##geneTree <- flashClust::flashClust(as.dist(dissTOM), method=clustMethod) + geneTree <- stats::hclust(as.dist(dissTOM), method=clustMethod) + + ## sometimes there is a height error. following is a fix. + geneTree$height <- round(geneTree$height, 6) + + ## exception1 + if(minModuleSize <= 1 && cutMethod!="static") { + message("WARNING: minModuleSize==1. Changing to static cutting") + cutMethod <- "static" + } + + if(treeCut > 1) cutMethod <- "static" + if(treeCut <= 1) { + ## transform from relative to actual + qq <- quantile( geneTree$height, probs = c(0.05, treeCutCeiling)) + treeCut <- qq[1] + treeCut * diff(qq) + } + + if(verbose>0) { + message("Tree cut method: ", cutMethod) + message("treeCut = ", treeCut) + message("deepSplit = ", deepSplit) + message("minModuleSize = ", minModuleSize) + } + + if(cutMethod %in% c("hybrid","tree")) { + + if(cutMethod=="tree") deepSplit <- (deepSplit > 0) + label <- dynamicTreeCut::cutreeDynamic( + geneTree, + method = cutMethod, + cutHeight = treeCut, + distM = dissTOM, + deepSplit = deepSplit, + minClusterSize = minModuleSize2 + ) + } else if( cutMethod == "static" && treeCut <= 1 ) { + if(verbose>0) message("Static cutting tree at fixed H = ", treeCut) + label <- cutree( geneTree, h = treeCut) + } else if(cutMethod == "static" && treeCut > 1 ) { + if(verbose>0) message("Static cutting tree with fixed K = ", treeCut) + label <- cutree( geneTree, k = treeCut) + } else { + stop("ERROR: could not determine cutMethod") + } + label <- as.integer(label) + table(label) + nmodules <- length(unique(label)) + if(verbose>0) message("Found ", nmodules, " modules") + + ## Eigengenes + if(verbose>0) message("Calculating eigengenes...") + colors <- WGCNA::labels2colors(label) + MEs <- WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes + + ## prune using minKME + if(minKMEtoStay > 0) { + if(verbose>0) message("Pruning features using minKME = ",minKMEtoStay) + ngrey <- sum(colors == 'grey') + for(k in unique(colors)) { + if (! paste0("ME",k) %in% colnames(MEs)) next + ii <- which(colors == k) + if(length(ii)>1) { + eg <- MEs[,paste0("ME",k)] + kme <- cor(datExpr[,ii, drop=FALSE], eg, use="pairwise")[,1] + kme.sign <- as.numeric(names(which.max(table(sign(kme))))) + kme <- kme * kme.sign + ii <- ii[which(kme < minKMEtoStay)] + if(length(ii)) colors[ii] <- 'grey' + } + } + ngrey <- sum(colors == 'grey') - ngrey + if(verbose>0) message("Pruned ",ngrey, " low KME features") + } + + ## Merge similar modules + unmergedColors <- colors + if(mergeCutHeight>0 && length(MEs)>1) { + if(verbose>0) message("Merging similar modules: mergeCutHeight = ",mergeCutHeight) + merge <- wgcna.mergeCloseModules(datExpr, colors, cutHeight=mergeCutHeight, MEs=MEs) + unmergedColors <- colors + colors <- merge$colors + MEs <- merge$MEs + if(verbose>0) { + n0 <- length(unique(unmergedColors)) + n1 <- length(unique(colors)) + message("Merged ", n0 - n1, " modules") + } + } + + ## filter on minModuleSize + if(minModuleSize>1) { + too.small <- names(which(table(colors) < minModuleSize)) + too.small + if(length(too.small)) { + if(verbose>0) message("Removing ",length(too.small)," too small modules") + colors[colors %in% too.small] <- "grey" + } + } + + ## Update MEs + if("grey" %in% colors && !"MEgrey" %in% names(MEs)) { + MEs <- WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes + } + MEs <- MEs[sub("^ME","",names(MEs)) %in% colors] + + # Rename to numeric + if(numericLabels) { + if(verbose>0) message("Renaming to numeric labels") + colorOrder <- names(sort(table(colors),decreasing=TRUE)) + colorOrder <- unique(c("grey", colorOrder)) + colors <- match(colors, colorOrder)-1 + unmergedColors <- match(unmergedColors, colorOrder)-1 + mecolor <- sub("^ME","",names(MEs)) + names(MEs) <- paste0("ME",match(mecolor, colorOrder)-1) + } else { + # Rename to standard colors, most frequent first + if(verbose>0) message("Renaming to standard colors") + colorOrder <- names(sort(table(colors),decreasing=TRUE)) + colorOrder <- unique(c("grey", colorOrder)) + newcolor <- setdiff( WGCNA::standardColors(), "grey") + n0 <- length(colorOrder) + n1 <- length(newcolor) + if(n1 < n0) newcolor <- make.unique(rep(newcolor,ceiling(n0/n1))) + newcolor <- unique(c("grey", newcolor)) + colors <- newcolor[match(colors, colorOrder)] + unmergedColors <- newcolor[match(unmergedColors, colorOrder)] + mecolor <- sub("^ME","",names(MEs)) + names(MEs) <- paste0("ME",newcolor[match(mecolor, colorOrder)]) + } + + names(colors) <- colnames(datExpr) + names(unmergedColors) <- colnames(datExpr) + + net <- list() + net$colors <- colors + net$unmergedColors <- unmergedColors + net$MEs <- MEs + net$goodSamples <- rep(TRUE, nrow(datExpr)) + net$goodGenes <- rep(TRUE, ncol(datExpr)) + net$dendrograms <- list( geneTree ) + net['TOMFiles'] <- list(NULL) + net$blockGenes <- list(1:ncol(datExpr)) + net$blocks <- rep(1, ncol(datExpr)) + net$MEsOK <- TRUE + net$power <- power + if(returnTOM) net$TOM <- TOM + return(net) +} + +#' Faster implementation of TOM computation using low-rank SVD +#' approximation. +#' @export +fastTOMsimilarity <- function(A, tomtype="signed", lowrank=20) { + #https://stackoverflow.com/questions/56574729 + # + #Given square symmetric adjacency matrix A, its possible to + #calculate the TOM matrix W without the use of for loops, which + #speeds up the process tremendously + if(!tomtype %in% c("signed","unsigned")) { + stop("only works for signed and unsigned tomtype") + } + + ## Adjacency matrix A can be approximated with SVD. This can make + ## TOM calculation much faster. + diag(A) <- 0 + if(lowrank > (ncol(A)/2) ) lowrank <- -1 + if(lowrank>0) { + res <- try( irlba::irlba(A, nv=lowrank) ) + if(!"try-error" %in% class(res)) { + U <- res$u %*% diag(sqrt(res$d)) + L <- U %*% (Matrix::t(U) %*% U) %*% Matrix::t(U) + } else { + message("[fastTOMsimilarity] Warning: irlba error. nv = ", lowrank) + L <- A %*% A ## full computation + } + } else { + L <- A %*% A + } + k <- Matrix::colSums(A) + Kmat <- outer(k, k, FUN = function(x, y) pmin(x, y)) + D <- (Kmat + 1 - A) + W <- (L + A) / D + diag(W) <- 1 + W <- as.matrix(W) + dimnames(W) <- dimnames(A) + W <- pmax(W, 0) ## sometimes has negative values... + return(W) } -wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) { + +wgcna.mergeCloseModules <- function(datExpr, colors, cutHeight, MEs=NULL) { + if(is.null(MEs)) { + MEs = WGCNA::moduleEigengenes(datExpr, colors = colors)$eigengenes + dim(MEs) + } + hc <- hclust(as.dist(1 - cor(MEs)), method="average") + idx <- cutree(hc, h=cutHeight) + names(idx) <- sub("^ME","",names(idx)) + table(idx) + new.colors <- colors + m=2 + for(m in unique(idx)) { + cc <- names(which(idx == m)) + cc <- setdiff(cc, "grey") ## never merge grey + ii <- which(colors %in% cc) + new.colors[ii] <- cc[1] + } + new.MEs = WGCNA::moduleEigengenes(datExpr, colors = new.colors)$eigengenes + list( + colors = new.colors, + MEs = new.MEs + ) +} + +##--------------------------------------------------------------------- +## Gene statistics +##--------------------------------------------------------------------- + +#' Compute general feature statistics after WGCNA results. +#' +#' +#' +wgcna.computeGeneStats <- function(net, datExpr, datTraits, TOM) { + + ## align + kk <- intersect(rownames(datExpr), rownames(datTraits)) + datExpr <- datExpr[kk, , drop = FALSE] + datTraits <- datTraits[kk, , drop = FALSE] + ## Define numbers of genes and samples nGenes <- ncol(datExpr) nSamples <- nrow(datExpr) @@ -345,79 +951,122 @@ wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) { moduleTraitPvalue <- WGCNA::corPvalueStudent(moduleTraitCor, nSamples) ## Module membership correlation (with p-values) - moduleMembership <- cor(datExpr, net$MEs, use = "p") + moduleMembership <- cor(datExpr, net$MEs, use = "pairwise.complete") MMPvalue <- WGCNA::corPvalueStudent(as.matrix(moduleMembership), nSamples) ## Gene-trait significance (trait correlation) (with p-values) - traitSignificance <- cor(datExpr, datTraits, use = "p") - GSPvalue <- WGCNA::corPvalueStudent(as.matrix(traitSignificance), nSamples) + traitSignificance <- cor(datExpr, datTraits, use = "pairwise.complete") + TSPvalue <- WGCNA::corPvalueStudent(as.matrix(traitSignificance), nSamples) ## Fold-change + foldChange <- NULL + foldChangePvalue <- NULL is.binary <- apply(datTraits, 2, function(a) length(unique(a[!is.na(a)])) == 2) - binY <- datTraits[, which(is.binary), drop = FALSE] - lm <- lapply(binY, function(y) { - gx.limma(t(datExpr), y, - lfc = 0, fdr = 1, - sort.by = "none", verbose = 0 - ) - }) - - foldChange <- sapply(lm, function(m) m$logFC) - foldChangePvalue <- sapply(lm, function(m) m$P.Value) - rownames(foldChange) <- rownames(lm[[1]]) - rownames(foldChangePvalue) <- rownames(lm[[1]]) - + is.binary + binY <- NULL + if(any(is.binary)) { + binY <- datTraits[, which(is.binary), drop = FALSE] + nmin <- apply(binY, 2, function(x) min(table(x))) + binY <- binY[,which(nmin>=2),drop=FALSE] + } + if(!is.null(binY) && NCOL(binY)>0) { + lm <- list() + i=1 + for(i in 1:ncol(binY)) { + y <- 1*binY[,i] + X <- t(datExpr) + suppressWarnings(suppressMessages( + res <- try(gx.limma(X, y, lfc=0, fdr=1, sort.by="none", verbose=0, max.na=1)) + )) + if(!"try-error" %in% class(res)) { + k <- colnames(binY)[i] + lm[[k]] <- res + } + } + lm <- lm[!sapply(lm,is.null)] + foldChange <- sapply(lm, function(m) m$logFC) + foldChangePvalue <- sapply(lm, function(m) m$P.Value) + if(length(lm)==1) { + foldChange <- cbind(foldChange) + foldChangePvalue <- cbind(foldChangePvalue) + } + rownames(foldChange) <- rownames(lm[[1]]) + rownames(foldChangePvalue) <- rownames(lm[[1]]) + } + # Continuous traits (not always present) contY <- datTraits[, which(!is.binary), drop = FALSE] - if (ncol(contY) > 0) { - contlm <- lapply(contY, function(y) { - rho <- cor(datExpr, y, use = "pairwise")[, 1] - P.Value <- cor.pvalue(rho, length(y)) + foldChange.cont <- NULL + foldChangePvalue.cont <- NULL + dim(contY) + if(NCOL(contY) > 0) { + contlm <- apply(contY, 2, function(y) { + rho <- cor(datExpr, y, use="pairwise")[,1] + P.Value <- WGCNA::corPvalueStudent(rho, n = length(y)) data.frame(rho, P.Value) }) + contlm <- contlm[!sapply(contlm, is.null)] foldChange.cont <- sapply(contlm, function(m) m$rho) foldChangePvalue.cont <- sapply(contlm, function(m) m$P.Value) + if(length(contlm)==1) { + foldChange.cont <- cbind(foldChange.cont) + foldChangePvalue.cont <- cbind(foldChangePvalue.cont) + } rownames(foldChange.cont) <- rownames(contlm[[1]]) rownames(foldChangePvalue.cont) <- rownames(contlm[[1]]) - - # Merge - foldChange <- cbind(foldChange, foldChange.cont) - foldChangePvalue <- cbind(foldChangePvalue, foldChangePvalue.cont) + colnames(foldChange.cont) <- colnames(contY) + colnames(foldChangePvalue.cont) <- colnames(contY) } + # Merge + foldChange <- cbind(foldChange, foldChange.cont) + foldChangePvalue <- cbind(foldChangePvalue, foldChangePvalue.cont) + ## Gene Centrality. Compute centrality of gene in Module subgraph ## using TOM matrix. - adj <- TOM -# diag(adj) <- NA - adj[which(abs(adj) < 0.01)] <- 0 - gr <- igraph::graph_from_adjacency_matrix( - adj, - mode = "undirected", weighted = TRUE, diag = FALSE - ) - geneCentrality <- rep(NA, nrow(adj)) - names(geneCentrality) <- rownames(adj) - me.genes <- tapply(names(net$colors), net$colors, list) - gg <- me.genes[[1]] - for (gg in me.genes) { - gr1 <- igraph::subgraph(gr, gg) - ct <- igraph::page_rank(gr1, weights = NULL)$vector - ct <- ct / mean(ct, na.rm = TRUE) - geneCentrality[gg] <- ct + geneCentrality <- NULL + if(!is.null(TOM)) { + if(is.null(dimnames(TOM))) dimnames(TOM) <- list(colnames(datExpr),colnames(datExpr)) + adj <- TOM + diag(adj) <- 0 + adj[which(abs(adj) < 0.01)] <- 0 + gr <- igraph::graph_from_adjacency_matrix( + adj, mode = "undirected", weighted = TRUE, diag = FALSE + ) + geneCentrality <- rep(NA, nrow(adj)) + names(geneCentrality) <- rownames(adj) + me.genes <- tapply(names(net$colors), net$colors, list) + for (gg in me.genes) { + gr1 <- igraph::subgraph(gr, gg) + ct <- igraph::page_rank(gr1, weights = NULL)$vector + ct <- ct / mean(ct, na.rm = TRUE) + geneCentrality[gg] <- ct + } } - ## propotion variance explained (PVE) per module - propVarExplained <- WGCNA::propVarExplained( - datExpr, - colors = net$colors, MEs = net$MEs - ) - + ## force align. Sometime they are shorter for some reason... + gg <- rownames(moduleMembership) + matMatch <- function(m, gg) { + if(is.null(m)) return(NULL) + m <- m[match(gg,rownames(m)),,drop=FALSE] + rownames(m) <- gg + return(m) + } + + ##moduleMembership <- matMatch(moduleMembership) + MMPvalue <- matMatch(MMPvalue, gg) + traitSignificance = matMatch(traitSignificance, gg) + TSPvalue = matMatch(TSPvalue, gg) + foldChange = matMatch(foldChange, gg) + foldChangePvalue = matMatch(foldChangePvalue, gg) + stats <- list( moduleTraitCor = moduleTraitCor, moduleTraitPvalue = moduleTraitPvalue, moduleMembership = moduleMembership, MMPvalue = MMPvalue, traitSignificance = traitSignificance, - GSPvalue = GSPvalue, + TSPvalue = TSPvalue, foldChange = foldChange, foldChangePvalue = foldChangePvalue, geneCentrality = geneCentrality @@ -426,42 +1075,271 @@ wgcna.compute_geneStats <- function(net, datExpr, datTraits, TOM) { return(stats) } +#' +#' +#' @export +wgcna.getGeneStats <- function(wgcna, trait, module=NULL, plot = TRUE, + stats = NULL, labels = NULL, showlogFC = TRUE, + col = NULL, main = NULL) { + + if(!is.null(stats)) { + features <- rownames(stats[['moduleMembership']]) + if(is.null(stats)) stop("must give stats") + if(is.null(labels)) stop("must give labels") + } else if(!is.null(wgcna)) { + labels <- wgcna$net$labels + features <- names(wgcna$net$colors) + stats <- wgcna$stats + } else { + stop("must supply wgcna or trait") + } + + is.color <- mean(labels %in% c("grey",WGCNA::standardColors())) > 0.9 + if(is.color) { + prefix <- substring(rownames(stats[['moduleTraitCor']]),1,2)[1] + labels <- paste0(prefix, labels) + } + + p1 <- c("moduleMembership", "MMPvalue") + p2 <- c("traitSignificance", "TSPvalue", "foldChange", "foldChangePvalue") + p3 <- c("geneCentrality") + + df <- data.frame( + feature = features, + module = labels + ) + head(df) + + ## get moduleMembership + mm.stats <- stats[p1] + mm.label <- colnames(mm.stats[[1]]) + idx <- cbind(1:length(labels), match(labels, mm.label)) + A1 <- sapply( mm.stats, function(x) x[idx]) + rownames(A1) <- labels + df <- cbind(df, A1) + + ## get traitSig columns for trait + tt.cols <- colnames(stats[[p2[1]]]) + if(is.null(trait)) trait <- tt.cols + trait <- intersect(trait, tt.cols) + + if (length(trait)>1) { + A2 <- lapply(stats[p2], function(x) x[,trait]) + for(i in 1:length(A2)) colnames(A2[[i]]) <- paste0(names(A2)[i],".",colnames(A2[[i]])) + A2 <- do.call(cbind, A2) + df <- cbind(df, A2) + } else if(length(trait)==1) { + A2 <- lapply(stats[p2], function(x) x[,trait]) + A2 <- do.call(cbind, A2) + df <- cbind(df, A2) + } else { + message("[wgcna.getGeneStats] ERROR: trait not in stats object") + return(NULL) + } + + A3 <- stats[[p3]] + if(!is.null(A3)) { + df <- cbind(df, centrality = A3) + } + + ## calculate score + sel <- c("moduleMembership", "traitSignificance", "foldChange", "centrality") + sel <- intersect(sel, colnames(df)) + #df1 <- pmax(as.matrix(df[, sel]), 1e-8) + df1 <- as.matrix(abs(df[, sel])) + score <- exp(rowMeans(log(1e-8 + df1))) * sign(df[,"foldChange"]) + df <- data.frame(df[,1:2], score = score, df[,-c(1,2)]) + + if (!is.null(module)) { + ##sel <- which(df$module == module) + sel <- grep( paste0(module,"$"), df$module) + df <- df[sel, , drop = FALSE] + } + + ## reorder, sort on score + score.sign <- sign(median(df$score, na.rm=TRUE)) + df <- df[order(-df$score * score.sign), ] + + if (plot) { + cols <- c("moduleMembership", "traitSignificance") + if(showlogFC) { + cols <- c(cols, "foldChange", "centrality") + } + cols <- intersect(cols, colnames(df)) + df1 <- df[, cols] + col1 <- wgcna.labels2colors(labels[rownames(df1)]) + if (!is.null(col)) col1 <- col + pairs(df1, col = col1, oma=c(1,1,1,1)*1.8) + if (is.null(main)) { + main <- paste("Gene significance for module", module, "and trait", trait) + } + title(main, line = 3, cex.main = 1.15) + } + + rownames(df) <- df$feature + df +} + +#' Compute gene statistics with original datExpr but with consensus +#' colors/labels for each layers. A separate function +#' wgcna.getConsensusGeneStats() extracts clean tables from this +#' results object. +#' +#' @export +wgcna.computeConsensusGeneStats <- function(cons) { + k <- names(cons$layers)[1] + stats <- list() + for(k in names(cons$layers)) { + w <- cons$layers[[k]] + colors <- cons$net$colors + wMEs <- cons$net$multiMEs[[k]]$data + wnet <- list( MEs = wMEs, colors = colors) + stats[[k]] <- wgcna.computeGeneStats( + wnet, w$datExpr, w$datTraits, TOM=NULL) + } + return(stats) +} + +#' +#' +#' @export +wgcna.getConsensusGeneStats <- function(cons, stats, trait, module=NULL) { + + ## create extended color vector + labels = paste0("ME",cons$net$colors) + gstats <- list() + for(k in names(stats)) { + gstats[[k]] <- wgcna.getGeneStats( + wgcna = NULL, + stats = stats[[k]], + labels = labels, + trait = trait, + plot = FALSE, + module = module, + col = NULL, + main = NULL + ) + } + + ## Align rows + ff <- gstats[[1]]$feature + for(k in names(gstats)) { + ii <- match(ff, gstats[[k]]$feature) + gstats[[k]] <- gstats[[k]][ii,] + } + + ## Compute consensus statistics. Consensus statistics are computed + ## as geometric mean of score variables, and/or maximum pvalue for + ## p.value columns. + xcols <- c(3,4,6,8) + pcols <- c(10,5,7,9) + pcols1 <- c(5,7,9) + xcols <- c("score","moduleMembership","traitSignificance","foldChange") + pcols <- c("scorePvalue","MMPvalue","TSPvalue","foldChangePvalue") + pcols1 <- pcols[-1] + for(i in 1:length(gstats)) { + gstats[[i]][,'scorePvalue'] <- apply(gstats[[i]][,pcols1],1,max,na.rm=TRUE) + } + ##xc <- lapply(gstats, function(x) log(abs(x[,xcols])*(x[,pcols]<0.05))) + xc <- lapply(gstats, function(x) log(abs(x[,xcols]))) + xc <- exp(Reduce('+', xc) / length(xc)) + xp <- Reduce(pmax, lapply(gstats, function(x) x[,pcols])) + df3 <- data.frame( gstats[[1]][,1:2], xc, xp) + df3 <- df3[,colnames(gstats[[1]])] + head(df3) + + ## Determine consensus status. Feature is 'C' (concordant) if sign + ## in all layers are equal and significant. 'D' (discordant) if sign + ## if not equal in all layers but significant. 'N' is any is + ## non-significant. + sign.pos <- Reduce('*',lapply(gstats,function(g) sign(g$score) == 1)) + sign.neg <- Reduce('*',lapply(gstats,function(g) sign(g$score) == -1)) + allsig <- Reduce('*',lapply(gstats,function(g) (g$scorePvalue) < 0.05)) + table(allsig) + consensus <- c("D","C")[ 1 + 1*(sign.pos | sign.neg)] + consensus[which(allsig==0)] <- 'N' + cons.df <- data.frame(df3[,1:2], consensus, df3[,-c(1,2)]) + head(cons.df) + + ## This creates the full stats matrix (all subgroups) + df1 <- gstats[[1]][,c("feature","module")] + df2 <- gstats[[1]][,0] + cols <- colnames(gstats[[1]])[-c(1:2)] + for(k in cols ) { + xx <- sapply(gstats, function(g) g[,k]) + df2[[k]] <- I(xx) + } + df2 <- do.call(cbind, lapply(df2,unclass)) + newcols <- unlist(lapply(cols, function(k) paste0(k,'.',names(gstats)))) + colnames(df2) <- newcols + full.df <- data.frame(df1, consensus=cons.df$consensus, df2) + + + ## sort?? + ii <- order(-cons.df$score * sign(mean(cons.df$score,na.rm=TRUE))) + cons.df <- cons.df[ii,] + full.df <- full.df[ii,] + + list( + consensus = cons.df, + full = full.df + ) +} + ## ---------------------------------------------------- ## Perform geneset analysis on modules ## ---------------------------------------------------- -wgcna.compute_enrichment <- function(wgcna, pgx, - method = c("fisher", "gsetcor", "mmcor", "wcor"), - ntop = 200, - annot = NULL, GMT = NULL, - filter = "^PATHWAY|^HALLMARK|^GO|^C[1-9]") { - ## collapse features to symbol - geneX <- t(wgcna$datExpr) - symbol.col <- NULL - ## annot=NULL;GMT=NULL - if (!is.null(pgx)) { - GMT <- pgx$GMT - gsetX <- pgx$gsetX - annot <- pgx$genes - if (!is.null(annot)) { - symbol.col <- intersect(c("symbol", "gene_name"), colnames(annot))[1] - } + +#' @export +wgcna.computeModuleEnrichment <- function(wgcna, + multi = FALSE, + methods = c("fisher","gsetcor","xcor"), + ntop = 200, + xtop = 100, + annot = NULL, + GMT = NULL, + gsetX = NULL, + filter = NULL + ) { + + if(!multi) { + wgcna <- list(gx = wgcna) } - if (is.null(GMT)) { - stop("FATAL. must supply GMT matrix") + if(!any( c("gx","px") %in% names(wgcna))) { + message("ERROR: datasets must have gx or px datatype!") return(NULL) } - if (is.null(gsetX)) { - stop("FATAL. must supply GMT matrix") - return(NULL) + + ## collapse features to symbol + selx <- intersect(c("gx","px"), names(wgcna))[1] + geneX <- t(as.matrix(wgcna[[selx]]$datExpr)) + symbol.col <- NULL + + if(is.null(annot)) { + gg <- lapply(wgcna, function(w) colnames(w$datExpr)) + gg1 <- lapply(names(wgcna), function(a) paste0(a,":",gg[[a]])) + gg <- as.character(unlist(gg)) + gg1 <- as.character(unlist(gg1)) + annot <- data.frame(feature = gg1, symbol = gg) } - if (!is.null(annot)) { - geneX <- rename_by(geneX, annot, symbol.col) - } else { - rownames(geneX) <- gsub(".*:|[_ -].*", "", rownames(geneX)) + + if(is.null(GMT)) { + message("Using playdata GSETxGENE genesets") + GMT <- Matrix::t(playdata::GSETxGENE) } + if(!is.null(annot)) { + symbol.col <- intersect(c("symbol","gene_name"),colnames(annot))[1] + geneX <- rename_by2(geneX, annot, symbol.col) + GMT <- rename_by2( GMT, annot, symbol.col) + } + if(length(intersect(rownames(geneX),rownames(GMT)))==0) { + message("ERROR: no overlap for geneX and GMT features. Please add annotation.") + return(NULL) + } + bg <- rownames(geneX) bg <- intersect(bg, rownames(GMT)) if (length(bg) == 0) { @@ -474,99 +1352,356 @@ wgcna.compute_enrichment <- function(wgcna, pgx, if (length(sel)) G1 <- G1[, sel, drop = FALSE] } G1 <- G1[, which(Matrix::colSums(G1 != 0) >= 4), drop = FALSE] - gmt <- mat2gmt(G1) - W <- as.matrix(wgcna$W) - if (!is.null(annot)) { - W <- rename_by2(W, annot, symbol.col) + if(is.null(gsetX)) { + gsetX <- plaid::plaid( geneX, G1) } - rho.list <- list() - pval.list <- list() - - ## Perform fisher-test on ME genes - if ("fisher" %in% method) { - message("[wgcna.compute_enrichment] calculating Fisher tests...") - i <- 1 - rho <- NULL - pval <- NULL - me.genes <- wgcna$me.genes - for (i in 1:length(me.genes)) { - gg <- me.genes[[i]] - if (!is.null(annot)) { - gg <- probe2symbol(me.genes[[i]], annot, query = symbol.col) - } - ## perform Fisher test - rr <- try(gset.fisher(gg, gmt, background = bg, fdr = 1, min.genes = 4, verbose = 0)) - if (!"try-error" %in% class(rr)) { - rr <- rr[match(names(gmt), rownames(rr)), ] - if (is.null(rho)) { - rho <- cbind(rr$odd.ratio) - pval <- cbind(rr$p.value) - colnames(rho) <- colnames(pval) <- names(me.genes)[i] - } else { - rho <- cbind(rho, rr$odd.ratio) - pval <- cbind(pval, rr$p.value) - me <- names(me.genes)[i] - colnames(rho)[ncol(rho)] <- me - colnames(pval)[ncol(pval)] <- me - } - rownames(rho) <- names(gmt) - rownames(pval) <- names(gmt) + ## align dimensions + ss <- intersect(rownames(gsetX), colnames(G1)) + G1 <- G1[,ss,drop=FALSE] + gsetX <- gsetX[ss,] + + ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs)) + me.genes <- lapply(wgcna, function(w) w$me.genes) + names(me.genes) <- NULL + me.genes <- do.call(c, me.genes) + me.genes <- lapply(me.genes, function(g) probe2symbol(g, annot, query = symbol.col)) + + ## compute most correlated gx/px genes. limit xtop if geneX is too + ## small + xtop <- min(xtop, round(nrow(geneX)/4)) + nbx.genes <- list() + for(i in 1:length(wgcna)) { + nbx.cor <- cor(t(geneX), ME[[i]]) + for(k in colnames(nbx.cor)) { + if(is.null(xtop)) { + n <- length(wgcna[[i]]$me.genes[[k]]) + } else { + n <- xtop } + nbx.genes[[k]] <- head(names(sort(-nbx.cor[,k])), n) } - ## handle infinite - rho[is.infinite(rho)] <- 2 * max(rho, na.rm = TRUE) - rho[is.na(rho)] <- 0 - pval[is.na(pval)] <- 1 - rho.list[["fisher"]] <- rho - pval.list[["fisher"]] <- pval } - ## Here we correlate geneset score (averageCLR) with the module + ## add nearest expression neighbors to module genes + nbx.genes <- nbx.genes[names(me.genes)] + for(k in names(me.genes)) { + me.genes[[k]] <- unique(c(me.genes[[k]], nbx.genes[[k]])) + } + + ## make single ME matrix + MEx <- do.call(cbind, ME) + + gsea <- wgcna.run_enrichment_methods( + ME = MEx, + me.genes = me.genes, + G = G1, + geneX = geneX, + gsetX = gsetX, + methods = methods, + ntop = ntop + ) + + return(gsea) +} + +#' +#' +#' @export +wgcna.getModuleCrossGenes <- function(wgcna, ref=NULL, ngenes = 100, + multi=TRUE, modules=NULL) +{ + + if(!multi) { + wgcna <- list(gx = wgcna) + ref <- 'gx' + } + + if(is.null(ref)) ref <- head(intersect(names(wgcna),c("gx","px")),1) + if(is.null(ref) || !ref %in% names(wgcna)) ref <- names(wgcna)[1] + + W <- wgcna[[ref]] + geneX <- W$datExpr + + MEx <- sapply(wgcna, function(w) as.matrix(w$net$MEs)) + MEx <- do.call(cbind, MEx) + + if(!is.null(modules)) { + modules <- intersect(modules, colnames(MEx)) + MEx <- MEx[,modules,drop=FALSE] + } + + nbx.cor <- cor(geneX, MEx) + + nbx.list <- list() + for(k in colnames(nbx.cor)) { + ii <- head(order(-nbx.cor[,k]), ngenes) + rho <- nbx.cor[ii,k] + gene <- rownames(nbx.cor)[ii] + me <- W$net$labels[gene] + nbx.list[[k]] <- data.frame( gene = gene, rho = rho, module = me) + } + + ##if(length(nbx.list)==1) nbx.list <- nbx.list[[1]] + return(nbx.list) +} + + +#' Compute consensus enrichment by calculating overlapping enriched +#' terms. +#' +wgcna.computeConsensusModuleEnrichment <- function(cons, + GMT, + annot, + methods = c("fisher","gsetcor","xcor"), + min.genes = 10, + ntop = 400 ) +{ + if(0) { + methods = c("fisher","gsetcor","xcor") + min.genes = 10 + ntop = 400 + annot = NULL + GMT <- Matrix::t(playdata::GSETxGENE) + } + + gseaX <- list() + i=1 + for(i in 1:length(cons$datExpr)) { + + geneX <- t(cons$datExpr[[i]]) + dim(geneX) + + ## Rename everything to symbols + if(!is.null(annot)) { + geneX <- rename_by2(geneX, annot, "symbol") + GMT <- rename_by2(GMT, annot, "symbol") + } + ng <- length(intersect(rownames(geneX), rownames(GMT))) + if(ng == 0) { + message("[wgcna.computeConsensusModuleEnrichment] ERROR. No symbol overlap.") + return(NULL) + } + symbols <- intersect(rownames(GMT),rownames(geneX)) + message("[wgcna.computeConsensusModuleEnrichment] number of symbols: ", length(symbols)) + geneX <- geneX[symbols,] + GMT <- GMT[symbols,] + + ## select on minimum gene sets size + sel <- which( Matrix::colSums(GMT!=0) >= min.genes ) + GMT <- GMT[,sel] + + ## Compute single-samples gene set expression + gsetX <- plaid::plaid(geneX, matG=GMT) + dim(gsetX) + + ## Create extended Eigengene matrix (ME). ME should be nicely + ## normalized/scaled so we just rbind across datasets + ME <- cons$net$multiMEs[[i]]$data + dim(ME) + + ## get genes in modules + me.genes <- tapply(names(cons$net$colors), cons$net$colors, list) + names(me.genes) <- paste0("ME",names(me.genes)) + if(!is.null(annot)) { + me.genes <- lapply(me.genes, function(gg) probe2symbol(gg, annot)) + } + me.genes <- lapply(me.genes, function(g) intersect(g, symbols)) + rownames(ME) + colnames(geneX) <- rownames(ME) + colnames(gsetX) <- rownames(ME) + + k <- names(cons$datExpr)[i] + gseaX[[k]] <- wgcna.run_enrichment_methods( + ME, + me.genes = me.genes, + GMT= GMT, + geneX = geneX, + gsetX = gsetX, + methods = methods, + min.genes = min.genes, + ntop = ntop) + } + + cons.gsea <- list() + m=1 + for(m in names(gseaX[[1]])) { + xx <- lapply( gseaX, function(g) g[[m]] ) + sel <- Reduce(intersect, lapply(xx, rownames)) + if(length(sel) > 0) { + if(length(sel)==1) sel <- c(sel,sel) ## length==1 crashes... + xx <- lapply(xx, function(x) x[sel,,drop=FALSE] ) + xx.score <- sapply(xx, function(x) x[,"score"]) + colnames(xx.score) <- paste0("score.",colnames(xx.score)) + + xx.pvalue <- lapply(xx, function(x) x[,grep("^p",colnames(x))]) + xx.pvalue <- do.call(cbind, xx.pvalue) + + m.score <- rowMeans(xx.score,na.rm=TRUE) + m.pvalue <- apply(sapply(xx, function(x) x[,"p.value"]), 1, max, na.rm=TRUE) + m.qvalue <- p.adjust( m.pvalue ) + df <- data.frame( + module = xx[[1]]$module, + geneset = xx[[1]]$geneset, + score = m.score, + xx.score, + p.value = m.pvalue, + q.value = m.qvalue, + overlap = xx[[1]]$overlap, + genes = xx[[1]]$genes, + xx.pvalue + ) + df <- df[order(df$p.value),] + #df <- df[!duplicated(df$geneset),,drop=FALSE] + cons.gsea[[m]] <- df + } + } + + return(cons.gsea) +} + + +wgcna.run_enrichment_methods <- function(ME, me.genes, GMT, geneX, gsetX, + methods = c("fisher","gsetcor","xcor"), + ntop = 400, min.genes = 3 + ) +{ + + rho.list <- list() + pval.list <- list() + + ## align matrices + bg <- intersect(rownames(GMT), rownames(geneX)) + ss <- intersect(colnames(GMT), rownames(gsetX)) + GMT <- GMT[bg,ss] + geneX <- geneX[bg,] + gsetX <- gsetX[ss,] + + ## select on minimum genes + sel <- which( Matrix::colSums(GMT!=0) >= min.genes ) + GMT <- GMT[,ss] + gsetX <- gsetX[ss,] + + message("Computing enrichment for ", length(ss), " genesets") + + ## Here we correlate geneset score (averageCLR) with the module ## eigengene (ME). This should select genesets correlated with the ## ME. - if ("gsetcor" %in% method) { - message("[wgcna.compute_enrichment] calculating geneset correlation...") - gsetX <- gsetX[colnames(G1), ] - ME <- as.matrix(wgcna$net$MEs) - if (ncol(gsetX) > nrow(ME)) { - kk <- sample(colnames(gsetX), nrow(ME)) - gsetX <- gsetX[, kk, drop = FALSE] - } - rc.rho <- cor(t(gsetX), ME, use = "pairwise") - rc.pvalue <- cor.pvalue(rc.rho, n = nrow(ME)) + if ("gsetcor" %in% methods && !is.null(gsetX)) { + message("[wgcna.run_enrichment_methods] calculating single-sample geneset correlation...") + rc.rho <- matrix(NA, ncol(GMT), ncol(ME)) + rc.pvalue <- matrix(NA, ncol(GMT), ncol(ME)) + dimnames(rc.rho) <- list( colnames(GMT), colnames(ME)) + dimnames(rc.pvalue) <- list( colnames(GMT), colnames(ME)) + jj <- which(rownames(gsetX) %in% colnames(GMT)) + kk <- intersect(colnames(gsetX), rownames(ME)) ## common samples + rho.jj <- cor(t(gsetX[jj,kk]), ME[kk,], use = "pairwise") + #rc.pvalue <- cor.pvalue(rc.rho, n = length(kk)) + pvalue.jj <- WGCNA::corPvalueStudent(rho.jj, n = length(kk)) + ii <- match(rownames(gsetX)[jj], rownames(rc.rho)) + rc.rho[ii,] <- rho.jj + rc.pvalue[ii,] <- pvalue.jj rho.list[["gsetcor"]] <- rc.rho pval.list[["gsetcor"]] <- rc.pvalue } - ## Here we correlate genes with the module eigengene (ME) then do - ## a gset.rankcor() on the ME correlation. - if ("mmcor" %in% method) { - message("[wgcna.compute_enrichment] calculating MM correlation...") - mm <- cor(t(geneX), wgcna$net$MEs, use = "pairwise") - rc <- gset.rankcor(mm, G1, compute.p = TRUE) ## NEEDS CHECK!!! - rho.list[["mmcor"]] <- rc$rho - pval.list[["mmcor"]] <- rc$p.value + ## Here we correlate the module eigengene (ME) with genes and then + ## do a gset.rankcor() on the ME correlation. + if ("xcor" %in% methods) { + message("[wgcna.run_enrichment_methods] calculating eigengene GBA correlation...") + gba <- cor(t(geneX), ME, use = "pairwise") + rc <- gset.rankcor(gba, GMT, compute.p = TRUE) ## NEEDS CHECK!!! + rho.list[["xcor"]] <- rc$rho + pval.list[["xcor"]] <- rc$p.value } - ## Here we correlate genes with the module eigengene (ME) then do - ## a gset.rankcor() on the ME correlation. - if ("wcor" %in% method) { - message("[wgcna.compute_enrichment] calculating W correlation...") - rc <- gset.rankcor(W, G1, compute.p = TRUE) ## NEEDS CHECK!!! - rho.list[["wcor"]] <- rc$rho - pval.list[["wcor"]] <- rc$p.value + gmt <- mat2gmt(GMT) + if(1) { + ## we pre-select to make this faster + Pmin <- sapply(pval.list, function(P) apply(P,1,min)) + sel <- head(order(rowMeans(apply(Pmin, 2, rank))), 5*ntop) + message("[wgcna.run_enrichment_methods] pre-selecting ",length(sel)," sets for fgsea/Fisher test...") + sel <- rownames(Pmin)[sel] + gmt <- gmt[sel] + } + + ## fGSEA + if ("fgsea" %in% methods) { + message("[wgcna.run_enrichment_methods] calculating module fgsea...") + xrho <- cor(t(geneX), ME, use = "pairwise") + res <- list() + i=1 + for(i in 1:ncol(xrho)) { + k <- colnames(xrho)[i] + res[[k]] <- fgsea::fgsea(gmt, xrho[,i]) ## NEEDS CHECK!!! + } + pw <- res[[1]]$pathway + res <- lapply(res, function(r) r[match(pw,r$pathway),]) + nes <- sapply(res, function(r) r$NES) + pval <- sapply(res, function(r) r$pval) + rownames(nes) <- rownames(pval) <- pw + colnames(nes) <- colnames(pval) <- names(res) + rho.list[["fgsea"]] <- nes + pval.list[["fgsea"]] <- pval } + + ## Perform fisher-test on ME genes + if ("fisher" %in% methods) { + + message("[wgcna.run_enrichment_methods] calculating Fisher tests...") + rho <- matrix(NA, length(gmt), ncol(ME)) + pval <- matrix(NA, length(gmt), ncol(ME)) + dimnames(rho) <- list( names(gmt), colnames(ME)) + dimnames(pval) <- list( names(gmt), colnames(ME)) + + ## perform Fisher test for all modules using the module genes + i=1 + for (i in 1:ncol(rho)) { + k <- colnames(rho)[i] + gg <- me.genes[[k]] + rr <- try(gset.fisher(gg, gmt, background = bg, fdr = 1, + min.genes = -1, verbose = 0, sort.by='none', no.pass=1)) + + if (!"try-error" %in% class(rr)) { + rr <- rr[match(rownames(rho), rownames(rr)), ] + rho[,i] <- rr$odd.ratio + pval[,i] <- rr$p.value + } + } + + ## handle infinite or NA + rho[is.infinite(rho)] <- 2*max(rho, na.rm=TRUE) ## Inf odd.ratio + pval[is.na(pval)] <- 1 + rho[is.na(rho)] <- 0 - ## Compute meta rank and pval - meta.p <- Reduce(pmax, pval.list) ## NEED RETHINK!!! + rho.list[["fisher"]] <- rho + pval.list[["fisher"]] <- pval + } + + lapply(rho.list, dim) + + ## ensure dimensions + gsets <- Reduce(intersect, lapply( rho.list, rownames)) + modules <- Reduce(intersect, lapply( rho.list, colnames)) + rho.list <- lapply( rho.list, function(x) x[gsets,modules,drop=FALSE]) + pval.list <- lapply( pval.list, function(x) x[gsets,modules,drop=FALSE]) + + ## Compute meta rank and pval. Handle NA for failing methods. + pvalNA <- lapply(pval.list, function(x) {x[is.na(x)]=0;x}) + ##pvalNA <- lapply(pval.list, function(x) {x[is.na(x)]=1;x}) + meta.p <- Reduce(pmax, pvalNA) ## NEED RETHINK!!! meta.q <- apply(meta.p, 2, p.adjust, method = "fdr") - rnk.list <- lapply(rho.list, function(x) apply(x, 2, rank) / nrow(x)) + + ## NEED RETHINK: how about negative FC??? + rnk.list <- lapply(rho.list, function(x) apply(x, 2, rank, na.last='keep') / nrow(x)) meta.rnk <- Reduce("+", rnk.list) / length(rnk.list) + rnk.NAZERO <- lapply(rnk.list, function(x) {x[is.na(x)]=0;x}) + rnk.NSUM <- Reduce('+', lapply(rnk.list, function(x) !is.na(x))) + meta.rnk <- Reduce('+', rnk.NAZERO) / rnk.NSUM ## create dataframe by module - message("[wgcna.compute_enrichment] creating dataframes...") + message("[wgcna.run_enrichment_methods] creating dataframes...") gse.list <- list() i <- 1 for (i in 1:ncol(meta.p)) { @@ -581,8 +1716,8 @@ wgcna.compute_enrichment <- function(wgcna, pgx, q.value = meta.q[, i], pv ) - df <- df[order(-df$score), ] - if (!is.null(ntop) && ntop > 0) df <- head(df, n = ntop) + df <- df[order(-abs(df$score)), ] + df <- head(df, ntop) gse.list[[k]] <- df } @@ -591,272 +1726,1280 @@ wgcna.compute_enrichment <- function(wgcna, pgx, k <- names(gse.list)[1] for (k in names(gse.list)) { gset <- rownames(gse.list[[k]]) - gg <- wgcna$me.genes[[k]] - if (!is.null(annot)) { - gg <- probe2symbol(gg, annot, query = symbol.col) - } else { - gg <- gsub(".*:", "", gg) - } + gg <- me.genes[[k]] set.genes <- lapply(gmt[gset], function(s) intersect(s, gg)) n0 <- sapply(gmt[gset], length) n1 <- sapply(set.genes, length) gse.genes[[k]] <- sort(table(unlist(set.genes)), decreasing = TRUE) set.genes <- sapply(set.genes, function(g) paste(sort(g), collapse = "|")) - gse.list[[k]]$genes <- set.genes gse.list[[k]]$overlap <- paste0(n1, "/", n0) + gse.list[[k]]$genes <- set.genes } - ## compute gene enrichment - gene.list <- list() - ## message("[wgcna.compute_enrichment] computing gene enrichment...") - ## k = names(gse.list)[1] - ## for(k in names(gse.list)) { - ## me <- gse.list[[k]] - ## rnk <- me$score - ## names(rnk) <- rownames(me) - ## me.genes <- strsplit(me$genes,split='\\|') - ## num.genes <- sapply(me.genes,length) - ## if(any(num.genes>0)) { - ## ii <- which(num.genes>0) - ## me.names <- unlist(mapply(rep, rownames(me)[ii], num.genes[ii])) - ## me.genes <- me.genes[ii] - ## me.gmt <- tapply(me.names, unlist(me.genes), list) - ## res <- fgsea::fgsea(me.gmt, rnk) - ## res <- res[order(-res$NES),] - ## gene.list[[k]] <- head(res,100) - ## } - ## } + return(gse.list) +} - res <- list( - gsets = gse.list, - genes = gene.list - ) - return(res) +wgcna.merge_block_dendrograms <- function(net, X, method = 1) { + ## This function is fragile: it can give a C stack limit error. In + ## case that happens you can increase the stack limit by running on + ## the cmd line: >>> ulimit -s unlimited + ## + hc <- net$dendrograms + + ## merge block dendrogram + mx <- list() + for (b in 1:length(net$dendrograms)) { + ii <- which(net$goodGenes & net$blocks == b) + mx[[b]] <- colMeans(X[ii, ]) + hc[[b]]$labels <- rownames(X)[ii] + } + + if (length(hc) == 1) { + return(hc[[1]]) + } + + ## compute parent dendrogram + M <- do.call(rbind, mx) + hclust_p <- hclust(dist(M), method = "average") + dend_p <- as.dendrogram(hclust_p) + dend.list <- lapply(hc, as.dendrogram) + + if (method == 1) { + merged <- ComplexHeatmap::merge_dendrogram(dend_p, dend.list) + } else { + mrg <- hclust_p$merge + merged_branch <- list() + k <- 1 + for (k in 1:nrow(mrg)) { + i <- mrg[k, 1] + j <- mrg[k, 2] + if (i < 0) d1 <- dend.list[[-i]] + if (i > 0) d1 <- merged_branch[[i]] + if (j < 0) d2 <- dend.list[[-j]] + if (j > 0) d2 <- merged_branch[[j]] + merged_branch[[k]] <- merge(d1, d2) ## actual merge + } + merged <- merged_branch[[k]] + } + + merged_hclust <- as.hclust(merged) + merged_hclust } +##========================================================================= +## CONSENSUS WGCNA +##========================================================================= + +#' +#' #' @export wgcna.runConsensusWGCNA <- function(exprList, - datTraits, + phenoData, + GMT = NULL, + annot = NULL, + ngenes = 2000, power = 12, - minKME = 0.8, - cutheight = 0.15, - deepsplit = 2, - maxBlockSize = 5000) { - ## exprList <- list(Set1 = V1, Set2 = V2) - multiExpr <- WGCNA::list2multiData(lapply(exprList, Matrix::t)) - - # The variable exprSize contains useful information about the sizes of all - # of the data sets now we run automatic module detection procedure - net.list <- list() - for (k in names(multiExpr)) { - message("[wgcna.runConsensusWGCNA] computing WGCNA for ", k) - net.list[[k]] <- WGCNA::blockwiseModules( - datExpr = multiExpr[[k]]$data, - power = power, - networkType = "signed", - TOMType = "signed", - minModuleSize = 20, - deepSplit = deepsplit, - mergeCutHeight = cutheight, - numericLabels = FALSE, - minKMEtoStay = minKME, - saveTOMs = FALSE, + minModuleSize = 20, + minKME = 0.3, + mergeCutHeight = 0.15, + deepSplit = 2, + maxBlockSize = 9999, + addCombined = FALSE, + calcMethod = "fast", + drop.ref = FALSE, + compute.stats = TRUE, + compute.enrichment = TRUE, + summary = TRUE, + ai_model = DEFAULT_LLM, + ai_experiment = "", + gsea.mingenes = 10, + gsea.ntop = 1000, + gset.methods = c("fisher","gsetcor","xcor"), + verbose = 1, + progress = NULL + ) { + + ## if(0) { + ## power=6;minKME=0.5;cutheight=0.15;deepSplit=2;maxBlockSize=5000;verbose=1;calcMethod="fast";addCombined=0;ngenes=2000;minModuleSize=20;mergeCutHeight=0.15 + ## gsea.mingenes=20;gset.methods = c("fisher","gsetcor","xcor") + ## } + + colors <- NULL + + ## Align and reduce matrices if needed + gg <- Reduce(intersect, lapply(exprList,rownames)) + exprList <- lapply(exprList, function(x) x[gg, , drop = FALSE]) + if( length(gg) > ngenes ) { + sdx <- Reduce('*',lapply(exprList, function(x) matrixStats::rowSds(x))) + ii <- head(order(-sdx), ngenes) + exprList <- lapply(exprList, function(x) x[ii, , drop = FALSE]) + } + + if(addCombined) { + exprList[['Combined']] <- do.call(cbind, exprList) + } + + exprsamples <- unlist(lapply(exprList, colnames)) + if(!all(exprsamples %in% rownames(phenoData))) { + stop("samples mismatch for exprList and phenoData") + } + + multiExpr = WGCNA::list2multiData(lapply(exprList, Matrix::t)) + cor <- WGCNA::cor ## needed... + + if(!is.null(power) && length(power)==1) { + power <- rep(power, length(multiExpr)) + } + + # module detection procedure + layers <- list() + if(!is.null(progress)) progress$inc(0.1, "Computing layers...") + for (i in 1:length(multiExpr)) { + k <- names(multiExpr)[i] + message("[wgcna.runConsensusWGCNA] >>> computing WGCNA for ", k) + X = Matrix::t(multiExpr[[i]]$data) + layers[[k]] <- wgcna.compute( + X = X, + samples = phenoData, + ngenes = ngenes, + power = power[i], + minmodsize = minModuleSize, + calcMethod = calcMethod, + deepsplit = deepSplit, + mergeCutHeight = mergeCutHeight, + numericlabels = FALSE, + minKME = minKME, maxBlockSize = maxBlockSize, - verbose = 0 + compute.stats = compute.stats, + sv.tom = 40, + verbose = verbose ) } # now we run automatic consensus module detection - message("[wgcna.runConsensusWGCNA] computing consensus modules...") - net <- WGCNA::blockwiseConsensusModules( - multiExpr[], - power = power, + message("[wgcna.runConsensusWGCNA] >>> computing CONSENSUS modules...") + if(!is.null(progress)) progress$inc(0.1, "Computing consensus...") + consensusPower <- unlist(sapply(layers, function(w) w$net$power)) + if(is.null(consensusPower) && !is.null(power)) { + consensusPower <- power + } + if(is.null(consensusPower)) { + consensusPower <- rep(12, length(layers)) + } + + sel <- setdiff(names(multiExpr), c("Combined")) + cons <- WGCNA::blockwiseConsensusModules( + multiExpr[sel], + power = as.numeric(consensusPower), networkType = "signed", TOMType = "signed", - minModuleSize = 20, - deepSplit = deepsplit, - mergeCutHeight = cutheight, + minModuleSize = as.integer(minModuleSize), + deepSplit = as.integer(deepSplit), + mergeCutHeight = as.numeric(mergeCutHeight), numericLabels = FALSE, - minKMEtoStay = minKME, - maxBlockSize = maxBlockSize, + minKMEtoStay = as.numeric(minKME), + maxBlockSize = as.integer(maxBlockSize), saveTOMs = FALSE, - verbose = 1 + useDiskCache = FALSE, + verbose = verbose ) - + cons$power = consensusPower + ## create and match colors - colors <- sapply(net.list, function(net) net$colors) - c0 <- net$colors - matched.colors <- apply(colors, 2, function(k) WGCNA::matchLabels(k, c0)) - colors <- cbind(Consensus = c0, matched.colors) + for(i in 1:length(layers)) { + layers[[i]] <- wgcna.matchColors(layers[[i]], cons$colors) + } + layers.colors <- sapply(layers, function(r) r$net$colors) + colors <- cbind(Consensus = cons$colors, layers.colors) + ## add labels to dendrogram - for (i in 1:length(net$dendrograms)) { - ii <- which(net$goodGenes & net$blocks == i) - xnames <- names(net$colors) - net$dendrograms[[i]]$labels <- xnames[ii] + for(i in 1:length(cons$dendrograms)) { + ii <- which(cons$goodGenes & cons$blocks==i) + xnames <- names(cons$colors) + cons$dendrograms[[i]]$labels <- xnames[ii] } - - ## merge dendrograms - message("[wgcna.compute] merge_block_dendrograms...") - multiX <- Matrix::t(do.call(rbind, lapply(exprList, function(x) scale(t(x))))) - merged <- try(wgcna.merge_block_dendrograms(net, multiX)) - if (!inherits(merged, "try-error")) { - net$merged_dendro <- merged + + ## merge dendrograms ???? + message("[wgcna.runConsensusWGCNA] merge_block_dendrograms...") + multiX <- Matrix::t(do.call(rbind,lapply(exprList,function(x)scale(t(x))))) + merged <- try(wgcna.merge_block_dendrograms(cons, multiX)) + if(!inherits(merged,"try-error")) { + cons$merged_dendro <- merged } else { - net$merged_dendro <- NULL + cons$merged_dendro <- NULL } - + ## create module-trait matrices for each set - message("[wgcna.compute] computing module-traits matrices...") - Z.list <- list() - k <- 1 - for (k in names(net$multiME)) { - M <- (net$multiME[[k]][[1]]) - Z.list[[k]] <- cor(M, datTraits[rownames(M), ], use = "pairwise") - } - z.check <- sapply(Z.list, function(z) colSums(z != 0, na.rm = TRUE) > 0) - sel <- names(which(rowMeans(z.check) == 1)) - sel - Z.list <- lapply(Z.list, function(z) z[, sel, drop = FALSE]) - Z.list + message("[wgcna.runConsensusWGCNA] >>> computing module-traits matrices...") + datTraits <- 1*expandPhenoMatrix( + phenoData, + drop.ref = drop.ref, + keep.numeric = TRUE + ) + zlist <- list() + k=1 + for(k in names(cons$multiME)) { + M <- (cons$multiME[[k]][[1]]) + Z <- datTraits + kk <- intersect(rownames(M),rownames(Z)) + zrho <- cor(M[kk,], Z[kk,], use="pairwise") + zrho[is.na(zrho)] <- 0 ## NEED RETHINK!! + zlist[[k]] <- zrho + } ## create consensus module-trait matrix - message("[wgcna.compute] computing consensus module-traits matrix...") - mdim <- sapply(exprList, ncol) - pv <- mapply(function(z, n) corPvalueStudent(z, n), Z.list, mdim, SIMPLIFY = FALSE) - all.sig <- Reduce("*", lapply(pv, function(p) 1 * (p < 0.05))) - all.sig - all.pos <- Reduce("*", lapply(Z.list, function(z) sign(z) == 1)) - all.neg <- Reduce("*", lapply(Z.list, function(z) sign(z) == -1)) - disconcordant <- !(all.pos | all.neg) - disconcordant - zsign <- sign(Reduce("+", lapply(Z.list, sign))) - consZ <- Reduce(pmin, lapply(Z.list, abs)) * zsign - consZ[disconcordant] <- NA - ## consZ[!all.sig] <- NA - ydim <- sapply(exprList, ncol) - + ydim <- sapply(exprList, ncol) + consZ <- wgcna.computeConsensusMatrix(zlist, ydim=ydim, psig=0.05) + + ## add slots + datExpr <- lapply(exprList, Matrix::t) + res <- list( - net = net, + net = cons, + layers = layers, + datExpr = datExpr, + datTraits = datTraits, modTraits = consZ, - dendro = net$merged_dendro, + dendro = cons$merged_dendro, colors = colors, - zlist = Z.list, - ydim = ydim + zlist = zlist, + ydim = ydim, + class = "consensus" ) + ## run stats + if(compute.stats) { + message("[wgcna.runConsensusWGCNA] >>> computing gene statistics...") + res$stats <- wgcna.computeConsensusGeneStats(res) + } + + ## run enrichment + if(compute.enrichment) { + if(!is.null(progress)) progress$inc(0.2, "Computing enrichment...") + message("[wgcna.runConsensusWGCNA] >>> computing module enrichment...") + if(is.null(GMT)) GMT <- Matrix::t(playdata::GSETxGENE) + res$gsea <- wgcna.computeConsensusModuleEnrichment( + res, + GMT = GMT, + method = gset.methods, + annot = annot, + min.genes = gsea.mingenes, + ntop = gsea.ntop + ) + if(summary) { + if(!is.null(progress)) progress$set(message = "Annotating modules...", value=0.6) + message("Annotating modules using ", ai_model) + res$summary <- wgcna.describeModules( + res, multi = TRUE, ntop = 25, model = ai_model, + annot = annot, experiment = ai_experiment, + verbose = 0 + ) + } + } + res } +#' @export +wgcna.matchColors <- function(wgcna, refcolors) { + + oldcolors <- wgcna$net$colors + newcolors <- WGCNA::matchLabels(oldcolors, refcolors) + lut <- table(oldcolors, newcolors) + old2new <- colnames(lut)[max.col(lut)] + names(old2new) <- rownames(lut) + prefix <- substring(names(wgcna$me.colors),1,2)[1] + old2newME <- paste0(prefix,old2new) + names(old2newME) <- paste0(prefix,names(old2new)) + old2new <- c(old2new, old2newME) + + newcol <- function(x) { + array( old2new[x], dimnames = list(names(x))) + } + + ## rename everything in net object + wgcna$net$colors <- newcol(wgcna$net$colors) + if("labels" %in% names(wgcna$net)) { + wgcna$net$labels <- newcol(wgcna$net$labels) + } + + ## rename unmergedColors + if("unmergedColors" %in% names(wgcna$net)) { + wgcna$net$unmergedColors <- newcol(wgcna$net$unmergedColors) + } + names(wgcna$net$MEs) <- newcol(names(wgcna$net$MEs)) + + ## rename everything in wgcna object + names(wgcna$me.genes) <- newcol(names(wgcna$me.genes)) + wgcna$me.colors <- newcol(wgcna$me.colors) + names(wgcna$me.colors) <- newcol(names(wgcna$me.colors)) + colnames(wgcna$W) <- newcol(colnames(wgcna$W)) + rownames(wgcna$modTraits) <- newcol(rownames(wgcna$modTraits)) + + ## rename everything in stats object + if("stats" %in% names(wgcna) && !is.null(wgcna$stats)) { + rownames(wgcna$stats[['moduleTraitCor']]) <- newcol(rownames(wgcna$stats[['moduleTraitCor']])) + rownames(wgcna$stats[['moduleTraitPvalue']]) <- newcol(rownames(wgcna$stats[['moduleTraitPvalue']])) + colnames(wgcna$stats[['moduleMembership']]) <- newcol(colnames(wgcna$stats[['moduleMembership']])) + colnames(wgcna$stats[['MMPvalue']]) <- newcol(colnames(wgcna$stats[['MMPvalue']])) + } + return(wgcna) +} + +#' @export +wgcna.createConsensusLayers <- function(exprList, + samples, + ngenes = 2000, + power = 12, + minModuleSize = 20, + deepSplit = 2, + mergeCutHeight = 0.15, + minKME = 0.3, + maxBlockSize = 9999, + prefix = NULL, + verbose = 1 + ) { + + if(0) { + ngenes = 2000 + power = 12 + minModuleSize = 5 + deepSplit = 2 + mergeCutHeight = 0.15 + minKME = 0.3 + maxBlockSize = 9999 + verbose = 1 + prefix=NULL + } + + if(is.null(prefix)) prefix <- names(exprList) + nx <- length(exprList) + prefix <- head(rep(prefix, nx),nx) + + ## reduce + message("[wgcna.computeConsensusLayers] Aligning matrices...") + gg <- Reduce(intersect, lapply(exprList,rownames)) + exprList <- lapply(exprList, function(x) x[gg,]) + + if( length(gg) > ngenes ) { + message("[wgcna.computeConsensusLayers] Reducing to ",ngenes," genes") + sdx <- Reduce('*',lapply(exprList, function(x) matrixStats::rowSds(x))) + ii <- head(order(-sdx), ngenes) + exprList <- lapply(exprList, function(x) x[ii,]) + } + multiExpr = WGCNA::list2multiData(lapply(exprList, Matrix::t)) + + ## determine power vector + if(is.null(power) || is.na(power) ) power <- "sft" + if(as.character(power[1]) %in% c("sft","iqr")) { + ## Estimate best power + power <- power[1] + message("[wgcna.createConsensusLayers] optimal power method = ", power) + est.power <- rep(NA, length(exprList)) + i=1 + for(i in 1:length(exprList)) { + p <- wgcna.pickSoftThreshold( + Matrix::t(exprList[[i]]), sft=NULL, rcut=0.85, powers = NULL, + method=power, nmax=1000, verbose=0) + if(length(p)==0 || is.null(p) ) p <- NA + est.power[i] <- p + } + est.power + power <- ifelse( is.na(est.power), 12, est.power) + } else { + power <- as.numeric(power) + } + nw <- length(exprList) + power <- head(rep(power, nw),nw) + names(power) <- names(exprList) + + message("[wgcna.computeConsensusLayers] Computing consensus modules...") + cons = WGCNA::blockwiseConsensusModules( + multiExpr, + power = as.numeric(power), + networkType = "signed", + TOMType = "signed", + minModuleSize = as.integer(minModuleSize), + deepSplit = as.integer(deepSplit), + mergeCutHeight = as.numeric(mergeCutHeight), + numericLabels = FALSE, + minKMEtoStay = as.numeric(minKME), + maxBlockSize = as.integer(maxBlockSize), + saveTOMs = FALSE, + useDiskCache = FALSE, + verbose = verbose + ) + + ## + message("[wgcna.computeConsensusLayers] Creating consensus layers...") + aligned <- list() + i=1 + for(i in 1:length(exprList)) { + k <- names(exprList)[i] + sel <- c("colors","unmergedColors","goodSamples","goodGenes", + "dendrograms","blockGenes","blocks") + net <- cons[sel] + net$power <- power[i] + X = exprList[[i]] + w <- wgcna.compute( + X = exprList[[i]], + samples, + prefix = prefix[i], + ngenes = -1, + net = net, + calcMethod = "fast", + sv.tom=0 + ) + aligned[[k]] <- w + } + + return(aligned) +} + +#' Compute consensus matrix from list of matrices. The consensus +#' matrix checks for consistent sign and minimal threshold for each +#' matrix. Optionally filters on consistent p-value. #' +#' @param ydim original dimension of data #' +#' #' @export -wgcna.getGeneStats <- function(wgcna, module, trait, plot = TRUE, - showallmodules = TRUE, col = NULL, - main = NULL) { - p1 <- c("moduleMembership", "MMPvalue") - p2 <- c("traitSignificance", "GSPvalue", "foldChange", "foldChangePvalue") - p3 <- c("geneCentrality") - nrow <- ncol(wgcna$datExpr) - df <- wgcna$stats[[p1[1]]][, 0] ## empty data.frame - rownames(df) <- colnames(wgcna$datExpr) +wgcna.computeConsensusMatrix <- function(matlist, ydim, psig = 0.05, consfun="min") { - mm.cols <- colnames(wgcna$stats[[p1[1]]]) - if (!is.null(module) && module %in% mm.cols) { - A1 <- sapply(wgcna$stats[p1], function(x) x[, module]) - df <- cbind(df, A1) + ## create consensus module-trait matrix + matsign <- lapply(matlist, sign) + matsign <- lapply(matsign, function(x) {x[is.na(x)]=0; x}) + all.pos <- Reduce("*", lapply(matsign, function(z) (z >= 0) )) + all.neg <- Reduce("*", lapply(matsign, function(z) (z <= 0) )) + concordant <- (all.pos | all.neg) + + matlistN <- Reduce("+", lapply(matlist, function(x) !is.na(x))) + matlist0 <- lapply(matlist, function(x) {x[is.na(x)]=0; x}) + + zsign <- sign(Reduce("+", matsign)) ## mean sign?? + if(consfun=="min") { + pminFUN <- function(...) pmin(..., na.rm=TRUE) + consZ <- Reduce(pminFUN, lapply(matlist, abs)) * zsign + } else if(consfun=="gmean") { + ## geometric mean + matlistG <- lapply(matlist, function(x) {x=log(abs(x));x[is.na(x)]=0;x}) + consZ <- exp(Reduce('+', matlistG) / matlistN) + consZ <- consZ * zsign + } else { + ## mean + consZ <- Reduce('+', matlist0) / matlistN } + consZ[!concordant] <- NA + + if(psig < 1) { + if(length(ydim) == 1) ydim <- rep(ydim[1], length(matlist)) + pv <- mapply(function(z, n) + WGCNA::corPvalueStudent(z, n), matlist, ydim, SIMPLIFY = FALSE) + for(i in 1:length(pv)) pv[[i]][is.na(pv[[i]])] <- 0 ## missing??? + all.sig <- Reduce("*", lapply(pv, function(p) 1 * (p < psig))) + consZ[!all.sig] <- NA + } + return(consZ) +} - tt.cols <- colnames(wgcna$stats[[p2[1]]]) - if (!is.null(trait) && trait %in% tt.cols) { - A2 <- sapply(wgcna$stats[p2], function(x) x[, trait]) - df <- cbind(df, A2) +#' Compute consensus matrix from list of matrices. The consensus +#' matrix checks for consistent sign and minimal threshold for each +#' matrix. Optionally filters on consistent p-value. +#' +#' @export +wgcna.computeDistinctMatrix <- function(matlist, ydim, psig = 0.05, min.diff=0.3, + consmax = 0) { + ## create difference module-trait matrix + pv <- mapply(function(z, n) corPvalueStudent(z, n), + matlist, ydim, SIMPLIFY = FALSE) + matsign <- lapply( matlist, sign ) + Q <- matlist + i=1 + for(i in 1:length(matlist)) { + + ## Any entry not significant is anyway invalid + notsig <- (pv[[i]] > psig) + Q[[i]][notsig] <- NA + + ## Any entry with too small difference with others is invalid + refmat <- Reduce("+", matlist[-i]) / (length(matlist)-1) + diff <- matlist[[i]] - refmat + notdiff <- ( abs(diff) < min.diff ) + Q[[i]][notdiff] <- NA + + ## any entry that has consensus is invalid + cons <- mapply(function(P, S) (P<0.05)*(S==matsign[[i]]), + pv[-i], matsign[-i], SIMPLIFY=FALSE) + cons <- (Reduce('+', cons) > consmax) ## or function + Q[[i]][cons] <- NA } + return(Q) +} - A3 <- wgcna$stats[[p3]] - df <- cbind(df, centrality = A3) - sel <- c("moduleMembership", "traitSignificance", "foldChange", "centrality") - sel <- intersect(sel, colnames(df)) - df1 <- pmax(as.matrix(df[, sel]), 1e-8) - score <- exp(rowMeans(log(df1))) - - labels <- wgcna$net$labels - df <- data.frame(module = labels, score = score, df) - df <- df[order(-df$score), ] - if (!is.null(module) && !showallmodules) { - sel <- which(df$module == module) - df <- df[sel, , drop = FALSE] +#' @export +wgcna.plotConsensusOverlapHeatmap <- function(net1, net2, + setLabels=NULL, + lab.line = c(8,8), + plotDendro = FALSE, + setpar=TRUE) { + + if(is.null(setLabels)) + setLabels <- c("Set1","Set2") + if(length(setLabels)==1) setLabels <- paste0(setLabels,1:2) + + if(plotDendro) { + + layout.matrix <- matrix( c(1,2,5,3,4,5), nrow = 3, ncol = 2) + layout(mat = layout.matrix, heights= c(0.8,0.2,2.5), widths = c(1,1)) + + WGCNA::plotDendroAndColors( + dendro = net1$dendrograms[[1]], + colors = net1$colors, + dendroLabels = FALSE, + hang = 0.03, + addGuide = FALSE, + guideHang = 0.05, + #marAll = marAll, + setLayout = FALSE, + main = setLabels[1] + ) + + WGCNA::plotDendroAndColors( + dendro = net2$dendrograms[[1]], + colors = net2$colors, + dendroLabels = FALSE, + hang = 0.03, + addGuide = FALSE, + guideHang = 0.05, + #marAll = marAll, + setLayout = FALSE, + main = setLabels[2] + ) } - if (plot) { - cols <- c("moduleMembership", "traitSignificance", "foldChange", "centrality") - cols <- intersect(cols, colnames(df)) - df1 <- df[, cols] - col1 <- wgcna.labels2colors(wgcna$net$colors[rownames(df1)]) - if (!is.null(col)) col1 <- col - pairs(df1, col = col1) - if (is.null(main)) { - main <- paste("Gene significance for module", module, "and trait", trait) - } - title(main, line = 3, cex.main = 1.15) + + firstColors <- wgcna.labels2colors(net1$colors) + secondColors <- wgcna.labels2colors(net2$colors) + overlap <- overlapTable(firstColors, secondColors) + names(overlap) + + T1 = overlap$countTable + T2 = table(firstColors, secondColors) + T3 = table(net1$colors, net2$colors) + + firstModTotals <- rowSums(overlap$countTable) + secondModTotals <- colSums(overlap$countTable) + firstModules <- rownames(overlap$countTable) + secondModules <- colnames(overlap$countTable) + + # Truncate p values smaller than 10^{-50} to 10^{-50} + pTable <- -log10(overlap$pTable) + pTable[is.infinite(pTable)] = 1.3*max(pTable[is.finite(pTable)]); + pTable[pTable>50 ] = 50 ; + + if(setpar) { + par(mfrow=c(1,1)); + par(cex = 1.0); + par(mar=c(10, 12.4, 2.7, 1)+0.3); } - df + # Use function labeledHeatmap to produce the color-coded table with all the trimmings + WGCNA::labeledHeatmap( + Matrix = t(pTable), + xLabels = paste(" ", firstModules), + yLabels = paste(" ", secondModules), + colorLabels = TRUE, + #xSymbols = paste0(setLabels[1],":", firstModules, " (", firstModTotals,")"), + #ySymbols = paste0(setLabels[2],":", secondModules, " (", secondModTotals, ")"), + xSymbols = paste0(firstModules, " (", firstModTotals,")"), + ySymbols = paste0(secondModules, " (", secondModTotals, ")"), + textMatrix = t(overlap$countTable), + colors = WGCNA::blueWhiteRed(100)[50:100], + main = paste("Correspondence of",setLabels[1],"and ", + setLabels[2],"modules"), + cex.text = 1.0, cex.lab = 1.0, setStdMargins = FALSE); + mtext(toupper(setLabels[1]), side=1, line=lab.line[1], cex=1.1) + mtext(toupper(setLabels[2]), side=2, line=lab.line[2], cex=1.1) + } -wgcna.merge_block_dendrograms <- function(net, X, method = 1) { - ## This function is fragile: it can give a C stack limit error. In - ## case that happens you can increase the stack limit by running on - ## the cmd line: >>> ulimit -s unlimited - ## - hc <- net$dendrograms +##========================================================================= +## PRESERVATION WGCNA +##========================================================================= - ## merge block dendrogram - mx <- list() - for (b in 1:length(net$dendrograms)) { - ii <- which(net$goodGenes & net$blocks == b) - mx[[b]] <- colMeans(X[ii, ]) - hc[[b]]$labels <- rownames(X)[ii] +#' @export +wgcna.runPreservationWGCNA <- function(exprList, + phenoData, + power = 12, + reference = 1, + add.merged=FALSE, + ngenes = 2000, + minModuleSize = 20, + deepSplit = 2, + annot = NULL, + compute.stats = TRUE, + compute.enrichment = TRUE, + gset.methods = c("fisher","gsetcor","xcor") + ) { + + if(is.character(reference)) { + reference <- match(reference, names(exprList)) + } + if(reference > 0) { + reference.name <- names(exprList)[reference] + } else { + reference.name <- "Consensus" + } + + ## multiset WGCNA + pres <- wgcna.runConsensusWGCNA( + exprList, + phenoData, + GMT = NULL, + annot = NULL, + ngenes = ngenes, + power = power, + minModuleSize = minModuleSize, + minKME = 0.3, + mergeCutHeight = 0.15, + deepSplit = deepSplit, + maxBlockSize = 9999, + addCombined = FALSE, + calcMethod = "fast", + drop.ref = FALSE, + compute.stats = FALSE, + compute.enrichment = FALSE, + gsea.mingenes = 10, + gset.methods = gset.methods + ) + + colorList <- lapply(pres$layers, function(w) w$net$colors) + names(colorList) <- names(pres$layers) + exprList <- lapply( pres$layers, function(w) t(w$datExpr)) + + if(add.merged || reference==0) { + message("[wgcna.runPreservationWGCNA] adding merged layer...") + cX <- lapply(exprList, function(x) x - rowMeans(x)) + merged <- do.call( cbind, cX) + exprList$Merged <- NULL + exprList <- c( list( Merged = merged), exprList) + cons.colors <- pres$net$colors + colorList <- c( list( Consensus = cons.colors ), colorList) + reference <- reference + 1 } + + message("[wgcna.runPreservationWGCNA] running WGCNA::modulePreservation...") + multiExpr = WGCNA::list2multiData(lapply(exprList,Matrix::t)) + mp <- WGCNA::modulePreservation( + multiExpr, + colorList, + referenceNetworks = reference, + nPermutations = 10, + networkType = "signed", + quickCor = 0, + verbose = 2, + indent = 0 + ) - if (length(hc) == 1) { - return(hc[[1]]) + ## Zsummary tables + mp.tables <- mp$preservation$Z[[1]][-reference] + Z <- sapply(mp.tables, function(mat) mat[,"Zsummary.pres"]) + rownames(Z) <- rownames(mp.tables[[1]]) + rownames(Z) <- paste0("ME",rownames(Z)) + colnames(Z) <- names(multiExpr)[-reference] + + ## median rank + mp.tables <- mp$preservation$observed[[1]][-reference] + M <- sapply(mp.tables, function(mat) mat[,"medianRank.pres"]) + rownames(M) <- rownames(mp.tables[[1]]) + rownames(M) <- paste0("ME",rownames(M)) + colnames(M) <- names(multiExpr)[-reference] + + ## module size + moduleSize <- mp.tables[[1]][,"moduleSize"] + names(moduleSize) <- rownames(Z) + + ## module-traits. We need to recompute the MEs (module eigengenes) + ## using the color coding of the reference set. + refColors <- colorList[[1]] + MEx = lapply(exprList, function(x) + WGCNA::moduleEigengenes(t(x), colors = refColors)$eigengenes + ) + + ## Compute module-trait correlation matrices + Y <- lapply(pres$layers, function(w) w$datTraits) + names(Y) + if("Merged" %in% names(MEx) && !"Merged" %in% names(Y) ) { + kk <- rownames(MEx[['Merged']]) + Y[['Merged']] <- pres$datTraits[kk,] + Y <-Y[names(MEx)] + } + kk <- Reduce( union, lapply(Y, colnames)) + Y <- lapply(Y, function(y) y[,match(kk,colnames(y))]) + for(i in 1:length(Y)) colnames(Y[[i]]) <- kk + R <- mapply(cor, MEx, Y, use="pairwise", SIMPLIFY=FALSE) + ##for(i in 1:length(R)) colnames(R[[i]]) <- paste0(names(R)[i],":",colnames(R[[i]])) + + ## gene statistics of reference layer + if(compute.stats) { + message("[wgcna.runPreservationWGCNA] computing gene statistics...") + ref = reference.name + wnet <- list( MEs = MEx[[ref]], colors = pres$colors[,ref]) + pres$stats <- wgcna.computeGeneStats(wnet, pres$datExpr[[ref]], + pres$datTraits, TOM=NULL) } + + ## geneset enrichment of reference layer + if(compute.enrichment) { + message("[wgcna.runPreservationWGCNA] computing geneset enrichment...") + pres$gsea <- wgcna.computeModuleEnrichment( + pres$layers[[ref]], + GMT = NULL, + gsetX = NULL, + annot = annot, + methods = gset.methods, + ntop = 1000, + xtop = 100, + filter = NULL + ) + } + + pres$modulePreservation <- mp + pres$Zsummary <- Z + pres$medianRank <- M + pres$moduleSize <- moduleSize + pres$modTraits <- R + pres$MEs <- MEx + + return(pres) +} - M <- do.call(rbind, mx) - hclust_p <- hclust(dist(M), method = "average") - dend_p <- as.dendrogram(hclust_p) - dend.list <- lapply(hc, as.dendrogram) +#' @export +wgcna.plotPreservationSummaries <- function(pres, setpar=TRUE) { - if (method == 1) { - merged <- ComplexHeatmap::merge_dendrogram(dend_p, dend.list) - } else { - mrg <- hclust_p$merge - merged_branch <- list() - k <- 1 - for (k in 1:nrow(mrg)) { - i <- mrg[k, 1] - j <- mrg[k, 2] - if (i < 0) d1 <- dend.list[[-i]] - if (i > 0) d1 <- merged_branch[[i]] - if (j < 0) d2 <- dend.list[[-j]] - if (j > 0) d2 <- merged_branch[[j]] - merged_branch[[k]] <- merge(d1, d2) ## actual merge + # Create a simple bar plot of Zsummary: + Z <- pres$Zsummary + ntest <- ncol(Z) + + if(setpar) { + par(mfrow=c(3, ntest), mar=c(5,5,4,1)) + } + xylist <- list( + c("moduleSize", "Zsummary.pres"), + c("moduleSize", "medianRank.pres"), + c("Zsummary.pres", "medianRank.pres") + ) + + for(xy in xylist) { + for(k in colnames(Z)) { + X <- data.frame( + color = substring(names(pres$moduleSize),3,99), + moduleSize = pres$moduleSize, + Zsummary.pres = pres$Zsummary[,k], + medianRank.pres = pres$medianRank[,k] + ) + xvar <- xy[1] + yvar <- xy[2] + ylim <- c(0,max(X[,yvar])) + if(yvar == "medianRank.pres") ylim <- rev(ylim) + plot( + X[,xvar], + X[,yvar], + pch = 21, + cex=2, + bg = X$color, + ylim = ylim, + xlab = xvar, + ylab = yvar + ) + title(yvar, cex.main=1.4, line=2.2) + sub <- paste( k, "vs.", "reference") + title(sub, cex.main=1, line=0.9) + if(yvar=="Zsummary.pres") abline(h = c(2,10), lty = 2) } - merged <- merged_branch[[k]] + } +} + +#' @export +wgcna.plotPreservationModuleTraits <- function(pres, + subplots = c("zsummary","consmt","wt.consmt"), + order.by = "name", + setpar=TRUE, rm.na=FALSE) { + + + if(all(is.numeric(subplots))) { + subplots <- c("zsummary","consmt","wt.consmt")[subplots] + } + + if(setpar) { + par(mfrow=c(2,2), mar=c(14,12,4,2)) + } + + ## compute consensus + Zsummary <- pres$Zsummary + + cR <- pres$modTraits + ydim <- sapply(pres$layers, function(w) nrow(w$datTraits)) + consZ <- wgcna.computeConsensusMatrix(cR, ydim, psig=1, consfun='gmean') + ##consZ <- consZ[rownames(cR[[1]]), colnames(cR[[1]])] + + ## match + ii <- intersect(rownames(Zsummary),rownames(consZ)) + Zsummary <- Zsummary[ii,,drop=FALSE] + consZ <- consZ[ii,,drop=FALSE] + + ## order + order.method="clust" + if(order.by == "name") { + ii <- order(rownames(Zsummary)) + Zsummary <- Zsummary[ii,,drop=FALSE] + consZ <- consZ[ii,,drop=FALSE] + } + if(order.by == "zsummary") { + ii <- order(-rowMeans(Zsummary**2)) + Zsummary <- Zsummary[ii,,drop=FALSE] + consZ <- consZ[ii,,drop=FALSE] + } + if(order.by == "clust") { + consZ1 <- consZ + consZ1[is.na(consZ1)] <- 0 + ii <- hclust(dist(consZ1))$order + jj <- hclust(dist(t(consZ1)))$order + Zsummary <- Zsummary[ii,,drop=FALSE] + consZ <- consZ[ii,jj,drop=FALSE] + } + + ##-------------------------------------- + ## Zsummary heatmap + ##-------------------------------------- + if("zsummary" %in% subplots) { + WGCNA::labeledHeatmap( + Matrix = Zsummary, + xLabels = colnames(Zsummary), + yLabels = rownames(Zsummary), + ySymbols = rownames(Zsummary), + colors = tail(WGCNA::blueWhiteRed(100),50), + colorLabels = TRUE, + setStdMargins = FALSE + ) + title("Module preservation (Zsummary)", line=1.2, cex.main=1.2) + } + + ##-------------------------------------- + ## Consensus Module-Trait + ##-------------------------------------- + validcol <- function(R) which( colMeans(is.na(R)) < 1 & + matrixStats::colSds(R,na.rm=TRUE) > 0.01 ) + + if("consmt" %in% subplots) { + clim <- max(abs(consZ),na.rm=TRUE) + cval <- seq( -clim, clim, length.out=201) + ii <- which( cval >= min(consZ,na.rm=TRUE) & cval <= max(consZ,na.rm=TRUE) ) + col2 <- WGCNA::blueWhiteRed(201)[ii] + jj <- 1:ncol(consZ) + if(rm.na) jj <- validcol(consZ) + WGCNA::labeledHeatmap( + Matrix = consZ[,jj,drop=FALSE], + xLabels = colnames(consZ)[jj], + yLabels = rownames(consZ), + ySymbols = rownames(consZ), + colors = col2, + colorLabels = TRUE, + setStdMargins = FALSE + ) + title("Consensus Module-Traits", line=1.2, cex.main=1.2) + } + + ##-------------------------------------- + ## preservation-weighted Consensus Module-Trait + ##-------------------------------------- + if("wt.consmt" %in% subplots) { + wz <- rowMeans(Zsummary**2, na.rm=TRUE) + wz <- wz / max(wz) + consW <- consZ * wz[rownames(consZ)] + + clim <- max(abs(consW),na.rm=TRUE) + cval <- seq( -clim, clim, length.out=201) + ii <- which( cval >= min(consW,na.rm=TRUE) & cval <= max(consW,na.rm=TRUE) ) + col2 <- WGCNA::blueWhiteRed(201)[ii] + + jj <- 1:ncol(consW) + if(rm.na) jj <- validcol(consW) + WGCNA::labeledHeatmap( + Matrix = consW[,jj,drop=FALSE], + xLabels = colnames(consW)[jj], + yLabels = rownames(consW), + ySymbols = rownames(consW), + colors = col2, + colorLabels = TRUE, + setStdMargins = FALSE + ) + title("Preservation-weighted Consensus\nModule-Traits", line=1, cex.main=1.2) } - merged_hclust <- as.hclust(merged) - merged_hclust } -## ========================================================================= + + +##========================================================================= ## PLOTTING FUNCTIONS -## ========================================================================= +##========================================================================= + + +#' @export +wgcna.plotTopModules <- function(wgcna, trait, nmax=16, setpar=TRUE) { + + MEx <- wgcna$net$MEs + Y <- wgcna$datTrait + kk <- intersect(rownames(MEx), rownames(Y)) + MEx <- MEx[kk,] + Y <- Y[kk,,drop=FALSE] + + rho <- cor(MEx, Y, use="pairwise") + sel <- order(-abs(rho[,trait])) + sel <- head(sel, nmax) + n <- length(sel) + nr <- ceiling(sqrt(n)) + nc <- ceiling(n / nr) + + yclass <- sapply(as.data.frame(Y), class) + is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA))) + yclass[which(is.binary)] <- "factor" + + if(setpar==1) par(mfrow=c(nr,nc), mgp=c(2.6,0.85,0), mar=c(4,4,2.5,1)) + if(setpar==2) par(mfrow=c(nc,nr), mgp=c(2.6,0.85,0), mar=c(4,4,2.5,1)) + + yclass <- sapply(as.data.frame(Y), class) + is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA))) + yclass[which(is.binary)] <- "logical" + yclass + + i=sel[1] + for(i in head(sel, nmax)) { + + x <- Y[, trait] + y <- MEx[,i] + label <- colnames(MEx)[i] + col <- substring(label, 3, 99) + col1 <- adjustcolor(col, alpha.f=0.5) + + if( yclass[trait] == "factor" ) { + boxplot(y ~ x, main = label, col = col1, + xlab = trait, ylab = "ME score") + points(1 + x + 0.04*rnorm(length(x)), y, + pch=21, bg=col1, lwd=0.5) + } + + if( yclass[trait] == "numeric" ) { + plot( x, y, main = label, + pch=21, cex = 1.1, col=1, bg = col1, lwd=0.25, + xlab = trait, ylab = "ME score") + abline(h=0, lty=2, lwd=0.5) + r <- cor(x,y,use="pairwise") + if(abs(r) > 0.3) { + abline( lm(y ~ x), col=1, lwd=0.6 ) + legend("bottomright", legend=paste("r=",round(r,3))) + } + } + } +} + + +#' Plot top modules most correlated with trait for multi expression +#' data. +#' +#' @export +wgcna.plotTopModules_multi <- function(multi, trait, nmax=16, collapse=FALSE, + plotlib = "base", setpar=TRUE) +{ + + if(!"MEs" %in% names(multi)) { + multi$MEs <- lapply(multi$net$multiMEs, function(m) m$data) + } + + ## we 'just' concatenate the ME matrices + multi$MEs <- lapply(multi$MEs, as.matrix) + MEx <- as.matrix(mofa.merge_data(multi$MEs)) + Y <- lapply(multi$MEs, function(m) multi$datTraits[rownames(m),,drop=FALSE]) + Y <- mofa.merge_data(Y) + + batch <- sub(":.*","",rownames(Y)) + names(batch) <- rownames(Y) + nbatch <- length(multi$MEs) + + ## select top modules + rho <- cor(MEx, Y, use="pairwise") + sel <- names(sort(-abs(rho[,trait]))) + sel <- head(sel, nmax) + + Y <- type.convert(data.frame(Y,check.names=FALSE),as.is=FALSE) + if(collapse) { + Y <- collapseTraitMatrix(Y) + trait <- sub("=.*","",trait) + } + + n <- length(sel) + nr <- ceiling(sqrt(n)) + nc <- ceiling(n / nr) + if(setpar==1) par(mfrow=c(nr,nc), mgp=c(2.6,0.85,0), mar=c(2.5,4,2.5,1)) + if(setpar==2) par(mfrow=c(nc,nr), mgp=c(2.6,0.85,0), mar=c(2.5,4,2.5,1)) + + yclass <- sapply(as.data.frame(Y), class) + is.binary <- apply(Y, 2, function(x) all(x %in% c(TRUE,FALSE,0,1,NA))) + yclass[which(is.binary)] <- "logical" + yclass + + i=sel[1] + for(i in head(sel, nmax)) { + + x <- Y[, trait] + y <- MEx[,i] + label <- i + col <- substring(label, 3, 99) + col1 <- adjustcolor(col, alpha.f=0.66) + col2 <- col1 + + yclass[trait] + if(yclass[trait] %in% c("factor","logical","binary")) { + + if(yclass[trait] %in% c("logical","binary")) { + x <- (x == 1) + } + df <- data.frame(x=x, y=y, group=factor(batch)) + + par(mgp=c(2.4,0.9,0)) + aa <- c(0.15, 0.55) + col1 <- sapply(aa, function(a) adjustcolor(col, alpha.f=a)) + + nb <- nbatch + atx <- c( seq(1,2, length.out=nb), seq(4,5, length.out=nb) ) + atx <- unlist(sapply(1:nbatch, function(i) i + c(-0.15, 0.15),simplify=FALSE)) + atmid <- 1:nbatch + + boxplot( + df$y ~ df$x + df$group, + #df$y ~ df$group + df$x, + at = atx, + xlim = c(0.6, nbatch+0.4), + cols = col1, + main = label, + col = col1, + boxwex = 0.24, + xaxt = 'n', + xlab = '', + ylab = "ME score" + ) + + mtext( levels(df$group), side=1, line=0.6, cex=1.0, at=atmid) + bb <- c("FALSE","TRUE") + legend("topright", legend=bb, fill = col1, + cex=0.8, y.intersp=0.82, title=trait, title.cex=1.1) + + } ## end of if factor + + if(yclass[trait] %in% c("numeric","integer")) { + + df <- data.frame(x=x, y=y, group=factor(batch)) + par(mgp=c(2.4,0.9,0)) + + aa <- seq(0.55, 0.15, length.out=nbatch) + col1 <- sapply(aa, function(a) adjustcolor(col, alpha.f=a)) + names(col1) <- sort(unique(batch)) + + nb <- nbatch + colx <- col1[as.integer(factor(batch))] + + plot( + df$x , df$y, + pch = 21, + cex = 1, + lwd = 0.4, + bg = colx, + main = label, + xlab = trait, + ylab = "ME score" + ) + bb <- names(multi$MEs) + legend("topright", legend=bb, fill=col1, cex=0.9, + y.intersp=0.85) + + ## add regression lines + b=df$group[1] + col2 <- adjustcolor(col1, red.f=0.5, green.f=0.5, blue.f=0.5) + names(col2) <- names(col1) + for(b in unique(df$group)) { + ii <- which(df$group == b) + abline( lm(df$y[ii] ~ df$x[ii]), lwd=1, lty=1, col=col2[1] ) + } + } ## end of if continuous + + } + +} + + + +#' Plot top modules most correlated with trait for multi expression +#' data. +#' +#' @export +wgcna.plotModuleScores <- function(res, trait, + multi=FALSE, nmax=16, + collapse.trait = FALSE, + plotlib = "base", setpar=TRUE) +{ + + Y <- res$datTrait + + ## For multi we 'just' concatenate the ME matrices + if(multi) { + MEx <- do.call(rbind, lapply(res$MEs, as.matrix)) + me.samples <- lapply(res$MEs, rownames) + batch <- max.col(sapply(me.samples, function(s) rownames(Y) %in% s)) + batch <- names(res$MEs)[batch] + names(batch) <- rownames(Y) + nbatch <- length(res$MEs) + } else { + MEx <- res$net$MEs + batch <- "" + nbatch <- 1 + } + + ## align + kk <- intersect(rownames(MEx), rownames(Y)) + MEx <- MEx[kk,] + Y <- Y[kk,] + if(!is.null(batch)) batch <- batch[kk] + + sel.modules <- colnames(MEx) + if(nmax > 0) { + ## select top modules + rho <- cor(MEx, Y, use="pairwise") + sel.modules <- names(sort(-abs(rho[,trait]))) + sel.modules <- head(sel.modules, nmax) + } + ncol <- ceiling(sqrt(length(sel.modules))) + + if(collapse.trait) { + Y <- type.convert(data.frame(Y,check.names=FALSE),as.is=FALSE) + Y <- collapseTraitMatrix(Y) + trait <- sub("=.*","",trait) + } + + module <- as.vector(sapply(sel.modules, rep, nrow(MEx))) + dfx <- data.frame( + sample = rownames(MEx), + trait = Y[,trait], + score = as.vector(unlist(MEx[,sel.modules])), + module = module, + group = batch + ) + + xtype <- class(type.convert(Y[,trait],as.is=TRUE)) + xtype + if(xtype != "numeric") { + + dfx$trait <- factor(dfx$trait) + if(nbatch== 1) { + ggplot2::ggplot(dfx, + ggplot2::aes(x = factor(trait), y = score, fill = trait)) + + #ggplot2::aes(y = score, x = trait)) + + ggplot2::geom_boxplot() + + ggplot2::xlab(trait) + + ggplot2::ylab("ME score") + + ggplot2::facet_wrap(~module, ncol=ncol) + + ggplot2::theme_bw(base_size = 18) + } + + if(nbatch > 1) { + ggplot2::ggplot(dfx, + ggplot2::aes(x = group, y = score, fill = trait)) + + ggplot2::geom_boxplot() + + ggplot2::xlab(trait) + + ggplot2::ylab("ME score") + + ggplot2::facet_wrap(~module, ncol=ncol) + + ggplot2::theme_bw(base_size = 18) + } + } else { + + dfx$trait <- as.numeric(dfx$trait) + ggplot2::ggplot(dfx, + #ggplot2::aes(x = trait, y = score, color = group)) + + ggplot2::aes(x = trait, y = score, color = group)) + + ggplot2::geom_point(size = 0.6) + + ggplot2::geom_smooth(method="lm", se = FALSE, linewidth=0.6) + + ggplot2::xlab(trait) + + ggplot2::ylab("ME score") + + ggplot2::facet_wrap(~module, ncol = ncol, scales = "free") + + ggplot2::theme_bw(base_size = 18) + + } +} + +#' +#' +#' @export +wgcna.plotTraitCorrelationBarPlots <- function(res, trait, multi=FALSE, + colored = TRUE, beside = TRUE, + main = NULL, cex.main = 1.3, + setpar=TRUE) { + if(setpar) { + nr <- ceiling( sqrt(length(trait))) + nc <- ceiling( length(trait)/nr) + par(mfrow=c(nr,nc)) + } + p=trait[1] + for(p in trait) { + groups <- NULL + if(multi) { + mt <- res$modTraits + groups <- names(mt) + m1 <- sapply(mt, function(x) x[,p]) + } else { + m1 <- res$modTraits[,p] + } + colnames(m1) <- paste0(p," (",colnames(m1),")") + me.col <- grey.colors(2) + if(colored) { + me.col <- sub("ME","",rownames(m1)) + me.col <- rbind(me.col, me.col) + aa <- seq(0.7,0.25,length.out=nrow(me.col)) + for(i in 1:nrow(me.col)) { + me.col[i,] <- adjustcolor(me.col[i,], alpha.f=aa[i]) + } + } + + if(beside) { + barplot( t(m1), las=3, beside=TRUE, col=me.col, + ylab = "trait correlation (rho)") + tt <- p + if(!is.null(main)) tt <- main + title(tt, cex.main = cex.main) + if(length(groups)>1) { + legend("topright", legend=groups, fill=grey.colors(length(groups))) + } + } else { + me.col <- NULL + if(colored) me.col <- sub("ME","",rownames(m1)) + for(i in 1:ncol(m1)) { + barplot( m1[,i], las=3, beside=TRUE, col=me.col, + ylab = "trait correlation (rho)") + tt <- colnames(m1)[i] + if(!is.null(main)) tt <- main + title(tt, cex.main = cex.main) + } + } + + + + + } +} #' #' @@ -953,125 +3096,374 @@ wgcna.plotTOM <- function(wgcna, justdata = FALSE, block = NULL, #' #' #' @export -wgcna.plotDendroAndColors <- function(wgcna, main = NULL, block = NULL, extra.colors = NULL) { - if (length(wgcna$net$dendrograms) > 1) { +wgcna.plotDendroAndColors <- function(wgcna, main=NULL, block=1, + extra.colors=NULL, + show.kme = FALSE, + show.traits = FALSE, + use.tree = 0, + rm.na = TRUE, + marAll = c(0.4, 5, 1, 0.2), + setLayout=TRUE, ... ) { + + if("net" %in% names(wgcna)) { + net <- wgcna$net + if("colors" %in% names(wgcna)) { + net$netcolors <- net$colors + net$colors <- wgcna$colors + } + } else { + net <- wgcna + } + dendro <- net$dendrograms + + if("layers" %in% names(wgcna) && use.tree>0) { + dendro <- wgcna$layers[[use.tree]]$net$dendrograms + } + + if(length(dendro)>1) { message("warning: this wgcna has multiple blocks") } + geneTree <- dendro[[block]] + + colors <- cbind(wgcna.labels2colors(net$colors)) + if(NCOL(colors)==1) colnames(colors)[1] <- "Module colors" - if (is.null(block) && "merged_dendro" %in% names(wgcna$net)) { - geneTree <- wgcna$net$merged_dendro - } else { - if (is.null(block)) block <- 1 - geneTree <- wgcna$net$dendrograms[[block]] - } - colors <- wgcna.labels2colors(wgcna$net$colors) gg <- geneTree$labels - if (is.null(gg) && !is.null(block)) { - ii <- which(wgcna$net$blocks == block & wgcna$net$goodGenes == TRUE) - gg <- names(wgcna$net$color)[ii] + if(is.null(gg) && !is.null(block)) { + ii <- which(net$blocks == block & net$goodGenes==TRUE) + gg <- names(net$color)[ii] } - if (is.null(gg) && is.null(block)) { - ii <- which(wgcna$net$goodGenes == TRUE) - gg <- names(wgcna$net$color)[ii] + if(is.null(gg) && is.null(block)) { + ii <- which(net$goodGenes==TRUE) + gg <- names(net$color)[ii] } - colors <- colors[gg] - groupLabels <- "Module colors" - if (!is.null(extra.colors)) { + colors <- colors[gg,,drop=FALSE] + if(!is.null(extra.colors)) { jj <- match(gg, rownames(extra.colors)) - colors <- cbind(colors, extra.colors[jj, ]) - groupLabels <- c("Module colors", colnames(extra.colors)) + colors <- cbind(colors, 0, extra.colors[jj,]) + } + + is.multi <- is.list(wgcna$datExpr) + is.multi + if(!is.multi) { + if(show.kme) { + X <- wgcna$datExpr + Y <- net$MEs + kme <- cor(X, Y, use="pairwise") + kmeColors <- rho2bluered(kme) + kmeColors <- kmeColors[gg,] + colors <- cbind(colors, 0, kmeColors) + } + if(show.traits) { + X <- wgcna$datExpr + Y <- wgcna$datTraits + kme <- cor(X, Y, use="pairwise") + kmeColors <- rho2bluered(kme) + kmeColors <- kmeColors[gg,,drop=FALSE] + colors <- cbind(colors, 0, kmeColors) + } + } + + if(is.multi) { + if(show.kme) { + X <- wgcna$datExpr + Y <- wgcna$net$multiMEs + for(i in 1:length(X)) { + kme <- cor(X[[i]], Y[[i]]$data, use="pairwise") + kmeColors <- rho2bluered(kme) + kmeColors <- kmeColors[gg,,drop=FALSE] + colnames(kmeColors) <- paste0(names(X)[i],":",colnames(kmeColors)) + colors <- cbind(colors, 0, kmeColors) + } + } + if(show.traits) { + X <- wgcna$datExpr + Y <- wgcna$datTraits + for(i in 1:length(X)) { + Y1 <- Y[rownames(X[[i]]),] + kme <- cor(X[[i]], Y1, use="pairwise") + kmeColors <- rho2bluered(kme) + kmeColors <- kmeColors[gg,] + colnames(kmeColors) <- paste0(names(X)[i],":",colnames(kmeColors)) + colors <- cbind(colors, 0, kmeColors) + } + } } - if (is.null(main)) main <- "Gene dendrogram and module colors" + if(rm.na) { + all.eq <- rowMeans(t(colors) == colors[1,]) == 1 + sel <- colMeans(is.na(colors)) < 1 & !all.eq + sel <- sel | colnames(colors) %in% c("",NA) + colors <- colors[, sel, drop=FALSE] + } + + if(is.null(main)) main <- "Gene dendrogram and module colors" ## Plot the dendrogram and the module colors underneath WGCNA::plotDendroAndColors( dendro = geneTree, colors = colors, - groupLabels = groupLabels, + groupLabels = colnames(colors), dendroLabels = FALSE, hang = 0.03, - addGuide = FALSE, guideHang = 0.05, - marAll = c(0.2, 5, 1, 0.2), - main = main + addGuide = FALSE, + guideHang = 0.05, + marAll = marAll, + setLayout = setLayout, + main = main, + ... ) } - +#' +#' #' @export -wgcna.labels2colors <- function(colors, ...) { - if (all(is.numeric(colors))) { - colors <- WGCNA::labels2colors(colors, ...) - return(colors) +wgcna.plotDendroAndTraitCorrelation <- function(wgcna, + traits = NULL, + show.traits = TRUE, + show.kme = FALSE, + main=NULL, + block=NULL, + rm.na = TRUE, + use.tree = 0, + marAll = c(0.2, 8, 2, 0.2), + setLayout=TRUE, + ... ) +{ + + ## if consensus output do this + is.cons <- ("class" %in% names(wgcna) && wgcna$class == "cons") + is.cons2 <- (all(c("layers","zlist") %in% names(wgcna))) + if(is.cons || is.cons2) { + message("object is consensus result") + wgcna.plotDendroAndTraitCorrelation_cons( + cons = wgcna, + traits = traits, + main = main, + rm.na = rm.na, + show.traits = show.traits, + marAll = marAll, + use.tree = use.tree, + setLayout = setLayout, + ... + ) + return() } - stdColors <- c("grey", WGCNA::standardColors()) - if (all(colors %in% stdColors)) { - return(colors) + + moduleColors <- cbind(wgcna$net$colors) + if(NCOL(moduleColors)==1) colnames(moduleColors) <- "Module" + colors <- moduleColors + + if(show.traits) { + X <- wgcna$datExpr + Y <- wgcna$datTraits + traitSig <- cor(X, Y, use="pairwise") + if(rm.na) { + sel <- colMeans(is.na(traitSig)) < 1 + traitSig <- traitSig[, sel, drop=FALSE] + } + traitColors <- rho2bluered(traitSig) + colors <- cbind(moduleColors, 0, traitColors) } - icolors <- as.integer(factor(as.character(colors))) - colors <- WGCNA::standardColors()[icolors] - return(colors) -} + + geneTree <- wgcna$net$dendrograms[[1]] + geneTree$labels <- names(wgcna$net$colors) + colors <- colors[geneTree$labels, ] + + if(is.null(main)) main = "Gene Dendrogram, Modules and Trait Correlation" + + WGCNA::plotDendroAndColors( + geneTree, + colors = colors, + colnames(colors), + dendroLabels = FALSE, + hang = 0.03, + addGuide = TRUE, + guideHang = 0.05, + marAll = marAll, + main = main, + setLayout = setLayout, + ... + ) +} -#' @export -wgcna.plotModuleTraitHeatmap <- function(wgcna, setpar = TRUE, cluster = FALSE, - main = NULL, justdata = FALSE, - pstar = TRUE) { - ## Define numbers of genes and samples - nGenes <- ncol(wgcna$datExpr) - nSamples <- nrow(wgcna$datExpr) - if ("stats" %in% names(wgcna)) { - moduleTraitCor <- wgcna$stats$moduleTraitCor - moduleTraitPvalue <- wgcna$stats$moduleTraitPvalue - } else { - moduleTraitCor <- cor(wgcna$net$MEs, wgcna$datTraits, use = "pairwise.complete") - moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples) +#' wgcna.plotDendroAndTraits for Consensus output +#' +#' +wgcna.plotDendroAndTraitCorrelation_cons <- function(cons, + show.traits = TRUE, + traits = NULL, + main = NULL, + rm.na = TRUE, + use.tree = 0, + marAll = c(0.2, 8, 2, 0.2), + setLayout=TRUE, + ... ) +{ + + if(0) { + show.traits = TRUE + traits = NULL + main = NULL + rm.na = TRUE + use.tree = 0 + marAll = c(0.2, 8, 2, 0.2) + setLayout=TRUE } - if (cluster) { - ii <- hclust(dist(moduleTraitCor))$order - jj <- hclust(dist(t(moduleTraitCor)))$order + ## quick hack to use wgcna.plotDendroAndTraitCorrelation_multi() + multi <- c( list(Consensus = cons), cons$layers ) + use.tree0 <- use.tree + if(use.tree %in% 0:99) use.tree <- as.integer(use.tree) + if(is.character(use.tree)) { + use.tree <- match(use.tree, names(multi)) } else { - ii <- rev(order(rownames(moduleTraitCor))) - jj <- order(colnames(moduleTraitCor)) + use.tree <- as.integer(use.tree) + 1 + } + if(is.na(use.tree)) { + message("ERROR: invalid class(use.tree) = ", class(use.tree0)) + message("ERROR: invalid use.tree = ", use.tree0) + return(NULL) } - moduleTraitCor <- moduleTraitCor[ii, jj] - moduleTraitPvalue <- moduleTraitPvalue[ii, jj] - if (justdata) { - return(moduleTraitCor) + wgcna.plotDendroAndTraitCorrelation_multi( + multi, + show.traits = show.traits, + traits = traits, + main = main, + rm.na = rm.na, + use.tree = use.tree, + marAll = marAll, + setLayout = setLayout, + ... + ) + +} + + +#' @export +wgcna.plotDendroAndTraitCorrelation_multi <- function(multi, + show.traits = TRUE, + traits = NULL, + main = NULL, + rm.na = TRUE, + use.tree = 1, + marAll = c(0.2, 8, 2, 0.2), + setLayout=TRUE, + ... ) +{ + + ## module colors + colors <- sapply( multi, function(m) m$net$colors ) + + if(show.traits) { + + traitSig <- list() + nsets <- length(multi) + i=1 + for(k in names(multi)) { + if( k == "Consensus") next + w <- multi[[k]] + Y <- w$datTraits + sel <- colnames(Y) + if(!is.null(traits)) sel <- intersect(sel, traits) + X <- w$datExpr + kk <- intersect(rownames(X), rownames(Y)) + traitSig[[k]] <- cor(X[kk,], Y[kk,sel], use="pairwise") + } + + if(rm.na) { + for(i in 1:length(traitSig)) { + sel <- colMeans(is.na(traitSig[[i]])) < 1 + traitSig[[i]] <- traitSig[[i]][, sel, drop=FALSE] + } + } + + ## prepend datatype/set name + for(k in names(traitSig)) { + colnames(traitSig[[k]]) <- paste0(k,":",colnames(traitSig[[k]])) + } + + traitSig2 <- c() + for(i in 1:length(traitSig)) { + traitSig2 <- cbind(traitSig2, traitSig[[i]]) + if(i < length(traitSig)) traitSig2 <- cbind(traitSig2, 0) + } + + traitColors <- rho2bluered(traitSig2, f=0.95) + ii <- which(colnames(traitColors)=='') + if(length(ii)) traitColors[,ii] <- "#FFFFFF" + if(is.null(colors)) { + colors <- traitColors + } else { + colors <- cbind( colors, 0, traitColors) + } } - ## Will display correlations and their p-values - if (pstar) { - textPv <- cut(moduleTraitPvalue, - breaks = c(-1, 0.001, 0.01, 0.05, 99), - labels = c("★★★", "★★", "★", "") - ) + message("using tree of layer: ", names(multi)[use.tree] ) + geneTree <- multi[[use.tree]]$net$dendrograms[[1]] + + if(is.null(main)) main = "Gene Dendrogram, Modules and Trait Correlation" + + WGCNA::plotDendroAndColors( + geneTree, + colors = colors, + colnames(colors), + dendroLabels = FALSE, + hang = 0.03, + addGuide = TRUE, + guideHang = 0.05, + marAll = marAll, + main = main, + ... + ) + +} + +#' @export +purpleGreyYellow <- function(n) { + colorRampPalette(c("purple","grey65","yellow"))(n) +} + +#' Converts correlation values [-1;1] to blue-white-red colors. Good +#' for creating color labels for labeledHeatmaps that expect colors. +#' NOTE: use WGCNA::numbers2colors??? +#' +rho2bluered <- function(R, a=1, f=0.95) { + BLUERED <- WGCNA::blueWhiteRed(100) + if(a!=1) R <- sign(R) * abs(R)**a + if(is.null(ncol(R))) { + col <- BLUERED[1+round(99*(1+R)/2)] } else { - textPv <- paste0("(", signif(moduleTraitPvalue, 1), ")") + col <- apply(R, 2, function(x) BLUERED[1+round(99*(1+x)/2)]) + dimnames(col) <- dimnames(R) + } + if(f < 1) { + col <- apply(col, 2, adjustcolor, red.f=f, green.f=f, blue.f=f) + } + dimnames(col) <- dimnames(R) + col +} + + +#' Converts WGCNA labels (numeric or color) to colors. +#' +wgcna.labels2colors <- function(colors, ...) { + if (all(is.numeric(colors))) { + colors <- WGCNA::labels2colors(colors, ...) + return(colors) + } + stdColors <- c("grey", WGCNA::standardColors()) + if (all(colors %in% stdColors)) { + return(colors) } - - textMatrix <- paste0(signif(moduleTraitCor, 2), "\n", textPv) - dim(textMatrix) <- dim(moduleTraitCor) - if (setpar) par(mar = c(8, 8, 3, 3)) - if (is.null(main)) main <- "Module-trait relationships" - ## Display the correlation values within a heatmap plot - WGCNA::labeledHeatmap( - Matrix = moduleTraitCor, - xLabels = colnames(moduleTraitCor), - yLabels = rownames(moduleTraitCor), - ySymbols = rownames(moduleTraitCor), - colorLabels = FALSE, - colors = WGCNA::blueWhiteRed(50), - textMatrix = textMatrix, - setStdMargins = FALSE, - cex.text = 0.7, - zlim = c(-1, 1), - main = main - ) + icolors <- as.integer(factor(as.character(colors))) + colors <- WGCNA::standardColors()[icolors] + return(colors) } + + #' Plot membership correlation vs gene signficance (correlation with #' trait) to discover biomarkers/driver genes. #' @@ -1094,10 +3486,10 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE, ## Gene-trait significance (trait correlation) (with p-values) if ("stats" %in% names(wgcna)) { traitSignificance <- wgcna$stats$traitSignificance - GSPvalue <- wgcna$stats$GSPvalue + TSPvalue <- wgcna$stats$TSPvalue } else { traitSignificance <- as.data.frame(cor(wgcna$datExpr, wgcna$datTraits, use = "p")) - GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(traitSignificance), nSamples)) + TSPvalue <- as.data.frame(corPvalueStudent(as.matrix(traitSignificance), nSamples)) } x <- (moduleMembership[moduleGenes, module]) @@ -1108,7 +3500,7 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE, } ## px <- MMPvalue[moduleGenes, module] - py <- GSPvalue[moduleGenes, trait] + py <- TSPvalue[moduleGenes, trait] qx <- p.adjust(px, method = "fdr") qy <- p.adjust(py, method = "fdr") is.sig <- (qx < 0.05 & qy < 0.05) @@ -1135,7 +3527,7 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE, title = paste("Module membership vs. gene significance\n"), cex.title = 0.9, girafe = FALSE - ) + ) } else if (plotlib == "girafe") { pgx.scatterPlotXY.GGPLOT( pos, @@ -1163,98 +3555,431 @@ wgcna.plotMMvsGS <- function(wgcna, module, trait, abs = TRUE, par = TRUE, } } +#' @export +wgcna.plotModuleTraitHeatmap <- function(wgcna, setpar = TRUE, cluster = FALSE, + multi = FALSE, main = NULL, justdata = FALSE, + transpose = FALSE, colorlabel = TRUE, + nmax = -1, tmax = -1, + text = TRUE, pstar = TRUE) { + + if(!multi) wgcna <- list(wgcna) + nSamples <- nrow(wgcna[[1]]$datExpr) + MEs <- do.call(cbind, lapply(wgcna, function(w) as.matrix(w$net$MEs))) + Y <- wgcna[[1]]$datTraits + + moduleTraitCor <- cor(MEs, Y, use = "pairwise.complete") + if(nmax > 0) { + sel <- head(order(-apply(abs(moduleTraitCor), 1, max, na.rm=TRUE)),nmax) + moduleTraitCor <- moduleTraitCor[sel,,drop=FALSE] + } + if(tmax > 0) { + sel <- head(order(-apply(abs(moduleTraitCor), 2, max, na.rm=TRUE)),tmax) + moduleTraitCor <- moduleTraitCor[,sel,drop=FALSE] + } + + if(transpose) { + moduleTraitCor <- t(moduleTraitCor) + } + + wgcna.plotLabeledCorrelationHeatmap( + R = moduleTraitCor, + nSamples = nSamples, + setpar = setpar, + cluster = cluster, + text = text, + main = main, + justdata = justdata, + colorlabel = colorlabel, + pstar = pstar + ) +} + #' Plot cluster dendrogram with eigengenes and traits. #' #' @export -wgcna.plotEigenGeneClusterDendrogram <- function(wgcna, add_traits = TRUE, +wgcna.plotEigenGeneClusterDendrogram <- function(wgcna = NULL, + ME = NULL, + add_traits = TRUE, + horiz = FALSE, + setMargins = TRUE, + method = 'wgcna', + showlabels = TRUE, + plot = TRUE, + multi = FALSE, main = NULL) { # Matrix with eigengenes and traits - MET <- wgcna$net$MEs - if (add_traits) { - MET <- cbind(MET, wgcna$datTraits) - } - MET <- WGCNA::orderMEs(MET) - if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!! - if (is.null(main)) main <- "Eigengene cluster dendrogram" - WGCNA::plotEigengeneNetworks( - MET, main, - marDendro = c(0, 4, 2, 0), - plotHeatmaps = FALSE - ) + if(is.null(wgcna) && is.null(ME)) { + stop("ERROR: wgcna or ME must be given") + } + + if(is.null(ME)) { + + if(multi) { + ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs)) + ME <- do.call(cbind, ME) + Y <- wgcna[[1]]$datTraits + } else { + ME <- wgcna$net$MEs + Y <- wgcna$datTraits + } + + if (length(add_traits)==1 && is.logical(add_traits) && add_traits==TRUE) { + ME <- cbind(ME, Y) + } else if (length(add_traits) > 0 && !is.logical(add_traits)) { + sel <- intersect(add_traits, colnames(Y)) + if(length(sel)) ME <- cbind(ME, Y[,sel]) + } + } + + impME <- svdImpute2(as.matrix(ME)) + ME <- WGCNA::orderMEs(impME) + + if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!! + if (is.null(main)) main <- "Eigengene Dendrogram" + + hc <- NULL + if(method == 'wgcna') { + ## plot dendrogram with WGCNA function + WGCNA::plotEigengeneNetworks( + ME, main, + setMargins = setMargins, + marDendro = c(0, 4, 2, 0), + plotHeatmaps = FALSE + ) + } else { + ## plot dendrogram with hclust function + if(setMargins && horiz) par(mar=c(4,4,4,8)) + if(setMargins && !horiz) par(mar=c(8,4,4,1)) + hc <- hclust( as.dist(1 - cor(ME)), method="average") + if(plot) { + save.labels <- hc$labels + if(!showlabels) hc$labels <- rep("",ncol(ME)) + plot( as.dendrogram(hc), horiz = horiz, main = main) + hc$labels <- save.labels + } + } + invisible(hc) } + #' Plot the adjacency correlation heatmap matrix of eigengenes with or #' without traits. This can show how traits cluster together with the #' eigengenes. #' #' @export -wgcna.plotEigenGeneAdjacencyHeatmap <- function(wgcna, add_traits = TRUE, +wgcna.plotEigenGeneAdjacencyHeatmap <- function(wgcna, + add_traits = TRUE, + traits = NULL, + add_me = TRUE, marx = 1, main = NULL, + multi = FALSE, + phenotype = NULL, + colorlabel = TRUE, + text = FALSE, + pstar = TRUE, + setMargins = TRUE, + mar1 = c(5.6, 4.5, 1.8, 0), + mar2 = c(8, 10, 4, 2), + cex.lab = 0.8, + cex.text = 0.7, + plotDendro = TRUE, + plotHeatmap = TRUE, + dendro.horiz = TRUE, + dendro.width = 0.3, + dendro.labels = TRUE, + nmax = -1, + fixclust = FALSE, + mask.intra = FALSE, justdata = FALSE) { + + if(0) { + add_traits = TRUE; + traits = NULL; + marx = 1; main = NULL; + multi = FALSE; + phenotype = NULL; + colorlabel = TRUE; + text = FALSE; + pstar = TRUE; + setMargins = TRUE; + mar1 = c(5.5, 5, 1.6, 1); + mar2 = c(8, 10, 4, 2); + cex.lab = 0.8; + cex.text = 0.7; + plotDendro = TRUE; + plotHeatmap = TRUE; + dendro.horiz = TRUE; + dendro.width = 0.3; + dendro.labels = TRUE; + nmax = -1; + fixclust = FALSE; + mask.intra = FALSE; + justdata = FALSE + add_me = TRUE + } + + + if(!multi) wgcna <- list(gx=wgcna) + # Matrix with eigengenes and traits - MET <- wgcna$net$MEs + ME <- c() + if(add_me) { + ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs)) + ME <- do.call(cbind, ME) + } + Y <- wgcna[[1]]$datTraits + if (add_traits) { - MET <- cbind(MET, wgcna$datTraits) + sel <- colnames(Y) + if(!is.null(traits)) { + sel <- intersect(traits, sel) + } + ME <- cbind(ME, Y[,sel,drop=FALSE]) + } + + if (!add_traits && !is.null(phenotype)) { + ME <- cbind(ME, Y[,phenotype,drop=FALSE]) + } + + if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!! + + ## eigengene correlation + impME <- svdImpute2(as.matrix(ME)) + R <- cor(impME, use="pairwise") + R0 <- R + + ## If phenotype is given we condition the heatmap using the + ## correlation to the phenotype. + if(!is.null(phenotype)) { + ## proper sign in case of inhibitor layer (like miRNA) + layersign <- rep(1, length(wgcna)) + names(layersign) <- names(wgcna) + layersign[grep("^mi",names(wgcna),ignore.case=TRUE)] <- -1 + ff <- list() + for(k in names(wgcna)) { + rho <- cor(ME, Y[,phenotype], use="pairwise")[,1] + ##rho <- wgcna[[k]]$modTraits[,phenotype] + ff[[k]] <- layersign[k] * rho + } + names(ff) <- NULL + ff <- unlist(ff) + ff <- 0.5 * (1 + ff) ## signed... + ff <- ff[match(rownames(R),names(ff))] + names(ff) <- rownames(R) + ff[is.na(ff)] <- 1 ## really??? NEED RETHINK + ww <- outer(ff, ff) + ##ww[is.na(ww)] <- 0 + ww <- ww / max(ww,na.rm=TRUE) + R <- R * ww } - colnames(MET) <- paste0(" ", colnames(MET)) - MET <- WGCNA::orderMEs(MET) - if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!! - + + if(nmax>0) { + if(!is.null(phenotype)) { + rho <- cor(ME, Y[,phenotype], use="pairwise")[,1] + ii <- head(order(-abs(rho)), nmax) + } else { + ii <- head( order(-Matrix::rowMeans(R**2)), nmax) + } + R <- R[ii,ii] + } + if (justdata) { - R <- (1 + cor(MET)) / 2 return(R) } - + # Plot the correlation heatmap matrix (note: this plot will overwrite # the dendrogram plot) - if (is.null(main)) main <- "Eigengene adjacency heatmap" - WGCNA::plotEigengeneNetworks( - MET, main, - marHeatmap = c(8 * marx, 10 * marx, 2, 2), - plotDendrograms = FALSE, - colorLabels = TRUE, - xLabelsAngle = 45 - ) + if (is.null(main)) main <- "Eigengene Adjacency Heatmap" + + if(plotDendro && plotHeatmap) { + layout.matrix <- matrix( 1:2, nrow = 1, ncol = 2) + layout(mat = layout.matrix, heights = 1, widths = c(dendro.width, 1)) + if(dendro.horiz && dendro.labels) { + mar1[4] <- mar2[2] ## copy left margin + } + } + if(plotDendro) par(mar=mar1) + + #fixclust=FALSE + if(fixclust) { + ii <- rownames(R) + hc <- hclust(as.dist(1 - R0[ii,ii]), method="average") + } else { + hc <- hclust(as.dist(1 - R), method="average") + } + if(plotDendro) { + par(cex=cex.lab) + plot( as.dendrogram(hc), horiz = TRUE, + ylab="Eigengene dendrogram") + par(cex=1) + } + + if(plotHeatmap) { + ii <- hc$labels[hc$order] + ii <- intersect(ii, rownames(R)) + R1 <- R[rev(ii), ii] + nsamples <- nrow(Y) + par(mar=mar2) + wgcna.plotLabeledCorrelationHeatmap( + R1, + nSamples = nsamples, + text = text, + pstar = pstar, + colorlabel = colorlabel, + cluster = FALSE, + setpar = FALSE, + main = main, + cex.lab = cex.lab, + cex.text = cex.text + ) + } } #' @export -wgcna.plotEigenGeneGraph <- function(wgcna, add_traits = TRUE, main = NULL) { - ## require(igraph) - net <- wgcna$net - MET <- net$MEs +wgcna.plotMultiEigengeneCorrelation <- function(wgcna, addtraits = TRUE, + phenotype=NULL, nmax=-1, main = NULL, + showvalues = FALSE, showsig = TRUE, + cex.text = 0.7, cex.lab = 0.8, + fixcluster = TRUE, setpar = TRUE) { + + ## Show inter-correlation of modules + me <- lapply(wgcna, function(w) w$net$MEs) + if(length(me) == 1) { + me <- list(me[[1]], me[[1]]) + } + + comb <- combn(length(me),2) + ncomb <- ncol(comb) + nsamples <- nrow(wgcna[[1]]$datExpr) + Y <- wgcna[[1]]$datTraits + + ## for miRNA we need to flip sign + msign <- c(1,-1)[1 + 1*(names(wgcna) %in% c("mi","mir"))] + + if(setpar) { + nc <- ceiling(sqrt(ncomb)) + nr <- ceiling(ncomb / nc) + par(mfrow=c(nr,nc), mar=c(8,10,3,1)) + } - if (add_traits) { - MET <- cbind(MET, wgcna$datTraits) + k=1 + for(k in 1:ncol(comb)) { + i <- comb[1,k] + j <- comb[2,k] + M1 <- me[[i]] + M2 <- me[[j]] + + if(addtraits) { + M1 <- cbind(M1, Y) + M2 <- cbind(M2, Y) + } + if(FALSE && !addtraits && !is.null(phenotype)) { + y <- Y[,phenotype,drop=FALSE] + M1 <- cbind(M1, y) + M2 <- cbind(M2, y) + } + + + R1 <- cor(M1, M2, use="pairwise.complete") + + if(nmax > 0) { + ii <- head(order(-apply(abs(R1), 1, max)), nmax) + jj <- head(order(-apply(abs(R1), 2, max)), nmax) + R1 <- R1[ii,jj] + } + + ## cluster unweighted matrix + ii <- hclust(dist(R1), method="average")$order + jj <- hclust(dist(t(R1)), method="average")$order + R1 <- R1[ii, jj] + + ## This conditions the correlation on phenotype. Important. + do.condition <- !is.null(phenotype) + if(do.condition) { + y <- Y[,phenotype] + w1 <- cor( M1[,rownames(R1)], y, use="pairwise")[,1] + w2 <- cor( M2[,colnames(R1)], y, use="pairwise")[,1] + if(msign[i]!=0) w1 <- msign[i] * w1 + if(msign[j]!=0) w2 <- msign[j] * w2 + w1 <- pmax(w1,0) + w2 <- pmax(w2,0) + ww <- outer(w1, w2) + ww <- ww / max(ww, na.rm=TRUE) + R1 <- R1 * ww + } + + main <- paste(names(me)[i],"vs.",names(me)[j]) + if(do.condition) main <- paste(main, "(conditioned)") + + wgcna.plotLabeledCorrelationHeatmap( + R1, + nsamples, + text = showvalues, + pstar = showsig, + is.dist = FALSE, + cluster = !fixcluster, + setpar = FALSE, + main = main, + cex.text = cex.text, + cex.lab = cex.lab + ) + } - if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!! + + +} + - sdx <- matrixStats::colSds(as.matrix(MET), na.rm = TRUE) - if (any(sdx == 0)) MET <- MET + runif(length(MET), 0, 1e-5) +#' @export +wgcna.plotEigenGeneGraph <- function(wgcna, add_traits = TRUE, main = NULL, + multi=FALSE, vcex=1, labcex=1) { + + ## require(igraph) + if(multi) { + ME <- lapply(wgcna, function(w) as.matrix(w$net$MEs)) + ME <- do.call(cbind, ME) + if (add_traits) ME <- cbind(ME, wgcna[[1]]$datTraits) + } else { + ME <- wgcna$net$MEs + if (add_traits) ME <- cbind(ME, wgcna$datTraits) + } + if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!! + + sdx <- matrixStats::colSds(as.matrix(ME*1), na.rm = TRUE) + if (any(sdx == 0)) ME <- ME + runif(length(ME), 0, 1e-5) + ## Recalculate MEs with color as labels - clust <- hclust(dist(t(scale(MET)))) - clust + clust <- hclust(dist(t(scale(ME)))) phylo <- ape::as.phylo(clust) gr <- igraph::as.igraph(phylo, directed = FALSE) is.node <- grepl("Node", igraph::V(gr)$name) module.name <- igraph::V(gr)$name - module.size <- table(wgcna$net$labels) + if(multi) { + module.size <- lapply(wgcna, function(w) table(w$net$labels)) + names(module.size) <- NULL + module.size <- unlist(module.size) + module.colors <- sapply(wgcna, function(w) w$me.colors) + names(module.colors) <- NULL + module.colors <- unlist(module.colors) + } else { + module.size <- table(wgcna$net$labels) + module.colors <- wgcna$me.colors + } module.size <- module.size / mean(module.size) - module.size - igraph::V(gr)$label <- igraph::V(gr)$name igraph::V(gr)$label[is.node] <- NA - igraph::V(gr)$color <- wgcna$me.colors[module.name] - igraph::V(gr)$size <- 20 * (module.size[module.name])**0.4 + igraph::V(gr)$color <- module.colors[module.name] + igraph::V(gr)$size <- vcex * 18 * (module.size[module.name])**0.4 igraph::V(gr)$size[is.na(igraph::V(gr)$size)] <- 0 ## par(mfrow = c(1, 1), mar = c(1, 1, 1, 1) * 0) igraph::plot.igraph( gr, layout = igraph::layout.kamada.kawai, - vertex.label.cex = 0.8, + vertex.label.cex = 0.85 * labcex, edge.width = 3 ) if (!is.null(main)) title(main, line = -1.5) @@ -1295,24 +4020,32 @@ wgcna.plotFeatureUMAP <- function(wgcna, nhub = 3, method = "clust", if (is.null(main)) main <- "Feature UMAP colored by module" - ## get top hub genes - mm <- wgcna$stats$moduleMembership - hubgenes <- apply(mm, 2, function(x) head(names(sort(-x)), 3), simplify = FALSE) - hubgenes - sel <- which(names(hubgenes) != "MEgrey") - hubgenes <- unlist(hubgenes[sel]) + hubgenes <- NULL + if(nhub > 0) { + ## get top hub genes + mm <- wgcna$stats$moduleMembership + hubgenes <- apply(mm, 2, function(x) head(names(sort(-x)), nhub), simplify = FALSE) + sel <- which(names(hubgenes) != "MEgrey") + hubgenes <- unlist(hubgenes[sel]) + } + col1 <- wgcna$net$colors genes1 <- names(which(col1 != "grey")) pgx.scatterPlotXY( pos, - var = col1, col = sort(unique(col1)), - hilight = genes1, hilight2 = hubgenes, - cex.lab = 1.2, label.clusters = FALSE, + var = col1, + col = sort(unique(col1)), + hilight = genes1, + hilight2 = hubgenes, + cex.lab = 1.2, + label.clusters = FALSE, title = main, plotlib = plotlib ) + } + #' Plot module significance. #' #' @export @@ -1335,38 +4068,86 @@ wgcna.plotModuleSignificance <- function(wgcna, trait, main = NULL, abs = FALSE) ) } + + +#' +#' +#' @export +wgcna.plotConsensusSampleDendroAndColors <- function(cons, i, + what = c("both","me","traits")[1], + clust.expr = TRUE, + setLayout = TRUE, + marAll = c(0.2, 7, 1.5, 0.5), + colorHeightMax = 0.6, + main = NULL) +{ + + wgcna.plotSampleDendroAndColors( + wgcna = cons$layers[[i]], + main = main, + datExpr = cons$datExpr[[i]], + datTraits = cons$datTraits, + datME = cons$net$multiME[[i]]$data, + what = what, + marAll = marAll, + clust.expr = clust.expr, + setLayout = setLayout, + colorHeightMax = colorHeightMax + ) + +} + #' #' #' @export -wgcna.plotSampleDendroAndColors <- function(wgcna, +wgcna.plotSampleDendroAndColors <- function(wgcna, input.type="wgcna", what = c("me", "traits", "both")[3], - clust.expr = TRUE, + datTraits = NULL, datExpr = NULL, datME = NULL, + clust.expr = TRUE, setLayout = TRUE, + marAll = c(0.2, 7, 1.5, 0.5), + colorHeightMax = 0.6, main = NULL, justdata = FALSE) { - MET0 <- wgcna$net$MEs - MET <- MET0[, 0] + + if(input.type == 'net') { + ME0 <- wgcna$MEs + if(is.null(datExpr)) stop("must supply datExpr") + if(is.null(datTraits)) stop("must supply datTraits") + } else { + ME0 <- wgcna$net$MEs + datTraits <- 1*wgcna$datTraits + datExpr <- wgcna$datExpr + } + + if(!is.null(datME)) { + ME0 <- datME + } + + ME <- ME0[, 0] + samples <- rownames(ME) if (any(what %in% c("me", "both"))) { - MET <- cbind(MET, MET0) + ME <- cbind(ME, ME0) } + if (any(what %in% c("traits", "both"))) { - MET <- cbind(MET, wgcna$datTraits) + ME <- cbind(ME, datTraits[samples,,drop=FALSE]) } - if (NCOL(MET) <= 2) MET <- cbind(MET, MET) ## error if ncol(MET)<=2 !!!! - - sdx <- matrixStats::colSds(as.matrix(MET), na.rm = TRUE) - if (any(sdx == 0)) MET <- MET + runif(length(MET), 0, 1e-5) - + + if (NCOL(ME) <= 2) ME <- cbind(ME, ME) ## error if ncol(ME)<=2 !!!! + sdx <- matrixStats::colSds(as.matrix(ME*1), na.rm = TRUE) + ME <- ME[, which(sdx>0), drop=FALSE] + ## Recalculate MEs with color as labels if (clust.expr) { - sampleTree <- hclust(dist(wgcna$datExpr), method = "average") + sampleTree <- hclust(as.dist(1 - cor(t(datExpr))), method = "average") } else { - sampleTree <- hclust(dist(scale(MET0)), method = "average") + sampleTree <- hclust(as.dist(1 - cor(t(ME0))), method = "average") } ii <- sampleTree$order - jj <- hclust(dist(t(scale(MET))))$order - colors <- WGCNA::numbers2colors(MET[, jj]) + jj <- hclust(dist(t(scale(ME))))$order + colors <- WGCNA::numbers2colors(ME[, jj]) if (justdata) { - return(MET) + return(ME) } if (is.null(main)) { @@ -1376,38 +4157,50 @@ wgcna.plotSampleDendroAndColors <- function(wgcna, } ## Plot the dendrogram and the module colors underneath - plotDendroAndColors( + WGCNA::plotDendroAndColors( dendro = sampleTree, colors = colors, - groupLabels = colnames(MET)[jj], - dendroLabels = rownames(MET), + groupLabels = colnames(ME)[jj], + dendroLabels = rownames(ME), hang = 0.03, addGuide = FALSE, guideHang = 0.05, - marAll = c(0.2, 7, 1.5, 0.5), - main = main + setLayout = setLayout, + marAll = marAll, + main = main, + colorHeightMax = colorHeightMax ) } #' @export -wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples, setpar = TRUE, - cluster = TRUE, +wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples, + cluster = TRUE, text = TRUE, main = NULL, justdata = FALSE, - pstar = TRUE) { + colorlabel = TRUE, pstar = TRUE, + zlim = NULL, colorpal = NULL, + cex.text = 0.7, cex.lab = NULL, + setpar = TRUE, is.dist = FALSE) { + ## Define numbers of genes and samples - Pvalue <- corPvalueStudent(R, nSamples) - if (cluster) { + if (cluster && nrow(R)>1 && ncol(R)>1) { R0 <- R R0[is.na(R0)] <- 0 - ii <- hclust(dist(R0))$order - jj <- hclust(dist(t(R0)))$order - } else { - ii <- rev(order(rownames(R))) - jj <- order(colnames(R)) + is.sym <- nrow(R) == ncol(R) && all(rownames(R)==colnames(R)) + if(is.dist) { + ii <- hclust(as.dist(abs(R0)))$order + jj <- ii + } else if(is.sym) { + ii <- hclust(dist(R0), method="average")$order + jj <- ii + } else { + ii <- hclust(dist(R0), method="average")$order + jj <- hclust(dist(t(R0)), method="average")$order + } + R <- R[ii, jj] } - R <- R[ii, jj] - Pvalue <- Pvalue[ii, jj] + R0 <- pmax(pmin(R, 1, na.rm=TRUE), -1, na.rm=TRUE) + Pvalue <- corPvalueStudent(R0, nSamples) if (justdata) { return(R) @@ -1417,32 +4210,65 @@ wgcna.plotLabeledCorrelationHeatmap <- function(R, nSamples, setpar = TRUE, if (pstar) { textPv <- cut(Pvalue, breaks = c(-1, 0.001, 0.01, 0.05, 99), - labels = c("★★★", "★★", "★", "") + #labels = c("★★★", "★★", "★", "") + #labels = c("***", "**", "*", "") + labels = c("+++", "++", "+", "") ) } else { textPv <- paste0("(", signif(Pvalue, 1), ")") } - textMatrix <- paste0(signif(R, 2), "\n", textPv) - textMatrix[which(is.na(R))] <- "" - dim(textMatrix) <- dim(R) + textMatrix <- NULL + if(text && pstar) textMatrix <- paste0(signif(R, 2), "\n", textPv) + if(text && !pstar) textMatrix <- paste0(signif(R, 2)) + if(!text && pstar) textMatrix <- textPv + if(!text && !pstar) textMatrix <- NULL + + if(!is.null(textMatrix)) { + textMatrix[which(is.na(R))] <- "" + dim(textMatrix) <- dim(R) + } + + if(!colorlabel) { + colnames(R) <- paste0(" ", colnames(R)) + rownames(R) <- paste0(" ", rownames(R)) + } + if (setpar) par(mar = c(8, 8, 3, 3)) if (is.null(main)) main <- "Correlation heatmap" + + ## set colorscale. make sure 0 is white if non-symmetric + col1 <- "grey90" + if(is.null(zlim)) { + zlim <- c(min(R,na.rm=TRUE), max(R,na.rm=TRUE)) + } + rlim <- max(abs(zlim),na.rm=TRUE) + if(is.null(colorpal)) colorpal <- WGCNA::blueWhiteRed + if(rlim > 0) { + rval <- seq(-rlim, rlim, length.out=201) + ii <- which( rval >= zlim[1] & rval <= zlim[2] ) + col1 <- colorpal(201)[ii] + } + ## Display the correlation values within a heatmap plot WGCNA::labeledHeatmap( Matrix = R, + #xLabels = paste0(1:ncol(R),":",colnames(R)), xLabels = colnames(R), + #xLabels = paste0(" ",colnames(R)), yLabels = rownames(R), xSymbols = colnames(R), ySymbols = rownames(R), - colorLabels = FALSE, - colors = WGCNA::blueWhiteRed(50), + colorLabels = TRUE, + colors = col1, textMatrix = textMatrix, setStdMargins = FALSE, - cex.text = 0.7, - zlim = c(-1, 1), + cex.text = cex.text, + cex.lab = cex.lab, + zlim = zlim, main = main ) + } @@ -1475,11 +4301,10 @@ wgcna.plotModuleHubGenes <- function(wgcna, modules = NULL, mm.score <- head(sort(mm, decreasing = TRUE), 30) topgenes <- names(mm.score) A <- cor(wgcna$datExpr[, topgenes]) - diag(A) <- NA + diag(A) <- 0 A <- (A - min(A, na.rm = TRUE)) / (max(A, na.rm = TRUE) - min(A, na.rm = TRUE)) gr <- igraph::graph_from_adjacency_matrix( - A, - mode = "undirected", weighted = TRUE, diag = FALSE + A, mode = "undirected", weighted = TRUE, diag = FALSE ) norm.mm.score <- (mm.score - min(mm.score)) / (max(mm.score) - min(mm.score)) clr <- sub("ME", "", k) @@ -1496,45 +4321,97 @@ wgcna.plotModuleHubGenes <- function(wgcna, modules = NULL, } } -#' @title Map module colors to rainbow palette -#' -#' @description -#' Maps the module colors from a WGCNA network to a rainbow palette. -#' -#' @param net A WGCNA network object. -#' -#' @details -#' This function takes a WGCNA network object and maps the module colors -#' to a rainbow palette based on the modules' hierarchical clustering order. -#' -#' It extracts the hierarchical clustering dendrogram order, gets the number -#' of unique module colors, ranks the module color means based on the -#' dendrogram order, and assigns rainbow colors accordingly. -#' -#' This allows easier visualization and interpretation of modules in the -#' standard rainbow palette order. -#' -#' @return -#' A named character vector mapping the original module colors to rainbow colors. -#' #' @export -labels2rainbow <- function(net) { - hc <- net$dendrograms[[1]] - nc <- length(unique(net$colors)) - n <- length(net$colors) - ii <- rep(NA, n) - ii[net$goodGenes] <- hc$order - col1 <- WGCNA::labels2colors(net$colors) - col.rnk <- rank(tapply(1:n, col1[ii], mean)) - new.col <- grDevices::rainbow(nc)[col.rnk] - names(new.col) <- names(col.rnk) - new.col["grey"] <- "#AAAAAA" - new.col <- new.col[col1] - names(new.col) <- net$colors - return(new.col) +wgcna.plotGeneNetwork <- function(wgcna, genes, col=NULL, + edge.alpha = 0.3, + rgamma = 4, + edge.width = 6, + alpha = 0.5, + min.rho = 0.5, + setpar = TRUE) { + + A <- cor(wgcna$datExpr[, genes]) + A <- A * (abs(A) > min.rho) + gr <- igraph::graph_from_adjacency_matrix( + A, mode = "undirected", weighted = TRUE, diag = FALSE + ) + vcex <- matrixStats::colVars(wgcna$datExpr[, genes]) + vcex <- vcex / max(abs(vcex)) + if(is.null(col)) { + col <- wgcna$net$color[genes] + } + col <- sub("black","grey40",col) + ecol <- c("darkred","darkgreen")[ 1 + 1*(igraph::E(gr)$weight > 0)] + table(ecol) + ecol <- adjustcolor(ecol, alpha.f = edge.alpha) + ewt <- abs(igraph::E(gr)$weight) + ewt <- (ewt / max(abs(ewt)))**rgamma + plot(gr, + layout = igraph::layout_in_circle, + edge.width = edge.width * ewt, + edge.color = ecol, + vertex.size = 5 + 20 * vcex, + vertex.color = adjustcolor(col, alpha.f = alpha), + vertex.frame.color = col + ) +} + +#' @export +wgcna.plotModuleHeatmap <- function(wgcna, + module, + genes=NULL, + rgamma = 4, + min.rho = 0, + cex = 0.8, + nmax = -1, + cluster = TRUE, + type = c("expression","correlation")[1], + heatmap.mar = c(7,7), + main = NULL) { + + if(!is.null(module) && is.null(genes) ) { + genes <- wgcna$me.genes[[module]] + } + if(is.null(genes) || length(genes)==0) { + stop("must specify genes or module") + } + + if(nmax > 0) { + sdx <- matrixStats::colSds(wgcna$datExpr[,genes]) + genes <- head( genes[order(-sdx)], nmax) + } + + if(type == "expression") { + X <- t(wgcna$datExpr[,genes]) + annot <- wgcna$datTraits + gx.heatmap(X, nmax=nmax, + ##col.annot = annot, + key=FALSE, keysize=0.5, mar=heatmap.mar) + + } + + if(type == "correlation") { + R <- cor(wgcna$datExpr[,genes]) + R <- sign(R) * abs(R)**rgamma + if(cluster) { + ii <- hclust(as.dist(1-R), method="average")$order + R <- R[ii,ii] + } + R[abs(R) < min.rho] <- NA + image(R) + mtext( rownames(R), side=4, at=seq(0,1,1/(nrow(R)-1)), + las=2, adj = 0, cex=cex, line=0.5 ) + } + + if(is.null(main) && !is.null(module)) main <- module + if(is.null(main)) main <- "Module Heatmap" + if(!is.null(main) && main!="") { + title( main, line=2, cex.main=1.3) + } } + #' Filter color vector by minimum KME and mergeCutHeight. Set color of #' features with KME smaller than minKME to grey (or 0) group. Merge #' similar modules with (module) correlation larger than @@ -1625,3 +4502,533 @@ wgcna.filterColors <- function(X, colors, minKME = 0.3, mergeCutHeight = 0.15, return(new.colors) } + +#' Wrapper to hclust from matrix using default WGCNA parameters. +#' +wgcna.tomclust <- function(X, power=6) { + A <- WGCNA::adjacency(t(X), power=power, type = "signed") + TOM <- fastTOMsimilarity(A, tomtype="signed", lowrank=40) + hc <- hclust(as.dist(1-TOM), method="average") + hc +} + + +wgcna.checkDendroHeights <- function(datExpr, n=200, powers=NULL, maxpower=20) { + ii <- 1:ncol(datExpr) + if(n < ncol(datExpr)) { + ##ii <- sample(1:ncol(datExpr), n) + ii <- head(order(-matrixStats::colSds(datExpr)),n) + } + tX <- datExpr[,ii] + ht <- list() + p=9 + p=24 + if(is.null(powers)) { + powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) + if(maxpower>20) { + powers <- c(powers, seq(from = 20, to = maxpower, by = 5)) + } + } + for(i in 1:length(powers)) { + A <- WGCNA::adjacency(tX, power=powers[i], type = "signed") + TOM <- fastTOMsimilarity(A, tomtype="signed", lowrank=40) + hc <- hclust(as.dist(1-TOM), method="average") + ht[[i]] <- hc$height + } + names(ht) <- paste0("p=",powers) + S <- sapply(ht, quantile, probs=c(0.25,0.5,0.75)) + iqr <- (S[3,] - S[1,]) + optK <- powers[which.max(iqr)] + + list( + quantiles = S, + IQR = iqr, + optK = optK + ) +} + +#' +#' +#' @export +wgcna.plotPowerAnalysis <- function(datExpr, networktype = "signed", + cex=1, maxpower = 20, nmax = 2000, + plots=c("sft.modelfit", "mean.k", + "dendro.IQR"), main=NULL, + RsquaredCut = 0.85, setPar=TRUE) { + + RsquaredCut <- RsquaredCut[1] + + ## Choose a set of soft-thresholding powers + powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) + if(maxpower>20) { + powers <- c(powers, seq(from = 20, to = maxpower, by = 5)) + } + + ## subsample for speed + if(ncol(datExpr) > nmax && nmax > 0) { + ii <- sample(1:ncol(datExpr),nmax) + datExpr <- datExpr[,ii] + } + + ## Call the network topology analysis function + sft <- WGCNA::pickSoftThreshold( + datExpr, + powerVector = powers, + RsquaredCut = RsquaredCut, + networkType = networktype, + verbose = 0 + ) + + ## This is more robust + + if(setPar) { + np <- length(plots) + nc <- ceiling(sqrt(np)) + par(mfrow = c(nc, nc), mar = c(3.3, 3.5, 1, 1), mgp = c(2, 0.9, 0)) + par(mfrow = c(1, np), mar = c(3.8, 3.8, 1, 1), mgp = c(2.4, 0.95, 0)) + } + + ## Plot the results: + if("sft.modelfit" %in% plots) { + ## Scale-free topology fit index as a function of the soft-thresholding power + y <- -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2] + base::plot( + x = sft$fitIndices[, 1], + y = y, + ylim = c(min(y),1), + type = "n", + xlab = "Soft threshold (power)", + ylab = "SFT model fit (signed R^2)", + main = main + ) + abline(h = 0, col = "black", lty=3) + text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2], + labels = powers, cex = cex, col = "red" + ) + ## this line corresponds to using an R^2 cut-off of h + abline(h = RsquaredCut, col = "red", lty=2) + ##if(legend) legend("bottomright", legend=paste("opt. power =",optPower)) + } + + ## Mean connectivity as a function of the soft-thresholding power + if("mean.k" %in% plots) { + base::plot(sft$fitIndices[, "Power"], sft$fitIndices[, "mean.k."], + type = "n", + xlab = "Soft threshold (power)", + ylab = "Mean connectivity", + main = main + ) + text(sft$fitIndices[,"Power"], sft$fitIndices[, "mean.k."], labels = powers, + cex = cex, col = "red") + } + + ht <- NULL + if("dendro.IQR" %in% plots) { + ht <- wgcna.checkDendroHeights(datExpr, n=200, powers=powers) + base::plot( + sft$fitIndices[, 1], ht$IQR, + type = "n", + xlab = "Soft threshold (power)", + ylab = "Dendrogram height IQR", + main = main + ) + text(sft$fitIndices[, 1], ht$IQR, labels = powers, + cex = cex, col = "red") + } +} + +#' +#' +#' @export +wgcna.plotPowerAnalysis_multi <- function(exprList, + cex=1, maxpower = 20, + nmax = 2000, + networktype = "signed", + plots=c("sft.modelfit", "mean.k", + "dendro.IQR"), + main=NULL, + cex.legend = 1, + RsquaredCut = 0.85, + setPar=TRUE) { + if(0) { + networktype = "signed" + cex=1; maxpower = 20; nmax = 2000; + plots=c("sft.modelfit", "mean.k","dendro.IQR"); + main=NULL; + RsquaredCut = 0.85 + } + + RsquaredCut <- RsquaredCut[1] + + ## Choose a set of soft-thresholding powers + powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) + if(maxpower>20) { + powers <- c(powers, seq(from = 20, to = maxpower, by = 5)) + } + + ## process each list + sft <- list() + for(i in 1:length(exprList)) { + datExpr <- Matrix::t(exprList[[i]]) + + ## subsample for speed + if( nmax > 0 && nrow(exprList[[i]]) > nmax ) { + ii <- sample(1:ncol(datExpr),nmax) + datExpr <- datExpr[,ii] + } + + ## Call the network topology analysis function + k <- names(exprList)[i] + sft[[k]] <- WGCNA::pickSoftThreshold( + datExpr, + powerVector = powers, + RsquaredCut = RsquaredCut, + networkType = networktype, + verbose = 0 + ) + } + + if(setPar) { + np <- length(plots) + par(mfrow = c(1, np), mar = c(3.8, 4.5, 3, 1), mgp = c(2.6, 0.95, 0)) + } + + ## Plot the results: + if("sft.modelfit" %in% plots) { + ## Scale-free topology fit index as a function of the soft-thresholding power + Y <- c() + for(i in 1:length(sft)) { + y1 <- -sign(sft[[i]]$fitIndices[,"slope"]) * sft[[i]]$fitIndices[,"SFT.R.sq"] + Y <- cbind(Y, y1) + } + colnames(Y) <- names(sft) + x <- sft[[1]]$fitIndices[, "Power"] + Y <- pmax(Y,0) + matplot( + x = x, + y = Y, + ylim = c(0,1), + type = "l", + col = 2:99, + lty = 1, + lwd = 0.6, + xlab = "Soft threshold (power)", + ylab = "SFT model fit (signed R^2)", + main = main + ) + #abline(h = 0, col = "black", lty=1) + for(i in 1:ncol(Y)) { + text(powers, Y[,i], labels = "█", cex=cex, col="white") + text(powers, Y[,i], labels = powers, cex=cex, col = 1+i) + } + ## this line corresponds to using an R^2 cut-off of h + abline(h = RsquaredCut, col = "grey10", lty=2) + legend("bottomright", legend=colnames(Y), fill=2:10, + cex=cex.legend, y.intersp=0.9) + title("SFT model fit") + } + + ## Mean connectivity as a function of the soft-thresholding power + if("mean.k" %in% plots) { + Y <- sapply(sft, function(s) s$fitIndices[, "mean.k."]) + matplot( + powers, + Y, + type = "l", + col = 2:99, + lty = 1, + lwd = 0.6, + xlab = "Soft threshold (power)", + ylab = "Mean connectivity", + main = main + ) + for(i in 1:ncol(Y)) { + text(powers, Y[,i], labels = "█", cex=cex, col="white") + text(powers, Y[,i], labels = powers, cex=cex, col = 1+i) + } + legend("topright", legend=colnames(Y), fill=2:10, + cex=cex.legend, y.intersp=0.9) + title("Mean connectivity") + } + + ht <- NULL + if("dendro.IQR" %in% plots) { + ht <- list() + for(i in 1:length(exprList)) { + ht[[i]] <- wgcna.checkDendroHeights( + Matrix::t(exprList[[i]]), n=200, powers=powers) + } + Y <- sapply(ht, function(h) h$IQR) + matplot( + powers, + Y, + type = "l", + col = 2:99, + lty = 1, + lwd = 0.6, + xlab = "Soft threshold (power)", + ylab = "Dendrogram height IQR", + main = main + ) + for(i in 1:ncol(Y)) { + text(powers, Y[,i], labels = "█", cex=cex, col="white") + text(powers, Y[,i], labels = powers, cex = cex, col = 1+i) + } + legend("bottomright", legend=names(exprList), fill=2:10, + cex=cex.legend, y.intersp=0.9) + title("Dendrogram IQR") + } + +} + + + +#' Better (?) method to pick soft threshold (aka power). +#' +#' @export +wgcna.pickSoftThreshold <- function(datExpr, sft=NULL, rcut=0.85, + method = c("sft","iqr")[1], + nmax = -1, powers = NULL, + verbose = 1) { + + if(is.null(powers)) { + powers <- c(c(1:10), seq(from = 12, to = 20, by = 2)) + # powers <- c(powers, seq(from = 25, to = 50, by = 5)) + } + + ## subsample for speed + if(ncol(datExpr) > nmax && nmax > 0) { + ii <- sample(1:ncol(datExpr),nmax) + datExpr <- datExpr[,ii] + } + + if(is.null(sft)) { + sft <- WGCNA::pickSoftThreshold( + datExpr, + powerVector = powers, + networkType = "signed", + verbose = verbose + ) + } + + optPower <- NULL + if(method == "sft") { + ## Pick power according to scale-free (SFT) parameter + sqr <- -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2] + if(max(sqr, na.rm=TRUE) >= rcut) { + optPower <- min(powers[which(sqr >= rcut)]) + } else { + ## remove initial value that are possible negative + if(sqr[1] < 0.05) { + for(i in 1:length(sqr)) sqr[i] <- ifelse(sqr[i] < 0.05, NA, sqr[i]) + } + ds <- 0.5*median(abs(diff(sqr)),na.rm=TRUE) ##small step + if(any(diff(sqr) < -ds, na.rm=TRUE)) { + i <- min(which(diff(sqr) < -ds))+1 + sqr[i:length(sqr)] <- NA + } + optPower <- powers[which.max(sqr)] + } + } else if(method == "iqr") { + ## Pick power according to IQR + ht <- wgcna.checkDendroHeights(datExpr, n=200, powers = powers) + ##base::plot( powers, ht$IQR) + optPower <- powers[ which.max(ht$IQR) ] + } else { + stop("[wgcna.pickSoftThreshold] invalid method = ", method) + } + + if(verbose>0) { + message("[wgcna.pickSoftThreshold] sft$powerEstimate = ", sft$powerEstimate) + message("[wgcna.pickSoftThreshold] optPower = ", optPower) + } + optPower +} + + +#' Scale a list of TOM matrices so that the quantiles (default p=0.95) +#' are equal after scaling with respect to the first TOM matrix. +#' +#' +wgcna.scaleTOMs <- function(TOMs, scaleP=0.95) { + nGenes <- nrow(TOMs[[1]]) + nSets <- length(TOMs) + # Sample sufficiently large number of TOM entries + nSamples = as.integer(1/(1-scaleP) * 1000); + # Choose the sampled TOM entries + scaleSample = sample(nGenes*(nGenes-1)/2, size = nSamples) + TOMScalingSamples = list(); + # These are TOM values at reference percentile + scaleQuant = rep(1, nSets) + # Scaling powers to equalize reference TOM values + scalePowers = rep(1, nSets) + # Loop over sets + set=1 + for (set in 1:nSets) + { + # Select the sampled TOM entries + tval = as.dist(TOMs[[set]])[scaleSample] + # Calculate the 95th percentile + scaleQuant[set] = quantile(tval, probs = scaleP, type = 8); + TOMScalingSamples[[set]] <- tval + + # Scale the TOM + if (set>1) + { + scalePowers[set] = log(scaleQuant[1])/log(scaleQuant[set]); + TOMs[[set]] = TOMs[[set]]^scalePowers[set]; + } + } + return(TOMs) +} + +#' @export +wgcna.getTopGenesAndSets <- function(wgcna, annot=NULL, module=NULL, ntop=25) { + if(!"stats" %in% names(wgcna)) stop("object has no stats") + if(!"gsea" %in% names(wgcna)) stop("object has no enrichment results (gsea)") + + if("layers" %in% names(wgcna) && class(wgcna$datExpr) == "list") { + cons <- wgcna.getConsensusTopGenesAndSets(wgcna, annot=annot, module=module, + ntop=ntop) + return(cons) + } + + ## get top genes (highest kME) + mm <- wgcna$stats$moduleMembership + ##mm <- cor( wgcna$datExpr, wgcna$net$MEs ) + if(!is.null(annot)) mm <- rename_by2(mm, annot) + gg <- rownames(mm) + mm <- as.list(data.frame(mm)) + if(!is.null(module)) mm <- mm[module] + sel.topgenes <- lapply(mm, function(x) head(order(-x),2*ntop) ) + topgenes <- lapply( sel.topgenes, function(i) gg[i]) + + ## top genesets + ee <- wgcna$gsea + if(!is.null(module)) ee <- ee[module] + topsets <- lapply(ee,function(x) head(rownames(x),ntop)) + topmembers <- lapply(ee, function(x) { + names(head(sort(-table(unlist(strsplit(x$genes,split="\\|")))),2*ntop)) + }) + + M <- wgcna$modTraits + toppheno <- apply(M, 1, function(x) names(which(x > 0.8*max(x)))) + + ## take intersection (high MM, high set membership) + topgenes <- mapply(intersect, topmembers, topgenes, SIMPLIFY=FALSE) + topgenes <- lapply(topgenes, head, ntop) + + list( sets = topsets, genes = topgenes, pheno=toppheno ) +} + +#' @export +wgcna.getConsensusTopGenesAndSets <- function(wgcna, annot=NULL, module=NULL, ntop=20) { + if(!"stats" %in% names(wgcna)) stop("object has no stats") + if(!"gsea" %in% names(wgcna)) stop("object has no enrichment results (gsea)") + + ## get top genes (highest kME) + topgenesx <- list() + for(i in 1:length(wgcna$stats)) { + mm <- wgcna$stats[[i]]$moduleMembership + if(!is.null(annot)) mm <- rename_by2(mm, annot, "symbol") + gg <- rownames(mm) + mm <- as.list(data.frame(mm)) + if(!is.null(module)) mm <- mm[module] + sel.topgenes <- lapply(mm, function(x) head(order(-x), 3*ntop) ) + topgenesx[[i]] <- lapply(sel.topgenes, function(i) gg[i]) + } + topgenes <- topgenesx[[1]] + k=2 + for(k in 2:length(topgenesx)) { + topgenes <- mapply(intersect, topgenes, topgenesx[[k]], SIMPLIFY=FALSE) + } + + ## top genesets (as symbol!) + ee <- wgcna$gsea + if(!is.null(module)) ee <- ee[module] + topsets <- lapply(ee,function(x) head(rownames(x),ntop)) + topmembers <- lapply(ee, function(x) { + names(head(sort(-table(unlist(strsplit(x$genes,split="\\|")))),3*ntop)) + }) + + ## module traits + M <- lapply(wgcna$net$multiMEs, function(x) as.matrix(x$data)) + Y <- lapply(M, function(m) wgcna$datTraits[rownames(m),]) + R <- mapply( function(x,y) abs(cor(x,y,use="pairwise")), M, Y, SIMPLIFY=FALSE) + R <- Reduce('+', R) + toppheno <- apply(R, 1, function(x) names(which(x > 0.9*max(x,na.rm=TRUE)))) + toppheno + + ## take intersection (high MM, high set membership) + topgenes <- mapply(intersect, topmembers, topgenes, SIMPLIFY=FALSE) + topgenes <- lapply(topgenes, head, ntop) + + list( sets = topsets, genes = topgenes, pheno=toppheno ) +} + +wgcna.describeModules <- function(wgcna, ntop=25, annot=NULL, multi=FALSE, + experiment="", verbose=1, model="gpt-5-nano", + modules=NULL) { + if(multi) { + top <- wgcna.getConsensusTopGenesAndSets(wgcna, annot=annot, ntop=ntop) + } else { + top <- wgcna.getTopGenesAndSets(wgcna, annot=annot, ntop=ntop) + } + + if(is.null(modules)) modules <- names(top$genes) + + modules <- intersect(modules, names(top$genes)) + modules <- intersect(modules, names(top$sets)) + ##modules <- intersect(modules, names(top$pheno)) + + if(length(modules)==0) return(NULL) + + ## If no LLM is available we do just a manual summary + if(is.null(model) || model == "") { + desc <- list() + for(m in modules) { + ss=gg=pp=NULL + gg <- paste( top$genes[[m]], collapse=', ') + ss <- paste( sub(".*:","",top$sets[[m]]), collapse='; ') + if(m %in% names(top$pheno)) pp <- paste( top$pheno[[m]], collapse='; ') + + d <- "" + if(!is.null(pp)) d <- paste(d, "Correlated phenotypes: ", pp, "

") + d <- paste(d, "Key genes: ", gg, "

") + d <- paste(d, "Top enriched gene sets: ", ss, "

") + + desc[[m]] <- d + } + return(desc) + } + + prompt <- "Give a short summary of the main overall biological function of the following top enriched genesets belonging to module . Discuss the possible relationship with phenotypes of this experiment about . Use maximum one paragraph. Do not use bullet points. \n\nHere is list of enriched gene sets: \n" + if(verbose) cat(prompt) + + desc <- list() + for(m in modules) { + ss=gg=pp="" + ss <- paste( top$sets[[m]], collapse=';') + gg <- paste( top$genes[[m]], collapse=';') + if(m %in% names(top$pheno)) pp <- paste( top$pheno[[m]], collapse=';') + + q <- prompt + if(length(top$genes[[m]])>0) { + q <- paste(q, "\n\nAfter that, shortly discuss if any of these key genes might be involved in the biological function. No need to mention all genes, just a few. Here is list of key genes: ") + } + if(verbose) cat(q) + + q <- sub("", m, q) + q <- sub("", pp, q) + q <- sub("", experiment, q) + q <- sub("", ss, q) + q <- sub("", gg, q) + + answer <- ai.ask(q, model=model) + answer <- paste0(answer, "\n\n[AI generated using ",model,"]") + desc[[m]] <- answer + } + + return(desc) +} + diff --git a/R/ui-colors.R b/R/ui-colors.R index 5e983ba2..990d65e5 100644 --- a/R/ui-colors.R +++ b/R/ui-colors.R @@ -285,3 +285,4 @@ color_from_middle <- function(data, color1, color2) { max_val <- max(abs(data), na.rm = TRUE) DT::JS(sprintf("isNaN(parseFloat(value)) || value < 0 ? 'linear-gradient(90deg, transparent, transparent ' + (50 + value/%s * 50) + '%%, %s ' + (50 + value/%s * 50) + '%%,%s 50%%,transparent 50%%)': 'linear-gradient(90deg, transparent, transparent 50%%, %s 50%%, %s ' + (50 + value/%s * 50) + '%%, transparent ' + (50 + value/%s * 50) + '%%)'", max_val, color1, max_val, color1, color2, color2, max_val, max_val)) } + diff --git a/R/utils-deep.R b/R/utils-deep.R deleted file mode 100644 index c317f40b..00000000 --- a/R/utils-deep.R +++ /dev/null @@ -1,16 +0,0 @@ -OLLAMA_MODELS <- c("gemma3:4b", "gemma3:27b", "deepseek-r1:1.5b", "deepseek-r1:7b") -GPT_MODELS <- c("gpt-4o-mini") -prompt <- NULL -deep.ask <- function(question, model = "gpt-4o-mini", prompt = NULL) { - if (model == "gpt-4o-mini") { - chat <- ellmer::chat_openai(model = "gpt-4o-mini", system_prompt = prompt) - } else if (model %in% OLLAMA_MODELS) { - chat <- ellmer::chat_ollama(model = model, system_prompt = prompt) - } else { - message("ERROR. unknown model", model) - message("valid models: ", paste(c(OLLAMA_MODELS, GPT_MODELS), collapse = ", ")) - return(NULL) - } - . <- chat$chat(question, echo = FALSE) - chat$last_turn()@text -} diff --git a/man/collapseTraitMatrix.Rd b/man/collapseTraitMatrix.Rd new file mode 100644 index 00000000..e8e57458 --- /dev/null +++ b/man/collapseTraitMatrix.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-functions.R +\name{collapseTraitMatrix} +\alias{collapseTraitMatrix} +\title{Collapses an expanded (binarized) trait matrix to its original +categorical phenotype with levels. Colnames must be "pheno1=A", +"pheno1=B" etc.} +\usage{ +collapseTraitMatrix(Y) +} +\description{ +Collapses an expanded (binarized) trait matrix to its original +categorical phenotype with levels. Colnames must be "pheno1=A", +"pheno1=B" etc. +} diff --git a/man/fastTOMsimilarity.Rd b/man/fastTOMsimilarity.Rd new file mode 100644 index 00000000..1ed1cc6c --- /dev/null +++ b/man/fastTOMsimilarity.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{fastTOMsimilarity} +\alias{fastTOMsimilarity} +\title{Faster implementation of TOM computation using low-rank SVD +approximation.} +\usage{ +fastTOMsimilarity(A, tomtype = "signed", lowrank = 20) +} +\description{ +Faster implementation of TOM computation using low-rank SVD +approximation. +} diff --git a/man/gset.fisher.Rd b/man/gset.fisher.Rd index 178dc84d..61601270 100644 --- a/man/gset.fisher.Rd +++ b/man/gset.fisher.Rd @@ -17,6 +17,7 @@ gset.fisher( method = "fast.fisher", check.background = TRUE, common.genes = TRUE, + no.pass = NA, verbose = 1 ) } diff --git a/man/labels2rainbow.Rd b/man/labels2rainbow.Rd deleted file mode 100644 index 07943af1..00000000 --- a/man/labels2rainbow.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pgx-wgcna.R -\name{labels2rainbow} -\alias{labels2rainbow} -\title{Map module colors to rainbow palette} -\usage{ -labels2rainbow(net) -} -\arguments{ -\item{net}{A WGCNA network object.} -} -\value{ -A named character vector mapping the original module colors to rainbow colors. -} -\description{ -Maps the module colors from a WGCNA network to a rainbow palette. -} -\details{ -This function takes a WGCNA network object and maps the module colors -to a rainbow palette based on the modules' hierarchical clustering order. - -It extracts the hierarchical clustering dendrogram order, gets the number -of unique module colors, ranks the module color means based on the -dendrogram order, and assigns rainbow colors accordingly. - -This allows easier visualization and interpretation of modules in the -standard rainbow palette order. -} diff --git a/man/mofa.imputeMissing.Rd b/man/mofa.imputeMissing.Rd new file mode 100644 index 00000000..947c3898 --- /dev/null +++ b/man/mofa.imputeMissing.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-mofa.R +\name{mofa.imputeMissing} +\alias{mofa.imputeMissing} +\title{Impute missing values for a multiomics expression matrix +X. Features must be prefixed with datatype.} +\usage{ +mofa.imputeMissing(X, method = "SVD2") +} +\description{ +Impute missing values for a multiomics expression matrix +X. Features must be prefixed with datatype. +} diff --git a/man/mofa.merge_data2.Rd b/man/mofa.merge_data2.Rd new file mode 100644 index 00000000..42bdf978 --- /dev/null +++ b/man/mofa.merge_data2.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-mofa.R +\name{mofa.merge_data2} +\alias{mofa.merge_data2} +\title{This merges list of multi-omics data to a single matrix. Note that +it can handle non-matched data by taking union of rownames or +colnames and extending the final matrix. Be careful it can +introduce NA in such non-matched cases.} +\usage{ +mofa.merge_data2(xdata, prefix.rows = NULL, prefix.cols = NULL) +} +\description{ +This merges list of multi-omics data to a single matrix. Note that +it can handle non-matched data by taking union of rownames or +colnames and extending the final matrix. Be careful it can +introduce NA in such non-matched cases. +} diff --git a/man/mofa.normalizeExpression.Rd b/man/mofa.normalizeExpression.Rd new file mode 100644 index 00000000..1dcde6bb --- /dev/null +++ b/man/mofa.normalizeExpression.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-mofa.R +\name{mofa.normalizeExpression} +\alias{mofa.normalizeExpression} +\title{Normalize matrix for a multiomics expression matrix X. Features +must be prefixed with datatype.} +\usage{ +mofa.normalizeExpression(X, method1 = "maxMedian", method2 = "none") +} +\description{ +See also: normalizeMultiOmics() +} diff --git a/man/pgx.supercell.Rd b/man/pgx.supercell.Rd index a8537914..1d7e6af1 100644 --- a/man/pgx.supercell.Rd +++ b/man/pgx.supercell.Rd @@ -4,7 +4,14 @@ \alias{pgx.supercell} \title{SuperCell down sampling. Uniform down samplsing using gamma = 20.} \usage{ -pgx.supercell(counts, meta, group = NULL, gamma = 20, log.transform = TRUE) +pgx.supercell( + counts, + meta, + group = NULL, + gamma = 20, + nvargenes = 1000, + log.transform = TRUE +) } \description{ SuperCell down sampling. Uniform down samplsing using gamma = 20. diff --git a/man/pgx.wgcna.Rd b/man/pgx.wgcna.Rd index dacff4ab..c11314c6 100644 --- a/man/pgx.wgcna.Rd +++ b/man/pgx.wgcna.Rd @@ -14,9 +14,13 @@ pgx.wgcna( networktype = "signed", tomtype = "signed", numericlabels = FALSE, - ngenes = 4000, - maxBlockSize = 5000, - gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]" + ngenes = 2000, + maxBlockSize = 9999, + gset.filter = "PATHWAY|HALLMARK|^GO|^C[1-9]", + summary = TRUE, + ai_model = DEFAULT_LLM, + verbose = 1, + progress = NULL ) } \arguments{ diff --git a/man/probe2symbol.Rd b/man/probe2symbol.Rd index 7c48606e..6bdec409 100644 --- a/man/probe2symbol.Rd +++ b/man/probe2symbol.Rd @@ -7,7 +7,7 @@ probe2symbol( probes, annot_table, - query = c("symbol", "gene_name"), + query = "symbol", key = NULL, fill_na = FALSE ) diff --git a/man/rho2bluered.Rd b/man/rho2bluered.Rd new file mode 100644 index 00000000..7afbd59e --- /dev/null +++ b/man/rho2bluered.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{rho2bluered} +\alias{rho2bluered} +\title{Converts correlation values [-1;1] to blue-white-red colors. Good +for creating color labels for labeledHeatmaps that expect colors. +NOTE: use WGCNA::numbers2colors???} +\usage{ +rho2bluered(R, a = 1, f = 0.95) +} +\description{ +Converts correlation values [-1;1] to blue-white-red colors. Good +for creating color labels for labeledHeatmaps that expect colors. +NOTE: use WGCNA::numbers2colors??? +} diff --git a/man/wgcna.computeConsensusGeneStats.Rd b/man/wgcna.computeConsensusGeneStats.Rd new file mode 100644 index 00000000..3229eaaf --- /dev/null +++ b/man/wgcna.computeConsensusGeneStats.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeConsensusGeneStats} +\alias{wgcna.computeConsensusGeneStats} +\title{Compute gene statistics with original datExpr but with consensus +colors/labels for each layers. A separate function +wgcna.getConsensusGeneStats() extracts clean tables from this +results object.} +\usage{ +wgcna.computeConsensusGeneStats(cons) +} +\description{ +Compute gene statistics with original datExpr but with consensus +colors/labels for each layers. A separate function +wgcna.getConsensusGeneStats() extracts clean tables from this +results object. +} diff --git a/man/wgcna.computeConsensusMatrix.Rd b/man/wgcna.computeConsensusMatrix.Rd new file mode 100644 index 00000000..3e6d893e --- /dev/null +++ b/man/wgcna.computeConsensusMatrix.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeConsensusMatrix} +\alias{wgcna.computeConsensusMatrix} +\title{Compute consensus matrix from list of matrices. The consensus +matrix checks for consistent sign and minimal threshold for each +matrix. Optionally filters on consistent p-value.} +\usage{ +wgcna.computeConsensusMatrix(matlist, ydim, psig = 0.05, consfun = "min") +} +\arguments{ +\item{ydim}{original dimension of data} +} +\description{ +Compute consensus matrix from list of matrices. The consensus +matrix checks for consistent sign and minimal threshold for each +matrix. Optionally filters on consistent p-value. +} diff --git a/man/wgcna.computeConsensusModuleEnrichment.Rd b/man/wgcna.computeConsensusModuleEnrichment.Rd new file mode 100644 index 00000000..a9c6faec --- /dev/null +++ b/man/wgcna.computeConsensusModuleEnrichment.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeConsensusModuleEnrichment} +\alias{wgcna.computeConsensusModuleEnrichment} +\title{Compute consensus enrichment by calculating overlapping enriched +terms.} +\usage{ +wgcna.computeConsensusModuleEnrichment( + cons, + GMT, + annot, + methods = c("fisher", "gsetcor", "xcor"), + min.genes = 10, + ntop = 400 +) +} +\description{ +Compute consensus enrichment by calculating overlapping enriched +terms. +} diff --git a/man/wgcna.computeDistinctMatrix.Rd b/man/wgcna.computeDistinctMatrix.Rd new file mode 100644 index 00000000..80b4b877 --- /dev/null +++ b/man/wgcna.computeDistinctMatrix.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeDistinctMatrix} +\alias{wgcna.computeDistinctMatrix} +\title{Compute consensus matrix from list of matrices. The consensus +matrix checks for consistent sign and minimal threshold for each +matrix. Optionally filters on consistent p-value.} +\usage{ +wgcna.computeDistinctMatrix( + matlist, + ydim, + psig = 0.05, + min.diff = 0.3, + consmax = 0 +) +} +\description{ +Compute consensus matrix from list of matrices. The consensus +matrix checks for consistent sign and minimal threshold for each +matrix. Optionally filters on consistent p-value. +} diff --git a/man/wgcna.computeGeneStats.Rd b/man/wgcna.computeGeneStats.Rd new file mode 100644 index 00000000..21107b12 --- /dev/null +++ b/man/wgcna.computeGeneStats.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeGeneStats} +\alias{wgcna.computeGeneStats} +\title{Compute general feature statistics after WGCNA results.} +\usage{ +wgcna.computeGeneStats(net, datExpr, datTraits, TOM) +} +\description{ +Compute general feature statistics after WGCNA results. +} diff --git a/man/wgcna.computeModules.Rd b/man/wgcna.computeModules.Rd new file mode 100644 index 00000000..08c440c5 --- /dev/null +++ b/man/wgcna.computeModules.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.computeModules} +\alias{wgcna.computeModules} +\title{Reimplementation for WGCNA::blockwiseModules(). This returns exact +same object as WGCNA::blockwiseModules() but is faster and allows +different clustering linkage methods (ward, complete).} +\usage{ +wgcna.computeModules( + datExpr, + power = 6, + networkType = "signed", + TOM = NULL, + TOMType = "signed", + calcMethod = "fast", + lowrank = 20, + clustMethod = "average", + cutMethod = "hybrid", + deepSplit = 2, + treeCut = 0.99, + treeCutCeiling = 1, + minModuleSize = 20, + minModuleSize2 = minModuleSize, + mergeCutHeight = 0.15, + minKMEtoStay = 0.3, + numericLabels = FALSE, + maxBlockSize = 9999, + returnTOM = FALSE, + verbose = 1 +) +} +\description{ +Reimplementation for WGCNA::blockwiseModules(). This returns exact +same object as WGCNA::blockwiseModules() but is faster and allows +different clustering linkage methods (ward, complete). +} diff --git a/man/wgcna.labels2colors.Rd b/man/wgcna.labels2colors.Rd new file mode 100644 index 00000000..6805f511 --- /dev/null +++ b/man/wgcna.labels2colors.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.labels2colors} +\alias{wgcna.labels2colors} +\title{Converts WGCNA labels (numeric or color) to colors.} +\usage{ +wgcna.labels2colors(colors, ...) +} +\description{ +Converts WGCNA labels (numeric or color) to colors. +} diff --git a/man/wgcna.pickSoftThreshold.Rd b/man/wgcna.pickSoftThreshold.Rd new file mode 100644 index 00000000..abfc9ba4 --- /dev/null +++ b/man/wgcna.pickSoftThreshold.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.pickSoftThreshold} +\alias{wgcna.pickSoftThreshold} +\title{Better (?) method to pick soft threshold (aka power).} +\usage{ +wgcna.pickSoftThreshold( + datExpr, + sft = NULL, + rcut = 0.85, + method = c("sft", "iqr")[1], + nmax = -1, + powers = NULL, + verbose = 1 +) +} +\description{ +Better (?) method to pick soft threshold (aka power). +} diff --git a/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd b/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd new file mode 100644 index 00000000..35048d94 --- /dev/null +++ b/man/wgcna.plotDendroAndTraitCorrelation_cons.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.plotDendroAndTraitCorrelation_cons} +\alias{wgcna.plotDendroAndTraitCorrelation_cons} +\title{wgcna.plotDendroAndTraits for Consensus output} +\usage{ +wgcna.plotDendroAndTraitCorrelation_cons( + cons, + show.traits = TRUE, + traits = NULL, + main = NULL, + rm.na = TRUE, + use.tree = 0, + marAll = c(0.2, 8, 2, 0.2), + setLayout = TRUE, + ... +) +} +\description{ +wgcna.plotDendroAndTraits for Consensus output +} diff --git a/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd b/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd index 7f25ab40..4ae0ba3c 100644 --- a/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd +++ b/man/wgcna.plotEigenGeneAdjacencyHeatmap.Rd @@ -9,8 +9,28 @@ eigengenes.} wgcna.plotEigenGeneAdjacencyHeatmap( wgcna, add_traits = TRUE, + traits = NULL, + add_me = TRUE, marx = 1, main = NULL, + multi = FALSE, + phenotype = NULL, + colorlabel = TRUE, + text = FALSE, + pstar = TRUE, + setMargins = TRUE, + mar1 = c(5.6, 4.5, 1.8, 0), + mar2 = c(8, 10, 4, 2), + cex.lab = 0.8, + cex.text = 0.7, + plotDendro = TRUE, + plotHeatmap = TRUE, + dendro.horiz = TRUE, + dendro.width = 0.3, + dendro.labels = TRUE, + nmax = -1, + fixclust = FALSE, + mask.intra = FALSE, justdata = FALSE ) } diff --git a/man/wgcna.plotEigenGeneClusterDendrogram.Rd b/man/wgcna.plotEigenGeneClusterDendrogram.Rd index 5e2b647c..738e967f 100644 --- a/man/wgcna.plotEigenGeneClusterDendrogram.Rd +++ b/man/wgcna.plotEigenGeneClusterDendrogram.Rd @@ -4,7 +4,18 @@ \alias{wgcna.plotEigenGeneClusterDendrogram} \title{Plot cluster dendrogram with eigengenes and traits.} \usage{ -wgcna.plotEigenGeneClusterDendrogram(wgcna, add_traits = TRUE, main = NULL) +wgcna.plotEigenGeneClusterDendrogram( + wgcna = NULL, + ME = NULL, + add_traits = TRUE, + horiz = FALSE, + setMargins = TRUE, + method = "wgcna", + showlabels = TRUE, + plot = TRUE, + multi = FALSE, + main = NULL +) } \description{ Plot cluster dendrogram with eigengenes and traits. diff --git a/man/wgcna.plotModuleScores.Rd b/man/wgcna.plotModuleScores.Rd new file mode 100644 index 00000000..80bc9171 --- /dev/null +++ b/man/wgcna.plotModuleScores.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.plotModuleScores} +\alias{wgcna.plotModuleScores} +\title{Plot top modules most correlated with trait for multi expression +data.} +\usage{ +wgcna.plotModuleScores( + res, + trait, + multi = FALSE, + nmax = 16, + collapse.trait = FALSE, + plotlib = "base", + setpar = TRUE +) +} +\description{ +Plot top modules most correlated with trait for multi expression +data. +} diff --git a/man/wgcna.plotTopModules_multi.Rd b/man/wgcna.plotTopModules_multi.Rd new file mode 100644 index 00000000..eb002953 --- /dev/null +++ b/man/wgcna.plotTopModules_multi.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.plotTopModules_multi} +\alias{wgcna.plotTopModules_multi} +\title{Plot top modules most correlated with trait for multi expression +data.} +\usage{ +wgcna.plotTopModules_multi( + multi, + trait, + nmax = 16, + collapse = FALSE, + plotlib = "base", + setpar = TRUE +) +} +\description{ +Plot top modules most correlated with trait for multi expression +data. +} diff --git a/man/wgcna.scaleTOMs.Rd b/man/wgcna.scaleTOMs.Rd new file mode 100644 index 00000000..ec5c859e --- /dev/null +++ b/man/wgcna.scaleTOMs.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.scaleTOMs} +\alias{wgcna.scaleTOMs} +\title{Scale a list of TOM matrices so that the quantiles (default p=0.95) +are equal after scaling with respect to the first TOM matrix.} +\usage{ +wgcna.scaleTOMs(TOMs, scaleP = 0.95) +} +\description{ +Scale a list of TOM matrices so that the quantiles (default p=0.95) +are equal after scaling with respect to the first TOM matrix. +} diff --git a/man/wgcna.tomclust.Rd b/man/wgcna.tomclust.Rd new file mode 100644 index 00000000..54bdbf13 --- /dev/null +++ b/man/wgcna.tomclust.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pgx-wgcna.R +\name{wgcna.tomclust} +\alias{wgcna.tomclust} +\title{Wrapper to hclust from matrix using default WGCNA parameters.} +\usage{ +wgcna.tomclust(X, power = 6) +} +\description{ +Wrapper to hclust from matrix using default WGCNA parameters. +}