Skip to content

Commit 45e28de

Browse files
authored
Merge branch 'JRaviLab:main' into rworkflow
2 parents 1002e81 + aacda7d commit 45e28de

8 files changed

+287
-120
lines changed

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ importFrom(readr,write_file)
232232
importFrom(readr,write_lines)
233233
importFrom(readr,write_tsv)
234234
importFrom(rentrez,entrez_fetch)
235+
importFrom(rlang,":=")
235236
importFrom(rlang,.data)
236237
importFrom(rlang,abort)
237238
importFrom(rlang,as_string)
@@ -274,6 +275,7 @@ importFrom(tidyr,pivot_wider)
274275
importFrom(tidyr,replace_na)
275276
importFrom(tidyr,separate)
276277
importFrom(tidyr,unite)
278+
importFrom(utils,combn)
277279
importFrom(viridis,scale_fill_viridis)
278280
importFrom(visNetwork,visEdges)
279281
importFrom(visNetwork,visGroups)

R/assign_job_queue.R

+7-6
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,8 @@ assignJobQueue <- function(
581581
#' @importFrom dplyr mutate select
582582
#' @importFrom ggplot2 aes geom_line ggplot labs
583583
#' @importFrom tibble as_tibble
584-
#' @importFrom rlang warn abort inform
584+
#' @importFrom utils combn
585+
#' @importFrom rlang .data warn abort inform
585586
#'
586587
#' @return line plot object
587588
#'
@@ -657,13 +658,13 @@ plotEstimatedWallTimes <- function() {
657658
df_walltimes <- tidyr::gather(df_walltimes,
658659
key = "advanced_opts",
659660
value = "est_walltime",
660-
n_inputs)
661+
.data$n_inputs)
661662
# sec to hrs
662663
df_walltimes <- df_walltimes |>
663-
dplyr::mutate(est_walltime = est_walltime / 3600)
664-
p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs,
665-
y = est_walltime,
666-
color = advanced_opts)) +
664+
dplyr::mutate(est_walltime = .data$est_walltime / 3600)
665+
p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = .data$n_inputs,
666+
y = .data$est_walltime,
667+
color = .data$advanced_opts)) +
667668
ggplot2::geom_line() +
668669
ggplot2::labs(
669670
title = "MolEvolvR estimated runtimes",

R/cleanup.R

+35-47
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,9 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
190190
#' @return A data frame with condensed repeated domains in the specified column.
191191
#' @export
192192
#'
193-
#' @importFrom dplyr pull
193+
#' @importFrom dplyr pull mutate
194194
#' @importFrom stringr str_replace_all
195+
#' @importFrom rlang .data :=
195196
#'
196197
#' @examples
197198
#' \dontrun{
@@ -206,36 +207,23 @@ condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots
206207
regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([a-z0-9_-]+)\\b(?:\\s+\\1\\b)+")
207208

208209
# !! FUNS is soft-deprecated. FIX!!!
209-
prot[, by_column] <- prot %>%
210-
pull(by_column) %>%
211-
str_replace_all(., pattern = "\\.", replacement = "_d_") %>%
212-
# str_replace_all(., pattern = " ", replacement = "_s_") %>%
213-
str_replace_all(., pattern = " ", replacement = "_") %>%
214-
str_replace_all(.,
215-
pattern = "\\+",
216-
replacement = " "
217-
) %>% # Use a different placeholder other than space
218-
str_replace_all(.,
219-
pattern = "-",
220-
replacement = "__"
221-
) %>%
222-
str_replace_all(.,
223-
pattern = regex_identify_repeats,
224-
replacement = "\\1(s)"
225-
) %>%
226-
str_replace_all(.,
227-
pattern = "__",
228-
replacement = "-"
229-
) %>%
230-
str_replace_all(.,
231-
pattern = " ",
232-
replacement = "+"
233-
) %>%
234-
# str_replace_all(., pattern = "_s_", replacement = " ") %>%
235-
str_replace_all(., pattern = "_d_", replacement = ".")
236-
210+
prot <- prot %>%
211+
dplyr::mutate(!!by_column := stringr::str_replace_all(
212+
.data[[by_column]],
213+
c(
214+
"\\." = "_d_",
215+
" " = "_",
216+
"\\+" = " ",
217+
"-" = "__",
218+
regex_identify_repeats = "\\1(s)",
219+
"__" = "-",
220+
" " = "+",
221+
"_d_" = "."
222+
)
223+
))
237224

238225
return(prot)
226+
239227
}
240228

241229

@@ -701,8 +689,8 @@ cleanGeneDescription <- function(prot, column) {
701689
#' @param column The name of the column from which the longest entry among
702690
#' duplicates will be selected.
703691
#'
704-
#' @importFrom dplyr arrange filter group_by pull n select summarize
705-
#' @importFrom rlang sym
692+
#' @importFrom dplyr arrange filter group_by pull n select summarize mutate
693+
#' @importFrom rlang sym .data
706694
#'
707695
#' @return A data frame containing only the longest entries among duplicates
708696
#' based on the specified column.
@@ -713,37 +701,37 @@ cleanGeneDescription <- function(prot, column) {
713701
#' selectLongestDuplicate()
714702
#' }
715703
selectLongestDuplicate <- function(prot, column) {
716-
col <- sym(column)
717-
718-
prot$row.orig <- 1:nrow(prot)
719-
704+
col <- rlang::sym(column)
705+
prot <- prot %>%
706+
mutate(row.orig = seq_len(n()))
720707
# Get list of duplicates
721708
dups <- prot %>%
722-
group_by(AccNum) %>%
723-
summarize("count" = n()) %>%
709+
group_by(.data$AccNum) %>%
710+
summarize(count = n()) %>%
724711
filter(count > 1) %>%
725-
arrange(-count) %>%
726-
merge(prot, by = "AccNum")
712+
arrange(desc(count)) %>%
713+
left_join(prot, by = "AccNum")
727714

728-
dup_acc <- dups$AccNum
715+
dup_acc <- unique(dups$AccNum)
729716

730-
longest_rows <- c()
731-
remove_rows <- c()
717+
longest_rows <- integer()
718+
remove_rows <- integer()
732719
for (acc in dup_acc) {
733-
dup_rows <- dups %>% filter(AccNum == acc)
720+
dup_rows <- dups %>% filter(.data$AccNum == acc)
734721

735-
longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"]
722+
longest <- dup_rows$row.orig[which.max(nchar(pull(dup_rows, !!col)))]
736723

737724
longest_rows <- c(longest_rows, longest)
738725

739-
to_remove <- dup_rows[which(dup_rows$row.orig != longest), "row.orig"][]
726+
to_remove <- dup_rows$row.orig[dup_rows$row.orig != longest]
740727

741-
# dup_rows[which(nchar(pull(dup_rows,{{col}})) == max(nchar(pull(dup_rows,{{col}}))))[2:nrow(dup_rows)], "row.orig"]
742728
remove_rows <- c(remove_rows, to_remove)
743729
}
744730

745731
# grab all the longest rows
746-
unique_dups <- prot[-remove_rows, ] %>% select(-row.orig)
732+
unique_dups <- prot %>%
733+
filter(!.data$row.orig %in% remove_rows) %>%
734+
select(-.data$row.orig)
747735

748736
return(unique_dups)
749737
}

R/plotting.R

+7-1
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
882882
#' @param coord_flip Logical. Whether to flip the coordinates of the plot
883883
#' (default is TRUE).
884884
#' @param legend Logical. Whether to display the legend (default is TRUE).
885+
#' @param cpcols
885886
#'
886887
#' @importFrom dplyr pull select
887888
#' @importFrom ggplot2 aes_string coord_flip element_blank element_line element_rect element_text geom_bar ggplot guides guide_legend scale_fill_manual xlab ylab theme theme_minimal
@@ -903,8 +904,13 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
903904
legend.text.size = 10,
904905
legend.cols = 2,
905906
legend.size = 0.7,
906-
coord_flip = TRUE, legend = TRUE) {
907+
coord_flip = TRUE, legend = TRUE,
908+
cpcols = NULL) {
907909
col <- sym(column)
910+
911+
if (is.null(cpcols)) {
912+
cpcols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF")
913+
}
908914

909915
if (reduce_lineage) {
910916
prot <- shortenLineage(prot, Lineage_col, abr_len = 3)

0 commit comments

Comments
 (0)