JRaviLab
diff --git a/‎NAMESPACE
+2 b/‎NAMESPACE
+2
diff --git a/‎R/assign_job_queue.R
+7-6 b/‎R/assign_job_queue.R
+7-6
diff --git a/‎R/cleanup.R
+35-47 b/‎R/cleanup.R
+35-47
diff --git a/‎R/plotting.R
+7-1 b/‎R/plotting.R
+7-1
@@ -232,6 +232,7 @@ importFrom(readr,write_file)
 importFrom(readr,write_lines)
 importFrom(readr,write_tsv)
 importFrom(rentrez,entrez_fetch)
+importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,abort)
 importFrom(rlang,as_string)
@@ -274,6 +275,7 @@ importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,separate)
 importFrom(tidyr,unite)
+importFrom(utils,combn)
 importFrom(viridis,scale_fill_viridis)
 importFrom(visNetwork,visEdges)
 importFrom(visNetwork,visGroups)
 
@@ -581,7 +581,8 @@ assignJobQueue <- function(
 #' @importFrom dplyr mutate select
 #' @importFrom ggplot2 aes geom_line ggplot labs
 #' @importFrom tibble as_tibble
-#' @importFrom rlang warn abort inform
+#' @importFrom utils combn
+#' @importFrom rlang .data warn abort inform
 #'
 #' @return line plot object
 #'
@@ -657,13 +658,13 @@ plotEstimatedWallTimes <- function() {
     df_walltimes <- tidyr::gather(df_walltimes,
                                   key = "advanced_opts",
                                   value = "est_walltime",
-                                  n_inputs)
+                                  .data$n_inputs)
     # sec to hrs
     df_walltimes <- df_walltimes |>
-      dplyr::mutate(est_walltime = est_walltime / 3600)
-    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = n_inputs,
-                                                    y = est_walltime,
-                                                    color = advanced_opts)) +
+      dplyr::mutate(est_walltime = .data$est_walltime / 3600)
+    p <- ggplot2::ggplot(df_walltimes, ggplot2::aes(x = .data$n_inputs, 
+                                                    y = .data$est_walltime, 
+                                                    color = .data$advanced_opts)) +
       ggplot2::geom_line() +
       ggplot2::labs(
         title = "MolEvolvR estimated runtimes",
 
@@ -190,8 +190,9 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
 #' @return A data frame with condensed repeated domains in the specified column.
 #' @export
 #'
-#' @importFrom dplyr pull
+#' @importFrom dplyr pull mutate
 #' @importFrom stringr str_replace_all
+#' @importFrom rlang .data :=
 #'
 #' @examples
 #' \dontrun{
@@ -206,36 +207,23 @@ condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots
     regex_identify_repeats <- paste0("(?i)", regex_exclude, "\\b([a-z0-9_-]+)\\b(?:\\s+\\1\\b)+")
 
     # !! FUNS is soft-deprecated. FIX!!!
-    prot[, by_column] <- prot %>%
-        pull(by_column) %>%
-        str_replace_all(., pattern = "\\.", replacement = "_d_") %>%
-        #  str_replace_all(., pattern = " ", replacement = "_s_") %>%
-        str_replace_all(., pattern = " ", replacement = "_") %>%
-        str_replace_all(.,
-            pattern = "\\+",
-            replacement = " "
-        ) %>% # Use a different placeholder other than space
-        str_replace_all(.,
-            pattern = "-",
-            replacement = "__"
-        ) %>%
-        str_replace_all(.,
-            pattern = regex_identify_repeats,
-            replacement = "\\1(s)"
-        ) %>%
-        str_replace_all(.,
-            pattern = "__",
-            replacement = "-"
-        ) %>%
-        str_replace_all(.,
-            pattern = " ",
-            replacement = "+"
-        ) %>%
-        # 			    str_replace_all(., pattern = "_s_", replacement = " ") %>%
-        str_replace_all(., pattern = "_d_", replacement = ".")
-
+    prot <- prot %>%
+        dplyr::mutate(!!by_column := stringr::str_replace_all(
+            .data[[by_column]],
+            c(
+                "\\." = "_d_",
+                " " = "_",
+                "\\+" = " ",
+                "-" = "__",
+                regex_identify_repeats = "\\1(s)",
+                "__" = "-",
+                " " = "+",
+                "_d_" = "."
+            )
+        ))
 
     return(prot)
+
 }
 
 
@@ -701,8 +689,8 @@ cleanGeneDescription <- function(prot, column) {
 #' @param column The name of the column from which the longest entry among 
 #' duplicates will be selected.
 #'
-#' @importFrom dplyr arrange filter group_by pull n select summarize
-#' @importFrom rlang sym
+#' @importFrom dplyr arrange filter group_by pull n select summarize mutate
+#' @importFrom rlang sym .data
 #'
 #' @return A data frame containing only the longest entries among duplicates 
 #' based on the specified column. 
@@ -713,37 +701,37 @@ cleanGeneDescription <- function(prot, column) {
 #' selectLongestDuplicate()
 #' }
 selectLongestDuplicate <- function(prot, column) {
-    col <- sym(column)
-
-    prot$row.orig <- 1:nrow(prot)
-
+    col <- rlang::sym(column)
+    prot <- prot %>% 
+        mutate(row.orig = seq_len(n()))
     # Get list of duplicates
     dups <- prot %>%
-        group_by(AccNum) %>%
-        summarize("count" = n()) %>%
+        group_by(.data$AccNum) %>%
+        summarize(count = n()) %>%
         filter(count > 1) %>%
-        arrange(-count) %>%
-        merge(prot, by = "AccNum")
+        arrange(desc(count)) %>%
+        left_join(prot, by = "AccNum")
 
-    dup_acc <- dups$AccNum
+    dup_acc <- unique(dups$AccNum)
 
-    longest_rows <- c()
-    remove_rows <- c()
+    longest_rows <- integer()
+    remove_rows <- integer()
     for (acc in dup_acc) {
-        dup_rows <- dups %>% filter(AccNum == acc)
+        dup_rows <- dups %>% filter(.data$AccNum == acc)
 
-        longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"]
+        longest <- dup_rows$row.orig[which.max(nchar(pull(dup_rows, !!col)))]
 
         longest_rows <- c(longest_rows, longest)
 
-        to_remove <- dup_rows[which(dup_rows$row.orig != longest), "row.orig"][]
+        to_remove <- dup_rows$row.orig[dup_rows$row.orig != longest]
 
-        # dup_rows[which(nchar(pull(dup_rows,{{col}})) == max(nchar(pull(dup_rows,{{col}}))))[2:nrow(dup_rows)], "row.orig"]
         remove_rows <- c(remove_rows, to_remove)
     }
 
     # grab all the longest rows
-    unique_dups <- prot[-remove_rows, ] %>% select(-row.orig)
+    unique_dups <- prot %>% 
+        filter(!.data$row.orig %in% remove_rows) %>% 
+        select(-.data$row.orig)
 
     return(unique_dups)
 }
 
@@ -882,6 +882,7 @@ plotLineageHeatmap <- function(prot, domains_of_interest, level = 3, label.size
 #' @param coord_flip Logical. Whether to flip the coordinates of the plot
 #' (default is TRUE).
 #' @param legend Logical. Whether to display the legend (default is TRUE).
+#' @param cpcols
 #'
 #' @importFrom dplyr pull select
 #' @importFrom ggplot2 aes_string coord_flip element_blank element_line element_rect element_text geom_bar ggplot guides guide_legend scale_fill_manual xlab ylab theme theme_minimal
@@ -903,8 +904,13 @@ plotStackedLineage <- function(prot, column = "DomArch", cutoff, Lineage_col = "
     legend.text.size = 10,
     legend.cols = 2,
     legend.size = 0.7,
-    coord_flip = TRUE, legend = TRUE) {
+    coord_flip = TRUE, legend = TRUE,
+    cpcols = NULL) {
     col <- sym(column)
+    
+    if (is.null(cpcols)) {
+        cpcols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF")
+    }
 
     if (reduce_lineage) {
         prot <- shortenLineage(prot, Lineage_col, abr_len = 3)