From 6b03555f5d232edb85f9db7aed9e7ca297462f75 Mon Sep 17 00:00:00 2001
From: Kritika Verma <95202116+SunSummoner@users.noreply.github.com>
Date: Mon, 7 Oct 2024 03:07:40 +0530
Subject: [PATCH] Binding for acc2lin.R, assign_job_queue.R, cleanup.R,
 ipr2viz.R, lineage.R, msa.R, plotting.R, summarize.R and tree.R

---
 MolEvolvR.Rproj      | 22 -------------------
 R/acc2lin.R          |  4 ++--
 R/assign_job_queue.R |  4 ++--
 R/cleanup.R          | 14 ++++++------
 R/ipr2viz.R          | 36 +++++++++++++++---------------
 R/lineage.R          | 16 +++++++-------
 R/msa.R              |  2 +-
 R/plotting.R         | 42 +++++++++++++++++------------------
 R/summarize.R        | 52 ++++++++++++++++++++++----------------------
 R/tree.R             |  8 +++----
 10 files changed, 89 insertions(+), 111 deletions(-)
 delete mode 100644 MolEvolvR.Rproj

diff --git a/MolEvolvR.Rproj b/MolEvolvR.Rproj
deleted file mode 100644
index 69fafd4b..00000000
--- a/MolEvolvR.Rproj
+++ /dev/null
@@ -1,22 +0,0 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-LineEndingConversion: Posix
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
diff --git a/R/acc2lin.R b/R/acc2lin.R
index f8d71949..017e5587 100644
--- a/R/acc2lin.R
+++ b/R/acc2lin.R
@@ -197,12 +197,12 @@ efetch_ipg <- function(accnums, out_path, plan = "sequential") {
 ipg2lin <- function(accessions, ipg_file, assembly_path, lineagelookup_path) {
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
 
-    ipg_dt <- ipg_dt[Protein %in% accessions]
+    ipg_dt <- ipg_dt[.data$Protein %in% accessions]
 
     ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID")
 
     lins <- GCA2Lins(prot_data = ipg_dt, assembly_path, lineagelookup_path)
-    lins <- lins[!is.na(Lineage)] %>% unique()
+    lins <- lins[!is.na(.data$Lineage)] %>% unique()
 
     return(lins)
 }
diff --git a/R/assign_job_queue.R b/R/assign_job_queue.R
index bc5253d4..89992810 100644
--- a/R/assign_job_queue.R
+++ b/R/assign_job_queue.R
@@ -94,7 +94,7 @@ get_proc_medians <- function(dir_job_results) {
         dplyr::summarise(
             dplyr::across(
                 dplyr::everything(),
-                \(x) median(x, na.rm = TRUE)
+                \(x) .data$median(x, na.rm = TRUE)
             )
         ) |>
         as.list()
@@ -126,7 +126,7 @@ write_proc_medians_table <- function(dir_job_results, filepath) {
             names_to = "process",
             values_to = "median_seconds"
         ) |>
-        dplyr::arrange(dplyr::desc(median_seconds))
+        dplyr::arrange(dplyr::desc(.data$median_seconds))
     readr::write_tsv(df_proc_medians, file = filepath)
     return(df_proc_medians)
 }
diff --git a/R/cleanup.R b/R/cleanup.R
index 3a708415..cbe360e9 100755
--- a/R/cleanup.R
+++ b/R/cleanup.R
@@ -342,7 +342,7 @@ remove_tails <- function(prot, by_column = "DomArch",
         # !! Insert line to read domains_keep
 
         # Contains all domains separated by "|"
-        domains_for_grep <- paste(domains_keep$domains, collapse = "|")
+        domains_for_grep <- paste(.data$domains_keep$domains, collapse = "|")
         # Remove rows with no domains contained within domains_keep
         # Redundant for ClustName since we already set the filter to only these doms.
         tails <- tails %>%
@@ -693,35 +693,35 @@ cleanup_GeneDesc <- function(prot, column) {
 pick_longer_duplicate <- function(prot, column) {
     col <- sym(column)
 
-    prot$row.orig <- 1:nrow(prot)
+    prot$.data$row.orig <- 1:nrow(prot)
 
     # Get list of duplicates
     dups <- prot %>%
-        group_by(AccNum) %>%
+        group_by(.data$AccNum) %>%
         summarize("count" = n()) %>%
         filter(count > 1) %>%
         arrange(-count) %>%
         merge(prot, by = "AccNum")
 
-    dup_acc <- dups$AccNum
+    dup_acc <- dups$.data$AccNum
 
     longest_rows <- c()
     remove_rows <- c()
     for (acc in dup_acc) {
-        dup_rows <- dups %>% filter(AccNum == acc)
+        dup_rows <- dups %>% filter(.data$AccNum == acc)
 
         longest <- dup_rows[which(nchar(pull(dup_rows, {{ col }})) == max(nchar(pull(dup_rows, {{ col }}))))[1], "row.orig"]
 
         longest_rows <- c(longest_rows, longest)
 
-        to_remove <- dup_rows[which(dup_rows$row.orig != longest), "row.orig"][]
+        to_remove <- dup_rows[which(dup_rows$.data$row.orig != longest), "row.orig"][]
 
         # dup_rows[which(nchar(pull(dup_rows,{{col}})) == max(nchar(pull(dup_rows,{{col}}))))[2:nrow(dup_rows)], "row.orig"]
         remove_rows <- c(remove_rows, to_remove)
     }
 
     # grab all the longest rows
-    unique_dups <- prot[-remove_rows, ] %>% select(-row.orig)
+    unique_dups <- prot[-remove_rows, ] %>% select(-.data$row.orig)
 
     return(unique_dups)
 }
diff --git a/R/ipr2viz.R b/R/ipr2viz.R
index bf3650f7..a84decf3 100644
--- a/R/ipr2viz.R
+++ b/R/ipr2viz.R
@@ -134,10 +134,10 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     ADDITIONAL_COLORS <- sample(CPCOLS, 1000, replace = TRUE)
     CPCOLS <- append(x = CPCOLS, values = ADDITIONAL_COLORS)
     ## Read IPR file
-    ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = iprscan_cols)
-    ipr_out <- ipr_out %>% filter(Name %in% accessions)
+    ipr_out <- read_tsv(infile_ipr, col_names = T, col_types = .data$iprscan_cols)
+    ipr_out <- ipr_out %>% filter(.data$Name %in% accessions)
     analysis_cols <- paste0("DomArch.", analysis)
-    infile_full <- infile_full %>% select(analysis_cols, Lineage_short, QueryName, PcPositive, AccNum)
+    infile_full <- infile_full %>% select(analysis_cols, .data$Lineage_short, .data$QueryName, .data$PcPositive, .data$AccNum)
     ## To filter by Analysis
     analysis <- paste(analysis, collapse = "|")
     ## @SAM: This can't be set in stone since the analysis may change!
@@ -152,35 +152,35 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
     # Filter by Top Accessions per Accession per DomArch and Lineage
     ipr_out <- subset(
         ipr_out,
-        ipr_out$AccNum %in% top_acc
+        ipr_out$.data$AccNum %in% top_acc
     )
     ## Need to fix this eventually based on the 'real' gene orientation! :)
     ipr_out$Strand <- rep("forward", nrow(ipr_out))
 
-    ipr_out <- ipr_out %>% arrange(AccNum, StartLoc, StopLoc)
+    ipr_out <- ipr_out %>% arrange(.data$AccNum, .data$StartLoc, .data$StopLoc)
     ipr_out_sub <- filter(
         ipr_out,
-        grepl(pattern = analysis, x = Analysis)
+        grepl(pattern = analysis, x = .data$Analysis)
     )
     # dynamic analysis labeller
     analyses <- ipr_out_sub %>%
-        select(Analysis) %>%
+        select(.data$Analysis) %>%
         distinct()
     analysis_labeler <- analyses %>%
-        pivot_wider(names_from = Analysis, values_from = Analysis)
+        pivot_wider(names_from = .data$Analysis, values_from = .data$Analysis)
 
     lookup_tbl_path <- "/data/research/jravilab/common_data/cln_lookup_tbl.tsv"
-    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = lookup_table_cols)
+    lookup_tbl <- read_tsv(lookup_tbl_path, col_names = T, col_types = .data$lookup_table_cols)
 
-    lookup_tbl <- lookup_tbl %>% select(-ShortName) # Already has ShortName -- Just needs SignDesc
-    # ipr_out_sub = ipr_out_sub %>% select(-ShortName)
+    lookup_tbl <- lookup_tbl %>% select(-.data$ShortName) # Already has ShortName -- Just needs SignDesc
+    # ipr_out_sub = ipr_out_sub %>% select(-.data$ShortName)
     # TODO: Fix lookup table and uncomment below
     # ipr_out_sub <- merge(ipr_out_sub, lookup_tbl, by.x = "DB.ID", by.y = "DB.ID")
 
     ## PLOTTING
     ## domains as separate arrows
     # For odering with tree
-    # ipr_out_sub$Name <- paste0(" ", ipr_out_sub$Name)
+    # ipr_out_sub$.data$Name <- paste0(" ", ipr_out_sub$.data$Name)
     if (group_by == "Analysis") {
         plot <- ggplot(ipr_out_sub,
             aes_string(
@@ -195,7 +195,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             ), color = "white") +
             geom_gene_arrow(fill = NA, color = "grey") +
             # geom_blank(data = dummies) +
-            facet_wrap(~Analysis,
+            facet_wrap(~.data$Analysis,
                 strip.position = "top", ncol = 5,
                 labeller = as_labeller(analysis_labeler)
             ) +
@@ -206,7 +206,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",
-                legend.key.size = unit(0.02, "npc"),
+                legend.key.size = .data$unit(0.02, "npc"),
                 legend.box.margin = margin(),
                 text = element_text(size = text_size)
             ) +
@@ -216,9 +216,9 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
         plot <- ggplot(
             ipr_out_sub,
             aes(
-                xmin = 1, xmax = SLength,
-                y = Analysis, # y = AccNum
-                label = ShortName
+                xmin = 1, xmax = .data$SLength,
+                y = .data$Analysis, # y = .data$AccNum
+                label = .data$ShortName
             )
         ) +
             geom_subgene_arrow(data = ipr_out_sub, aes_string(
@@ -236,7 +236,7 @@ ipr2viz <- function(infile_ipr = NULL, infile_full = NULL, accessions = c(),
             theme(
                 legend.position = "bottom",
                 legend.box = "horizontal",
-                legend.key.size = unit(0.02, "npc"),
+                legend.key.size = .data$unit(0.02, "npc"),
                 legend.box.margin = margin(),
                 text = element_text(size = text_size)
             ) +
diff --git a/R/lineage.R b/R/lineage.R
index 20acec04..a1c4b670 100644
--- a/R/lineage.R
+++ b/R/lineage.R
@@ -323,7 +323,7 @@ ipg2lin <- function(accessions, ipg_file,
     ipg_dt <- fread(ipg_file, sep = "\t", fill = T)
 
     accessions <- unique(accessions)
-    ipg_dt <- ipg_dt[Protein %in% accessions]
+    ipg_dt <- ipg_dt[.data$Protein %in% accessions]
 
     ipg_dt <- setnames(ipg_dt, "Assembly", "GCA_ID")
 
@@ -335,10 +335,10 @@ ipg2lin <- function(accessions, ipg_file,
     {
         # browser()
         acc <- accessions[i]
-        acc_inds <- which(mergedTax$Protein == acc)
+        acc_inds <- which(.data$mergedTax$.data$Protein == acc)
         if (length(acc_inds) != 0) {
             # refseq inds take precedence
-            refseq_inds <- acc_inds[which(mergedTax[acc_inds, ]$Source == "RefSeq")]
+            refseq_inds <- acc_inds[which(.data$mergedTax[acc_inds, ]$Source == "RefSeq")]
             if (length(refseq_inds) != 0) {
                 # Take the first first row of the refseq (smallest index)
                 refseq_rows[i] <- refseq_inds[1]
@@ -358,21 +358,21 @@ ipg2lin <- function(accessions, ipg_file,
     if (length(refseq_rows) != 0) {
         refseq_ipg_dt <- ipg_dt[refseq_rows, ]
         refseq_lins <- GCA2lin(refseq_ipg_dt,
-            assembly_path = refseq_assembly_path,
+            .data$assembly_path = refseq_assembly_path,
             lineagelookup_path
         )
     }
     if (length(genbank_rows) != 0) {
         genbank_ipg_dt <- ipg_dt[genbank_rows, ]
-        genbank_lins <- GCA2lin(gca_ipg_dt,
-            assembly_path = genbank_assembly_path,
+        genbank_lins <- GCA2lin(.data$gca_ipg_dt,
+            .data$assembly_path = genbank_assembly_path,
             lineagelookup_path
         )
     }
 
 
-    lins <- GCA2lin(prot_data = ipg_dt, assembly_path, lineagelookup_path)
-    lins <- lins[!is.na(Lineage)] %>% unique()
+    lins <- GCA2lin(prot_data = ipg_dt, .data$assembly_path, lineagelookup_path)
+    lins <- lins[!is.na(.data$Lineage)] %>% unique()
 
     return(lins)
 }
diff --git a/R/msa.R b/R/msa.R
index e56cc32c..bc24a9ad 100644
--- a/R/msa.R
+++ b/R/msa.R
@@ -210,7 +210,7 @@ generate_msa <- function(fa_file = "", outfile = "") {
     # source("scripts/c2r.R")
 
     ## align the sequences
-    al <- kalign(prot_aa) # !! won't work!
+    al <- .data$kalign(prot_aa) # !! won't work!
     al
 }
 
diff --git a/R/plotting.R b/R/plotting.R
index 7abd06d4..891901ee 100644
--- a/R/plotting.R
+++ b/R/plotting.R
@@ -574,7 +574,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
     # colname <- "SIG.TM.LADB"
 
     ## Create columns for domains/DAs and fill them with 1/0
-    for (i in query.DAdoms$domains)
+    for (i in .data$query.DAdoms$.data$domains)
     {
         j <- str_replace_all(string = i, pattern = "\\(", replacement = "\\\\(")
         j <- str_replace_all(string = j, pattern = "\\)", replacement = "\\\\)")
@@ -592,9 +592,9 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
     ggplot.data <- query_data %>%
         # filter(grepl(queryname, Query)) %>%
         select(
-            DomArch.norep, Lineage, GenContext.norep,
-            SIG.TM.LADB, GenContext, AccNum,
-            query.DAdoms$domains
+            .data$DomArch.norep, .data$Lineage, .data$GenContext.norep,
+            .data$SIG.TM.LADB, .data$GenContext, .data$AccNum,
+            .data$query.DAdoms$.data$domains
         ) %>% # words.gecutoff$words
         # mutate_all(list(~ if (is.numeric(.)) as.integer(.) else .)) %>%
         mutate(across(where(is.numeric), as.integer)) %>%
@@ -606,8 +606,8 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
 
     ## Gathering element/word columns
     ggplot.data.gather <- ggplot.data %>%
-        gather(key = domains, value = count, 7:ncol(ggplot.data)) # %>%
-    # select(DomArch.norep, Lineage, domains, count)
+        gather(key = .data$domains, value = count, 7:ncol(ggplot.data)) # %>%
+    # select(.data$DomArch.norep, .data$Lineage, .data$domains, count)
 
     # ## written on Sep 4
     # write_delim(ggplot.data.gather,
@@ -615,7 +615,7 @@ lineage.domain_repeats.plot <- function(query_data, colname) {
     # 						delim="\t", col_names=TRUE)
 
     ## Stacked column plot
-    ggplot(data = ggplot.data.gather, aes(x = Lineage, y = domains)) + # aes_string # plot <- (
+    ggplot(data = ggplot.data.gather, aes(x = .data$Lineage, y = domains)) + # aes_string # plot <- (
         # geom_col(position="fill") +
         geom_tile(
             data = subset(ggplot.data.gather, !is.na(count)),
@@ -868,16 +868,16 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li
                 xlab("Group") +
                 ylab("Number of proteins") +
                 theme_minimal() +
-                scale_fill_manual(values = cpcols, na.value = "#A9A9A9") +
+                scale_fill_manual(values = .data$cpcols, na.value = "#A9A9A9") +
                 theme(
                     legend.position = legend.position,
                     legend.background = element_rect(fill = "white", color = "white"),
                     legend.text = element_text(size = legend.text.size),
                     legend.title = element_text(size = legend.text.size + 2),
-                    legend.key.size = unit(legend.size, "cm"),
-                    # legend.key.height = unit(2, "cm"),
-                    # legend.key.width = unit(0.9, "cm"),
-                    legend.spacing = unit(0.4, "cm"),
+                    legend.key.size = .data$unit(legend.size, "cm"),
+                    # legend.key.height = .data$unit(2, "cm"),
+                    # legend.key.width = .data$unit(0.9, "cm"),
+                    legend.spacing = .data$unit(0.4, "cm"),
                     axis.text = element_text(size = label.size),
                     panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     panel.background = element_blank(),
@@ -894,10 +894,10 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li
                 xlab("Group") +
                 ylab("Number of proteins") +
                 theme_minimal() +
-                scale_fill_manual(values = cpcols, na.value = "#A9A9A9") +
+                scale_fill_manual(values = .data$cpcols, na.value = "#A9A9A9") +
                 theme(
                     legend.position = "none",
-                    legend.spacing = unit(0.4, "cm"),
+                    legend.spacing = .data$unit(0.4, "cm"),
                     axis.text = element_text(size = label.size),
                     panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                     panel.background = element_blank(),
@@ -918,7 +918,7 @@ stacked_lin_plot <- function(prot, column = "DomArch", cutoff, Lineage_col = "Li
                 legend.background = element_rect(fill = "white", color = "white"),
                 legend.text = element_text(size = legend.text.size),
                 legend.title = element_text(size = legend.text.size + 2),
-                legend.key.size = unit(legend.size, "cm"),
+                legend.key.size = .data$unit(legend.size, "cm"),
                 axis.text = element_text(size = label.size),
                 axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
                 panel.grid.major = element_blank(), panel.grid.minor = element_blank()
@@ -1145,24 +1145,24 @@ wordcloud2_element <- function(query_data = "prot",
         type <- "gc2da"
     }
 
-    words.tc <- query_data %>%
+    .data$words.tc <- query_data %>%
         elements2words(
             column = colname,
             conversion_type = type
         ) %>%
         words2wc()
 
-    names(words.tc) <- c("words", "freq")
+    names(.data$words.tc) <- c("words", "freq")
 
     # need a label column for actual frequencies, and frequencies will be the
     # normalized sizes
-    words.tc$label <- words.tc$freq
+    .data$words.tc$.data$label <- .data$words.tc$.data$freq
 
-    words.tc <- words.tc %>% mutate(freq = log10(freq))
+    .data$words.tc <- .data$words.tc %>% mutate(.data$freq = log10(.data$freq))
 
-    words.tc <- words.tc %>% select(words, freq, label)
+    .data$words.tc <- .data$words.tc %>% select(words, .data$freq, .data$label)
 
-    wordcloud3(words.tc, minSize = 0)
+    wordcloud3(.data$words.tc, minSize = 0)
 }
 
 
diff --git a/R/summarize.R b/R/summarize.R
index a9b13e43..b17ba485 100644
--- a/R/summarize.R
+++ b/R/summarize.R
@@ -371,11 +371,11 @@ summ.GC.byDALin <- function(x) {
 summ.GC.byLin <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
-        filter(!grepl("^-$", GenContext)) %>%
-        filter(!grepl("^-$", DomArch)) %>%
-        filter(!grepl("^-$", Lineage)) %>%
-        filter(!grepl("^NA$", DomArch)) %>%
-        group_by(GenContext, Lineage) %>% # DomArch.norep,
+        filter(!grepl("^-$", .data$GenContext)) %>%
+        filter(!grepl("^-$", .data$DomArch)) %>%
+        filter(!grepl("^-$", .data$Lineage)) %>%
+        filter(!grepl("^NA$", .data$DomArch)) %>%
+        group_by(.data$GenContext, .data$Lineage) %>% # .data$DomArch.norep,
         summarise(count = n()) %>% # , bin=as.numeric(as.logical(n()))
         arrange(desc(count))
 }
@@ -396,15 +396,15 @@ summ.GC.byLin <- function(x) {
 summ.GC <- function(x) {
     ## Note: it is better to reserve dots for S3 Objects. Consider replacing '.' with '_'
     x %>%
-        group_by(GenContext) %>%
+        group_by(.data$GenContext) %>%
         summarise(
-            totalcount = sum(count),
-            totalDA = n_distinct(DomArch),
-            totallin = n_distinct(Lineage)
-        ) %>% # totallin=n_distinct(Lineage),
-        arrange(desc(totalcount), desc(totalDA), desc(totallin)) %>%
-        filter(!grepl(" \\{n\\}", GenContext)) %>%
-        filter(!grepl("^-$", GenContext))
+            .data$totalcount = sum(count),
+            .data$totalDA = n_distinct(.data$DomArch),
+            .data$totallin = n_distinct(.data$Lineage)
+        ) %>% # .data$totallin=n_distinct(.data$Lineage),
+        arrange(desc(.data$totalcount), desc(.data$totalDA), desc(.data$totallin)) %>%
+        filter(!grepl(" \\{n\\}", .data$GenContext)) %>%
+        filter(!grepl("^-$", .data$GenContext))
 }
 
 
@@ -451,52 +451,52 @@ total_counts <- function(prot, column = "DomArch", lineage_col = "Lineage",
     prot <- summarize_bylin(prot, column, by = lineage_col, query = "all")
     col_count <- prot %>%
         group_by({{ column }}) %>%
-        summarise(totalcount = sum(count))
+        summarise(.data$totalcount = sum(count))
 
     total <- left_join(prot, col_count, by = as_string(column))
 
     sum_count <- sum(total$count)
     total <- total %>%
-        mutate("IndividualCountPercent" = totalcount / sum_count * 100) %>%
-        arrange(-totalcount, -count)
+        mutate("IndividualCountPercent" = .data$totalcount / sum_count * 100) %>%
+        arrange(-.data$totalcount, -count)
 
     cumm_percent <- total %>%
-        select({{ column }}, totalcount) %>%
+        select({{ column }}, .data$totalcount) %>%
         distinct() %>%
         mutate("CumulativePercent" = 0)
     total_counter <- 0
-    for (x in length(cumm_percent$totalcount):1) {
-        total_counter <- total_counter + cumm_percent$totalcount[x]
-        cumm_percent$CumulativePercent[x] <- total_counter / sum_count * 100
+    for (x in length(cumm_percent$.data$totalcount):1) {
+        total_counter <- total_counter + cumm_percent$.data$totalcount[x]
+        cumm_percent$.data$CumulativePercent[x] <- total_counter / sum_count * 100
     }
 
-    cumm_percent <- cumm_percent %>% select(CumulativePercent, {{ column }})
+    cumm_percent <- cumm_percent %>% select(.data$CumulativePercent, {{ column }})
 
     total <- total %>% left_join(cumm_percent, by = as_string(column))
 
     # Round the percentage columns
-    total$CumulativePercent <- total$CumulativePercent %>% round(digits = digits)
+    total$.data$CumulativePercent <- total$.data$CumulativePercent %>% round(digits = digits)
     total$IndividualCountPercent <- total$IndividualCountPercent %>% round(digits = digits)
 
     if (RowsCutoff) {
         # If total counts is being used for plotting based on number of rows,
         # don't include other observations that fall below the cummulative percent cutoff
         # , but that have the same 'totalcount' number as the cutoff observation
-        total <- total %>% filter(CumulativePercent >= 100 - cutoff - .0001)
+        total <- total %>% filter(.data$CumulativePercent >= 100 - cutoff - .0001)
         return(total)
     }
 
     # Include observations that fall below the cummulative percent cutoff,
     # but that have the same 'totalcount' as the cutoff observation
-    t <- total %>% filter(CumulativePercent >= 100 - cutoff)
+    t <- total %>% filter(.data$CumulativePercent >= 100 - cutoff)
     if (length(t) == 0) {
         cutoff_count <- 0
     } else {
-        cutoff_count <- t$totalcount[nrow(t)]
+        cutoff_count <- t$.data$totalcount[nrow(t)]
     }
 
     total <- total %>%
-        filter(totalcount >= cutoff_count) %>%
+        filter(.data$totalcount >= cutoff_count) %>%
         ungroup()
 
     return(total)
diff --git a/R/tree.R b/R/tree.R
index 01e9ead5..932927df 100755
--- a/R/tree.R
+++ b/R/tree.R
@@ -149,7 +149,7 @@ generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
     prot_dist <- dist.alignment(prot_aln, matrix = "similarity")
     prot_nj <- NJ(prot_dist)
     prot_nj_tree <- plot(prot_nj, main = "Neighbor Joining")
-    write.tree(phy = prot_nj_tree, file = tre_path)
+    write.tree(phy = prot_nj_tree, file = .data$tre_path)
 
     ###########################
     ## Approach 2
@@ -177,7 +177,7 @@ generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
     plot(prot_NJ, main = "Neighbor Joining")
 
     # parsimony searches
-    prot_optim <- optim.parsimony(prot_NJ, prot)
+    prot_optim <- optim.parsimony(prot_NJ, .data$prot)
     prot_pratchet <- pratchet(prot10) # returning error
     plot(prot_optim)
     plot(prot_pratchet)
@@ -192,7 +192,7 @@ generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
         bs = 100, optNni = TRUE, multicore = TRUE,
         control = pml.control(trace = 0)
     )
-    plotBS(midpoint(fitJC$tree), bs, p = 50, type = "p")
+    plotBS(.data$midpoint(fitJC$tree), bs, p = 50, type = "p")
 
     # subsetted alignment bs example
     prot10_dm <- dist.ml(prot10)
@@ -205,7 +205,7 @@ generate_fa2tre <- function(fa_file = "data/alns/pspa_snf7.fa",
         bs = 100, optNni = TRUE, multicore = TRUE,
         control = pml.control(trace = 0)
     )
-    bs2 <- plotBS(midpoint(fitJC2$tree), bs, p = 50, type = "p")
+    bs2 <- plotBS(.data$midpoint(fitJC2$tree), bs, p = 50, type = "p")
 
     ## Exporting Trees
     write.tree(bs2, file = "bootstrap_example.tre")