@@ -190,8 +190,9 @@ removeEmptyRows <- function(prot, by_column = "DomArch") {
190
190
# ' @return A data frame with condensed repeated domains in the specified column.
191
191
# ' @export
192
192
# '
193
- # ' @importFrom dplyr pull
193
+ # ' @importFrom dplyr pull mutate
194
194
# ' @importFrom stringr str_replace_all
195
+ # ' @importFrom rlang .data :=
195
196
# '
196
197
# ' @examples
197
198
# ' \dontrun{
@@ -206,36 +207,23 @@ condenseRepeatedDomains <- function(prot, by_column = "DomArch", excluded_prots
206
207
regex_identify_repeats <- paste0(" (?i)" , regex_exclude , " \\ b([a-z0-9_-]+)\\ b(?:\\ s+\\ 1\\ b)+" )
207
208
208
209
# !! FUNS is soft-deprecated. FIX!!!
209
- prot [, by_column ] <- prot %> %
210
- pull(by_column ) %> %
211
- str_replace_all(. , pattern = " \\ ." , replacement = " _d_" ) %> %
212
- # str_replace_all(., pattern = " ", replacement = "_s_") %>%
213
- str_replace_all(. , pattern = " " , replacement = " _" ) %> %
214
- str_replace_all(. ,
215
- pattern = " \\ +" ,
216
- replacement = " "
217
- ) %> % # Use a different placeholder other than space
218
- str_replace_all(. ,
219
- pattern = " -" ,
220
- replacement = " __"
221
- ) %> %
222
- str_replace_all(. ,
223
- pattern = regex_identify_repeats ,
224
- replacement = " \\ 1(s)"
225
- ) %> %
226
- str_replace_all(. ,
227
- pattern = " __" ,
228
- replacement = " -"
229
- ) %> %
230
- str_replace_all(. ,
231
- pattern = " " ,
232
- replacement = " +"
233
- ) %> %
234
- # str_replace_all(., pattern = "_s_", replacement = " ") %>%
235
- str_replace_all(. , pattern = " _d_" , replacement = " ." )
236
-
210
+ prot <- prot %> %
211
+ dplyr :: mutate(!! by_column : = stringr :: str_replace_all(
212
+ .data [[by_column ]],
213
+ c(
214
+ " \\ ." = " _d_" ,
215
+ " " = " _" ,
216
+ " \\ +" = " " ,
217
+ " -" = " __" ,
218
+ regex_identify_repeats = " \\ 1(s)" ,
219
+ " __" = " -" ,
220
+ " " = " +" ,
221
+ " _d_" = " ."
222
+ )
223
+ ))
237
224
238
225
return (prot )
226
+
239
227
}
240
228
241
229
@@ -701,8 +689,8 @@ cleanGeneDescription <- function(prot, column) {
701
689
# ' @param column The name of the column from which the longest entry among
702
690
# ' duplicates will be selected.
703
691
# '
704
- # ' @importFrom dplyr arrange filter group_by pull n select summarize
705
- # ' @importFrom rlang sym
692
+ # ' @importFrom dplyr arrange filter group_by pull n select summarize mutate
693
+ # ' @importFrom rlang sym .data
706
694
# '
707
695
# ' @return A data frame containing only the longest entries among duplicates
708
696
# ' based on the specified column.
@@ -713,37 +701,37 @@ cleanGeneDescription <- function(prot, column) {
713
701
# ' selectLongestDuplicate()
714
702
# ' }
715
703
selectLongestDuplicate <- function (prot , column ) {
716
- col <- sym(column )
717
-
718
- prot $ row.orig <- 1 : nrow(prot )
719
-
704
+ col <- rlang :: sym(column )
705
+ prot <- prot %> %
706
+ mutate(row.orig = seq_len(n()))
720
707
# Get list of duplicates
721
708
dups <- prot %> %
722
- group_by(AccNum ) %> %
723
- summarize(" count" = n()) %> %
709
+ group_by(.data $ AccNum ) %> %
710
+ summarize(count = n()) %> %
724
711
filter(count > 1 ) %> %
725
- arrange(- count ) %> %
726
- merge (prot , by = " AccNum" )
712
+ arrange(desc( count ) ) %> %
713
+ left_join (prot , by = " AccNum" )
727
714
728
- dup_acc <- dups $ AccNum
715
+ dup_acc <- unique( dups $ AccNum )
729
716
730
- longest_rows <- c ()
731
- remove_rows <- c ()
717
+ longest_rows <- integer ()
718
+ remove_rows <- integer ()
732
719
for (acc in dup_acc ) {
733
- dup_rows <- dups %> % filter(AccNum == acc )
720
+ dup_rows <- dups %> % filter(.data $ AccNum == acc )
734
721
735
- longest <- dup_rows [which(nchar(pull( dup_rows , {{ col }})) == max(nchar(pull(dup_rows , {{ col }} ))))[ 1 ], " row.orig " ]
722
+ longest <- dup_rows $ row.orig [which. max(nchar(pull(dup_rows , !! col )))]
736
723
737
724
longest_rows <- c(longest_rows , longest )
738
725
739
- to_remove <- dup_rows [which( dup_rows $ row.orig != longest ), " row.orig " ][ ]
726
+ to_remove <- dup_rows $ row.orig [ dup_rows $ row.orig != longest ]
740
727
741
- # dup_rows[which(nchar(pull(dup_rows,{{col}})) == max(nchar(pull(dup_rows,{{col}}))))[2:nrow(dup_rows)], "row.orig"]
742
728
remove_rows <- c(remove_rows , to_remove )
743
729
}
744
730
745
731
# grab all the longest rows
746
- unique_dups <- prot [- remove_rows , ] %> % select(- row.orig )
732
+ unique_dups <- prot %> %
733
+ filter(! .data $ row.orig %in% remove_rows ) %> %
734
+ select(- .data $ row.orig )
747
735
748
736
return (unique_dups )
749
737
}
0 commit comments