Skip to content

Commit a96c945

Browse files
committed
v0.7.0
1 parent eb0b3c3 commit a96c945

30 files changed

+809
-105
lines changed

DESCRIPTION

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ggcoverage
22
Type: Package
33
Title: Visualize Genome Coverage with Various Annotations
4-
Version: 0.6.0
4+
Version: 0.7.0
55
Authors@R:
66
person(given = "Yabing",
77
family = "Song",
@@ -10,7 +10,7 @@ Authors@R:
1010
Maintainer: Yabing Song <[email protected]>
1111
Description: The goal of 'ggcoverage' is to simplify the process of visualizing genome coverage. It contains functions to
1212
load data from 'BAM', 'BigWig' or 'BedGraph' files, create genome coverage plot, add various annotations to
13-
the coverage plot, including GC annotation, gene annotation, transcript annotation, ideogram annotation and peak annotation.
13+
the coverage plot, including base and amino acid annotaion, GC annotation, gene annotation, transcript annotation, ideogram annotation and peak annotation.
1414
License: MIT + file LICENSE
1515
Encoding: UTF-8
1616
RoxygenNote: 7.1.1
@@ -37,7 +37,10 @@ Imports:
3737
GenomeInfoDb,
3838
S4Vectors,
3939
Biostrings,
40-
BSgenome
40+
BSgenome,
41+
GenomicAlignments,
42+
reshape2,
43+
seqinr
4144
Suggests:
4245
rmarkdown,
4346
knitr,

NAMESPACE

+8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(ggplot_add,base)
34
S3method(ggplot_add,gc)
45
S3method(ggplot_add,gene)
56
S3method(ggplot_add,ideogram)
67
S3method(ggplot_add,peak)
78
S3method(ggplot_add,transcript)
89
export(FormatTrack)
910
export(LoadTrackFile)
11+
export(geom_base)
1012
export(geom_coverage)
1113
export(geom_gc)
1214
export(geom_gene)
@@ -22,6 +24,7 @@ importFrom(Biostrings,readDNAStringSet)
2224
importFrom(GenomeInfoDb,"seqlengths<-")
2325
importFrom(GenomeInfoDb,seqlengths)
2426
importFrom(GenomeInfoDb,seqnames)
27+
importFrom(GenomicAlignments,alphabetFrequencyFromBam)
2528
importFrom(GenomicRanges,GRanges)
2629
importFrom(GenomicRanges,makeGRangesFromDataFrame)
2730
importFrom(GenomicRanges,setdiff)
@@ -51,12 +54,15 @@ importFrom(ggplot2,element_blank)
5154
importFrom(ggplot2,element_rect)
5255
importFrom(ggplot2,element_text)
5356
importFrom(ggplot2,expansion)
57+
importFrom(ggplot2,geom_bar)
5458
importFrom(ggplot2,geom_hline)
59+
importFrom(ggplot2,geom_label)
5560
importFrom(ggplot2,geom_line)
5661
importFrom(ggplot2,geom_polygon)
5762
importFrom(ggplot2,geom_rect)
5863
importFrom(ggplot2,geom_segment)
5964
importFrom(ggplot2,geom_text)
65+
importFrom(ggplot2,geom_tile)
6066
importFrom(ggplot2,ggplot)
6167
importFrom(ggplot2,ggplot_add)
6268
importFrom(ggplot2,labs)
@@ -74,10 +80,12 @@ importFrom(grDevices,colorRampPalette)
7480
importFrom(magrittr,"%>%")
7581
importFrom(methods,extends)
7682
importFrom(patchwork,wrap_plots)
83+
importFrom(reshape2,melt)
7784
importFrom(rlang,.data)
7885
importFrom(rlang,as_label)
7986
importFrom(rtracklayer,import)
8087
importFrom(scales,comma)
88+
importFrom(seqinr,translate)
8189
importFrom(stats,as.formula)
8290
importFrom(utils,menu)
8391
importFrom(utils,read.csv)

NEWS.md

+15-7
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
1-
# ggcoverage 0.6.0
1+
# ggcoverage 0.7.0
22
## New features
3-
* Add `geom_gc` to annotate genome coverage with GC content.
3+
* Added `geom_base` to annotate genome coverage with base and amino acids.
4+
5+
## Minor changes
6+
* `LoadTrackFile` supported visualization at single-nucleotide level.
7+
* Added `rect.color` for `geom_coverage` to control rect border color.
8+
* Fixed bug in track file format identification.
49

510
-------------
611

7-
# ggcoverage 0.5.0
12+
# ggcoverage 0.6.0
813
## New features
9-
* First CRAN release.
14+
* Added `geom_gc` to annotate genome coverage with GC content.
15+
16+
-------------
1017

18+
# ggcoverage 0.5.0
1119
## Minor changes
12-
* Fix bug in `getIdeogram`.
20+
* Fixed bug in `getIdeogram`.
1321

1422
-------------
1523

1624
# ggcoverage 0.4.0
1725
## New features
1826
* Added `geom_peak` to enhance its usage in ChIP-seq or ATAC-seq data.
19-
* Change Y axis theme.
27+
* Changed Y axis theme.
2028

2129
-------------
2230

@@ -25,7 +33,7 @@
2533
* Added `geom_transcript`, `geom_ideogram`.
2634

2735
## Minor changes
28-
* Fix bug in `GetGeneGroup`.
36+
* Fixed bug in `GetGeneGroup`.
2937

3038
-------------
3139

R/LoadTrack.R

+82-41
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,18 @@
1010
#' @param bamcoverage.path The path to \code{bamCoverage}, used when \code{format} is bam. Default: NULL (auto-detect).
1111
#' @param norm.method Methods to normalize the number of reads per bin, chosen from "RPKM", "CPM", "BPM", "RPGC", "None".
1212
#' Default: RPKM.
13+
#' @param single.nuc Logical value, whether to visualize at single nucleotide level. Default: FALSE.
14+
#' @param single.nuc.region Region for \code{single.nuc}. Default: NULL
1315
#' @param bin.size Size of the bins, in bases. Default: 50.
1416
#' @param bc.extra.para Extra parameters for \code{bamCoverage}, eg: "--effectiveGenomeSize 2700000000 --ignoreForNormalization chrX"
1517
#'
1618
#' @return A dataframe.
1719
#' @importFrom rtracklayer import
1820
#' @importFrom Rsamtools indexBam
1921
#' @importFrom utils read.csv
22+
#' @importFrom GenomicAlignments alphabetFrequencyFromBam
23+
#' @importFrom magrittr %>%
24+
#' @importFrom dplyr select
2025
#' @export
2126
#'
2227
#' @examples
@@ -35,37 +40,32 @@
3540
#' )
3641
LoadTrackFile <- function(track.file, track.folder = NULL, format = c("bam", "wig", "bw", "bedgraph"), meta.info = NULL, meta.file = "",
3742
bamcoverage.path = NULL, norm.method = c("RPKM", "CPM", "BPM", "RPGC", "None"),
38-
bin.size = 10, bc.extra.para = NULL) {
43+
single.nuc = FALSE, single.nuc.region = NULL, bin.size = 10, bc.extra.para = NULL) {
3944
# check parameters
4045
format <- match.arg(arg = format)
4146
norm.method <- match.arg(arg = norm.method)
4247

4348
# prepare track files
4449
if (!is.null(track.folder)) {
45-
track.file <- list.files(path = track.folder, full.names = TRUE, pattern = format)
50+
track.file <- list.files(path = track.folder, full.names = TRUE, pattern = paste0(format, "$"))
4651
}
4752

4853
# get track dataframe
4954
if (format %in% c("wig", "bw", "bedgraph")) {
50-
# read track file
51-
track.list <- lapply(track.file, function(x) {
52-
# get basename
53-
track.file.base <- basename(x)
54-
# import wig, bigwig and bedgraph file
55-
single.track.df <- as.data.frame(rtracklayer::import(x))
56-
single.track.df$TrackFile <- track.file.base
57-
return(single.track.df)
58-
})
59-
} else if (format == "bam") {
60-
# require deeptools
61-
if (is.null(bamcoverage.path)) {
62-
bamcoverage.path <- Sys.which("bamCoverage")
63-
if (bamcoverage.path == "") {
64-
stop("Can not find bamCoverage automatically, please specify the path!")
65-
}
55+
if (single.nuc) {
56+
stop("To visualize single nucleotide, please use bam file!")
6657
} else {
67-
bamcoverage.path <- bamcoverage.path
58+
# read track file
59+
track.list <- lapply(track.file, function(x) {
60+
# get basename
61+
track.file.base <- basename(x)
62+
# import wig, bigwig and bedgraph file
63+
single.track.df <- as.data.frame(rtracklayer::import(x))
64+
single.track.df$TrackFile <- track.file.base
65+
return(single.track.df)
66+
})
6867
}
68+
} else if (format == "bam") {
6969
# create index
7070
for (bam in track.file) {
7171
bam.index.file <- paste(bam, "bai", sep = ".")
@@ -74,29 +74,70 @@ LoadTrackFile <- function(track.file, track.folder = NULL, format = c("bam", "wi
7474
Rsamtools::indexBam(bam)
7575
}
7676
}
77-
# read track file
78-
track.list <- lapply(track.file, function(x) {
79-
# get basename
80-
track.file.base <- basename(x)
81-
# bigwig file
82-
out.bw.file <- tempfile(fileext = c(".bw"))
83-
# prepare bamCoverage cmd
84-
bamcoverage.cmd <- paste(
85-
bamcoverage.path, "-b", x, "-o", out.bw.file,
86-
"--binSize", bin.size, "--normalizeUsing", norm.method, bc.extra.para
87-
)
88-
# run command
89-
message(paste("Calling bamCoverage: ", bamcoverage.cmd))
90-
bamcoverage.status <- system(bamcoverage.cmd, intern = TRUE)
91-
bamcoverage.status.code <- attr(bamcoverage.status, "status")
92-
if (!is.null(bamcoverage.status.code)) {
93-
stop("Run bamCoverage error!")
77+
if (single.nuc) {
78+
if (!is.null(single.nuc.region)) {
79+
single.nuc.region <- gsub(pattern = ",", replacement = "", x = single.nuc.region)
80+
single.nuc.region.chr <- unlist(strsplit(x = single.nuc.region, split = ":"))[1]
81+
single.nuc.region.se <- unlist(strsplit(x = single.nuc.region, split = ":"))[2]
82+
single.nuc.region.start <- unlist(strsplit(x = single.nuc.region.se, split = "-"))[1]
83+
single.nuc.region.end <- unlist(strsplit(x = single.nuc.region.se, split = "-"))[2]
84+
track.list <- lapply(track.file, function(x) {
85+
single.track.df <- GenomicAlignments::alphabetFrequencyFromBam(x, param = single.nuc.region, baseOnly = TRUE) %>% as.data.frame()
86+
single.track.df <- single.track.df[, c("A", "G", "C", "T")]
87+
single.track.df$score <- rowSums(single.track.df)
88+
single.track.df$seqnames <- single.nuc.region.chr
89+
single.track.df$start <- single.nuc.region.start:single.nuc.region.end
90+
single.track.df$end <- single.track.df$start + 1
91+
single.track.df$width <- 1
92+
single.track.df$strand <- "*"
93+
single.track.df <- single.track.df %>% dplyr::select(-c("A", "G", "C", "T"))
94+
# get basename
95+
track.file.base <- basename(x)
96+
single.track.df$TrackFile <- track.file.base
97+
single.track.df <- single.track.df[c(
98+
"seqnames", "start", "end", "width",
99+
"strand", "score", "TrackFile"
100+
)]
101+
return(single.track.df)
102+
})
103+
} else {
104+
stop("Please provide region for visualizing single nucleotide!")
94105
}
95-
# import wig, bigwig and bedgraph file
96-
single.track.df <- as.data.frame(rtracklayer::import(out.bw.file))
97-
single.track.df$TrackFile <- track.file.base
98-
return(single.track.df)
99-
})
106+
} else {
107+
# require deeptools
108+
if (is.null(bamcoverage.path)) {
109+
bamcoverage.path <- Sys.which("bamCoverage")
110+
if (bamcoverage.path == "") {
111+
stop("Can not find bamCoverage automatically, please specify the path!")
112+
}
113+
} else {
114+
bamcoverage.path <- bamcoverage.path
115+
}
116+
117+
# read track file
118+
track.list <- lapply(track.file, function(x) {
119+
# get basename
120+
track.file.base <- basename(x)
121+
# bigwig file
122+
out.bw.file <- tempfile(fileext = c(".bw"))
123+
# prepare bamCoverage cmd
124+
bamcoverage.cmd <- paste(
125+
bamcoverage.path, "-b", x, "-o", out.bw.file,
126+
"--binSize", bin.size, "--normalizeUsing", norm.method, bc.extra.para
127+
)
128+
# run command
129+
message(paste("Calling bamCoverage: ", bamcoverage.cmd))
130+
bamcoverage.status <- system(bamcoverage.cmd, intern = TRUE)
131+
bamcoverage.status.code <- attr(bamcoverage.status, "status")
132+
if (!is.null(bamcoverage.status.code)) {
133+
stop("Run bamCoverage error!")
134+
}
135+
# import wig, bigwig and bedgraph file
136+
single.track.df <- as.data.frame(rtracklayer::import(out.bw.file))
137+
single.track.df$TrackFile <- track.file.base
138+
return(single.track.df)
139+
})
140+
}
100141
}
101142
# get track dataframe
102143
track.df <- do.call(rbind, track.list)

0 commit comments

Comments
 (0)