-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathTEQC.R
74 lines (62 loc) · 2.67 KB
/
TEQC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env Rscript
# read bam file to create R workspace data file
suppressPackageStartupMessages(library("optparse"));
suppressPackageStartupMessages(library("TEQC"));
optList <- list(
make_option("--ref", default = "hg19", help ="Reference genome [default %default]"),
make_option("--offset", default = 0, help ="target offset [default %default]"),
make_option("--outFile", default = NULL, type = "character", action = "store", help ="Output file (required)"))
parser <- OptionParser(usage = "%prog [options] bamFile targetsFile", option_list = optList);
arguments <- parse_args(parser, positional_arguments = T);
opt <- arguments$options;
if (length(arguments$args) != 2) {
cat("Need reads file and targets file\n\n");
print_help(parser);
stop();
} else if (is.null(opt$outFile)) {
cat("Need output file\n\n");
print_help(parser);
stop();
} else {
files <- arguments$args;
}
print("reading bam file...")
reads <- get.reads(files[1], filetype = "bam");
x <- sapply(reads, function(x) nrow(x) == 0)
if (any(x)) {
reads <- reads[-which(x)];
}
print("collapsing reads to pairs...")
readpairs <- reads2pairs(reads)
print("reading targets file...")
targets <- get.targets(files[2]);
baits <- get.baits(files[2]);
print("calculating fraction on targets...")
ft <- fraction.target(targets, genome = opt$ref)
fr <- fraction.reads.target(readpairs, targets, Offset = opt$offset)
enrichment <- fr/ft
print("calculating coverage...")
Coverage <- coverage.target(reads, targets, Offset = opt$offset)
avgcov <- data.frame(round(Coverage$avgTargetCoverage, 2),
round(Coverage$targetCoverageSD, 2), matrix(Coverage$targetCoverageQuantiles,
ncol = 5))
names(avgcov) <- c("avgTargetCoverage", "targetCoverageSD",
paste(names(Coverage$targetCoverageQuantiles), "quantile"))
print("counting reads per target...")
targetcov0 <- Coverage$targetCoverages
targetcov <- readsPerTarget(reads, targetcov0, Offset = opt$offset)
print("counting reads per bait...")
covercounts.baits <- RleList()
baitcov <- NULL
for (chr in names(baits)) {
cov.chr <- Coverage$coverageAll[[chr]]
ir.chr <- ranges(baits)[[chr]]
tmp <- lapply(ir.chr, function(x) seqselect(cov.chr, x))
avgcov <- sapply(tmp, mean)
baitcov <- c(baitcov, avgcov)
cov.chr <- seqselect(cov.chr, reduce(ir.chr))
covercounts.baits <- c(covercounts.baits, RleList(cov.chr))
}
avgcov <- mean(as.integer(unlist(covercounts.baits)))
baitcov.norm <- baitcov/avgcov
save(targetcov, baitcov, baitcov.norm, avgcov, Coverage, fr, ft, enrichment, targets, baits, file = opt$outFile)