-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
2,087 additions
and
935 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/bin/bash | ||
#Author: Dr Charles Foster http://github.com/charlesfoster | ||
|
||
i_flag='' | ||
g_flag='' | ||
h_flag='' | ||
n_flag='' | ||
o_flag='' | ||
|
||
print_usage() { | ||
printf "Usage: bash add_artificial_genotype.sh -i in.vcf.gz [-g genotype] [-n sample_name] -o out.vcf.gz\n" | ||
printf "Genotype defaults to 1 if not specified\n" | ||
printf "Sample name gussed from infile name if not specified\n" | ||
} | ||
|
||
if [[ $# -eq 0 ]] ; then | ||
print_usage | ||
exit 1 | ||
fi | ||
|
||
while getopts 'i:g:n:ho:' flag; do | ||
case "${flag}" in | ||
i) IN="${OPTARG}" ;; | ||
n) NAME="${OPTARG}" ;; | ||
g) GENOTYPE="${OPTARG}" ;; | ||
h) print_usage | ||
exit 1 ;; | ||
o) OUT="${OPTARG}" ;; | ||
*) print_usage | ||
exit 1 ;; | ||
esac | ||
done | ||
|
||
if [ ! -f ${IN} ]; then | ||
printf "\nError: input file not found\n" | ||
print_usage | ||
exit 1 | ||
fi | ||
|
||
if [ -z "${GENOTYPE}" ] | ||
then | ||
printf "\nNo genotype specified: setting to 1" | ||
GENOTYPE=1 | ||
fi | ||
|
||
if [ -z "${NAME}" ] | ||
then | ||
NAME=$(basename ${IN} | cut -f1 -d ".") | ||
printf "\nNo name specified: guessed it to be ${NAME}" | ||
fi | ||
|
||
if [ -z "${OUT}" ] | ||
then | ||
OUT=$(echo ${IN} | sed "s/.vcf.gz/_withGT.vcf.gz/") | ||
printf "\nNo outfile specified: setting to ${OUT}\n" | ||
fi | ||
|
||
gunzip -kc ${IN} | \ | ||
sed -e '6i##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">' \ | ||
-e "s|FILTER\tINFO|FILTER\tINFO\tFORMAT\t${NAME}|g" | \ | ||
awk -F'\t' -v genotype=${GENOTYPE} -v OFS="\t" '/^[^#]/{ $9 = "GT"; $10 = genotype }1' | \ | ||
bgzip -c > ${OUT} | ||
tabix -p vcf ${OUT} | ||
printf "VCF with artificial genotype written to ${OUT}\n" | ||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
####### | ||
# | ||
#R Script for ASCAT | ||
# | ||
###### | ||
library(ASCAT) | ||
library(RColorBrewer) | ||
|
||
args = commandArgs(trailingOnly=TRUE) | ||
tumor_bam=args[1] | ||
normal_bam=args[2] | ||
|
||
genome="hg38" | ||
|
||
ascat.prepareHTS( | ||
tumourseqfile = tumor_bam, | ||
normalseqfile = normal_bam, | ||
tumourname = tumor_name, | ||
normalname = normal_name, | ||
allelecounter_exe = "/PATH/TO/allelecounter", | ||
alleles.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT/G1000_alleles", | ||
loci.prefix = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/ASCAT_G1000_loci", | ||
nthreads = 10 | ||
gender = "XX", | ||
genomeVersion = genome, | ||
nthreads = 8, | ||
tumourLogR_file = "Tumor_LogR.txt", | ||
tumourBAF_file = "Tumor_BAF.txt", | ||
normalLogR_file = "Germline_LogR.txt", | ||
normalBAF_file = "Germline_BAF.txt") | ||
|
||
ascat.bc = ascat.loadData(Tumor_LogR_file = "Tumor_LogR.txt", Tumor_BAF_file = "Tumor_BAF.txt", | ||
Germline_LogR_file = "Germline_LogR.txt", Germline_BAF_file = "Germline_BAF.txt", gender = 'XX', genomeVersion = "hg19") | ||
|
||
ascat.plotRawData(ascat.bc, img.prefix = "Before_correction_") | ||
ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = "GC_file.txt", replictimingfile = "RT_file.txt") | ||
ascat.plotRawData(ascat.bc, img.prefix = "After_correction_") | ||
ascat.bc = ascat.aspcf(ascat.bc) | ||
ascat.plotSegmentedData(ascat.bc) | ||
ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = T) | ||
QC = ascat.metrics(ascat.bc,ascat.output) | ||
save(ascat.bc, ascat.output, QC, file = 'ASCAT_objects.Rdata') | ||
|
||
|
||
##### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
library(rtracklayer) | ||
|
||
args <- commandArgs() | ||
|
||
dataTable <-read.table(args[5], header=TRUE); | ||
ratio<-data.frame(dataTable) | ||
|
||
dataTable <- read.table(args[4], header=FALSE) | ||
cnvs<- data.frame(dataTable) | ||
|
||
ratio$Ratio[which(ratio$Ratio==-1)]=NA | ||
|
||
cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3])) | ||
ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio) | ||
|
||
overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed) | ||
normals <- setdiff(ratio.bed,cnvs.bed) | ||
normals <- subsetByOverlaps(ratio.bed,normals) | ||
|
||
#mu <- mean(score(normals),na.rm=TRUE) | ||
#sigma<- sd(score(normals),na.rm=TRUE) | ||
|
||
#hist(score(normals),n=500,xlim=c(0,2)) | ||
#hist(log(score(normals)),n=500,xlim=c(-1,1)) | ||
|
||
#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000]) | ||
#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10))) | ||
#qqline(score(normals)[which(!is.na(score(normals)))], col = 2) | ||
|
||
#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000]) | ||
#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10))) | ||
#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2) | ||
|
||
numberOfCol=length(cnvs) | ||
wscore=c() | ||
kscore=c() | ||
for (i in c(1:length(cnvs[,1]))) { | ||
values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i])) | ||
resultw <- class(try(wilcox.test(values,score(normals)), silent = TRUE)) | ||
ifelse(resultw == "try-error", wscore <- c(wscore, "NA"), wscore <- c(wscore, wilcox.test(values,score(normals))$p.value)) | ||
resultks <- class(try(ks.test(values,score(normals)), silent = TRUE)) | ||
ifelse(resultks == "try-error",kscore <- c(kscore, "NA"),kscore <- c(kscore, ks.test(values,score(normals))$p.value)) | ||
} | ||
cnvs = cbind(cnvs, as.numeric(wscore), as.numeric(kscore)) | ||
|
||
if (numberOfCol==7) { | ||
names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") | ||
} | ||
if (numberOfCol==9) { | ||
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") | ||
} | ||
if (numberOfCol==11) { | ||
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue") | ||
} | ||
write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/perl -w | ||
use strict; | ||
use List::Util 'shuffle'; | ||
|
||
#INPUT | ||
|
||
#my $mergedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix... | ||
#open C, ">$mergedmaf"; | ||
|
||
my $outfile = $ARGV[0] . '/freec_genome_config.txt'; | ||
my $chrLenFile = $ARGV[1]; | ||
my $chrFiles = $ARGV[2]; | ||
my $tumormateFile = $ARGV[3]; | ||
my $controlmateFile = $ARGV[4]; | ||
my $makePileup = $ARGV[5]; | ||
my $fastaFile = $ARGV[6]; | ||
my $SNPfile = $ARGV[7]; | ||
|
||
open C, ">$outfile"; | ||
|
||
print C '[general]' . "\n\n"; | ||
|
||
print C "BedGraphOutput = TRUE\ndegree = 3\nforceGCcontentNormalization = 0\nminCNAlength = 1\nreadCountThreshold = 10\n"; | ||
print C "chrLenFile = $chrLenFile\n"; | ||
print C "ploidy = 2,3,4\nbreakPointThreshold = 0.8\nwindow = 50000\n"; | ||
print C "chrFiles = $chrFiles\n"; | ||
print C "minimalSubclonePresence = 20\nmaxThreads = 8\n"; | ||
print C "outputDir = $ARGV[0]\n\n"; | ||
|
||
print C '[sample]' . "\n\n"; | ||
|
||
print C "mateFile = $tumormateFile\n"; | ||
print C "inputFormat = BAM\nmateOrientation = FR\n\n"; | ||
|
||
print C '[BAF]' . "\n\n"; | ||
|
||
print C "mateFile = $controlmateFile\n"; | ||
print C "inputFormat = BAM\nmateOrientation = FR\n\n"; | ||
|
||
print C "makePileup = $makePileup\n"; | ||
print C "fastaFile = $fastaFile\n"; | ||
print C "minimalCoveragePerPosition = 20\nminimalQualityPerPosition = 20\n"; | ||
print C "SNPfile = $SNPfile"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
INPUT_FILE="$1" | ||
TUMOR_NAME="$2" | ||
export TUMOR_NAME | ||
|
||
zcat "${INPUT_FILE}" \ | ||
| awk '($4=="A" || $4 == "C" || $4=="T" || $4=="G" || /^\#/)' \ | ||
| perl -ne 'print if /^#|^(chr)*[\dX]+\s.+/' \ | ||
| perl -ne 's/AF=/VAF=/g;s/ID=AF/ID=VAF/;print;' \ | ||
| perl -ne ' | ||
# Add 2 new rows to the description and 2 new columns in the header | ||
if(/^#/){ | ||
if(/##INFO=<ID=DP,.+\n/){ | ||
$DP=$&; | ||
}; | ||
$DP =~ s/INFO/FORMAT/; | ||
print $DP if /min_dp/; | ||
if(/##INFO=<ID=DP4,.+\n/){ | ||
$DP4=$&; | ||
}; | ||
$DP4 =~ s/INFO/FORMAT/; | ||
print $DP4 if /min_dp/; | ||
if(/^#CHROM.+/){ | ||
s/$&/$&\tFORMAT\t$ENV{'TUMOR_NAME'}/; | ||
}; | ||
print; | ||
} | ||
# For each feature, add FORMAT column with descriptors and populate TUMOUR column with depth, reads counts | ||
else{ | ||
my @data = map { chomp; [ split /=|;/ ] } $_; | ||
$NEW_ROW = "$_\tDP:DP4\t$data[0][1]:$data[0][7]\n"; | ||
print $NEW_ROW; | ||
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
args <- commandArgs() | ||
|
||
dataTable <-read.table(args[5], header=TRUE); | ||
|
||
ratio<-data.frame(dataTable) | ||
ploidy <- type.convert(args[4]) | ||
|
||
|
||
png(filename = paste(args[5],".log2.png",sep = ""), width = 1180, height = 1180, | ||
units = "px", pointsize = 20, bg = "white", res = NA) | ||
plot(1:10) | ||
op <- par(mfrow = c(5,5)) | ||
|
||
for (i in c(1:22,'X','Y')) { | ||
tt <- which(ratio$Chromosome==i) | ||
if (length(tt)>0) { | ||
plot(ratio$Start[tt],log2(ratio$Ratio[tt]),xlab = paste ("position, chr",i),ylab = "normalized copy number profile (log2)",pch = ".",col = colors()[88]) | ||
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy ) | ||
points(ratio$Start[tt],log2(ratio$Ratio[tt]),pch = ".",col = colors()[136]) | ||
|
||
|
||
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1) | ||
points(ratio$Start[tt],log2(ratio$Ratio[tt]),pch = ".",col = colors()[461]) | ||
tt <- which(ratio$Chromosome==i) | ||
|
||
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL: | ||
#points(ratio$Start[tt],log2(ratio$CopyNumber[tt]/ploidy), pch = ".", col = colors()[24],cex=4) | ||
|
||
} | ||
tt <- which(ratio$Chromosome==i) | ||
|
||
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT: | ||
#points(ratio$Start[tt],log2(ratio$MedianRatio[tt]), pch = ".", col = colors()[463],cex=4) | ||
|
||
} | ||
|
||
dev.off() | ||
|
||
|
||
png(filename = paste(args[5],".png",sep = ""), width = 1180, height = 1180, | ||
units = "px", pointsize = 20, bg = "white", res = NA) | ||
plot(1:10) | ||
op <- par(mfrow = c(5,5)) | ||
|
||
maxLevelToPlot <- 3 | ||
for (i in c(1:length(ratio$Ratio))) { | ||
if (ratio$Ratio[i]>maxLevelToPlot) { | ||
ratio$Ratio[i]=maxLevelToPlot; | ||
} | ||
} | ||
|
||
|
||
for (i in c(1:22,'X','Y')) { | ||
tt <- which(ratio$Chromosome==i) | ||
if (length(tt)>0) { | ||
plot(ratio$Start[tt],ratio$Ratio[tt]*ploidy,ylim = c(0,maxLevelToPlot*ploidy),xlab = paste ("position, chr",i),ylab = "normalized copy number profile",pch = ".",col = colors()[88]) | ||
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy ) | ||
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136]) | ||
|
||
tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy) | ||
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4) | ||
|
||
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1) | ||
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461]) | ||
tt <- which(ratio$Chromosome==i) | ||
|
||
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL: | ||
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4) | ||
|
||
} | ||
tt <- which(ratio$Chromosome==i) | ||
|
||
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT: | ||
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4) | ||
|
||
} | ||
|
||
dev.off() | ||
|
||
|
||
|
||
|
||
if (length(args)>5) { | ||
dataTable <-read.table(args[6], header=TRUE); | ||
BAF<-data.frame(dataTable) | ||
|
||
png(filename = paste(args[6],".png",sep = ""), width = 1180, height = 1180, | ||
units = "px", pointsize = 20, bg = "white", res = NA) | ||
plot(1:10) | ||
op <- par(mfrow = c(5,5)) | ||
|
||
for (i in c(1:22,'X','Y')) { | ||
tt <- which(BAF$Chromosome==i) | ||
if (length(tt)>0){ | ||
lBAF <-BAF[tt,] | ||
plot(lBAF$Position,lBAF$BAF,ylim = c(-0.1,1.1),xlab = paste ("position, chr",i),ylab = "BAF",pch = ".",col = colors()[1]) | ||
|
||
tt <- which(lBAF$A==0.5) | ||
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92]) | ||
tt <- which(lBAF$A!=0.5 & lBAF$A>=0) | ||
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62]) | ||
tt <- 1 | ||
pres <- 1 | ||
|
||
if (length(lBAF$A)>4) { | ||
for (j in c(2:(length(lBAF$A)-pres-1))) { | ||
if (lBAF$A[j]==lBAF$A[j+pres]) { | ||
tt[length(tt)+1] <- j | ||
} | ||
} | ||
points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4) | ||
points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4) | ||
} | ||
|
||
tt <- 1 | ||
pres <- 1 | ||
if (length(lBAF$FittedA)>4) { | ||
for (j in c(2:(length(lBAF$FittedA)-pres-1))) { | ||
if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) { | ||
tt[length(tt)+1] <- j | ||
} | ||
} | ||
points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4) | ||
points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4) | ||
} | ||
|
||
} | ||
|
||
} | ||
dev.off() | ||
|
||
} |
Oops, something went wrong.