Skip to content

Commit 85202b2

Browse files
committed
switched clove latest build from git code
1 parent 4c2c20b commit 85202b2

File tree

5 files changed

+188
-31
lines changed

5 files changed

+188
-31
lines changed

clove/Dockerfile

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ RUN apt-get -y install default-jre
1313

1414
WORKDIR /bin
1515
#RUN wget https://github.com/PapenfussLab/clove/releases/download/v0.17/clove-0.17-jar-with-dependencies.jar
16-
RUN wget https://github.com/PapenfussLab/clove/releases/download/v0.15/clove-0.15-jar-with-dependencies.jar
16+
#RUN wget https://github.com/PapenfussLab/clove/releases/download/v0.15/clove-0.15-jar-with-dependencies.jar
17+
ADD clove.jar /bin

clove/clove.jar

2.49 MB
Binary file not shown.

clove/clove.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ GRIDSS_VCF="${outdir/clove/gridss}/svs.vcf"
1616
GRIDSS_Filtered_VCF="${outdir/clove/gridss}/svs_filtered.vcf"
1717
DELLY_VCF="${outdir/clove/delly_0.7.9}/delly_0.7.9.vcf"
1818

19-
CLOVE="/bin/clove-0.15-jar-with-dependencies.jar"
19+
#CLOVE="/bin/clove-0.15-jar-with-dependencies.jar"
20+
CLOVE="/bin/clove.jar"
2021

2122
#if [ -s "$GRIDSS_VCF" ] && [ -s "$DELLY_VCF" ]; then
2223
if [ -s "$GRIDSS_VCF" ]; then

sv_sim/HackBedToComplexVCF.py

+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from bed_maker_supplementary import *
2+
3+
import sys
4+
import argparse
5+
import csv
6+
from Bio import SeqIO
7+
8+
# adapted from : https://github.com/PapenfussLab/sv_benchmark/blob/master/breakdancer2vcf.py
9+
10+
if __name__ == '__main__':
11+
parser = argparse.ArgumentParser(
12+
prog='HockBedToVCF', description='Reformat the bedmaker output to VCF')
13+
parser.add_argument('-i', '--inFile', type=str, nargs='+',
14+
help='file containing summary bed file')
15+
parser.add_argument('-r', '--reference', type=str, nargs='+',
16+
help='base reference fasta')
17+
parser.add_argument('-p', '--hasPlasmid', type=bool, nargs='?',
18+
const=True, default=False,
19+
help='indicate if ref has plasmids')
20+
# parser.add_argument('-c', '--contigs', type=str, nargs='+',
21+
# help='contigs and legths of contigs in form id:length')
22+
args = parser.parse_args()
23+
24+
in_file = vars(args).get("inFile")[0]
25+
reference = vars(args).get("reference")[0]
26+
has_plasmids = vars(args).get("hasPlasmid")
27+
plasmid_length = 0
28+
29+
print(args)
30+
31+
if in_file is None or reference is None:
32+
parser.print_help()
33+
sys.exit(1)
34+
35+
import os
36+
37+
vcf_path = in_file.replace(".bed","-complex.vcf")
38+
39+
40+
vcf_file = open(vcf_path, "w")
41+
42+
vcf_file.write("##fileformat=VCFv4.2\n")
43+
44+
vcf_file.write("##ALT=<ID=DEL,Description=\"Deletion\">\n")
45+
vcf_file.write("##ALT=<ID=DUP,Description=\"Duplication\">\n")
46+
vcf_file.write("##ALT=<ID=INV,Description=\"Inversion\">\n")
47+
vcf_file.write("##ALT=<ID=BND,Description=\"Translocation\">\n")
48+
vcf_file.write("##ALT=<ID=INS,Description=\"Insertion\">\n")
49+
50+
vcf_file.write("##FILTER=<ID=PASS,Description=\"All filters passed\">\n")
51+
52+
vcf_file.write("##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for POS2 coordinate in case of an inter-chromosomal translocation\">\n")
53+
vcf_file.write("##INFO=<ID=POS2,Number=1,Type=Integer,Description=\"Genomic position for CHR2 in case of an inter-chromosomal translocation\">\n")
54+
vcf_file.write("##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the structural variant\">\n")
55+
vcf_file.write("##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"BE for breakend and BND for breakpoint \">\n")
56+
vcf_file.write("##INFO=<ID=SV_DEF,Number=1,Type=String,Description=\"Definition of complex SV \">\n")
57+
vcf_file.write("##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of SV \">\n")
58+
vcf_file.write("##INFO=<ID=EVENT,Number=1,Type=String,Description=\"EVENT the SV belongs to \">\n")
59+
vcf_file.write("##INFO=<ID=MATEID,Number=1,Type=String,Description=\"ID of mate breakend or breakpoint \">\n")
60+
vcf_file.write("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n")
61+
62+
vcf_file.write(f"##reference={vars(args).get('reference')[0]}\n")
63+
64+
# for cont in vars(args).get("contigs"):
65+
# conts = cont.split(":")
66+
# vcf_file.write(f"##contig=<ID={conts[0]},length={conts[1]}>\n")
67+
68+
referenceFile = open(reference, 'r')
69+
70+
for rec in SeqIO.parse(referenceFile, 'fasta'):
71+
name = rec.id
72+
seq = rec.seq
73+
seqLen = len(rec)
74+
vcf_file.write(f"##contig=<ID={name},length={seqLen}>\n")
75+
if has_plasmids and name == "plasmid":
76+
plasmid_length = seqLen
77+
print(plasmid_length)
78+
79+
vcf_file.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tCHR1\n")
80+
def toVcfBreakend(localChr, localPos, localPositive, remoteChr, remotePos, remotePositive):
81+
if remotePositive:
82+
remote = "]" + remoteChr + ":" + str(remotePos) + "]"
83+
else:
84+
remote = "[" + remoteChr + ":" + str(remotePos) + "["
85+
if localPositive:
86+
return "N" + remote
87+
else:
88+
return remote + "N"
89+
90+
with open(in_file) as file:
91+
bed = csv.reader(file, delimiter="\t")
92+
for idx, line in enumerate(bed):
93+
chr1 = line[0]
94+
start = int(line[1])
95+
end = int(line[2])
96+
sv_def = line[3]
97+
insertion = line[4]
98+
99+
if sv_def == options_dict.get("del"):
100+
vcf_file.write(f"{chr1}\t{start}\tDEL_{str(idx)}\tN\t<DEL>\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=DEL;SV_DEF={sv_def};EVENT=DEL_{str(idx)}\tGT\t1/1\n")
101+
elif sv_def == options_dict.get("ins"):
102+
if has_plasmids:
103+
vcf_file.write(f"{chr1}\t{start}\tINS_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,'plasmid',0,False)}\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=BND;SV_DEF={sv_def};EVENT=INS_{str(idx)}\tGT\t1/1\n")
104+
vcf_file.write(f"{chr1}\t{end}\tINS_{str(idx)}b\tN\t{toVcfBreakend(chr1,end,False,'plasmid',plasmid_length,True)}\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=BND;SV_DEF={sv_def};EVENT=INS_{str(idx)}\tGT\t1/1\n")
105+
else:
106+
vcf_file.write(f"{chr1}\t{start}\tINS_{str(idx)}\tN\t<INS>\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=INS;SV_DEF={sv_def};EVENT=INS_{str(idx)}\tGT\t1/1\n")
107+
elif sv_def == options_dict.get("inv"):
108+
vcf_file.write(f"{chr1}\t{start}\tINV_{str(idx)}\tN\t<INV>\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=INV;SV_DEF={sv_def};EVENT=INV_{str(idx)}\tGT\t1/1\n")
109+
elif sv_def == options_dict.get("tdu"):
110+
vcf_file.write(f"{chr1}\t{start}\tTDU_{str(idx)}\tN\t<DUP>\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=DUP;SV_DEF={sv_def};EVENT=TDU_{str(idx)}\tGT\t1/1\n")
111+
elif sv_def == options_dict.get("itd"):
112+
vcf_file.write(f"{chr1}\t{start}\tITD_{str(idx)}b\tN\t<DUP>\t.\tPASS\tCHR2={chr1};END={end};SVLEN={str(end-start)};SVTYPE=BND;SV_DEF={sv_def};EVENT=ITD_{str(idx)}\tGT\t1/1\n")
113+
elif sv_def in (options_dict.get("tcp"), options_dict.get("txp"), options_dict.get("tci"), options_dict.get("txi"), options_dict.get("chr")):
114+
inserts = insertion.split(":")
115+
chr2 = inserts[1]
116+
pos2 = int(inserts[2])
117+
direction = inserts[3]
118+
119+
if sv_def in {options_dict.get("tcp"), options_dict.get("tci")}:
120+
vcf_file.write(f"{chr2}\t{pos2}\tTCP_{str(idx)}a\tN\t<DUP>\t.\tPASS\tCHR2={chr2};END={pos2+1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TCP_{str(idx)}\tGT\t1/1\n")
121+
elif sv_def in {options_dict.get("txp"), options_dict.get("txi")}:
122+
vcf_file.write(f"{chr2}\t{pos2}\tTCP_{str(idx)}a\tN\t<TRA>\t.\tPASS\tCHR2={chr2};END={pos2+1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TCP_{str(idx)}\tGT\t1/1\n")
123+
elif sv_def == options_dict.get("chr"):
124+
if direction == "forward":
125+
vcf_file.write(f"{chr1}\t{start}\tCHR_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,chr2,pos2+1,False)}\t.\tPASS\tCHR2={chr2};END={pos2+1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=CHR_{str(idx)}\tGT\t1/1\n")
126+
vcf_file.write(f"{chr1}\t{end+1}\tCHR_{str(idx)}b\tN\t{toVcfBreakend(chr1,end+1,False,chr2,pos2,True)}\t.\tPASS\tCHR2={chr2};END={pos2};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=CHR_{str(idx)}\tGT\t1/1\n")
127+
else:
128+
vcf_file.write(f"{chr1}\t{start}\tCHR_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,chr2,pos2,True)}\t.\tPASS\tCHR2={chr2};END={pos2};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=CHR_{str(idx)}\tGT\t1/1\n")
129+
vcf_file.write(f"{chr1}\t{end+1}\tCHR_{str(idx)}b\tN\t{toVcfBreakend(chr1,end+1,False,chr2,pos2+1,False)}\t.\tPASS\tCHR2={chr2};END={pos2+1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=CHR_{str(idx)}\tGT\t1/1\n")
130+
elif sv_def == options_dict.get("sub"):
131+
if has_plasmids:
132+
vcf_file.write(f"{chr1}\t{start}\tSUB_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,'plasmid',0,False)}\t.\tPASS\tCHR2={chr1};END={start};SVLEN=0;SVTYPE=BE;SV_DEF={sv_def};EVENT=SUB_{str(idx)}\tGT\t1/1\n")
133+
vcf_file.write(f"{chr1}\t{end}\tSUB_{str(idx)}b\tN\t{toVcfBreakend(chr1,start,False,'plasmid',plasmid_length,True)}\t.\tPASS\tCHR2={chr1};END={end};SVLEN=0;SVTYPE=BE;SV_DEF={sv_def};EVENT=SUB_{str(idx)}\tGT\t1/1\n")
134+
else:
135+
vcf_file.write(f"{chr1}\t{start}\tSUB_{str(idx)}a\tN\tN]{chr1}:{start}]\t.\tPASS\tCHR2={chr1};END={start};SVLEN=0;SVTYPE=BE;SV_DEF={sv_def};EVENT=SUB_{str(idx)}\tGT\t1/1\n")
136+
vcf_file.write(f"{chr1}\t{end}\tSUB_{str(idx)}b\tN\t[{chr1}:{end}[N\t.\tPASS\tCHR2={chr1};END={end};SVLEN=0;SVTYPE=BE;SV_DEF={sv_def};EVENT=SUB_{str(idx)}\tGT\t1/1\n")
137+
elif sv_def == options_dict.get("tcs"):
138+
inserts = insertion.split(":")
139+
chr2 = inserts[1]
140+
pos1 = int(inserts[0])
141+
pos2 = int(inserts[2])
142+
direction = inserts[3]
143+
144+
if direction == "forward":
145+
vcf_file.write(f"{chr1}\t{start}\tTCS_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,chr2,pos1,False)}\t.\tPASS\tCHR2={chr2};END={pos1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TCS_{str(idx)}\tGT\t1/1\n")
146+
vcf_file.write(f"{chr1}\t{end}\tTCS_{str(idx)}b\tN\t{toVcfBreakend(chr1,end,False,chr2,pos2+1,True)}\t.\tPASS\tCHR2={chr2};END={pos2+1};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TCS_{str(idx)}\tGT\t1/1\n")
147+
else:
148+
vcf_file.write(f"{chr1}\t{start}\tTCS_{str(idx)}a\tN\t{toVcfBreakend(chr1,start,True,chr2,pos2+1,True)}\t.\tPASS\tCHR2={chr2};END={end};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TSI_{str(idx)}\tGT\t1/1\n")
149+
vcf_file.write(f"{chr1}\t{end}\tTCS_{str(idx)}b\tN\t{toVcfBreakend(chr1,end,False,chr2,pos1,False)}\t.\tPASS\tCHR2={chr2};END={end};SVLEN=0;SVTYPE=BND;SV_DEF={sv_def};EVENT=TSI_{str(idx)}\tGT\t1/1\n")
150+
else:
151+
print("UNHANDLED VARIANT CALL: " + sv_def + " " + options_dict.get("tci"))
152+
153+
vcf_file.close()

0 commit comments

Comments
 (0)