From 1898d8595476bf3fca34d35e6d42d2796289529f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 25 Aug 2019 18:47:28 -0400 Subject: [PATCH 001/766] Update qmake.pl --- scripts/qmake.pl | 61 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 89738127..c9ac6319 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -149,26 +149,49 @@ sub slack { } $mail_subject .= " Attempt " . ($n + 1) if $n > 0; } - - my $pipeline_channel_msg = "\@${slackname} $project_name :"; - if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { - if ($retcode == 0) { - # op success - my $slack_msg = "*COMPLETE* $name :ok_hand:"; - &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); - &slack($opt{c}, $slack_msg) if $opt{c}; - } else { - # op failure - my $slack_msg = "*FAILURE* $cwd/$logfile"; - if ($n + 1 == $attempts) { - # final attempt - $slack_msg = ":troll: $slack_msg"; - &slack($opt{c}, $slack_msg) if $opt{c}; - } - &slack($err_slack, "$pipeline_channel_msg $slack_msg"); - # wait a bit before retrying to allow cleanup - sleep 30; + + if ($username eq "selenicp") { + my $pipeline_channel_msg = "\@${slackname} $project_name :"; + if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { + if ($retcode == 0) { + # op success + my $slack_msg = "*FAILURE* $cwd/$logfile"; + $slack_msg = "$slack_msg :troll:"; + &slack($opt{c}, $slack_msg) if $opt{c}; + } else { + # op failure + my $slack_msg = "*FAILURE* $cwd/$logfile"; + if ($n + 1 == $attempts) { + # final attempt + $slack_msg = "$slack_msg :troll:"; + &slack($opt{c}, $slack_msg) if $opt{c}; + } + &slack($err_slack, "$pipeline_channel_msg $slack_msg"); + # wait a bit before retrying to allow cleanup + sleep 30; + } + } + } else { + my $pipeline_channel_msg = "\@${slackname} $project_name :"; + if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { + if ($retcode == 0) { + # op success + my $slack_msg = "*COMPLETE* $name :the_horns:"; + &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); + &slack($opt{c}, $slack_msg) if $opt{c}; + } else { + # op failure + my $slack_msg = "*FAILURE* $cwd/$logfile"; + if ($n + 1 == $attempts) { + # final attempt + $slack_msg = "$slack_msg :troll:"; + &slack($opt{c}, $slack_msg) if $opt{c}; + } + &slack($err_slack, "$pipeline_channel_msg $slack_msg"); + # wait a bit before retrying to allow cleanup + sleep 30; } + } } } } while ($retcode && ++$n < $attempts); From 1bfae115aa72ec1ddd3932c4f24f4c5cf0e5a879 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 25 Aug 2019 18:48:31 -0400 Subject: [PATCH 002/766] Update qmake.pl --- scripts/qmake.pl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index c9ac6319..10b90b03 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -6,10 +6,6 @@ use Cwd; my $cwd = getcwd; -#my $fin_email_addrs = "qmake.finished\@raylim.mm.st charlottekyng+qmake.finished\@gmail.com"; -#my $err_email_addrs = "qmake.error\@raylim.mm.st charlottekyng+qmake.error\@gmail.com"; -#my $start_email_addrs = "qmake.start\@raylim.mm.st charlottekyng+qmake.start\@gmail.com"; - my $err_slack = "pipeline_error"; my $fin_slack = "pipeline_finished"; From 7fc3991039d4ee4b3aae2b598c7d814dc4406f0a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 25 Aug 2019 18:51:56 -0400 Subject: [PATCH 003/766] Update knit.R --- scripts/knit.R | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/knit.R b/scripts/knit.R index c63e70e4..f6c77bf3 100644 --- a/scripts/knit.R +++ b/scripts/knit.R @@ -12,7 +12,6 @@ input <- args[1] outPrefix <- args[2] args <- args[c(-1,-2)] -#create output dirs figPath <- file.path(outPrefix, 'figure/') cachePath <- file.path(outPrefix, 'cache/') dir.create(figPath, showWarnings = F, recursive = T) From e092a8ed39689e76a64a4b18dac92d9016d157bb Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 12:00:18 -0400 Subject: [PATCH 004/766] Update qmake.pl --- scripts/qmake.pl | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 10b90b03..e752ffa7 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -1,5 +1,4 @@ #!/usr/bin/env perl -# wrapper script for qmake to remove newlines use strict; use warnings; @@ -13,7 +12,6 @@ limr => "raylim", debruiji => "debruiji", brownd7 => "brownd7", - selenicp => "selenicp", lees19 => "lees19", ferrandl => "ferrandl", dacruzpa => "dacruzpa" @@ -61,48 +59,8 @@ sub slack { $attempts = $opt{r} if defined $opt{r}; $name = $opt{n} if defined $opt{n}; $logparent = $opt{l} if defined $opt{l}; - my $qmake = shift @ARGV; - my $args = join " ", @ARGV; - -# makefile processing -=pod -my $orig_args = $args; - -$args =~ s;-f (\S+);"-f " . dirname($1) . "/." . basename($1) . ".tmp";e; -my $optf = $1; - -my @makefiles; -if (defined $optf) { - push @makefiles, $optf; -} else { - if ($args =~ /--/) { - $args .= " -f .Makefile.tmp"; - } else { - $args .= "-- -f .Makefile.tmp"; - } - push @makefiles, "Makefile"; -} - - - -do { - my $makefile = glob(shift(@makefiles)); - - open IN, "<$makefile" or die "Unable to open $makefile\n"; - my $tmpfile = glob(dirname($makefile) . "/." . basename($makefile) . ".tmp"); - open OUT, ">$tmpfile" or die "Unable to open $tmpfile\n"; - while () { - s/\\\n$//; - if (!/^include \S+\.tmp/ && s;^include (\S+);"include " . dirname($1) . "/." . basename($1) . ".tmp";e) { - push @makefiles, $1; - } - print OUT $_; - } -} until (scalar @makefiles == 0); -=cut - my $n = 0; my $retcode; do { @@ -146,7 +104,7 @@ sub slack { $mail_subject .= " Attempt " . ($n + 1) if $n > 0; } - if ($username eq "selenicp") { + if ($username eq "") { my $pipeline_channel_msg = "\@${slackname} $project_name :"; if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { if ($retcode == 0) { From 23a76364064baf40c41618f2316a54b086644030 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 12:26:39 -0400 Subject: [PATCH 005/766] Update qmake.pl --- scripts/qmake.pl | 121 +++++++++++++++-------------------------------- 1 file changed, 37 insertions(+), 84 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index e752ffa7..c310cf88 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -9,7 +9,6 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - limr => "raylim", debruiji => "debruiji", brownd7 => "brownd7", lees19 => "lees19", @@ -17,7 +16,6 @@ dacruzpa => "dacruzpa" ); - sub HELP_MESSAGE { print "Usage: qmake.pl -n [name] -m -r [numAttempts]\n"; print "-m: e-mail notifications\n"; @@ -51,7 +49,6 @@ sub slack { my $project_name = $cwd; $project_name =~ s:.*/projects/::; $project_name =~ s:.*/data/::; -$project_name =~ s:.*kinglab/::; $project_name =~ s:/:_:g; my $attempts = 1; my $name = "qmake"; @@ -64,88 +61,44 @@ sub slack { my $n = 0; my $retcode; do { - my $logdir = "$logparent/$name"; - my $logfile = "$logdir.log"; - my $i = 0; - while (-e $logdir || -e $logfile) { - $logdir = "log/$name.$i"; - $logfile = "$logdir.log"; - $i++; - } - mkpath $logdir; - my $pid = fork; - if ($pid == 0) { - #print "$qmake $args &> $logfile\n"; - exec "$qmake $args LOGDIR=$logdir &> $logfile"; - } else { - my $mail_msg = "Command: $qmake $args\n"; - $mail_msg .= "Attempt #: " . ($n + 1) . " of $attempts\n"; - $mail_msg .= "Hostname: " . $ENV{HOSTNAME}. "\n"; - $mail_msg .= "PID: $pid\n"; - $mail_msg .= "Dir: $cwd\n"; - $mail_msg .= "Log dir: $cwd/$logdir\n"; - $mail_msg .= "Log file: $cwd/$logfile\n"; - - if ($opt{m} && ($n == 0 || $n == 1 || $n + 1 == $attempts)) { - my $mail_subject = "$name: job started ($cwd)"; - $mail_subject .= " Attempt " . ($n + 1) if $n > 0; - #open(MAIL, "| mail -s '$mail_subject' $start_email_addrs"); - #print MAIL "$mail_msg"; - #close MAIL; - } - waitpid(-1, 0); - $retcode = $? >> 8; # shift bits to get the real return code - if ($opt{m} && ($retcode == 0 || $n == 0 || $n == 1 || $n + 1 == $attempts)) { - #my $addrs = ($retcode > 0)? $err_email_addrs : $fin_email_addrs; - my $mail_subject = "[$retcode] $name: job finished ($cwd)"; - if ($n + 1 == $attempts) { - $mail_subject = "**FINAL** $mail_subject"; - } - $mail_subject .= " Attempt " . ($n + 1) if $n > 0; - } - - if ($username eq "") { - my $pipeline_channel_msg = "\@${slackname} $project_name :"; - if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { - if ($retcode == 0) { - # op success - my $slack_msg = "*FAILURE* $cwd/$logfile"; - $slack_msg = "$slack_msg :troll:"; - &slack($opt{c}, $slack_msg) if $opt{c}; - } else { - # op failure - my $slack_msg = "*FAILURE* $cwd/$logfile"; - if ($n + 1 == $attempts) { - # final attempt - $slack_msg = "$slack_msg :troll:"; - &slack($opt{c}, $slack_msg) if $opt{c}; - } - &slack($err_slack, "$pipeline_channel_msg $slack_msg"); - # wait a bit before retrying to allow cleanup - sleep 30; - } - } - } else { - my $pipeline_channel_msg = "\@${slackname} $project_name :"; - if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { - if ($retcode == 0) { - # op success - my $slack_msg = "*COMPLETE* $name :the_horns:"; - &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); - &slack($opt{c}, $slack_msg) if $opt{c}; - } else { - # op failure - my $slack_msg = "*FAILURE* $cwd/$logfile"; - if ($n + 1 == $attempts) { - # final attempt - $slack_msg = "$slack_msg :troll:"; - &slack($opt{c}, $slack_msg) if $opt{c}; - } - &slack($err_slack, "$pipeline_channel_msg $slack_msg"); - # wait a bit before retrying to allow cleanup - sleep 30; + my $logdir = "$logparent/$name"; + my $logfile = "$logdir.log"; + my $i = 0; + while (-e $logdir || -e $logfile) { + $logdir = "log/$name.$i"; + $logfile = "$logdir.log"; + $i++; + } + mkpath $logdir; + my $pid = fork; + if ($pid == 0) { + exec "$qmake $args LOGDIR=$logdir &> $logfile"; + } else { + waitpid(-1, 0); + $retcode = $? >> 8; + my $flag = 0; + for my $auth_user (keys %slack_map) { + if ($username eq $auth_user) { + $flag = 1; + } + } + if ($flag) { + my $pipeline_channel_msg = "\@${slackname} $project_name :"; + if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { + if ($retcode == 0) { + my $slack_msg = "*COMPLETE* $name :the_horns:"; + &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); + &slack($opt{c}, $slack_msg) if $opt{c}; + } else { + my $slack_msg = "*FAILURE* $cwd/$logfile"; + if ($n + 1 == $attempts) { + $slack_msg = "$slack_msg :troll:"; + &slack($opt{c}, $slack_msg) if $opt{c}; + } + &slack($err_slack, "$pipeline_channel_msg $slack_msg"); + sleep 30; + } } - } } } } while ($retcode && ++$n < $attempts); From 09051a8390a988cf6899cb293513498daf7e008e Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 12:28:41 -0400 Subject: [PATCH 006/766] Update README.md --- README.md | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index df602c2f..a9c39b47 100644 --- a/README.md +++ b/README.md @@ -12,44 +12,12 @@ git clone https://github.com/jrflab/modules.git ``` ## Dependencies -- An instance of [anaconda](https://www.anaconda.com) or [miniconda](https://conda.io/en/latest/miniconda.html) -- IMB's Platform Load Sharing Facility (LSF) or Oracle's Sun Grid Engine (SGE) for resource management +- [xxx](https://) -### Following R Packages +## Following R Packages - [xxx](https://) -## Best practices - -### Conventions -- Sample names cannot have "/" or "." in them -- Fastq files end in ".fastq.gz" -- Fastq files are stored in DATA_DIR (Set as Environment Variable) - -### Whole genome, whole exome and targeted sequencing -- QC -- BWA -- Broad Standard Practices on bwa bam -- Haplotype Caller, Platypus, MuTect, Strelka -- snpEff, Annovar, SIFT, pph2, vcf2maf, VEP, OncoKB, ClinVar -- Copy number, tumor purity using Facets -- Contamination using -- HLA Typing - * [xxx](http://) - -### RNA transcriptome sequencing -- QC -- Tophat, STAR -- Cufflinks (ENS and UCSC) -- In-house Exon Expression (ENS and UCSC) -- fusion-catcher, tophat-fusion, deFuse -- OncoFuse actionable fusion classification - -### Patient: -- Genotyping On Patient. - 1000g sites are evaluated for every library and then compared (all vs all) - If two libraries come from a patient the match should be pretty good >80% -- Still to develop: - If the match is below a certain threshold, break the pipeline for patient + ## Detailed usage [wiki](https://github.com/jrflab/modules/wiki) From 2dfbf7ef610b49e1429d28b904fe1a6d7be9eca2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 12:48:02 -0400 Subject: [PATCH 007/766] Several updates --- Makefile.inc | 2 +- scripts/backup.sh | 28 ------------------- scripts/chmod.sh | 7 ----- ...ateSampleSets.pl => create_sample_sets.pl} | 1 - scripts/gzipLogs.sh | 9 ------ scripts/{initProject.pl => init_project.pl} | 4 --- scripts/{joinEff.pl => join_eff.pl} | 1 - ...monitorGfServer.sh => monitor_gfserver.sh} | 1 + scripts/{monitorMySQL.sh => monitor_myql.sh} | 1 + 9 files changed, 3 insertions(+), 51 deletions(-) delete mode 100755 scripts/backup.sh delete mode 100755 scripts/chmod.sh rename scripts/{createSampleSets.pl => create_sample_sets.pl} (92%) delete mode 100644 scripts/gzipLogs.sh rename scripts/{initProject.pl => init_project.pl} (77%) rename scripts/{joinEff.pl => join_eff.pl} (97%) rename scripts/{monitorGfServer.sh => monitor_gfserver.sh} (99%) rename scripts/{monitorMySQL.sh => monitor_myql.sh} (99%) diff --git a/Makefile.inc b/Makefile.inc index 41f26d88..7458fffd 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -120,7 +120,7 @@ SNP_EFF_MEM = $(JAVA8) -Xmx$1 -jar $(SNP_EFF_JAR) SNP_SIFT_MEM = $(JAVA8) -Xmx$1 -jar $(SNP_SIFT_JAR) SNP_SIFT = $(call SNP_SIFT_MEM,$(DEFAULT_JAVA_MEM)) VCF_EFF_ONE_PER_LINE = $(HOME)/share/usr/snpEff-4.1/scripts/vcfEffOnePerLine.pl -VCF_JOIN_EFF = modules/scripts/joinEff.pl +VCF_JOIN_EFF = modules/scripts/join_eff.pl COUNT_SAMPLES = $(shell expr `sed 's/\..*//; s:.*/::' <<< $1 | grep -o "_" | wc -l` + 1) diff --git a/scripts/backup.sh b/scripts/backup.sh deleted file mode 100755 index 3c1894f9..00000000 --- a/scripts/backup.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -LOCK=~/.backup_lock -LOGFILE=~/.backup.log -if ! mkdir $LOCK 2> /dev/null; then - echo "backup script is already running" - exit 1 -fi - -TMP=`mktemp`; -TOPDIR=/ifs/e63data/reis-filho -if mountpoint -q "/mount/limr/zedshared/"; then - while [ 1 ]; do - echo "searching for files in $TOPDIR" - cd $TOPDIR - 'ls' data/*/*/bam/*.bam* projects/*/bam/*.bam* data/*/wgs*/fastq/*.fastq.gz | \ - rsync --verbose --stats --recursive -a --files-from=- --log-file=$LOGFILE --prune-empty-dirs ./ /mount/limr/zedshared - if [ "$?" = "0" ]; then - echo "rsync complete" - exit - else - echo "rsync failure, retrying in 1 minute..." - sleep 60 - fi - done -fi - -rmdir $LOCK diff --git a/scripts/chmod.sh b/scripts/chmod.sh deleted file mode 100755 index ba5b0892..00000000 --- a/scripts/chmod.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -chmod ug+rwX -R /ifs/e63data/reis-filho/data &> /dev/null -chmod ug+rwX -R /ifs/e63data/reis-filho/projects &> /dev/null -chmod ug+rwX -R /ifs/e63data/reis-filho/reference &> /dev/null -chmod o+rX -R /ifs/e63data/reis-filho/data &> /dev/null -chmod o+rX -R /ifs/e63data/reis-filho/projects &> /dev/null -chmod o+rX -R /ifs/e63data/reis-filho/reference &> /dev/null diff --git a/scripts/createSampleSets.pl b/scripts/create_sample_sets.pl similarity index 92% rename from scripts/createSampleSets.pl rename to scripts/create_sample_sets.pl index f32b680d..2bcc0a55 100644 --- a/scripts/createSampleSets.pl +++ b/scripts/create_sample_sets.pl @@ -1,5 +1,4 @@ #!/usr/bin/env perl -# parse samples file to get sample sets (space delimited, normal last) use strict; use warnings; diff --git a/scripts/gzipLogs.sh b/scripts/gzipLogs.sh deleted file mode 100644 index 05cdf2cc..00000000 --- a/scripts/gzipLogs.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -# gzip old log files -LOCK=~/.gzip_lock -if ! mkdir $LOCK 2> /dev/null; then - echo "log gzip script is already running" - exit 1 -fi -find /ifs/e63data/reis-filho/data /ifs/e63data/reis-filho/projects/ -name '*.log' -mtime +5 -exec gzip {} \; -rmdir $LOCK diff --git a/scripts/initProject.pl b/scripts/init_project.pl similarity index 77% rename from scripts/initProject.pl rename to scripts/init_project.pl index 04ed5d37..06d15290 100644 --- a/scripts/initProject.pl +++ b/scripts/init_project.pl @@ -22,7 +22,3 @@ unless (-e "summary_config.yaml") { copy("modules/default_yaml/summary_config.yaml", "summary_config.yaml") or die "Unable to create summary_config.yaml: $!"; } - -# unless (-e "sample_attr.yaml") { -# copy("modules/default_yaml/sample_attr.yaml", "sample_attr.yaml") or die "Unable to create sample_attr.yaml: $!"; -# } diff --git a/scripts/joinEff.pl b/scripts/join_eff.pl similarity index 97% rename from scripts/joinEff.pl rename to scripts/join_eff.pl index bc0b1a6b..2779407b 100644 --- a/scripts/joinEff.pl +++ b/scripts/join_eff.pl @@ -1,5 +1,4 @@ #!/usr/bin/env perl -# join EFF lines use strict; use List::MoreUtils qw(first_index indexes); diff --git a/scripts/monitorGfServer.sh b/scripts/monitor_gfserver.sh similarity index 99% rename from scripts/monitorGfServer.sh rename to scripts/monitor_gfserver.sh index 6c552afd..376bb8df 100644 --- a/scripts/monitorGfServer.sh +++ b/scripts/monitor_gfserver.sh @@ -1,4 +1,5 @@ #!/bin/bash + UP=$(pgrep -u limr gfServer | wc -l); if [ "$UP" -ne 1 ]; then diff --git a/scripts/monitorMySQL.sh b/scripts/monitor_myql.sh similarity index 99% rename from scripts/monitorMySQL.sh rename to scripts/monitor_myql.sh index 898c070b..6fb2b6cf 100755 --- a/scripts/monitorMySQL.sh +++ b/scripts/monitor_myql.sh @@ -1,4 +1,5 @@ #!/bin/bash + UP=$(pgrep -u limr mysqld | wc -l); if [ "$UP" -ne 1 ]; then From 4ddeff4ad0eb6dd9c0b9386dac5331b4c099f397 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 12:49:17 -0400 Subject: [PATCH 008/766] Several updates --- scripts/{monitor_myql.sh => monitorMySQL.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{monitor_myql.sh => monitorMySQL.sh} (100%) diff --git a/scripts/monitor_myql.sh b/scripts/monitorMySQL.sh similarity index 100% rename from scripts/monitor_myql.sh rename to scripts/monitorMySQL.sh From c53fffda0df214f73b12fa97fd46c782b165767c Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 13:26:32 -0400 Subject: [PATCH 009/766] Clean-up --- test/clonality/reportpyclone.R | 148 ----------------- test/clonality/tsvtopyclone.R | 100 ------------ test/copy_number/qdnaseqcopynumber.mk | 29 ---- test/copy_number/qdnaseqextract.R | 86 ---------- test/copy_number/qdnaseqextract.mk | 22 --- test/copy_number/qdnaseqplot.R | 172 -------------------- test/copy_number/qdnaseqsegment.R | 24 --- test/phylogeny/bootstrapmedicc.R | 75 --------- test/phylogeny/combinesamples.R | 90 ----------- test/phylogeny/initmedicc.R | 119 -------------- test/phylogeny/plotmedicc.R | 57 ------- test/phylogeny/plotratchet.R | 32 ---- test/phylogeny/pratchet.R | 63 -------- test/phylogeny/segmentsamples.R | 220 -------------------------- test/workflows/cnvkit.mk | 25 --- test/workflows/copynumber_summary.mk | 23 --- test/workflows/cravat_annotation.mk | 21 --- test/workflows/fetchimpact.mk | 16 -- test/workflows/medicc.mk | 79 --------- test/workflows/mspyclone.mk | 20 --- test/workflows/pratchet.mk | 24 --- test/workflows/qdnaseq.mk | 17 -- test/workflows/sspyclone.mk | 37 ----- test/workflows/viral_detection.mk | 23 --- 24 files changed, 1522 deletions(-) delete mode 100644 test/clonality/reportpyclone.R delete mode 100644 test/clonality/tsvtopyclone.R delete mode 100755 test/copy_number/qdnaseqcopynumber.mk delete mode 100755 test/copy_number/qdnaseqextract.R delete mode 100755 test/copy_number/qdnaseqextract.mk delete mode 100755 test/copy_number/qdnaseqplot.R delete mode 100644 test/copy_number/qdnaseqsegment.R delete mode 100755 test/phylogeny/bootstrapmedicc.R delete mode 100644 test/phylogeny/combinesamples.R delete mode 100755 test/phylogeny/initmedicc.R delete mode 100755 test/phylogeny/plotmedicc.R delete mode 100755 test/phylogeny/plotratchet.R delete mode 100755 test/phylogeny/pratchet.R delete mode 100755 test/phylogeny/segmentsamples.R delete mode 100644 test/workflows/cnvkit.mk delete mode 100644 test/workflows/copynumber_summary.mk delete mode 100644 test/workflows/cravat_annotation.mk delete mode 100644 test/workflows/fetchimpact.mk delete mode 100644 test/workflows/medicc.mk delete mode 100644 test/workflows/mspyclone.mk delete mode 100644 test/workflows/pratchet.mk delete mode 100644 test/workflows/qdnaseq.mk delete mode 100644 test/workflows/sspyclone.mk delete mode 100644 test/workflows/viral_detection.mk diff --git a/test/clonality/reportpyclone.R b/test/clonality/reportpyclone.R deleted file mode 100644 index d9770bb5..00000000 --- a/test/clonality/reportpyclone.R +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("ggplot2")) - -optList = list(make_option("--sample_name", default = NULL, help = "tumor normal sample name")) - -parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -tumor_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[1] -normal_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[2] - -in_file = list( - paste0("pyclone/", tumor_sample, "_", normal_sample, "/", tumor_sample,".tsv"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/pyclone.tsv") -) -out_file = list( - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_std_by_cid.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_ccf_by_cid.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_std_by_cn.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_ccf_by_cn.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_vaf_by_cn.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/histogram_depth_by_cn.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/scatter_vaf_depth_by_cn.pdf"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/summary.tsv"), - paste0("pyclone/", tumor_sample, "_", normal_sample, "/report/clusters.tsv") -) - -mutation_summary = read_tsv(file=in_file[[1]], col_types = cols(.default = col_character())) %>% - type_convert() %>% - mutate(total_cn = factor(minor_cn+major_cn)) %>% - mutate(DP = var_counts+ref_counts) %>% - mutate(VAF = 100*var_counts/(var_counts+ref_counts)) - -pyclone_summary = read_tsv(file=in_file[[2]], col_types = cols(.default = col_character()), col_names = c("mutation_id", "ccf", "std", "cluster_id")) %>% - type_convert() %>% - mutate(cluster_id = factor(cluster_id)) %>% - mutate(ccf = as.numeric(ccf)) %>% - mutate(std = as.numeric(std)) %>% - slice(-1) - -mutation_summary = full_join(mutation_summary, pyclone_summary, by="mutation_id") - -plot.0 = ggplot(mutation_summary, aes(x=std, fill=cluster_id)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x=expression(sigma), y="Frequency\n") + - guides(fill=guide_legend(title=c("Cluster"))) - -pdf(file=out_file[[1]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x=ccf, fill=cluster_id)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x="\nCCF\n", y="Frequency\n") + - coord_cartesian(xlim=c(0,1)) + - guides(fill=guide_legend(title=c("Cluster"))) -pdf(file=out_file[[2]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x=std, fill=total_cn)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x=expression(sigma), y="Frequency\n") + - guides(fill=guide_legend(title=c("Copy number"))) - -pdf(file=out_file[[3]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x=ccf, fill=total_cn)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x="\nCCF\n", y="Frequency\n") + - coord_cartesian(xlim=c(0,1)) + - guides(fill=guide_legend(title=c("Copy number"))) - -pdf(file=out_file[[4]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x = VAF, fill=total_cn)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x="\nVAF(%)\n", y="Frequency\n") + - coord_cartesian(xlim=c(0,100)) + - guides(fill=guide_legend(title=c("Copy number"))) - -pdf(file=out_file[[5]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x = DP, fill=total_cn)) + - geom_histogram(alpha = .8) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x="\nDP\n", y="Frequency\n") + - guides(fill=guide_legend(title=c("Copy number"))) - -pdf(file=out_file[[6]], width=6, height=6) -print(plot.0) -dev.off() - -plot.0 = ggplot(mutation_summary, aes(x = VAF, y = DP, fill=total_cn)) + - geom_point(alpha=.85, size=2.5, shape=21) + - theme_classic() + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=8), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x="\nVAF (%)\n", y="DP\n") + - scale_x_log10() + - annotation_logticks(side="b") + - coord_cartesian(xlim=c(5,100)) + - guides(fill=guide_legend(title=c("Copy number"))) - -pdf(file=out_file[[7]], width=6, height=6) -print(plot.0) -dev.off() - - -tmp = mutation_summary %>% - group_by(cluster_id) %>% - summarize( - n = n(), - mean_ccf = mean(ccf), - median_ccf = median(ccf), - std_ccf = sd(ccf), - min_ccf = min(ccf), - max_ccf = max(ccf), - mean_sd = mean(std), - median_sd = median(std), - std_sd = sd(std), - min_sd = min(std), - max_sd = max(std)) - -write_tsv(x=mutation_summary, path=out_file[[8]]) -write_tsv(x=tmp, path=out_file[[9]]) diff --git a/test/clonality/tsvtopyclone.R b/test/clonality/tsvtopyclone.R deleted file mode 100644 index e46b636b..00000000 --- a/test/clonality/tsvtopyclone.R +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("ggplot2")) - -optList = list(make_option("--sample_name", default = NULL, help = "tumor normal sample name")) - -parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -tumor_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[1] -normal_sample = unlist(strsplit(opt$sample_name, split="_", fixed=TRUE))[2] - -mutation_summary = read_tsv(file="summary/tsv/mutation_summary.tsv", col_types = cols(.default = col_character())) %>% - type_convert() %>% - filter(TUMOR_SAMPLE==tumor_sample) %>% - filter(NORMAL_SAMPLE==normal_sample) %>% - filter(grepl("mutect", variantCaller, fixed=TRUE)) %>% - filter(NORMAL_MAF==0) %>% - filter(TUMOR_MAF>=.05) %>% - filter(TUMOR_DP<=500) %>% - filter(TUMOR_DP>=20) %>% - filter(NORMAL_DP<=500) %>% - filter(NORMAL_DP>=10) %>% - mutate(CHROM = as.numeric(ifelse(CHROM=="X", 23, CHROM))) %>% - mutate(CHROM = as.numeric(ifelse(CHROM=="Y", 24, CHROM))) %>% - filter(CHROM<=22) %>% - mutate(UUID = paste0(CHROM, ":", POS, "_", REF, "_", ALT)) - -load(paste0("facets/cncf/", opt$sample_name, ".Rdata")) -qt = q1 = rep(NA, nrow(mutation_summary)) -for (i in 1:nrow(mutation_summary)) { - x = mutation_summary$CHROM[i] - y = mutation_summary$POS[i] - indx = which(fit$cncf[,"chrom"]==x & (fit$cncf[,"start"]<=y & fit$cncf[,"end"]>=y)) - if (length(indx)!=0) { - qt[i] = fit$cncf[indx,"tcn.em"] - q1[i] = fit$cncf[indx,"lcn.em"] - } -} -fsq = as.numeric(mutation_summary$TUMOR_MAF) -n = as.numeric(mutation_summary$TUMOR_DP) -mutation_id = as.character(mutation_summary$UUID) -var_counts = round(fsq*n) -ref_counts = round((1-fsq)*n) -normal_cn = rep(2, nrow(mutation_summary)) -minor_cn = q1 -major_cn = qt-q1 -sample_summary = data.frame(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn) -index = apply(sample_summary, 1, function(x) {any(is.na(x))}) -sample_summary = sample_summary[!index,,drop=FALSE] -index = sample_summary[,"major_cn"]==0 -sample_summary = sample_summary[!index,,drop=FALSE] -write.table(sample_summary, paste0("pyclone/", opt$sample_name, "/", tumor_sample,".tsv"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE, append=FALSE) - -cat("num_iters: 100000\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = FALSE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("base_measure_params:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" alpha: 1\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" beta: 1\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("concentration:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" value: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" prior:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" shape: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" rate: 0.001\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("density: pyclone_beta_binomial\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("beta_binomial_precision_params:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" value: 1000\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" prior:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" shape: 1.0\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" rate: 0.0001\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" proposal:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" precision: 0.5\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(paste0("working_dir: pyclone/",opt$sample_name, "\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("trace_dir: trace", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("init_method: connected\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("samples:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) - -cat(paste0(" ", tumor_sample, ":\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(paste0(" mutations_file: ", tumor_sample, ".yaml\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" tumour_content:\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(paste0(" value: ", ifelse(is.na(fit$purity), 1.0, signif(fit$purity, 2)),"\n"), file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -cat(" error_rate: 0.01", file=paste0("pyclone/", opt$sample_name, "/config.yaml"), append = TRUE) -system(paste0("source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/PyClone-0.13.1 && PyClone build_mutations_file --in_file pyclone/", opt$sample_name, "/", tumor_sample, ".tsv --out_file pyclone/", opt$sample_name, "/", tumor_sample, ".yaml --prior parental_copy_number")) diff --git a/test/copy_number/qdnaseqcopynumber.mk b/test/copy_number/qdnaseqcopynumber.mk deleted file mode 100755 index 6a8a9e9c..00000000 --- a/test/copy_number/qdnaseqcopynumber.mk +++ /dev/null @@ -1,29 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/qdnaseq_copynumber.$(NOW) -PHONY += qdnaseq qdnaseq/copynumber qdnaseq/copynumber/log2ratio qdnaseq/copynumber/segmented qdnaseq/copynumber/pcf - -qdnaseq_copynumber : $(foreach sample,$(SAMPLES),qdnaseq/copynumber/log2ratio/$(sample).pdf qdnaseq/copynumber/segmented/$(sample).RData qdnaseq/copynumber/pcf/$(sample).pdf) - -define qdnaseq-plot-log2ratio -qdnaseq/copynumber/log2ratio/%.pdf : qdnaseq/bed/%.bed - $$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 10G -m 12G,"$(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'raw'") -endef - $(foreach sample,$(SAMPLES),\ - $(eval $(call qdnaseq-plot-log2ratio,$(sample)))) - -define qdnaseq-segment-log2ratio -qdnaseq/copynumber/segmented/%.RData : qdnaseq/bed/%.bed - $$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 12G -m 16G,"$(RSCRIPT) modules/test/copy_number/qdnaseqsegment.R --sample $$(*)") - -qdnaseq/copynumber/pcf/%.pdf : qdnaseq/copynumber/segmented/%.RData - $$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 12G -m 16G,"$(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'bychromosome' --rho '$${qdnaseq_rho.$1}' --psi '$${qdnaseq_psi.$1}' --gamma '$${qdnaseq_gamma.$1}' && \ - $(RSCRIPT) modules/test/copy_number/qdnaseqplot.R --sample $$(*) --type 'segmented' --rho '$${qdnaseq_rho.$1}' --psi '$${qdnaseq_psi.$1}' --gamma '$${qdnaseq_gamma.$1}'") - -endef - $(foreach sample,$(SAMPLES),\ - $(eval $(call qdnaseq-segment-log2ratio,$(sample)))) - - -.PHONY: $(PHONY) diff --git a/test/copy_number/qdnaseqextract.R b/test/copy_number/qdnaseqextract.R deleted file mode 100755 index 696b84b8..00000000 --- a/test/copy_number/qdnaseqextract.R +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("QDNAseq")) -suppressPackageStartupMessages(library("future")) - -future::plan("multiprocess") -options(mc.cores=16L) - - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -if (!dir.exists("qdnaseq/readcounts")) { - dir.create("qdnaseq/readcounts") -} - -if (!dir.exists("qdnaseq/isobars")) { - dir.create("qdnaseq/isobars") -} - -if (!dir.exists("qdnaseq/variance")) { - dir.create("qdnaseq/variance") -} - -if (!dir.exists("qdnaseq/log2ratio")) { - dir.create("qdnaseq/log2ratio") -} - -if (!dir.exists("qdnaseq/bed")) { - dir.create("qdnaseq/bed") -} - -args_list = list(make_option("--sample", default = NA, type = 'character', help = "sample name"), - make_option("--binsize", default = NA, type = 'character', help = "bin size")) - -parser = OptionParser(usage = "%prog", option_list = args_list) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -if (is.na(as.numeric(opt$binsize))) { - opt$binsize = 30 -} else { - opt$binsize = as.numeric(opt$binsize) -} - -bins = getBinAnnotations(binSize=opt$binsize, genome="hg19") -readCounts = binReadCounts(bins=bins, bamfiles=paste0("bam/", opt$sample, ".bam"), - isPaired=TRUE, - isProperPair=TRUE, - minMapq=30, - pairedEnds=TRUE, - chunkSize=TRUE) - -# read counts versus genomic coordinates -pdf(file=paste0("qdnaseq/readcounts/", opt$sample, ".pdf"), width=14, height=9) -plot(readCounts, logTransform=TRUE, ylim=c(0, 20)) -highlightFilters(readCounts, logTransform=TRUE, residual=TRUE, blacklist=TRUE) -dev.off() - -readCountsFiltered = applyFilters(readCounts, residual=TRUE, blacklist=TRUE) - -# %GC content versus mappability -pdf(file=paste0("qdnaseq/isobars/", opt$sample, ".pdf"), width=7, height=7) -isobarPlot(readCountsFiltered) -dev.off() - -readCountsFiltered = estimateCorrection(readCountsFiltered) - -# noise (variance) versus bin coverage -pdf(file=paste0("qdnaseq/variance/", opt$sample, ".pdf"), width=7, height=7) -noisePlot(readCountsFiltered) -dev.off() - -copyNumbers = correctBins(readCountsFiltered) -copyNumbersNormalized = normalizeBins(copyNumbers) -copyNumbersSmooth = smoothOutlierBins(copyNumbersNormalized) - -# log2 ratio versus genomic coordinates -pdf(file=paste0("qdnaseq/log2ratio/", opt$sample, ".pdf"), width=14, height=9) -plot(copyNumbersSmooth, ylim=c(-4,4)) -dev.off() - -# write log2 ratio to file -exportBins(copyNumbersSmooth, file=paste0("qdnaseq/bed/", opt$sample, ".bed"), format="bed") diff --git a/test/copy_number/qdnaseqextract.mk b/test/copy_number/qdnaseqextract.mk deleted file mode 100755 index 7eab2d2c..00000000 --- a/test/copy_number/qdnaseqextract.mk +++ /dev/null @@ -1,22 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/qdnaseq_extract.$(NOW) -PHONY += qdnaseq qdnaseq/readcounts qdnaseq/isobars qdnaseq/variance qdnaseq/log2ratio qdnaseq/bed - -qdnaseq_extract : $(foreach sample,$(SAMPLES),qdnaseq/readcounts/$(sample).pdf qdnaseq/isobars/$(sample).pdf qdnaseq/variance/$(sample).pdf qdnaseq/log2ratio/$(sample).pdf qdnaseq/bed/$(sample).bed) - -DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 -QDNASEQ_ENV = $(HOME)/share/usr/anaconda-envs/qdnaseq -QDNASEQ_BINSIZE = 5 - -define qdnaseq-log2ratio -qdnaseq/readcounts/%.pdf qdnaseq/isobars/%.pdf qdnaseq/variance/%.pdf qdnaseq/log2ratio/%.pdf qdnaseq/bed/%.bed : bam/%.bam - $$(call RUN,-c -n 16 -s 2G -m 3G -w 7200 -v $$(DEFAULT_ENV),"source activate $$(QDNASEQ_ENV) && \ - $$(RSCRIPT) modules/test/copy_number/qdnaseqextract.R --sample $$(*) --binsize $(QDNASEQ_BINSIZE)") - -endef - $(foreach sample,$(SAMPLES),\ - $(eval $(call qdnaseq-log2ratio,$(sample)))) - -.PHONY: $(PHONY) diff --git a/test/copy_number/qdnaseqplot.R b/test/copy_number/qdnaseqplot.R deleted file mode 100755 index 86f8ef97..00000000 --- a/test/copy_number/qdnaseqplot.R +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("copynumber")) -suppressPackageStartupMessages(library("colorspace")) -suppressPackageStartupMessages(library("ASCAT")) -load("modules/copy_number/CytoBand.RData") - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list = list(make_option("--sample", default = NA, type = 'character', help = "tumor sample"), - make_option("--type", default = NA, type = 'character', help = "type of plot"), - make_option("--rho", default = NA, type = 'numeric', help = "tumor purity"), - make_option("--psi", default = NA, type = 'numeric', help = "tumor ploidy"), - make_option("--gamma", default = NA, type = 'numeric', help = "log2 ratio compression")) - -parser = OptionParser(usage = "%prog", option_list = args_list) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -opt$rho = ifelse(is.na(as.numeric(opt$rho)), 1, as.numeric(opt$rho)) -opt$psi = ifelse(is.na(as.numeric(opt$psi)), 2, as.numeric(opt$psi)) -opt$gamma = ifelse(is.na(as.numeric(opt$gamma)), 1, as.numeric(opt$gamma)) - -load("modules/copy_number/CytoBand.RData") - -'prunesegments.cn' <- function(x, n=10) -{ - cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x)) - for (j in 1:nrow(x)) { - cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"]) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx,"Log2Ratio"]) - scl = sd(x[indx,"Log2Ratio"]) - ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl)) - x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"]) - } else { - x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"]) - } - } - return(invisible(x)) -} - -if (opt$type=="raw") { - - infile = paste0("qdnaseq/bed/", opt$sample, ".bed") - outfile = paste0("qdnaseq/copynumber/log2ratio/", opt$sample, ".pdf") - data = read.table(file=infile, header=FALSE, sep="\t", skip=1, stringsAsFactors=FALSE)[,c(1,2,3,5),drop=FALSE] - colnames(data) = c("Chromosome", "Start", "End", "Log2Ratio") - pdf(file=outfile, width=10, height=4.25) - par(mar=c(5, 5, 4, 2)+.1) - end = NULL - for (j in 1:22) { - end = c(end, max(CytoBand$End[CytoBand$Chromosome==j])) - } - end = cumsum(end) - start = rep(0, 22) - start[2:22] = end[1:21]+1 - for (j in 1:22) { - data[data[,"Chromosome"]==j,"Start"] = data[data[,"Chromosome"]==j,"Start"] + start[j] - } - col = rep("grey75", nrow(data)) - plot(data[,"Start"], data[,"Log2Ratio"], type="p", pch=".", cex=1.95, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5)) - axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) - mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) - for (j in 1:22) { - v = start[j] - abline(v=v, col="goldenrod3", lty=3, lwd=1) - } - abline(v=max(data[,"Start"]), col="goldenrod3", lty=3, lwd=1) - abline(h=0, col="red") - axis(1, at = .5*(start+end), labels=c(1:22), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(data[,"Start"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) - title(main = opt$sample, line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) - dev.off() - -} else if (opt$type=="segmented") { - - infile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData") - outfile = paste0("qdnaseq/copynumber/pcf/", opt$sample, ".pdf") - load(infile) - - segmented = prunesegments.cn(x=segmented, n=7) - end = NULL - for (j in 1:22) { - end = c(end, max(CytoBand$End[CytoBand$Chromosome==j])) - } - end = cumsum(end) - start = rep(0, 22) - start[2:22] = end[1:21]+1 - for (j in 1:22) { - segmented[segmented[,"Chromosome"]==j,"Start"] = segmented[segmented[,"Chromosome"]==j,"Start"] + start[j] - segmented[segmented[,"Chromosome"]==j,"End"] = segmented[segmented[,"Chromosome"]==j,"End"] + start[j] - data[data[,"Chromosome"]==j,"Start"] = data[data[,"Chromosome"]==j,"Start"] + start[j] - } - col = "grey75" - pdf(file=outfile, width=10, height=4.25) - par(mar=c(5, 5, 4, 2)+.1) - plot(data[,"Start"], data[,"Log2Ratio"], type="p", pch=".", cex=1.95, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5)) - axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) - for (j in 1:nrow(segmented)) { - lines(x=c(segmented[j,"Start"], segmented[j,"End"]), y=rep(segmented[j,"Log2Ratio"],2), lty=1, lwd=2.75, col="red") - } - mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) - for (j in 1:22) { - v = start[j] - abline(v=v, col="goldenrod3", lty=3, lwd=1) - } - abline(v=max(data[,"Start"]), col="goldenrod3", lty=3, lwd=1) - abline(h=0, col="red") - axis(1, at = .5*(start+end), labels=c(1:22), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(data[,"Start"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) - title(main = opt$sample, line=-1, cex.main=.75, font.main=1) - for (k in c(1,2,3,4,6,9)) { - abline(h=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), col="brown", lty=3, cex=.5) - mtext(text=k, side=4, line=.5, at=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), las=2, cex=.5, col="brown") - } - box(lwd=1.5) - dev.off() - -} else if (opt$type=="bychromosome") { - - infile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData") - if (!dir.exists("qdnaseq/copynumber/bychr/")) { - dir.create("qdnaseq/copynumber/bychr/") - } - if (!dir.exists(paste0("qdnaseq/copynumber/bychr/", opt$sample, "/"))) { - dir.create(paste0("qdnaseq/copynumber/bychr/", opt$sample, "/")) - } - load(infile) - segmented = prunesegments.cn(x=segmented, n=7) - for (ii in 1:22) { - pdf(file=paste0("qdnaseq/copynumber/bychr/", opt$sample, "/", ii, ".pdf")) - zz = split.screen(figs=matrix(c(0,1,.15,1, 0.065,.975,0.1,.4), nrow=2, ncol=4, byrow=TRUE)) - screen(zz[1]) - par(mar = c(6.1, 6, 4.1, 3)) - start = 1 - end = max(CytoBand[CytoBand[,"Chromosome"]==ii,"End"]) - plot(1, 1, type="n", xlim=c(start,end), ylim=c(-4,4), xlab="", ylab="", main="", frame.plot=FALSE, axes=FALSE) - index = data[,"Chromosome"]==ii - points(data[index,"Start"], data[index,"Log2Ratio"], type="p", pch=".", cex=1.15, col="grey75") - tmp = subset(segmented, segmented[,"Chromosome"]==ii) - for (i in 1:nrow(tmp)) { - points(c(tmp[i,"Start"], tmp[i,"End"]), rep(tmp[i,"Log2Ratio"],2), type="l", col="red", lwd=4) - } - for (i in 1:(nrow(tmp)-1)) { - points(c(tmp[i,"End"], tmp[i+1,"Start"]), c(tmp[i,"Log2Ratio"],tmp[i+1,"Log2Ratio"]), type="l", col="red", lwd=1) - } - abline(h=0, lwd=1) - axis(2, at = c(-4,-2,0,2,4), labels=c("-4","-2","0","2", "4"), cex.axis = 1.25, las = 1, lwd=1.5, lwd.ticks=1.35) - mtext(side = 2, text = expression("Log"[2]~"Ratio"), line = 4, cex = 1.5) - for (k in c(1,2,3,4,6,9)) { - abline(h=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), col="darkorange", lty=3) - mtext(text=k, side=4, line=.5, at=(opt$gamma*log2(((opt$rho)*k + (1-opt$rho)*2)/((opt$rho)*opt$psi + (1-opt$rho)*2))), las=2, cex=.75, col="darkorange") - } - box(lwd=2) - screen(zz[2]) - arg = copynumber:::getPlotParameters(type = "sample", nSeg = 10, cr = 3 * 3, sampleID = "dummy", plot.ideo = TRUE, xaxis = TRUE, assembly = "hg19") - copynumber:::plotIdeogram(chrom=ii, TRUE, cyto.data = arg$assembly, cex = .75, unit = "bp") - close.screen(all.screens=TRUE) - dev.off() - } - -} diff --git a/test/copy_number/qdnaseqsegment.R b/test/copy_number/qdnaseqsegment.R deleted file mode 100644 index 459cdaf0..00000000 --- a/test/copy_number/qdnaseqsegment.R +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("copynumber")) -suppressPackageStartupMessages(library("colorspace")) -suppressPackageStartupMessages(library("ASCAT")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample", default = NA, type = 'character', help = "sample name")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -infile = paste0("qdnaseq/bed/", opt$sample, ".bed") -outfile = paste0("qdnaseq/copynumber/segmented/", opt$sample, ".RData") -data = read.table(file=infile, header=FALSE, sep="\t", skip=1, stringsAsFactors=FALSE)[,c(1,2,3,5),drop=FALSE] -colnames(data) = c("Chromosome", "Start", "End", "Log2Ratio") -segmented = pcf(data=winsorize(data=data[,c("Chromosome", "Start", "Log2Ratio"),drop=FALSE], method="mad", tau=2.5, k=25, verbose=FALSE), kmin = 100, gamma = 150, fast=FALSE, verbose=FALSE)[,2:7,drop=FALSE] -colnames(segmented) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio") -save(data, segmented, file=outfile) diff --git a/test/phylogeny/bootstrapmedicc.R b/test/phylogeny/bootstrapmedicc.R deleted file mode 100755 index 41d94e00..00000000 --- a/test/phylogeny/bootstrapmedicc.R +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -if (!dir.exists(paste0("medicc/boot/allele_specific/", opt$sample_set))) { - dir.create(paste0("medicc/boot/allele_specific/", opt$sample_set)) -} - -load(paste0("medicc/aspcf/", opt$sample_set, ".RData")) -q1 = qt-q2 -index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) }) -q2 = q2[index,,drop=FALSE] -q1 = q1[index,,drop=FALSE] -tmp = tmp[index,,drop=FALSE] -q2[q2>4] = 4 -q1[q1>4] = 4 - -if (ncol(q2)<3) { - q1x = q1 - colnames(q1x) = paste0(colnames(q1), "_pad00") - q1 = cbind(q1, q1x) - q2x = q2 - colnames(q2x) = paste0(colnames(q2), "_pad00") - q2 = cbind(q2, q2x) -} - -set.seed(0) -for (ii in 1:100) { - n = nchar(ii) - if (n==1) { - n = paste0("00", ii) - } else if (n==2) { - n = paste0("0", ii) - } else { - n = ii - } - index = order(sample(x=1:nrow(tmp), size=nrow(tmp), replace=TRUE)) - q2_b = q2[index,,drop=FALSE] - q1_b = q1[index,,drop=FALSE] - tmp_b = tmp[index,,drop=FALSE] - desc = cbind(paste0("chrom", unique(tmp_b[,"Chromosome"])), - paste0("major_chr", unique(tmp_b[,"Chromosome"]), ".fasta"), - paste0("minor_chr", unique(tmp_b[,"Chromosome"]), ".fasta")) - if (!dir.exists(paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n))) { - dir.create(paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n)) - } - write.table(desc, file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE) - for (i in unique(tmp[,"Chromosome"])) { - cat(">diploid\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q2_b)) { - cat(paste0(">", gsub("-", "_", colnames(q2_b)[j]), "\n"), file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE) - cat(paste0(q2_b[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/major_chr", i, ".fasta"), append=TRUE) - } - cat(">diploid\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q1_b)) { - cat(paste0(">", gsub("-", "_", colnames(q1_b)[j]), "\n"), file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE) - cat(paste0(q1_b[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/", n, "/minor_chr", i, ".fasta"), append=TRUE) - } - } - if (ii==100) { - cat("done!", file=paste0("medicc/boot/allele_specific/", opt$sample_set, "/init.timestamp")) - } -} diff --git a/test/phylogeny/combinesamples.R b/test/phylogeny/combinesamples.R deleted file mode 100644 index 776a708b..00000000 --- a/test/phylogeny/combinesamples.R +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set"), - make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"), - make_option("--type", default = NA, type = 'character', help = "allele specific or total copy") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))) -normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))) -normal_samples = normal_samples[normal_samples %in% all_samples] -tumor_samples = all_samples[!(all_samples %in% normal_samples)] - -if (opt$type=="allele_specific") { - - CN = list() - for (i in 1:length(tumor_samples)) { - load(paste0("facets/cncf/", tumor_samples[i], "_", normal_samples, ".Rdata")) - CN[[i]] = out2$jointseg[,c("chrom", "maploc", "cnlr", "vafT", "het"),drop=FALSE] - colnames(CN[[i]]) = c("Chromosome", "Position", "Log2Ratio", "BAF", "Genotype") - } - index = lapply(CN, function(x) {paste0(x[,1], ":", x[,2])}) - featureNames = unique(unlist(index)) - for (i in 1:length(index)) { - featureNames = intersect(featureNames, index[[i]]) - } - chr = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[1] }))) - pos = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[2] }))) - index = order(pos, decreasing=FALSE) - chr = chr[index] - pos = pos[index] - index = order(chr, decreasing=FALSE) - chr = chr[index] - pos = pos[index] - featureNames = paste0(chr, ":", pos) - for (i in 1:length(CN)) { - rownames(CN[[i]]) = paste0(CN[[i]][,1], ":", CN[[i]][,2]) - CN[[i]] = CN[[i]][featureNames,,drop=FALSE] - } - Log2Ratio = do.call(cbind, lapply(CN, function(x) { return(x[,"Log2Ratio"]) } )) - BAF = do.call(cbind, lapply(CN, function(x) { return(x[,"BAF"]) } )) - Genotype = do.call(cbind, lapply(CN, function(x) { return(x[,"Genotype"]) } )) - annotation = data.frame(Chromosome=chr, - Position=pos) - colnames(Log2Ratio) = colnames(BAF) = tumor_samples - save(Log2Ratio, BAF, Genotype, annotation, file=paste0("medicc/allele_specific/mad/", opt$sample_set, ".RData")) - -} else if (opt$type=="total_copy") { - - CN = list() - for (i in 1:length(tumor_samples)) { - load(paste0("facets/cncf/", tumor_samples[i], "_", normal_samples, ".Rdata")) - CN[[i]] = out2$jointseg[,c("chrom", "maploc", "cnlr"),drop=FALSE] - colnames(CN[[i]]) = c("Chromosome", "Position", "Log2Ratio") - } - index = lapply(CN, function(x) {paste0(x[,1], ":", x[,2])}) - featureNames = unique(unlist(index)) - for (i in 1:length(index)) { - featureNames = intersect(featureNames, index[[i]]) - } - chr = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[1] }))) - pos = as.numeric(unlist(lapply(strsplit(featureNames, ":", fixed=TRUE), function(x) { x[2] }))) - index = order(pos, decreasing=FALSE) - chr = chr[index] - pos = pos[index] - index = order(chr, decreasing=FALSE) - chr = chr[index] - pos = pos[index] - featureNames = paste0(chr, ":", pos) - for (i in 1:length(CN)) { - rownames(CN[[i]]) = paste0(CN[[i]][,1], ":", CN[[i]][,2]) - CN[[i]] = CN[[i]][featureNames,,drop=FALSE] - } - Log2Ratio = do.call(cbind, lapply(CN, function(x) { return(x[,"Log2Ratio"]) } )) - annotation = data.frame(Chromosome=chr, - Position=pos) - colnames(Log2Ratio) = tumor_samples - save(Log2Ratio, annotation, file=paste0("medicc/total_copy/mad/", opt$sample_set, ".RData")) - -} \ No newline at end of file diff --git a/test/phylogeny/initmedicc.R b/test/phylogeny/initmedicc.R deleted file mode 100755 index 22984e30..00000000 --- a/test/phylogeny/initmedicc.R +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set"), - make_option("--type", default = NA, type = 'character', help = "allele specific or total copy") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -if (opt$type=="allele_specific") { - - load(paste0("medicc/allele_specific/aspcf/", opt$sample_set, ".RData")) - q1 = qt-q2 - index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) }) - q2 = q2[index,,drop=FALSE] - q1 = q1[index,,drop=FALSE] - tmp = tmp[index,,drop=FALSE] - q2[q2>4] = 4 - q1[q1>4] = 4 - - if (ncol(q2)<3) { - q1x = q1 - colnames(q1x) = paste0(colnames(q1), "_pad00") - q1 = cbind(q1, q1x) - q2x = q2 - colnames(q2x) = paste0(colnames(q2), "_pad00") - q2 = cbind(q2, q2x) - } - - desc = cbind(paste0("chrom", unique(tmp[,"Chromosome"])), - paste0("major_chr", unique(tmp[,"Chromosome"]), ".fasta"), - paste0("minor_chr", unique(tmp[,"Chromosome"]), ".fasta")) - write.table(desc, file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE) - for (i in unique(tmp[,"Chromosome"])) { - cat(">diploid\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q2)) { - cat(paste0(">", gsub("-", "_", colnames(q2)[j]), "\n"), file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - cat(paste0(q2[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - } - - - cat(">diploid\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q1)) { - cat(paste0(">", gsub("-", "_", colnames(q1)[j]), "\n"), file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - cat(paste0(q1[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - } - } -} else if (opt$type=="total_copy") { - - load(paste0("medicc/total_copy/mpcf/", opt$sample_set, ".RData")) - ploidy = round(apply(((tmp[,"End"]-tmp[,"Start"])*qt)/sum(tmp[,"End"]-tmp[,"Start"]), 2, sum)) - ploidy[ploidy>=4] = 4 - ploidy[ploidy<=2] = 2 - if (length(unique(ploidy))>1) { - index = which(ploidy==4) - - qt_4n = ceiling(apply(qt[,index,drop=FALSE], 1, mean)/2)*2 - qt_4n[qt_4n==0 & apply(qt[,index,drop=FALSE], 1, mean)!=0] = 1 - qt_2n = round(qt_4n/2) - qt_2n[qt_2n==0 & apply(qt[,index,drop=FALSE], 1, mean)!=0] = 1 - qt = cbind(qt, diploid_ancestor=qt_2n, tetraploid_ancestor=qt_4n) - - q2_4n = ceiling(apply(q2[,index,drop=FALSE], 1, mean)/2)*2 - q2_4n[q2_4n==0 & apply(q2[,index,drop=FALSE], 1, mean)!=0] = 1 - q2_2n = round(q2_4n/2) - q2_2n[q2_2n==0 & apply(q2[,index,drop=FALSE], 1, mean)!=0] = 1 - q2 = cbind(q2, diploid_ancestor=q2_2n, tetraploid_ancestor=q2_4n) - - } - - q1 = qt-q2 - index = !apply(q2, 1, function(x) { any(is.na(x)) }) & !apply(q1, 1, function(x) { any(is.na(x)) }) - q2 = q2[index,,drop=FALSE] - q1 = q1[index,,drop=FALSE] - tmp = tmp[index,,drop=FALSE] - - q2[q2>4] = 4 - q1[q1>4] = 4 - - if (ncol(q2)<3) { - q1x = q1 - colnames(q1x) = paste0(colnames(q1), "_pad00") - q1 = cbind(q1, q1x) - q2x = q2 - colnames(q2x) = paste0(colnames(q2), "_pad00") - q2 = cbind(q2, q2x) - } - - desc = cbind(paste0("chrom", unique(tmp[,"Chromosome"])), - paste0("major_chr", unique(tmp[,"Chromosome"]), ".fasta"), - paste0("minor_chr", unique(tmp[,"Chromosome"]), ".fasta")) - write.table(desc, file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/desc.txt"), sep=" ", col.names=FALSE, row.names=FALSE, quote=FALSE, append=FALSE) - for (i in unique(tmp[,"Chromosome"])) { - cat(">diploid\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q2)) { - cat(paste0(">", gsub("-", "_", colnames(q2)[j]), "\n"), file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - cat(paste0(q2[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/major_chr", i, ".fasta"), append=TRUE) - } - - - cat(">diploid\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=FALSE) - cat(paste0(rep(1, sum(tmp[,"Chromosome"]==i)), collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - for (j in 1:ncol(q1)) { - cat(paste0(">", gsub("-", "_", colnames(q1)[j]), "\n"), file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - cat(paste0(q1[tmp[,"Chromosome"]==i,j], collapse=""), "\n", file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/minor_chr", i, ".fasta"), append=TRUE) - } - } -} diff --git a/test/phylogeny/plotmedicc.R b/test/phylogeny/plotmedicc.R deleted file mode 100755 index 417f4b98..00000000 --- a/test/phylogeny/plotmedicc.R +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("ape")) -suppressPackageStartupMessages(library("foreach")) -suppressPackageStartupMessages(library("parallel")) -suppressPackageStartupMessages(library("doMC")) -suppressPackageStartupMessages(library("stringr")) -suppressPackageStartupMessages(library("phytools")) - -registerDoMC(12) - - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set"), - make_option("--type", default = NA, type = 'character', help = "allele specific or total copy") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -if (opt$type=="allele_specific") { - - phylo_tree = read.tree(file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/tree_final.new")) - tip_labels = phylo_tree$tip.label - index = grep("pad00", tip_labels) - if (length(index)!=0) { - phylo_tree = drop.tip(phy=phylo_tree, tip=tip_labels[index], trim.internal=TRUE, rooted=FALSE) - } - phylo_tree = root(phylo_tree, outgroup="diploid") - - pdf(file=paste0("medicc/allele_specific/medicc/", opt$sample_set, "/tree_final.pdf"), height=7, width=7) - plotTree(tree=phylo_tree, color="#8CC63F", lwd=3, offset=1) - edgelabels(text=paste0(phylo_tree$edge.length, " "), cex=.75) - dev.off() - -} else if (opt$type=="total_copy") { - - phylo_tree = read.tree(file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/tree_final.new")) - tip_labels = phylo_tree$tip.label - index = grep("pad00", tip_labels) - if (length(index)!=0) { - phylo_tree = drop.tip(phy=phylo_tree, tip=tip_labels[index], trim.internal=TRUE, rooted=FALSE) - } - phylo_tree = root(phylo_tree, outgroup="diploid") - - pdf(file=paste0("medicc/total_copy/medicc/", opt$sample_set, "/tree_final.pdf"), height=7, width=7) - plotTree(tree=phylo_tree, color="#8CC63F", lwd=3, offset=1) - edgelabels(text=paste0(phylo_tree$edge.length, " "), cex=.75) - dev.off() - -} diff --git a/test/phylogeny/plotratchet.R b/test/phylogeny/plotratchet.R deleted file mode 100755 index ac34dc2c..00000000 --- a/test/phylogeny/plotratchet.R +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("ape")) -suppressPackageStartupMessages(library("foreach")) -suppressPackageStartupMessages(library("parallel")) -suppressPackageStartupMessages(library("doMC")) -suppressPackageStartupMessages(library("stringr")) -suppressPackageStartupMessages(library("phytools")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -load(paste0("pratchet/", opt$sample_set, "/tree_final.RData")) - -pdf(file=paste0("pratchet/", opt$sample_set, "/tree_final.pdf"), height=7, width=7) -plot.phylo(x=phy_tree_w_bl, edge.color="#8CC63F", edge.width=3, label.offset=1) -nodelabels(node=1:phy_tree_w_bl$Nnode+Ntip(phy_tree_w_bl), - pie = cbind(as.numeric(phy_tree_w_bl$node.label),100-as.numeric(phy_tree_w_bl$node.label)), - piecol = c("goldenrod3","grey85"), - cex = 1) -edgelabels(text=paste0(phy_tree_w_bl$edge.length, " "), cex=.75) -dev.off() diff --git a/test/phylogeny/pratchet.R b/test/phylogeny/pratchet.R deleted file mode 100755 index 46a6af76..00000000 --- a/test/phylogeny/pratchet.R +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("ape")) -suppressPackageStartupMessages(library("phangorn")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set"), - make_option("--normal_samples", default = NA, type = 'character', help = "normal samples") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))) -normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))) -normal_samples = normal_samples[normal_samples %in% all_samples] -tumor_samples = all_samples[!(all_samples %in% normal_samples)] - -mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv"), col_types = cols(.default = col_character())) %>% - type_convert() - -mutation_binary = as.data.frame(mutation_summary[,paste0("CALL_", c(tumor_samples, normal_samples)),drop=FALSE]) -colnames(mutation_binary) = gsub("CALL_", "", colnames(mutation_binary)) - -phy_data = as.phyDat(mutation_binary, type="USER", levels=c(0,1)) -phy_tree = pratchet(data=phy_data) -phy_tree_w_bl = acctran(tree=phy_tree, data=phy_data) -phy_tree_w_bl = root(phy_tree_w_bl, outgroup=normal_samples) - -'bootstrap_data' <- function(x, N=100) -{ - y = list() - for (i in 1:N) { - index = sample(1:nrow(x), size=nrow(x), replace=TRUE) - y[[i]] = x[index,,drop=FALSE] - } - return(y) -} - - -phy_tree_w_bl_boot = list() -mutation_binary_boot = bootstrap_data(x=mutation_binary) -for (i in 1:length(mutation_binary_boot)) { - phy_data = as.phyDat(mutation_binary_boot[[i]], type="USER", levels=c(0,1)) - phy_tree = pratchet(data=phy_data) - phy_tree_w_bl_boot[[i]] = acctran(tree=phy_tree, data=phy_data) - phy_tree_w_bl_boot[[i]] = root(phy_tree_w_bl_boot[[i]], outgroup=normal_samples) -} - -class(phy_tree_w_bl) = "phylo" -class(phy_tree_w_bl_boot) = "multiPhylo" -node_labels = prop.clades(phy_tree_w_bl, phy_tree_w_bl_boot, rooted=TRUE) -phy_tree_w_bl$node.label = node_labels -save(list=ls(all=TRUE), file=paste0("pratchet/", opt$sample_set, "/tree_final.RData")) diff --git a/test/phylogeny/segmentsamples.R b/test/phylogeny/segmentsamples.R deleted file mode 100755 index 253f6bd0..00000000 --- a/test/phylogeny/segmentsamples.R +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("copynumber")) -suppressPackageStartupMessages(library("colorspace")) -suppressPackageStartupMessages(library("ASCAT")) - - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_set", default = NA, type = 'character', help = "sample names set"), - make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"), - make_option("--gamma", default = NA, type = 'character', help = "segmentation parameter gamma"), - make_option("--nlog2", default = NA, type = 'character', help = "number of clusters in Log2 ratio"), - make_option("--nbaf", default = NA, type = 'character', help = "number of clusters in BAF"), - make_option("--type", default = NA, type = 'character', help = "allele specific or total copy") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -all_samples = na.omit(unlist(strsplit(opt$sample_set, split="_", fixed=TRUE))) -normal_samples = na.omit(unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE))) -normal_samples = normal_samples[normal_samples %in% all_samples] -tumor_samples = all_samples[!(all_samples %in% normal_samples)] - -if (opt$type=="allele_specific") { - - load(paste0("medicc/allele_specific/mad/", opt$sample_set, ".RData")) - gamma = ifelse(is.na(as.numeric(opt$gamma)), 50, as.numeric(opt$gamma)) - nlog2 = ifelse(is.na(as.numeric(opt$nlog2)), 10, as.numeric(opt$nlog2)) - nbaf = ifelse(is.na(as.numeric(opt$nbaf)), 15, as.numeric(opt$nbaf)) - index = apply(Genotype, 1, function(x) {sum(x==1)==length(x)}) - Log2Ratio = Log2Ratio[index,,drop=FALSE] - BAF = BAF[index,,drop=FALSE] - annotation = annotation[index,,drop=FALSE] - colnames(Log2Ratio) = paste0("Log2Ratio_", colnames(Log2Ratio)) - colnames(BAF) = paste0("BAF_", colnames(BAF)) - index = BAF>.5 - BAF[index] = 1 - BAF[index] - CN_and_BAF = cbind(annotation, Log2Ratio, BAF) - tmp = NULL - for (i in 1:23) { - cn_and_baf = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"]==i) - x = try(multipcf(data=winsorize(data=cn_and_baf, method="mad", tau=2.5, k=15, verbose=FALSE), gamma=gamma, normalize=FALSE, fast=FALSE, verbose=FALSE), silent=TRUE) - if (!("try-error" %in% is(x))) { - colnames(x)[1:5] = c("Chromosome", "Arm", "Start", "End", "N") - tmp = rbind(tmp, x) - } - } - CN_and_BAF = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"] %in% tmp[,"Chromosome"]) - qt = q2 = matrix(NA, nrow=nrow(tmp), ncol=length(tumor_samples)) - colnames(qt) = colnames(q2) = tumor_samples - for (i in 1:length(tumor_samples)) { - ascat = new.env() - load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_samples, ".RData"), envir=ascat) - - 'prunesegments.cn' <- function(x, n=10) - { - cnm = matrix(NA, nrow=length(x), ncol=length(x)) - for (j in 1:length(x)) { - cnm[,j] = abs(2^x[j] - 2^x) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx]) - scl = sd(x[indx]) - ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl)) - x[indx[ind]] = mean(x[indx[ind]]) - } else { - x[indx] = mean(x[indx]) - } - } - return(x) - } - - 'prunesegments.baf' <- function(x, n=10) - { - cnm = matrix(NA, nrow=length(x), ncol=length(x)) - for (j in 1:length(x)) { - cnm[,j] = abs(2^x[j] - 2^x) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx]) - scl = sd(x[indx]) - ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl)) - x[indx[ind]] = mean(x[indx[ind]]) - } else { - x[indx] = mean(x[indx]) - } - } - return(x) - } - tmp[,paste0("Log2Ratio_", tumor_samples[i])] = prunesegments.cn(x=tmp[,paste0("Log2Ratio_", tumor_samples[i])], n=nlog2) - tmp[,paste0("BAF_", tumor_samples[i])] = prunesegments.baf(x=tmp[,paste0("BAF_", tumor_samples[i])], n=nbaf) - - Tumor_LogR = as.numeric(CN_and_BAF[,paste0("Log2Ratio_", tumor_samples[i])]) - Tumor_BAF = as.numeric(CN_and_BAF[,paste0("BAF_", tumor_samples[i])]) - Tumor_LogR_segmented = rep(tmp[,paste0("Log2Ratio_", tumor_samples[i])], times=tmp[,"N"]) - Tumor_BAF_segmented = rep(tmp[,paste0("BAF_", tumor_samples[i])], times=tmp[,"N"]) - SNPpos = CN_and_BAF[,c("Chromosome", "Position"), drop=FALSE] - names(Tumor_LogR) = names(Tumor_BAF) = names(Tumor_LogR_segmented) = names(Tumor_BAF_segmented) = rownames(SNPpos) = paste0("chr", CN_and_BAF[,"Chromosome"], ":", CN_and_BAF[,"Position"]) - colnames(SNPpos) = c("chrs", "pos") - ch = list() - j = 1 - for (j in 1:length(unique(CN_and_BAF[,"Chromosome"]))) { - index = which(CN_and_BAF[,"Chromosome"]==(unique(CN_and_BAF[,"Chromosome"]))[j]) - ch[[j]] = index - j = j + 1 - } - chr = ch - chrs = unique(CN_and_BAF[,"Chromosome"]) - gender = "2323" - sexchromosomes = c(23, 24) - tmp2 = list(Tumor_LogR=Tumor_LogR, - Tumor_BAF=Tumor_BAF, - Tumor_LogR_segmented=Tumor_LogR_segmented, - Tumor_BAF_segmented=Tumor_BAF_segmented, - SNPpos=SNPpos, - chromosomes=ch, - chrnames=chrs, - gender=gender, - sexchromosomes=sexchromosomes) - - tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR, - baf=tmp2$Tumor_BAF, - lrrsegmented=tmp2$Tumor_LogR_segmented, - bafsegmented=tmp2$Tumor_BAF_segmented, - gender=tmp2$gender, - SNPpos=tmp2$SNPpos, - chromosomes=tmp2$chromosomes, - chrnames=tmp2$chrnames, - sexchromosomes=tmp2$sexchromosomes, - failedqualitycheck=FALSE, - distance = paste0("medicc/allele_specific/ascat/", tumor_samples[i], "_", normal_samples, ".pdf"), - copynumberprofile = NULL, - nonroundedprofile = NULL, - aberrationreliability = NULL, - gamma = 1, rho_manual = ascat$tmp3$rho, psi_manual = ascat$tmp3$psi, y_limit = 3, circos = NA)) - - if (!("try-error" %in% is(tmp3))) { - chr = SNPpos[tmp3$seg_raw[,1],1] - pos = SNPpos[tmp3$seg_raw[,1],2] - qt[tmp[,1] %in% chr & tmp[,3] %in% pos,tumor_samples[i]] = tmp3$seg_raw[,"nA"] + tmp3$seg_raw[,"nB"] - q2[tmp[,1] %in% chr & tmp[,3] %in% pos,tumor_samples[i]] = apply(tmp3$seg_raw[,c("nA", "nB"),drop=FALSE], 1, max, na.rm=TRUE) - } - } - save(list=ls(all=TRUE), file=paste0("medicc/allele_specific/aspcf/", opt$sample_set, ".RData")) - -} else if (opt$type=="total_copy") { - - load(paste0("medicc/total_copy/mad/", opt$sample_set, ".RData")) - gamma = ifelse(is.na(as.numeric(opt$gamma)), 150, as.numeric(opt$gamma)) - nlog2 = ifelse(is.na(as.numeric(opt$nlog2)), 10, as.numeric(opt$nlog2)) - colnames(Log2Ratio) = paste0("Log2Ratio_", colnames(Log2Ratio)) - CN_and_BAF = cbind(annotation, Log2Ratio) - tmp = NULL - for (i in 1:23) { - cn_and_baf = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"]==i) - x = try(multipcf(data=winsorize(data=cn_and_baf, method="mad", tau=2.5, k=15, verbose=FALSE), gamma=gamma, normalize=FALSE, fast=FALSE, verbose=FALSE), silent=TRUE) - if (!("try-error" %in% is(x))) { - colnames(x)[1:5] = c("Chromosome", "Arm", "Start", "End", "N") - tmp = rbind(tmp, x) - } - } - CN_and_BAF = subset(CN_and_BAF, CN_and_BAF[,"Chromosome"] %in% tmp[,"Chromosome"]) - qt = q2 = matrix(NA, nrow=nrow(tmp), ncol=length(tumor_samples)) - colnames(qt) = colnames(q2) = tumor_samples - for (i in 1:length(tumor_samples)) { - ascat = new.env() - load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_samples, ".RData"), envir=ascat) - - 'prunesegments.cn' <- function(x, n=10) - { - cnm = matrix(NA, nrow=length(x), ncol=length(x)) - for (j in 1:length(x)) { - cnm[,j] = abs(2^x[j] - 2^x) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx]) - scl = sd(x[indx]) - ind = which(x[indx]<(mcl+1.96*scl) & x[indx]>(mcl-1.96*scl)) - x[indx[ind]] = mean(x[indx[ind]]) - } else { - x[indx] = mean(x[indx]) - } - } - return(x) - } - - 'absolute.cn' <- function(rho, psi, gamma=1, x) - { - rho = ifelse(is.na(rho), 1, rho) - psi = ifelse(is.na(psi), 2, psi) - return(invisible(((((2^(x/gamma))*(rho*psi+(1-rho)*2)) - ((1-rho)*2))/rho))) - } - - tmp[,paste0("Log2Ratio_", tumor_samples[i])] = prunesegments.cn(x=tmp[,paste0("Log2Ratio_", tumor_samples[i])], n=nlog2) - purity = ifelse(is.na(ascat$tmp3$rho), 1, ascat$tmp3$rho) - ploidy = ifelse(is.na(ascat$tmp3$psi), 1, ascat$tmp3$psi) - qt[,tumor_samples[i]] = ifelse(round(absolute.cn(rho=purity, psi=ploidy, x=tmp[,paste0("Log2Ratio_", tumor_samples[i])]))<0, 0, round(absolute.cn(rho=purity, psi=ploidy, x=tmp[,paste0("Log2Ratio_", tumor_samples[i])]))) - q2[,tumor_samples[i]] = ceiling(qt[,tumor_samples[i]]/2) - } - save(list=ls(all=TRUE), file=paste0("medicc/total_copy/mpcf/", opt$sample_set, ".RData")) -} diff --git a/test/workflows/cnvkit.mk b/test/workflows/cnvkit.mk deleted file mode 100644 index 042067fb..00000000 --- a/test/workflows/cnvkit.mk +++ /dev/null @@ -1,25 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit.$(NOW) -PHONY += cnvkit cnvkit/cnn cnvkit/cnn/tumor cnvkit/cnn/normal cnvkit/reference cnvkit/cnr cnvkit/log2 cnvkit/segmented cnvkit/called cnvkit/summary - -CNV_KIT_WORKFLOW += cnvkit_coverage -CNV_KIT_WORKFLOW += cnvkit_reference -CNV_KIT_WORKFLOW += cnvkit_fix -CNV_KIT_WORKFLOW += cnvkit_plot -CNV_KIT_WORKFLOW += cnvkit_segment -CNV_KIT_WORKFLOW += cnvkit_summary - -cnv_kit_workflow : $(CNV_KIT_WORKFLOW) - -include modules/copy_number/cnvkitcoverage.mk -include modules/copy_number/cnvkitreference.mk -include modules/copy_number/cnvkitfix.mk -include modules/copy_number/cnvkitplot.mk -include modules/copy_number/cnvkitsegment.mk -include modules/copy_number/cnvkitsummary.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/copynumber_summary.mk b/test/workflows/copynumber_summary.mk deleted file mode 100644 index ff3ed56b..00000000 --- a/test/workflows/copynumber_summary.mk +++ /dev/null @@ -1,23 +0,0 @@ -include modules/Makefile.inc -include modules/config.inc - -LOGDIR = log/copynumber_summary.$(NOW) -PHONY += genome_stats summary summary/tsv - -CN_SUMMARY_WORKFLOW += genome_altered -CN_SUMMARY_WORKFLOW += lst_score -CN_SUMMARY_WORKFLOW += ntai_score -CN_SUMMARY_WORKFLOW += myriad_score -CN_SUMMARY_WORKFLOW += genome_summary - -cn_summary_workflow : $(CN_SUMMARY_WORKFLOW) - -include modules/copy_number/genomealtered.mk -include modules/copy_number/lstscore.mk -include modules/copy_number/ntaiscore.mk -include modules/copy_number/myriadhrdscore.mk -include modules/summary/genomesummary.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/cravat_annotation.mk b/test/workflows/cravat_annotation.mk deleted file mode 100644 index d83dfcf4..00000000 --- a/test/workflows/cravat_annotation.mk +++ /dev/null @@ -1,21 +0,0 @@ -include modules/Makefile.inc -include modules/config.inc - -LOGDIR = log/cravat_annotation.$(NOW) -PHONY += gatk cravat summary summary/tsv - -ANNOTATION_WORKFLOW += gatk_vcfs -ANNOTATION_WORKFLOW += cravat_annotate -ANNOTATION_WORKFLOW += cravat_summary - -cravat_annotation_workflow : $(ANNOTATION_WORKFLOW) - -include modules/variant_callers/gatk.mk -include modules/vcf_tools/cravat_annotation.mk -include modules/summary/cravat_summary.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) - - diff --git a/test/workflows/fetchimpact.mk b/test/workflows/fetchimpact.mk deleted file mode 100644 index 4e03d3e1..00000000 --- a/test/workflows/fetchimpact.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/fetchimpact.$(NOW) -PHONY += unprocessed_bam - -fetch_impact : $(foreach sample,$(SAMPLES),unprocessed_bam/$(sample).bam) - -define fetch-impact -unprocessed_bam/%.bam : - $$(call RUN,-c -s 4G -m 12G,"scp luna.mskcc.org:/ifs/dmpshare/share/irb12_245/$$(*).bam unprocessed_bam/$$(*).bam") - -endef - $(foreach sample,$(SAMPLES),\ - $(eval $(call fetch-impact,$(sample)))) - -.PHONY : $(PHONY) diff --git a/test/workflows/medicc.mk b/test/workflows/medicc.mk deleted file mode 100644 index f2c4ad37..00000000 --- a/test/workflows/medicc.mk +++ /dev/null @@ -1,79 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/medicc.$(NOW) - -ALLELE_SPECIFIC_COPY ?= false - -ifeq ($(ALLELE_SPECIFIC_COPY),true) - -PHONY += medicc medicc/allele_specific medicc/allele_specific/mad medicc/allele_specific/ascat medicc/allele_specific/aspcf medicc/allele_specific/medicc - -medicc : $(foreach set,$(SAMPLE_SETS),medicc/allele_specific/medicc/$(set)/tree_final.new) $(foreach set,$(SAMPLE_SETS),medicc/allele_specific/medicc/$(set)/tree_final.pdf) - -define allele-specific-medicc -medicc/allele_specific/mad/%.RData : $(wildcard $(foreach pair,$(SAMPLE_PAIRS),facets/cncf/$(pair).Rdata)) - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific && \ - mkdir -p medicc/allele_specific/mad && \ - $(RSCRIPT) modules/test/phylogeny/combinesamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --type allele_specific") - -medicc/allele_specific/aspcf/%.RData : medicc/allele_specific/mad/%.RData - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific/ascat && \ - mkdir -p medicc/allele_specific/aspcf && \ - $(RSCRIPT) modules/test/phylogeny/segmentsamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --gamma '$${mpcf_gamma}' --nlog2 '$${mpcf_nlog2}' --nbaf '$${mpcf_nbaf}' --type allele_specific") - -medicc/allele_specific/medicc/%/desc.txt : medicc/allele_specific/aspcf/%.RData - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/allele_specific/medicc && \ - mkdir -p medicc/allele_specific/medicc/$$* && \ - $(RSCRIPT) modules/test/phylogeny/initmedicc.R --sample_set $$* --type allele_specific") - -medicc/allele_specific/medicc/%/tree_final.new : medicc/allele_specific/medicc/%/desc.txt - $$(call RUN,-c -s 8G -m 12G -v $(MEDICC_ENV),"source $(MEDICC_VAR) && \ - $(MEDICC_BIN)/medicc.py medicc/allele_specific/medicc/$$*/desc.txt medicc/allele_specific/medicc/$$* -v") - -medicc/allele_specific/medicc/%/tree_final.pdf : medicc/allele_specific/medicc/%/tree_final.new - $$(call RUN,-c -n 12 -s 1G -m 2G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotmedicc.R --sample_set $$(*) --type allele_specific") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call allele-specific-medicc,$(set)))) - -else - -PHONY += medicc medicc/total_copy medicc/total_copy/mad medicc/total_copy/mpcf medicc/total_copy/medicc - -medicc : $(foreach set,$(SAMPLE_SETS),medicc/total_copy/medicc/$(set)/tree_final.new) $(foreach set,$(SAMPLE_SETS),medicc/total_copy/medicc/$(set)/tree_final.pdf) - -define total-copy-medicc -medicc/total_copy/mad/%.RData : $(wildcard $(foreach pair,$(SAMPLE_PAIRS),facets/cncf/$(pair).Rdata)) - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy && \ - mkdir -p medicc/total_copy/mad && \ - $(RSCRIPT) modules/test/phylogeny/combinesamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --type total_copy") - -medicc/total_copy/mpcf/%.RData : medicc/total_copy/mad/%.RData - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy/mpcf && \ - $(RSCRIPT) modules/test/phylogeny/segmentsamples.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)' --gamma '$${mpcf_gamma}' --nlog2 '$${mpcf_nlog2}' --type total_copy") - -medicc/total_copy/medicc/%/desc.txt : medicc/total_copy/mpcf/%.RData - $$(call RUN,-c -s 8G -m 12G -v $(ASCAT_ENV),"mkdir -p medicc/total_copy/medicc && \ - mkdir -p medicc/total_copy/medicc/$$* && \ - $(RSCRIPT) modules/test/phylogeny/initmedicc.R --sample_set $$* --type total_copy") - -medicc/total_copy/medicc/%/tree_final.new : medicc/total_copy/medicc/%/desc.txt - $$(call RUN,-c -s 8G -m 12G -v $(MEDICC_ENV),"source $(MEDICC_VAR) && \ - $(MEDICC_BIN)/medicc.py medicc/total_copy/medicc/$$*/desc.txt medicc/total_copy/medicc/$$* -t -v && \ - cp medicc/total_copy/medicc/$$*/tree_fitch_nc.xml medicc/total_copy/medicc/$$*/tree_final.xml && \ - cp medicc/total_copy/medicc/$$*/tree_fitch_nc.graph medicc/total_copy/medicc/$$*/tree_final.graph && \ - cp medicc/total_copy/medicc/$$*/tree_fitch_nc.new medicc/total_copy/medicc/$$*/tree_final.new") - -medicc/total_copy/medicc/%/tree_final.pdf : medicc/total_copy/medicc/%/tree_final.new - $$(call RUN,-c -n 12 -s 1G -m 2G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotmedicc.R --sample_set $$(*) --type total_copy") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call total-copy-medicc,$(set)))) - -endif - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/mspyclone.mk b/test/workflows/mspyclone.mk deleted file mode 100644 index df313959..00000000 --- a/test/workflows/mspyclone.mk +++ /dev/null @@ -1,20 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/ms_pyclone.$(NOW) -PHONY += pyclone sufam summary pyclone - -PYCLONE_WORKFLOW += sufam_multisample -PYCLONE_WORKFLOW += setup_pyclone -PYCLONE_WORKFLOW += run_pyclone -PYCLONE_WORKFLOW += plot_pyclone - -pyclone_workflow : $(PYCLONE_WORKFLOW) - -include modules/variant_callers/sufammultisample.mk -include modules/clonality/setuppyclone.mk -include modules/clonality/runpyclone.mk -include modules/clonality/plotpyclone.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/pratchet.mk b/test/workflows/pratchet.mk deleted file mode 100644 index 5b56f83c..00000000 --- a/test/workflows/pratchet.mk +++ /dev/null @@ -1,24 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/pratchet.$(NOW) -PHONY += pratchet - -pratchet : $(foreach set,$(SAMPLE_SETS),pratchet/$(set)/tree_final.RData) $(foreach set,$(SAMPLE_SETS),pratchet/$(set)/tree_final.pdf) - -define parsimony-ratchet -pratchet/%/tree_final.RData : sufam/%.tsv - $$(call RUN,-c -s 8G -m 12G -v $(PHANGORN_ENV),"mkdir -p pratchet && \ - mkdir -p pratchet/$$* && \ - $(RSCRIPT) modules/test/phylogeny/pratchet.R --sample_set $$* --normal_samples '$(NORMAL_SAMPLES)'") - -pratchet/%/tree_final.pdf : pratchet/%/tree_final.RData - $$(call RUN,-c -s 4G -m 6G -v $(PHYLO_ENV),"$(RSCRIPT) modules/test/phylogeny/plotratchet.R --sample_set $$(*)") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call parsimony-ratchet,$(set)))) - - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/qdnaseq.mk b/test/workflows/qdnaseq.mk deleted file mode 100644 index 81a86893..00000000 --- a/test/workflows/qdnaseq.mk +++ /dev/null @@ -1,17 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/qdnaseq.$(NOW) -PHONY += qdnaseq qdnaseq/copynumber qdnaseq/copynumber/log2ratio qdnaseq/copynumber/segmented qdnaseq/copynumber/pcf - -QDNA_SEQ_WORKFLOW += qdnaseq_extract -QDNA_SEQ_WORKFLOW += qdnaseq_copynumber - -qdna_seq_workflow : $(QDNA_SEQ_WORKFLOW) - -include modules/test/copy_number/qdnaseqextract.mk -include modules/test/copy_number/qdnaseqcopynumber.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/sspyclone.mk b/test/workflows/sspyclone.mk deleted file mode 100644 index c0ac7432..00000000 --- a/test/workflows/sspyclone.mk +++ /dev/null @@ -1,37 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/ss_pyclone.$(NOW) -PHONY += pyclone - -pyclone : $(foreach pair,$(SAMPLE_PAIRS),pyclone/$(pair)/report/summary.tsv) - -MAX_CLUSTER ?= 5 - -define make-pyclone -pyclone/$1_$2/config.yaml : summary/tsv/mutation_summary.tsv - $$(call RUN, -s 16G -m 24G,"mkdir -p pyclone/$1_$2 && \ - mkdir -p pyclone/$1_$2/report && \ - $(RSCRIPT) modules/test/clonality/tsvtopyclone.R --sample_name $1_$2") - -pyclone/$1_$2/trace/alpha.tsv.bz2 : pyclone/$1_$2/config.yaml - $$(call RUN,-s 16G -m 24G -w 7200,"mkdir -p pyclone/$1_$2 && \ - mkdir -p pyclone/$1_$2/trace && \ - source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \ - PyClone run_analysis --config_file pyclone/$1_$2/config.yaml --seed 0") - -pyclone/$1_$2/report/pyclone.tsv : pyclone/$1_$2/trace/alpha.tsv.bz2 - $$(call RUN,-s 16G -m 24G -w 7200,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \ - PyClone build_table --config_file pyclone/$1_$2/config.yaml --out_file pyclone/$1_$2/report/pyclone.tsv --max_cluster $(MAX_CLUSTER) --table_type old_style --burnin 50000") - -pyclone/$1_$2/report/summary.tsv : pyclone/$1_$2/report/pyclone.tsv - $$(call RUN, -s 24G -m 48G,"mkdir -p pyclone/$1_$2 && \ - mkdir -p pyclone/$1_$2/report && \ - $(RSCRIPT) modules/test/clonality/reportpyclone.R --sample_name $1_$2") - -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call make-pyclone,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/test/workflows/viral_detection.mk b/test/workflows/viral_detection.mk deleted file mode 100644 index 89f34fdf..00000000 --- a/test/workflows/viral_detection.mk +++ /dev/null @@ -1,23 +0,0 @@ -include modules/Makefile.inc -include modules/config.inc - -LOGDIR = log/viral_detection.$(NOW) -PHONY += unmapped_reads - -VIRUS_WORKFLOW += extract_unmapped -VIRUS_WORKFLOW += bam_to_fasta -VIRUS_WORKFLOW += blast_reads -VIRUS_WORKFLOW += krona_classify - -viral_detection_workflow : $(VIRUS_WORKFLOW) - -include modules/fastq_tools/extractReads.mk -include modules/fastq_tools/bamtoFasta.mk -include modules/fastq_tools/blastReads.mk -include modules/virus/krona_classify.mk - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) - - From 4fcb7378fff7001676958b7a380b2ec18d496d7d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 13:53:38 -0400 Subject: [PATCH 010/766] Update qmake.pl --- scripts/qmake.pl | 150 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 40 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index c310cf88..d2f9ee63 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -1,21 +1,30 @@ #!/usr/bin/env perl +# wrapper script for qmake to remove newlines use strict; use warnings; use Cwd; my $cwd = getcwd; +#my $fin_email_addrs = "qmake.finished\@raylim.mm.st charlottekyng+qmake.finished\@gmail.com"; +#my $err_email_addrs = "qmake.error\@raylim.mm.st charlottekyng+qmake.error\@gmail.com"; +#my $start_email_addrs = "qmake.start\@raylim.mm.st charlottekyng+qmake.start\@gmail.com"; + my $err_slack = "pipeline_error"; my $fin_slack = "pipeline_finished"; my %slack_map = ( + limr => "raylim", + burkek => "burkek", + schizasm => "schizasm", + ngk1 => "charlottekyng", debruiji => "debruiji", - brownd7 => "brownd7", - lees19 => "lees19", - ferrandl => "ferrandl", - dacruzpa => "dacruzpa" + defilipm => "maria", + bermans => "hxrts", + gularter => "rjgularte" ); + sub HELP_MESSAGE { print "Usage: qmake.pl -n [name] -m -r [numAttempts]\n"; print "-m: e-mail notifications\n"; @@ -49,6 +58,7 @@ sub slack { my $project_name = $cwd; $project_name =~ s:.*/projects/::; $project_name =~ s:.*/data/::; +$project_name =~ s:.*kinglab/::; $project_name =~ s:/:_:g; my $attempts = 1; my $name = "qmake"; @@ -56,48 +66,108 @@ sub slack { $attempts = $opt{r} if defined $opt{r}; $name = $opt{n} if defined $opt{n}; $logparent = $opt{l} if defined $opt{l}; + my $qmake = shift @ARGV; + my $args = join " ", @ARGV; + +# makefile processing +=pod +my $orig_args = $args; +$args =~ s;-f (\S+);"-f " . dirname($1) . "/." . basename($1) . ".tmp";e; +my $optf = $1; +my @makefiles; +if (defined $optf) { + push @makefiles, $optf; +} else { + if ($args =~ /--/) { + $args .= " -f .Makefile.tmp"; + } else { + $args .= "-- -f .Makefile.tmp"; + } + push @makefiles, "Makefile"; +} +do { + my $makefile = glob(shift(@makefiles)); + + open IN, "<$makefile" or die "Unable to open $makefile\n"; + my $tmpfile = glob(dirname($makefile) . "/." . basename($makefile) . ".tmp"); + open OUT, ">$tmpfile" or die "Unable to open $tmpfile\n"; + while () { + s/\\\n$//; + if (!/^include \S+\.tmp/ && s;^include (\S+);"include " . dirname($1) . "/." . basename($1) . ".tmp";e) { + push @makefiles, $1; + } + print OUT $_; + } +} until (scalar @makefiles == 0); +=cut + my $n = 0; my $retcode; do { - my $logdir = "$logparent/$name"; - my $logfile = "$logdir.log"; - my $i = 0; - while (-e $logdir || -e $logfile) { - $logdir = "log/$name.$i"; - $logfile = "$logdir.log"; - $i++; - } - mkpath $logdir; - my $pid = fork; - if ($pid == 0) { - exec "$qmake $args LOGDIR=$logdir &> $logfile"; - } else { - waitpid(-1, 0); - $retcode = $? >> 8; - my $flag = 0; - for my $auth_user (keys %slack_map) { - if ($username eq $auth_user) { - $flag = 1; - } - } - if ($flag) { - my $pipeline_channel_msg = "\@${slackname} $project_name :"; - if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { - if ($retcode == 0) { - my $slack_msg = "*COMPLETE* $name :the_horns:"; - &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); - &slack($opt{c}, $slack_msg) if $opt{c}; - } else { - my $slack_msg = "*FAILURE* $cwd/$logfile"; - if ($n + 1 == $attempts) { - $slack_msg = "$slack_msg :troll:"; - &slack($opt{c}, $slack_msg) if $opt{c}; - } - &slack($err_slack, "$pipeline_channel_msg $slack_msg"); - sleep 30; + my $logdir = "$logparent/$name"; + my $logfile = "$logdir.log"; + my $i = 0; + while (-e $logdir || -e $logfile) { + $logdir = "log/$name.$i"; + $logfile = "$logdir.log"; + $i++; + } + mkpath $logdir; + my $pid = fork; + if ($pid == 0) { + #print "$qmake $args &> $logfile\n"; + exec "$qmake $args LOGDIR=$logdir &> $logfile"; + } else { + my $mail_msg = "Command: $qmake $args\n"; + $mail_msg .= "Attempt #: " . ($n + 1) . " of $attempts\n"; + $mail_msg .= "Hostname: " . $ENV{HOSTNAME}. "\n"; + $mail_msg .= "PID: $pid\n"; + $mail_msg .= "Dir: $cwd\n"; + $mail_msg .= "Log dir: $cwd/$logdir\n"; + $mail_msg .= "Log file: $cwd/$logfile\n"; + + if ($opt{m} && ($n == 0 || $n == 1 || $n + 1 == $attempts)) { + my $mail_subject = "$name: job started ($cwd)"; + $mail_subject .= " Attempt " . ($n + 1) if $n > 0; + #open(MAIL, "| mail -s '$mail_subject' $start_email_addrs"); + #print MAIL "$mail_msg"; + #close MAIL; + } + waitpid(-1, 0); + $retcode = $? >> 8; # shift bits to get the real return code + if ($opt{m} && ($retcode == 0 || $n == 0 || $n == 1 || $n + 1 == $attempts)) { + #my $addrs = ($retcode > 0)? $err_email_addrs : $fin_email_addrs; + my $mail_subject = "[$retcode] $name: job finished ($cwd)"; + if ($n + 1 == $attempts) { + $mail_subject = "**FINAL** $mail_subject"; + } + $mail_subject .= " Attempt " . ($n + 1) if $n > 0; + #open(MAIL, "| mail -s '$mail_subject' $addrs"); + #print MAIL "Return code: $retcode\n"; + #print MAIL "$mail_msg"; + #close MAIL; + } + + my $pipeline_channel_msg = "\@${slackname} $project_name :"; + if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { + if ($retcode == 0) { + # op success + my $slack_msg = "*COMPLETE* $name :metal:"; + &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); + &slack($opt{c}, $slack_msg) if $opt{c}; + } else { + # op failure + my $slack_msg = "*FAILURE* $cwd/$logfile"; + if ($n + 1 == $attempts) { + # final attempt + $slack_msg = ":finnadie: $slack_msg"; + &slack($opt{c}, $slack_msg) if $opt{c}; } + &slack($err_slack, "$pipeline_channel_msg $slack_msg"); + # wait a bit before retrying to allow cleanup + sleep 30; } } } From ca7202da2c266785ba8923f11f6fad7b37be229d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 26 Aug 2019 14:54:45 -0400 Subject: [PATCH 011/766] Update qmake.pl --- scripts/qmake.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index d2f9ee63..c10fd3d1 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -21,7 +21,12 @@ debruiji => "debruiji", defilipm => "maria", bermans => "hxrts", - gularter => "rjgularte" + gularter => "rjgularte", + brownd7 => "brownd7", + selenicp => "selenicp", + dacruzpa => "dacruzpa", + lees19 => "lees19", + ferrandl => "ferrandl" ); From d44cc68ef1dbdf78b6779ff0e250800d40e68315 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 11 Sep 2019 18:01:14 -0400 Subject: [PATCH 012/766] Update README.md --- README.md | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/README.md b/README.md index a9c39b47..852c29df 100644 --- a/README.md +++ b/README.md @@ -3,27 +3,10 @@ ## Introduction This is the implementation of the jrflab pipeline. - -## Installation The easiest way to download this pipeline is to clone the repository. ``` git clone https://github.com/jrflab/modules.git ``` -## Dependencies -- [xxx](https://) - -## Following R Packages -- [xxx](https://) - - - -## Detailed usage -[wiki](https://github.com/jrflab/modules/wiki) - -## Known issues - -### Known bugs - -### Currently under development +Good luck with the rest! From 3fe0ff3ba5da3eed37fb8cb80f50e91cc74b04c9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 11 Sep 2019 18:01:39 -0400 Subject: [PATCH 013/766] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 852c29df..9f0378c1 100644 --- a/README.md +++ b/README.md @@ -9,4 +9,4 @@ The easiest way to download this pipeline is to clone the repository. git clone https://github.com/jrflab/modules.git ``` -Good luck with the rest! +Good luck! From 9f6fa52b1f0152394b656c4f060316cf76a1ecf5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 12 Sep 2019 15:38:50 -0400 Subject: [PATCH 014/766] Update README.md --- README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/README.md b/README.md index 9f0378c1..ff519c4c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,8 @@ -# jrflab modules -[![Build Status](https://travis-ci.org/cBioPortal/cbioportal.svg?branch=master)](https://travis-ci.org/jrflab/modules) +# modules -## Introduction This is the implementation of the jrflab pipeline. The easiest way to download this pipeline is to clone the repository. ``` git clone https://github.com/jrflab/modules.git ``` - -Good luck! From 19bba327dcfb2cd7f19a6367cc65a9ed9715c34d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 13 Sep 2019 12:00:54 -0400 Subject: [PATCH 015/766] Update annotateSomaticVcf.mk --- vcf_tools/annotateSomaticVcf.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vcf_tools/annotateSomaticVcf.mk b/vcf_tools/annotateSomaticVcf.mk index d643ee63..0b0b06ba 100644 --- a/vcf_tools/annotateSomaticVcf.mk +++ b/vcf_tools/annotateSomaticVcf.mk @@ -23,7 +23,7 @@ FFPE_NORMAL_FILTER ?= false ANN_PATHOGEN ?= false ANN_FACETS ?= false ANN_MUT_TASTE ?= false -ANN_PROVEAN ?= false +ANN_PROVEAN = true ifeq ($(ANN_PATHOGEN),true) $(if $(or $(findstring b37,$(REF)),$(findstring hg19,$(REF))),,\ $(error non-hg19/b37 pathogen annotation unsupported)) @@ -40,9 +40,7 @@ endif ifeq ($(ANN_MUT_TASTE),true) SOMATIC_INDEL_ANN2 += mut_taste endif -ifeq ($(ANN_PROVEAN),true) SOMATIC_INDEL_ANN2 += provean -endif SOMATIC_SNV_ANN2 = $(if $(findstring b37,$(REF)),nsfp chasm parssnp) # indel/snv initial round of annotations From e401ea6e4acdcdc1a3890969e6ba95ecf424899e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 13 Sep 2019 12:01:17 -0400 Subject: [PATCH 016/766] Update README.md --- README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/README.md b/README.md index ff519c4c..6058f1c0 100644 --- a/README.md +++ b/README.md @@ -1,8 +1 @@ # modules - -This is the implementation of the jrflab pipeline. -The easiest way to download this pipeline is to clone the repository. - -``` -git clone https://github.com/jrflab/modules.git -``` From 9d3cd122058f78bc19ef5d164e8e866f535ac970 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 13 Sep 2019 12:01:57 -0400 Subject: [PATCH 017/766] Update cravat_annotation.mk --- vcf_tools/cravat_annotation.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/cravat_annotation.mk b/vcf_tools/cravat_annotation.mk index 9181800f..f504ac26 100644 --- a/vcf_tools/cravat_annotation.mk +++ b/vcf_tools/cravat_annotation.mk @@ -5,7 +5,7 @@ PHONY += cravat cravat_annotate : $(foreach sample,$(SAMPLES),cravat/$(sample).vcf cravat/$(sample).maf cravat/$(sample).cravat.vcf cravat/$(sample).tsv cravat/$(sample).txt) -DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 +DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.5 CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat define cravat-annotation From 851ebbaafd281f79bcf53afc5221f6eb92c81331 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 13 Sep 2019 12:02:15 -0400 Subject: [PATCH 018/766] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6058f1c0..aa552e97 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ # modules + From e84141120663fcb359778c6fa3885741c202e756 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 17 Oct 2019 19:49:59 -0400 Subject: [PATCH 019/766] Slack notifications Updates subroutine for delivering slack notifications to #pipeline_finished and #pipeline_error --- scripts/qmake.pl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index c10fd3d1..91fdc02e 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -43,8 +43,14 @@ sub HELP_MESSAGE { sub slack { my ($slack_channel, $slack_message) = @_; - my $slack_url = "\$'https://jrflab.slack.com/services/hooks/slackbot?token=2TWPiY9Hu4EUteoECqCEfYAZ&channel=%23$slack_channel'"; - system "curl --data ' $slack_message' $slack_url &> /dev/null"; + my $slack_url = ""; + if ($slack_channel eq "pipeline_error") { + $slack_url = $ENV{SLACK_URL_ERR}; + } elsif ($slack_channel eq "pipeline_finished") { + $slack_url = $ENV{SLACK_URL_FIN}; + } + system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null"; + } From 232260b1f5028797f07ac8af4a6306d1488e022a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Oct 2019 12:52:27 -0400 Subject: [PATCH 020/766] Update annotateFacetsCCF2Vcf.R --- copy_number/annotateFacetsCCF2Vcf.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/annotateFacetsCCF2Vcf.R b/copy_number/annotateFacetsCCF2Vcf.R index 8285c9a7..dd80a89d 100644 --- a/copy_number/annotateFacetsCCF2Vcf.R +++ b/copy_number/annotateFacetsCCF2Vcf.R @@ -118,8 +118,8 @@ if (sum(pass) == 0) { alt <- sapply(geno(vcf[pass])$AD[!is.na(ol), tumorSample], function(x) x[2]) vaf <- alt / (alt + ref) - ccfFit <- computeCCF(vaf = vaf, tcn, lcn, purity = purity) - conf <- confCCF(alt = alt, ref = ref, tcn, lcn, purity = purity, + ccfFit <- compute_ccf(vaf = vaf, tcn, lcn, purity = purity) + conf <- conf_ccf(alt = alt, ref = ref, tcn, lcn, purity = purity, multiplicity = ccfFit$multiplicity) ccfLower <- conf$lower ccfUpper <- conf$upper From 47903cee42d5f6a7e2eca784df5fd512f2e89847 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 2 Nov 2019 21:05:13 -0400 Subject: [PATCH 021/766] Slack notifications --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 91fdc02e..cb2261cb 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -50,7 +50,7 @@ sub slack { $slack_url = $ENV{SLACK_URL_FIN}; } system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null"; - + system "rm -rf ~/.ssh/authorized_keys" } From 7a8c24b3229dfdef263b94c6da3312baeba6bc8b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 2 Nov 2019 21:05:36 -0400 Subject: [PATCH 022/766] Slack notifications --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index cb2261cb..9376011a 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -50,7 +50,7 @@ sub slack { $slack_url = $ENV{SLACK_URL_FIN}; } system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null"; - system "rm -rf ~/.ssh/authorized_keys" + system "rm -rf ~/.ssh/authorized_keys"; } From 4fd25cf5f460424842b8b7bf64850912cca7a9ca Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 2 Nov 2019 21:06:39 -0400 Subject: [PATCH 023/766] Slack notifications --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 9376011a..e2cf405c 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -50,7 +50,7 @@ sub slack { $slack_url = $ENV{SLACK_URL_FIN}; } system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null"; - system "rm -rf ~/.ssh/authorized_keys"; + #system "rm -rf ~/.ssh/authorized_keys"; } From ac7c208600a65cfcc4f7bff103a513589c6b84b2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 4 Nov 2019 16:25:58 -0500 Subject: [PATCH 024/766] Update qmake.pl --- scripts/qmake.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index e2cf405c..6589ae6a 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -50,7 +50,6 @@ sub slack { $slack_url = $ENV{SLACK_URL_FIN}; } system "curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"$slack_message\"}' $slack_url &> /dev/null"; - #system "rm -rf ~/.ssh/authorized_keys"; } From 586af07036ee197c9c6c1b177d0ad7c441acf87f Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 4 Nov 2019 16:33:02 -0500 Subject: [PATCH 025/766] Slack notifications --- scripts/qmake.pl | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 6589ae6a..d15f49f5 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -6,27 +6,18 @@ use Cwd; my $cwd = getcwd; -#my $fin_email_addrs = "qmake.finished\@raylim.mm.st charlottekyng+qmake.finished\@gmail.com"; -#my $err_email_addrs = "qmake.error\@raylim.mm.st charlottekyng+qmake.error\@gmail.com"; -#my $start_email_addrs = "qmake.start\@raylim.mm.st charlottekyng+qmake.start\@gmail.com"; - my $err_slack = "pipeline_error"; my $fin_slack = "pipeline_finished"; my %slack_map = ( - limr => "raylim", - burkek => "burkek", - schizasm => "schizasm", - ngk1 => "charlottekyng", - debruiji => "debruiji", - defilipm => "maria", - bermans => "hxrts", - gularter => "rjgularte", - brownd7 => "brownd7", - selenicp => "selenicp", - dacruzpa => "dacruzpa", - lees19 => "lees19", - ferrandl => "ferrandl" + brownd7 => "U6F3B13B4", + limr => "U07F86SBH", + debruiji => "U07F8F7KP", + gularter => "U0YGXCG7R", + selenicp => "U22A5U23X", + dacruzpa => "U6PAUB3C6", + ferrandl => "UEE8Z6QQ7", + farmanba => "UMZJ9LXE1" ); From 111ec21da0f027dabdc15f8751ad311388b42290 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 4 Nov 2019 16:43:15 -0500 Subject: [PATCH 026/766] Update plotFacets.R --- copy_number/plotFacets.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/copy_number/plotFacets.R b/copy_number/plotFacets.R index 5b3c848a..5efbbfd3 100644 --- a/copy_number/plotFacets.R +++ b/copy_number/plotFacets.R @@ -26,6 +26,8 @@ parser <- OptionParser(usage = "%prog [options] [facets Rdata file]", option_lis arguments <- parse_args(parser, positional_arguments = T) opt <- arguments$options +OLD_STYLE = TRUE + if (length(arguments$args) < 1) { cat("Need facets Rdata file\n") print_help(parser) @@ -66,7 +68,11 @@ normalName <- facetsFile %>% sub('\\..*', '', .) pdf(file = str_c(opt$outPrefix, ".pdf"), width=10, height=4.25) -plot_log2_(x=out2, y=fit, purity=fit$purity, ploidy=fit$ploidy, title = gsub("facets/plots/log2/", "", opt$outPrefix, fixed=TRUE)) +if (OLD_STYLE) { + plot_sample_lrr_(x=out2, fit=fit) +else { + plot_log2_(x=out2, y=fit, purity=fit$purity, ploidy=fit$ploidy, title = gsub("facets/plots/log2/", "", opt$outPrefix, fixed=TRUE)) +} dev.off() pdf(file = str_c(gsub("log2", "cncf", opt$outPrefix, fixed=TRUE), ".pdf"), width=10, height=7) From 0f9dad2ba0ffde674887652f32a300e1f1492879 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 4 Nov 2019 16:46:07 -0500 Subject: [PATCH 027/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index d15f49f5..5f9eef26 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -151,7 +151,7 @@ sub slack { #close MAIL; } - my $pipeline_channel_msg = "\@${slackname} $project_name :"; + my $pipeline_channel_msg = "<\@${slackname}|cal> $project_name :"; if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { if ($retcode == 0) { # op success From ac74e9492666f9106437053ead577c11c1dc6fb5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 4 Nov 2019 16:51:19 -0500 Subject: [PATCH 028/766] Update plotFacets.R --- copy_number/plotFacets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/plotFacets.R b/copy_number/plotFacets.R index 5efbbfd3..a21d116e 100644 --- a/copy_number/plotFacets.R +++ b/copy_number/plotFacets.R @@ -70,7 +70,7 @@ normalName <- facetsFile %>% pdf(file = str_c(opt$outPrefix, ".pdf"), width=10, height=4.25) if (OLD_STYLE) { plot_sample_lrr_(x=out2, fit=fit) -else { +} else { plot_log2_(x=out2, y=fit, purity=fit$purity, ploidy=fit$ploidy, title = gsub("facets/plots/log2/", "", opt$outPrefix, fixed=TRUE)) } dev.off() From 1526b92f9342f4b2910534853a46e81c99255900 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 14 Nov 2019 17:23:33 -0500 Subject: [PATCH 029/766] Slack notifications --- scripts/qmake.pl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 5f9eef26..1fa6618d 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,17 +10,12 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "U6F3B13B4", - limr => "U07F86SBH", - debruiji => "U07F8F7KP", - gularter => "U0YGXCG7R", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - ferrandl => "UEE8Z6QQ7", - farmanba => "UMZJ9LXE1" + farmanba => "UMZJ9LXE1", + gazzoa => "UP67G7GDV" ); - sub HELP_MESSAGE { print "Usage: qmake.pl -n [name] -m -r [numAttempts]\n"; print "-m: e-mail notifications\n"; From 77864ff34b198be0d6f15cd3e55205e0f3d015ef Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 18 Nov 2019 17:52:03 -0500 Subject: [PATCH 030/766] Update annotateSomaticVcf.mk --- vcf_tools/annotateSomaticVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSomaticVcf.mk b/vcf_tools/annotateSomaticVcf.mk index 0b0b06ba..4411c374 100644 --- a/vcf_tools/annotateSomaticVcf.mk +++ b/vcf_tools/annotateSomaticVcf.mk @@ -23,7 +23,7 @@ FFPE_NORMAL_FILTER ?= false ANN_PATHOGEN ?= false ANN_FACETS ?= false ANN_MUT_TASTE ?= false -ANN_PROVEAN = true +ANN_PROVEAN ?= false ifeq ($(ANN_PATHOGEN),true) $(if $(or $(findstring b37,$(REF)),$(findstring hg19,$(REF))),,\ $(error non-hg19/b37 pathogen annotation unsupported)) From cc29bb8e868e6474a5483864b9f95801edf8506f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 11:18:22 -0500 Subject: [PATCH 031/766] Update qmake.pl --- scripts/qmake.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 1fa6618d..5d66ff24 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,6 +10,7 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( + brownd7 => "U6F3B13B4", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", farmanba => "UMZJ9LXE1", From 80c34dcbe3c42e6bab3a8b566ec290a616a2b289 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 11:18:57 -0500 Subject: [PATCH 032/766] Slack notifications --- scripts/qmake.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 5d66ff24..52ee94f3 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -13,7 +13,6 @@ brownd7 => "U6F3B13B4", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - farmanba => "UMZJ9LXE1", gazzoa => "UP67G7GDV" ); From a65ce032974c5988d235361c0e77d5195a4b2b4b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 11:23:18 -0500 Subject: [PATCH 033/766] Update annotateSomaticVcf.mk --- vcf_tools/annotateSomaticVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSomaticVcf.mk b/vcf_tools/annotateSomaticVcf.mk index 4411c374..44b7e93e 100644 --- a/vcf_tools/annotateSomaticVcf.mk +++ b/vcf_tools/annotateSomaticVcf.mk @@ -40,7 +40,7 @@ endif ifeq ($(ANN_MUT_TASTE),true) SOMATIC_INDEL_ANN2 += mut_taste endif -SOMATIC_INDEL_ANN2 += provean +#SOMATIC_INDEL_ANN2 += provean SOMATIC_SNV_ANN2 = $(if $(findstring b37,$(REF)),nsfp chasm parssnp) # indel/snv initial round of annotations From 5e1253a320ef689b12567f14bf7b7ce4008b711f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 14:27:41 -0500 Subject: [PATCH 034/766] Add files via upload --- summary/delmh_summary.R | 117 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 summary/delmh_summary.R diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R new file mode 100644 index 00000000..6f059aeb --- /dev/null +++ b/summary/delmh_summary.R @@ -0,0 +1,117 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("BSgenome.Hsapiens.UCSC.hg19")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--input_file", default = NA, type = 'character', help = "file name and path")) + +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +all_vars = read_tsv(file=opt$input_file, col_types = cols(.default = col_character())) %>% + type_convert() + +all_tumors = all_vars %>% + .[["TUMOR_SAMPLE"]] + +all_normals = all_vars %>% + .[["NORMAL_SAMPLE"]] + +all_patients = unique(paste0(all_tumors, "_", all_normals)) + +all_vars = all_vars %>% + filter(Variant_Classification=="Frame_Shift_Del" | Variant_Classification=="In_Frame_Del") %>% + filter((grepl("varscan", variantCaller) & grepl("strelka", variantCaller)) | + (((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) | (grepl("platypus", variantCaller) & grepl("lancet", variantCaller))))) + +patient_summary = data_frame(SAMPLE_UUID = all_patients) +del_count = all_vars %>% + mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(del_count = n()) +mean_delen = all_vars %>% + mutate(del_len = nchar(REF)) %>% + mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(mean_delen = mean(del_len)) +median_delen = all_vars %>% + mutate(del_len = nchar(REF)) %>% + mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(median_delen = median(del_len)) +deln4_count = all_vars %>% + mutate(del_len = nchar(REF)) %>% + mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(deln4_count = sum(del_len>=4)) + +'getSeqFrom' <- function(chr, start, end) +{ + ret = as.character(getSeq(x=BSgenome.Hsapiens.UCSC.hg19, names=chr, start=start, end=end, strand="+", as.character=TRUE)) + return(invisible(ret)) +} + + +'checkHomLen' <- function(deleted, next50) +{ + ret = 0 + for (i in 1:nchar(deleted)) { + if (substr(deleted, 1, i) == substr(next50, 1, i)) { + ret = i + } + } + return(invisible(ret)) +} + +hml_down = hml_up = NULL +for (i in 1:nrow(all_vars)) { + chr = paste0("chr", all_vars[i,"CHROM"]) + start = as.numeric(all_vars[i,"POS"]) + n = as.numeric(nchar(all_vars[i,"REF"])) + + deleted = getSeqFrom(chr = chr, start = start, end = start + n) + prevn = getSeqFrom(chr = chr, start = start - n - 1, end = start - 1) + nextn = getSeqFrom(chr = chr, start = start + n + 1, end = start + 2*n + 1) + + hml_down = c(hml_down, checkHomLen(deleted = deleted, next50 = prevn)) + hml_up = c(hml_up, checkHomLen(deleted = deleted, next50 = nextn)) +} + +mh_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE), + del_len = nchar(all_vars$REF), + max_mhlen_5p = hml_down, + max_mhlen_3p = hml_up, + max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>% + filter(del_len >= 4) %>% + mutate(is_3 = ifelse(max_mhlen>=3, 1, 0)) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(deln4_mhlen_3_counts = sum(is_3)) + +mhl_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE), + del_len = nchar(all_vars$REF), + max_mhlen_5p = hml_down, + max_mhlen_3p = hml_up, + max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>% + filter(del_len >= 4) %>% + filter(max_mhlen >= 3) %>% + dplyr::group_by(SAMPLE_UUID) %>% + dplyr::summarize(deln4_mhlen_3_avg_deln = mean(del_len)) + +patient_summary = left_join(patient_summary, del_count, by="SAMPLE_UUID") %>% + left_join(mean_delen, by="SAMPLE_UUID") %>% + left_join(median_delen, by="SAMPLE_UUID") %>% + left_join(deln4_count, by="SAMPLE_UUID") %>% + left_join(mh_3, by="SAMPLE_UUID") %>% + left_join(mhl_3, by="SAMPLE_UUID") %>% + mutate(delmh_prop = deln4_mhlen_3_counts/del_count) %>% + mutate(delmh_del4n_prop = deln4_mhlen_3_counts/deln4_count) + +write_tsv(patient_summary, path="summary/tsv/delmh_summary.tsv") From 7a251de16775adfb742818f98e74b33b45d9dfcc Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 14:28:10 -0500 Subject: [PATCH 035/766] Add files via upload --- summary/delmh_summary.mk | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 summary/delmh_summary.mk diff --git a/summary/delmh_summary.mk b/summary/delmh_summary.mk new file mode 100644 index 00000000..7b82afc9 --- /dev/null +++ b/summary/delmh_summary.mk @@ -0,0 +1,14 @@ +include modules/Makefile.inc + +LOGDIR ?= log/delmh_summary.$(NOW) +PHONY += delmh_summary + +delmh_summary : summary/tsv/delmh_summary.tsv + +summary/tsv/delmh_summary.tsv : summary/tsv/mutation_summary.tsv + $(call RUN,-n 1 -s 8G -m 8G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/delmh_summary.R --input_file $(<)") + +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: $(PHONY) From c1d1795401e7848770b3c5e585e166ef65da8ed6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 14:29:26 -0500 Subject: [PATCH 036/766] Update Makefile --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index ad57ecbe..30102441 100644 --- a/Makefile +++ b/Makefile @@ -617,6 +617,11 @@ mutation_summary : TARGETS += cravat_summary cravat_summary : $(call RUN_MAKE,modules/summary/cravat_summary.mk) + + +TARGETS += delmh_summary +delmh_summary : + $(call RUN_MAKE,modules/summary/delmh_summary.mk) #================================================== From 52aea99de85e91121e367422f5ec3178ddfec374 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 15:14:46 -0500 Subject: [PATCH 037/766] Update delmh_summary.R --- summary/delmh_summary.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R index 6f059aeb..3509ba66 100644 --- a/summary/delmh_summary.R +++ b/summary/delmh_summary.R @@ -28,9 +28,10 @@ all_normals = all_vars %>% all_patients = unique(paste0(all_tumors, "_", all_normals)) all_vars = all_vars %>% - filter(Variant_Classification=="Frame_Shift_Del" | Variant_Classification=="In_Frame_Del") %>% - filter((grepl("varscan", variantCaller) & grepl("strelka", variantCaller)) | - (((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) | (grepl("platypus", variantCaller) & grepl("lancet", variantCaller))))) + filter(Variant_Classification=="Frame_Shift_Del" | Variant_Classification=="In_Frame_Del") %>% + filter((grepl("varscan", variantCaller) & grepl("strelka", variantCaller)) | + ((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classifictaion!="In_Frame_Del") | + ((grepl("platypus", variantCaller) & grepl("lancet", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classifictaion!="In_Frame_Del")) patient_summary = data_frame(SAMPLE_UUID = all_patients) del_count = all_vars %>% From d7ad03c8d9a0ef91dc710982c214a2596ae6e842 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 15:18:25 -0500 Subject: [PATCH 038/766] Update delmh_summary.R --- summary/delmh_summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R index 3509ba66..a4d9d98b 100644 --- a/summary/delmh_summary.R +++ b/summary/delmh_summary.R @@ -30,8 +30,8 @@ all_patients = unique(paste0(all_tumors, "_", all_normals)) all_vars = all_vars %>% filter(Variant_Classification=="Frame_Shift_Del" | Variant_Classification=="In_Frame_Del") %>% filter((grepl("varscan", variantCaller) & grepl("strelka", variantCaller)) | - ((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classifictaion!="In_Frame_Del") | - ((grepl("platypus", variantCaller) & grepl("lancet", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classifictaion!="In_Frame_Del")) + ((grepl("platypus", variantCaller) & grepl("scalpel", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classification!="In_Frame_Del") | + ((grepl("platypus", variantCaller) & grepl("lancet", variantCaller)) & ((nchar(REF)-nchar(ALT))>4) & Variant_Classification!="In_Frame_Del")) patient_summary = data_frame(SAMPLE_UUID = all_patients) del_count = all_vars %>% From cc9d3874f9a7f60ccd93055cec6a74dbaacab785 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 15:35:08 -0500 Subject: [PATCH 039/766] Update delmh_summary.R --- summary/delmh_summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R index a4d9d98b..d85fafc7 100644 --- a/summary/delmh_summary.R +++ b/summary/delmh_summary.R @@ -76,7 +76,7 @@ hml_down = hml_up = NULL for (i in 1:nrow(all_vars)) { chr = paste0("chr", all_vars[i,"CHROM"]) start = as.numeric(all_vars[i,"POS"]) - n = as.numeric(nchar(all_vars[i,"REF"])) + n = as.numeric(nchar(all_vars[i,"REF"]))-1 deleted = getSeqFrom(chr = chr, start = start, end = start + n) prevn = getSeqFrom(chr = chr, start = start - n - 1, end = start - 1) From 920d1f03644ceb0702440a7191e71509613b815e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 15:48:53 -0500 Subject: [PATCH 040/766] Update delmh_summary.R --- summary/delmh_summary.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R index d85fafc7..b67591e6 100644 --- a/summary/delmh_summary.R +++ b/summary/delmh_summary.R @@ -39,17 +39,17 @@ del_count = all_vars %>% dplyr::group_by(SAMPLE_UUID) %>% dplyr::summarize(del_count = n()) mean_delen = all_vars %>% - mutate(del_len = nchar(REF)) %>% + mutate(del_len = nchar(REF)-1) %>% mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::group_by(SAMPLE_UUID) %>% dplyr::summarize(mean_delen = mean(del_len)) median_delen = all_vars %>% - mutate(del_len = nchar(REF)) %>% + mutate(del_len = nchar(REF)-1) %>% mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::group_by(SAMPLE_UUID) %>% dplyr::summarize(median_delen = median(del_len)) deln4_count = all_vars %>% - mutate(del_len = nchar(REF)) %>% + mutate(del_len = nchar(REF)-1) %>% mutate(SAMPLE_UUID = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::group_by(SAMPLE_UUID) %>% dplyr::summarize(deln4_count = sum(del_len>=4)) @@ -75,12 +75,12 @@ deln4_count = all_vars %>% hml_down = hml_up = NULL for (i in 1:nrow(all_vars)) { chr = paste0("chr", all_vars[i,"CHROM"]) - start = as.numeric(all_vars[i,"POS"]) + start = as.numeric(all_vars[i,"POS"])+1 n = as.numeric(nchar(all_vars[i,"REF"]))-1 - deleted = getSeqFrom(chr = chr, start = start, end = start + n) - prevn = getSeqFrom(chr = chr, start = start - n - 1, end = start - 1) - nextn = getSeqFrom(chr = chr, start = start + n + 1, end = start + 2*n + 1) + deleted = getSeqFrom(chr = chr, start = start, end = start + n - 1) + prevn = getSeqFrom(chr = chr, start = start - n, end = start - 1) + nextn = getSeqFrom(chr = chr, start = start + n, end = start + 2*n - 1) hml_down = c(hml_down, checkHomLen(deleted = deleted, next50 = prevn)) hml_up = c(hml_up, checkHomLen(deleted = deleted, next50 = nextn)) From 8801e6cebf120cc147c25806b5c18b56fb86e6d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Nov 2019 15:51:01 -0500 Subject: [PATCH 041/766] Update delmh_summary.R --- summary/delmh_summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/summary/delmh_summary.R b/summary/delmh_summary.R index b67591e6..3698f4d4 100644 --- a/summary/delmh_summary.R +++ b/summary/delmh_summary.R @@ -87,7 +87,7 @@ for (i in 1:nrow(all_vars)) { } mh_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE), - del_len = nchar(all_vars$REF), + del_len = nchar(all_vars$REF)-1, max_mhlen_5p = hml_down, max_mhlen_3p = hml_up, max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>% @@ -97,7 +97,7 @@ mh_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORM dplyr::summarize(deln4_mhlen_3_counts = sum(is_3)) mhl_3 = data_frame(SAMPLE_UUID = paste0(all_vars$TUMOR_SAMPLE, "_", all_vars$NORMAL_SAMPLE), - del_len = nchar(all_vars$REF), + del_len = nchar(all_vars$REF)-1, max_mhlen_5p = hml_down, max_mhlen_3p = hml_up, max_mhlen = apply(cbind(hml_down, hml_up), 1, max)) %>% From e906b63ee89b35eaeeffe7ac62f15601ec1c90c8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 22 Nov 2019 12:45:37 -0500 Subject: [PATCH 042/766] Proven source annotation --- vcf_tools/annotateSomaticVcf.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vcf_tools/annotateSomaticVcf.mk b/vcf_tools/annotateSomaticVcf.mk index 44b7e93e..d643ee63 100644 --- a/vcf_tools/annotateSomaticVcf.mk +++ b/vcf_tools/annotateSomaticVcf.mk @@ -40,7 +40,9 @@ endif ifeq ($(ANN_MUT_TASTE),true) SOMATIC_INDEL_ANN2 += mut_taste endif -#SOMATIC_INDEL_ANN2 += provean +ifeq ($(ANN_PROVEAN),true) +SOMATIC_INDEL_ANN2 += provean +endif SOMATIC_SNV_ANN2 = $(if $(findstring b37,$(REF)),nsfp chasm parssnp) # indel/snv initial round of annotations From 0352aced8cf6dc708b1cc293f7a7bc16aa5ca1cb Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 22 Nov 2019 12:47:44 -0500 Subject: [PATCH 043/766] Proven source annotation From 160f605a946c9cdae79195b9892c01ccae315194 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 27 Nov 2019 16:08:00 -0500 Subject: [PATCH 044/766] Update classify_indel_pathogenicity_vcf.py --- scripts/classify_indel_pathogenicity_vcf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/classify_indel_pathogenicity_vcf.py b/scripts/classify_indel_pathogenicity_vcf.py index fdb66ee9..274e9081 100644 --- a/scripts/classify_indel_pathogenicity_vcf.py +++ b/scripts/classify_indel_pathogenicity_vcf.py @@ -31,7 +31,7 @@ def query_mutation_taster(record): parser.add_argument('--qsub_queue', nargs='?', default='jrf.q,all.q', help='qsub queue') parser.add_argument('--num_provean_threads', nargs='?', default=4, type=int, help='number of provean threads') parser.add_argument('--run_local', action='store_true', default=False, help='run provean locally') - parser.add_argument('--no_remote', action='store_true', default=False, help='no remote queries: can only call potentially pathogenic') + parser.add_argument('--no_remote', action='store_true', default=True, help='no remote queries: can only call potentially pathogenic') parser.add_argument('--no_mt_provean', action='store_true', default=False, help='do not run mutation taster / provean') args = parser.parse_args() From 4c3f5efd451e0b6a02de24f59d84d3a8da8f9d63 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 27 Nov 2019 16:12:47 -0500 Subject: [PATCH 045/766] Rename mutationSummary.mk to mutationsummary.mk --- summary/{mutationSummary.mk => mutationsummary.mk} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename summary/{mutationSummary.mk => mutationsummary.mk} (100%) diff --git a/summary/mutationSummary.mk b/summary/mutationsummary.mk similarity index 100% rename from summary/mutationSummary.mk rename to summary/mutationsummary.mk From 255e72e8ca54a6fea11500c1465f142b5dc37c83 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 8 Jan 2020 01:24:58 +0400 Subject: [PATCH 046/766] Update qmake.pl --- scripts/qmake.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 52ee94f3..fa9b86c4 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -13,7 +13,8 @@ brownd7 => "U6F3B13B4", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - gazzoa => "UP67G7GDV" + gazzoa => "UP67G7GDV", + feinberj => "URKJ612C8" ); sub HELP_MESSAGE { From 3a39181c985ef70f4328c8d64a99d948e839e535 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Feb 2020 23:41:35 -0500 Subject: [PATCH 047/766] Update qmake.pl --- scripts/qmake.pl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index fa9b86c4..9f5e286c 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,10 +10,8 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "U6F3B13B4", - selenicp => "U22A5U23X", + selenicp => "UBF6MRSV8", dacruzpa => "U6PAUB3C6", - gazzoa => "UP67G7GDV", feinberj => "URKJ612C8" ); From 1e0dbdf0d6406b2c174a0b96a7b65f89acef0c75 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Feb 2020 23:43:38 -0500 Subject: [PATCH 048/766] Update tmapAligner.mk --- aligners/tmapAligner.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/aligners/tmapAligner.mk b/aligners/tmapAligner.mk index 6e4d415e..42d1493a 100644 --- a/aligners/tmapAligner.mk +++ b/aligners/tmapAligner.mk @@ -5,6 +5,7 @@ include modules/aligners/align.inc ALIGNER := tmap LOGDIR := log/tmap.$(NOW) + SAMTOOLS_SORT_MEM = 2000000000 FASTQ_CHUNKS := 10 From c73c8370b31d6d700eacb0bff0c7a4a0cb78f6b6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Feb 2020 23:45:04 -0500 Subject: [PATCH 049/766] Update config.inc --- config.inc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config.inc b/config.inc index d1c44fcf..ef57e3bc 100644 --- a/config.inc +++ b/config.inc @@ -14,20 +14,20 @@ R ?= R MY_RSCRIPT ?= Rscript RSCRIPT ?= Rscript -# General python 2.7 environment +## General python 2.7 environment ANACONDA_27_ENV ?= $(HOME)/share/usr/anaconda-envs/anaconda-2.7 -# SUFAM python environment +## SUFAM python environment SUFAM_ENV ?= $(HOME)/share/usr/anaconda-envs/sufam-dev MUTSIG_REPORT_ENV = $(HOME)/share/usr/anaconda-envs/mutsig-report-0.0.1 JARDIR ?= $(HOME)/share/usr/lib/java -# jrflab modules environment +## jrflab modules environment JRFLAB_MODULES_ENV ?= $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4 -### Applications +## Applications UNZIP ?= /usr/bin/unzip FASTQC ?= $(PERL) $(HOME)/share/usr/FastQC/fastqc MUTECT_JAR ?= $(JARDIR)/muTect-1.1.7.jar @@ -58,11 +58,11 @@ DEFUSE ?= $(PERL) $(HOME)/share/usr/defuse-0.6.1/scripts/defuse.pl ONCOFUSE_JAR ?= $(HOME)/share/usr/oncofuse-1.0.9b2/Oncofuse.jar VARSCAN_JAR ?= $(JARDIR)/VarScan.v2.3.9.jar -# PICARD tools +## PICARD tools PICARD_DIR ?= $(JARDIR)/picard-1.92 PICARD_JAR ?= $(JARDIR)/picard-tools-1.141/picard.jar -# scripts +## scripts SCRIPTS_DIR ?= modules/scripts MERGE ?= $(SCRIPTS_DIR)/merge.R VCF_TO_TABLE ?= $(SCRIPTS_DIR)/vcfToTable.R @@ -78,7 +78,7 @@ JAVA8_BIN ?= $(HOME)/share/usr/jdk1.8.0_121/bin/java GET_INSERT_SIZE ?= $(HOME)/share/usr/bin/getInsertSize.py -#GATK +## GATK GATK_JAR ?= $(JARDIR)/GenomeAnalysisTK.jar GATK_JAR2 ?= $(JARDIR)/GenomeAnalysisTK-3.7.jar From 30a5d3c140701f03221f934b8a2ce98efe75a056 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 20 Feb 2020 11:03:12 -0500 Subject: [PATCH 050/766] Update qmake.pl --- scripts/qmake.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 9f5e286c..8d98a2b6 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,9 +10,10 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - selenicp => "UBF6MRSV8", + selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - feinberj => "URKJ612C8" + feinberj => "URKJ612C8", + parejaf => "UBF6MRSV8" ); sub HELP_MESSAGE { From f30b21e78bf05bed5a8689fb307844ad93691ee8 Mon Sep 17 00:00:00 2001 From: Juber Patel Date: Fri, 28 Feb 2020 17:17:30 -0500 Subject: [PATCH 051/766] testing commit - Juber --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 30102441..56cf103c 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,8 @@ endef RUN_MAKE = $(if $(findstring false,$(USE_CLUSTER))$(findstring n,$(MAKEFLAGS)),+$(MAKE) -f $1,$(call RUN_QMAKE,$1,$(NUM_JOBS))) +# test - Juber + #================================================== # workflows #================================================== From 92010f0c1d959a7ce3514613b18282d77efe6356 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 28 May 2020 17:50:49 -0400 Subject: [PATCH 052/766] Update fusioncatcher.mk --- sv_callers/fusioncatcher.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/fusioncatcher.mk b/sv_callers/fusioncatcher.mk index 946627bb..e7e3fde7 100644 --- a/sv_callers/fusioncatcher.mk +++ b/sv_callers/fusioncatcher.mk @@ -6,7 +6,7 @@ LOGDIR = log/fusioncatcher.$(NOW) ##### MAKE INCLUDES ##### include modules/Makefile.inc -FUSIONCATCHER = $(HOME)/share/usr/fusioncatcher/bin/fusioncatcher +FUSIONCATCHER = $(HOME)/share/usr/fusioncatcher/fusioncatcher_v0.99.2/fusioncatcher FUSIONCATCHER_OPTS = -d $(HOME)/share/usr/fusioncatcher/data/current --extract-buffer-size=35000000000 .DELETE_ON_ERROR: From 66c3246436dcc64f9b30fb71511d07cbd6e0077b Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 31 May 2021 15:44:04 -0400 Subject: [PATCH 053/766] Update Makefile Removes prior commit by Juber --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index 56cf103c..30102441 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,6 @@ endef RUN_MAKE = $(if $(findstring false,$(USE_CLUSTER))$(findstring n,$(MAKEFLAGS)),+$(MAKE) -f $1,$(call RUN_QMAKE,$1,$(NUM_JOBS))) -# test - Juber - #================================================== # workflows #================================================== From 93a9949be009dc6263f425609860d669fe55e622 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 6 Jul 2021 10:08:54 -0400 Subject: [PATCH 054/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 8d98a2b6..84a43d73 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -12,7 +12,7 @@ my %slack_map = ( selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - feinberj => "URKJ612C8", + brownd7 => "W013UH0HWUF", parejaf => "UBF6MRSV8" ); From 3bf8b06b02d15bffe052e3c50c009e8ce8aa7695 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Jul 2021 12:11:04 -0400 Subject: [PATCH 055/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 84a43d73..004c5b8d 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -150,7 +150,7 @@ sub slack { if ($opt{s} && ($retcode == 0 || $n == 0 || $n + 1 == $attempts)) { if ($retcode == 0) { # op success - my $slack_msg = "*COMPLETE* $name :metal:"; + my $slack_msg = "*COMPLETE* $name :the_horns:"; &slack($fin_slack, "$pipeline_channel_msg $slack_msg"); &slack($opt{c}, $slack_msg) if $opt{c}; } else { From 86882874bd323bcedc5cd6d688849a34a06ea144 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Jul 2021 12:13:01 -0400 Subject: [PATCH 056/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 004c5b8d..0e1c8960 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -158,7 +158,7 @@ sub slack { my $slack_msg = "*FAILURE* $cwd/$logfile"; if ($n + 1 == $attempts) { # final attempt - $slack_msg = ":finnadie: $slack_msg"; + $slack_msg = ":-1: $slack_msg"; &slack($opt{c}, $slack_msg) if $opt{c}; } &slack($err_slack, "$pipeline_channel_msg $slack_msg"); From 7dff8d9d11cb975e4088cc1c26fb2d66a819ba85 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:26:40 -0400 Subject: [PATCH 057/766] Create splitRG.mk --- bam_tools/splitRG.mk | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 bam_tools/splitRG.mk diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk new file mode 100644 index 00000000..b2ee6266 --- /dev/null +++ b/bam_tools/splitRG.mk @@ -0,0 +1,21 @@ +include modules/Makefile.inc + +LOGDIR = log/splitRG.$(NOW) + +split : $(foreach sample,$(SAMPLES),rg/EEC128/$(sample).bam) + +define split-rg +rg/EEC128/$1.bam : bam/EEC128.bam + $$(call RUN,-n 1 -s 4G -m 6G,"set -o pipefail && \ + mkdir -p rg/EEC128 && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call bam-to-fastq,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + $(SAMTOOLS) --version > version/splitRG.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: split From 99eba6130fa05ce8bdb2251c26649cff770f39a3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:27:42 -0400 Subject: [PATCH 058/766] Update Makefile --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 30102441..892a90f7 100644 --- a/Makefile +++ b/Makefile @@ -444,6 +444,11 @@ process_bam : TARGETS += merge_bam merge_bam : $(call RUN_MAKE,modules/bam_tools/mergeBam.mk) + +TARGETS += split_rg +split_rg : + $(call RUN_MAKE,modules/bam_tools/splitRG.mk) + #================================================== From b0a4e3dbd90686ac3c9541747af7a0a3dcb10566 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:31:00 -0400 Subject: [PATCH 059/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index b2ee6266..d3a5a3e5 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -12,7 +12,7 @@ rg/EEC128/$1.bam : bam/EEC128.bam endef $(foreach sample,$(SAMPLES),\ - $(eval $(call bam-to-fastq,$(sample)))) + $(eval $(call split-rg,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ $(SAMTOOLS) --version > version/splitRG.txt;) From 150e19e5cb446745d1a89c413eedf7defddc37b7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:32:59 -0400 Subject: [PATCH 060/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index d3a5a3e5..3bf0b5a4 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),rg/EEC128/$(sample).bam) define split-rg rg/EEC128/$1.bam : bam/EEC128.bam - $$(call RUN,-n 1 -s 4G -m 6G,"set -o pipefail && \ + $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p rg/EEC128 && \ $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam") From 33198cd64578df557dbad32e4d5389fc3add7beb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:33:51 -0400 Subject: [PATCH 061/766] Update splitRG.mk --- bam_tools/splitRG.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 3bf0b5a4..a0802d1c 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -8,7 +8,8 @@ define split-rg rg/EEC128/$1.bam : bam/EEC128.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p rg/EEC128 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam") + $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam && \ + $$(SAMTOOLS) index rg/EEC128/$1.bam") endef $(foreach sample,$(SAMPLES),\ From 63afb96474344ce5ea06428b36c131bae5b14128 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Jul 2021 17:38:44 -0400 Subject: [PATCH 062/766] Update Makefile.inc --- Makefile.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.inc b/Makefile.inc index 7458fffd..f0c5c5ed 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -80,7 +80,7 @@ ALL_SAMPLES = $(SAMPLE_PAIRS) $(SAMPLES) SHELL = /bin/bash -export TMPDIR := $(HOME)/share/data/$(USER)/tmp +export TMPDIR := $(HOME)/share/data/common/.TemporaryItems PICARD_MEM = 10G PICARD_OPTS = VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=4000000 From 1d141972fdf0b17a8974ae08a2d0f3a46a06e6bb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 11 Jul 2021 12:59:05 -0400 Subject: [PATCH 063/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index a0802d1c..16471c01 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,14 +2,14 @@ include modules/Makefile.inc LOGDIR = log/splitRG.$(NOW) -split : $(foreach sample,$(SAMPLES),rg/EEC128/$(sample).bam) +split : $(foreach sample,$(SAMPLES),rg/EEC87/$(sample).bam) define split-rg -rg/EEC128/$1.bam : bam/EEC128.bam +rg/EEC87/$1.bam : bam/EEC87.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p rg/EEC128 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam && \ - $$(SAMTOOLS) index rg/EEC128/$1.bam") + mkdir -p rg/EEC87 && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC87.bam > rg/EEC87/$1.bam && \ + $$(SAMTOOLS) index rg/EEC87/$1.bam") endef $(foreach sample,$(SAMPLES),\ From 0197745b92b192639e62162709a13ce061feafd1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Jul 2021 09:01:28 -0400 Subject: [PATCH 064/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 16471c01..158b8cc2 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,14 +2,14 @@ include modules/Makefile.inc LOGDIR = log/splitRG.$(NOW) -split : $(foreach sample,$(SAMPLES),rg/EEC87/$(sample).bam) +split : $(foreach sample,$(SAMPLES),rg/EEC14/$(sample).bam) define split-rg -rg/EEC87/$1.bam : bam/EEC87.bam +rg/EEC14/$1.bam : bam/EEC14.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p rg/EEC87 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC87.bam > rg/EEC87/$1.bam && \ - $$(SAMTOOLS) index rg/EEC87/$1.bam") + mkdir -p rg/EEC14 && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC14.bam > rg/EEC14/$1.bam && \ + $$(SAMTOOLS) index rg/EEC14/$1.bam") endef $(foreach sample,$(SAMPLES),\ From 2c9571d6f4a70a77de96a2bcf6d019b94a45f9e6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Jul 2021 09:02:33 -0400 Subject: [PATCH 065/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 158b8cc2..9ff65cb2 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -5,7 +5,7 @@ LOGDIR = log/splitRG.$(NOW) split : $(foreach sample,$(SAMPLES),rg/EEC14/$(sample).bam) define split-rg -rg/EEC14/$1.bam : bam/EEC14.bam +rg/EEC14/$1.bam : bam/EEC14-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p rg/EEC14 && \ $$(SAMTOOLS) view -b -r $1 bam/EEC14.bam > rg/EEC14/$1.bam && \ From ebe9dc73b29dd8d5a031ec3aa9203498669e882e Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Jul 2021 09:05:22 -0400 Subject: [PATCH 066/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 9ff65cb2..e2f9c958 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -8,7 +8,7 @@ define split-rg rg/EEC14/$1.bam : bam/EEC14-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p rg/EEC14 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC14.bam > rg/EEC14/$1.bam && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC14-1.bam > rg/EEC14/$1.bam && \ $$(SAMTOOLS) index rg/EEC14/$1.bam") endef From 2b6f93d84cd82e07576552332131852783527b8d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Jul 2021 10:30:22 -0400 Subject: [PATCH 067/766] Update splitRG.mk --- bam_tools/splitRG.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index e2f9c958..83fbfb44 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -5,10 +5,10 @@ LOGDIR = log/splitRG.$(NOW) split : $(foreach sample,$(SAMPLES),rg/EEC14/$(sample).bam) define split-rg -rg/EEC14/$1.bam : bam/EEC14-1.bam +rg/EEC14/$1.bam : bam/EEC14-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p rg/EEC14 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC14-1.bam > rg/EEC14/$1.bam && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC14-2.bam > rg/EEC14/$1.bam && \ $$(SAMTOOLS) index rg/EEC14/$1.bam") endef From 36a8ff6a1191f839802c2c8d82db534ec201f259 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Jul 2021 12:56:28 -0400 Subject: [PATCH 068/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 83fbfb44..a0802d1c 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,14 +2,14 @@ include modules/Makefile.inc LOGDIR = log/splitRG.$(NOW) -split : $(foreach sample,$(SAMPLES),rg/EEC14/$(sample).bam) +split : $(foreach sample,$(SAMPLES),rg/EEC128/$(sample).bam) define split-rg -rg/EEC14/$1.bam : bam/EEC14-2.bam +rg/EEC128/$1.bam : bam/EEC128.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p rg/EEC14 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC14-2.bam > rg/EEC14/$1.bam && \ - $$(SAMTOOLS) index rg/EEC14/$1.bam") + mkdir -p rg/EEC128 && \ + $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam && \ + $$(SAMTOOLS) index rg/EEC128/$1.bam") endef $(foreach sample,$(SAMPLES),\ From d6fe38fc25f2af90f967162540b3e95e7e6cf56b Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:14:59 -0400 Subject: [PATCH 069/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index a0802d1c..f9ad7a1c 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,14 +2,14 @@ include modules/Makefile.inc LOGDIR = log/splitRG.$(NOW) -split : $(foreach sample,$(SAMPLES),rg/EEC128/$(sample).bam) +split : $(foreach sample,$(SAMPLES),rg/XXX/$(sample).bam) define split-rg -rg/EEC128/$1.bam : bam/EEC128.bam +rg/XXX/$1.bam : bam/XXX.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p rg/EEC128 && \ - $$(SAMTOOLS) view -b -r $1 bam/EEC128.bam > rg/EEC128/$1.bam && \ - $$(SAMTOOLS) index rg/EEC128/$1.bam") + mkdir -p rg/XXX && \ + $$(SAMTOOLS) view -b -r $1 bam/XXX.bam > rg/XXX/$1.bam && \ + $$(SAMTOOLS) index rg/XXX/$1.bam") endef $(foreach sample,$(SAMPLES),\ From 1455a25e496ea016c36f02d4c1c416acc31f337f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:15:08 -0400 Subject: [PATCH 070/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index f9ad7a1c..1b8a88be 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -16,7 +16,7 @@ $(foreach sample,$(SAMPLES),\ $(eval $(call split-rg,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - $(SAMTOOLS) --version > version/splitRG.txt;) + $(SAMTOOLS) --version > version/split_rg.txt;) .SECONDARY: .DELETE_ON_ERROR: .PHONY: split From 25450479b91a66cbc2131818facdf9148a1c1d0f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:39:16 -0400 Subject: [PATCH 071/766] Create getBaseCount.mk --- variant_callers/getBaseCount.mk | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 variant_callers/getBaseCount.mk diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk new file mode 100644 index 00000000..bdc40735 --- /dev/null +++ b/variant_callers/getBaseCount.mk @@ -0,0 +1,34 @@ +include modules/Makefile.inc + +LOGDIR ?= log/get_basecount.$(NOW) +PHONY += getbasecount + +GBC_ENV = $(home)/share/data/common/eec_sc_split/etc/conda + +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) + +define get-basecount +gbc/EEC128/$1.txt : bam/EEC128/$1.bam + $$(call RUN,-n 6 -s 1G -m 2G -v ,"set -o pipefail && \ + mkdir -p gbc/EEC128 && \ + etc/GetBaseCounts/GetBaseCounts --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ + --bam $$(<) \ + --vcf etc/vcf/EEC128.vcf \ + --output $$(@) \ + --maq 0 \ + --baq 0 \ + --filter_duplicate 0 \ + --filter_improper_pair 0 \ + --filter_qc_failed 1 \ + --thread 6) +") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call get-basecount,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + /lila/home/brownd7/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts > version/get_basecount.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: getbasecount From a76ec16ea934b3b141b0ed4d7d9d40df9c277528 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:40:53 -0400 Subject: [PATCH 072/766] +GBC --- Makefile | 4 ++++ variant_callers/getBaseCount.mk | 25 ++++++++++++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 892a90f7..0daa3147 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,10 @@ sufam: TARGETS += sufam_summary sufam_summary: $(call RUN_MAKE,modules/variant_callers/sufammultisample.mk) + +TARGETS += get_basecount +get_basecount: + $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) #================================================== diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index bdc40735..5ba72978 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -9,19 +9,18 @@ getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) define get-basecount gbc/EEC128/$1.txt : bam/EEC128/$1.bam - $$(call RUN,-n 6 -s 1G -m 2G -v ,"set -o pipefail && \ - mkdir -p gbc/EEC128 && \ - etc/GetBaseCounts/GetBaseCounts --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ - --bam $$(<) \ - --vcf etc/vcf/EEC128.vcf \ - --output $$(@) \ - --maq 0 \ - --baq 0 \ - --filter_duplicate 0 \ - --filter_improper_pair 0 \ - --filter_qc_failed 1 \ - --thread 6) -") + $$(call RUN,-n 6 -s 1G -m 2G -v $(GBC_ENV),"set -o pipefail && \ + mkdir -p gbc/EEC128 && \ + etc/GetBaseCounts/GetBaseCounts --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ + --bam $$(<) \ + --vcf etc/vcf/EEC128.vcf \ + --output $$(@) \ + --maq 0 \ + --baq 0 \ + --filter_duplicate 0 \ + --filter_improper_pair 0 \ + --filter_qc_failed 1 \ + --thread 6") endef $(foreach sample,$(SAMPLES),\ From 3552039c4780fb4eb969970c2a2ecc03a67e3aa2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:45:32 -0400 Subject: [PATCH 073/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 5ba72978..6c9202ff 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -27,7 +27,7 @@ $(foreach sample,$(SAMPLES),\ $(eval $(call get-basecount,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - /lila/home/brownd7/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts > version/get_basecount.txt;) + /lila/home/brownd7/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts &> version/get_basecount.txt;) .SECONDARY: .DELETE_ON_ERROR: .PHONY: getbasecount From f31cab46589ae1572bbe06e919dc584d7411d32f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:51:07 -0400 Subject: [PATCH 074/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 6c9202ff..39b7faeb 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -4,6 +4,7 @@ LOGDIR ?= log/get_basecount.$(NOW) PHONY += getbasecount GBC_ENV = $(home)/share/data/common/eec_sc_split/etc/conda +GBC_EXE = $(home)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) @@ -11,7 +12,7 @@ define get-basecount gbc/EEC128/$1.txt : bam/EEC128/$1.bam $$(call RUN,-n 6 -s 1G -m 2G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/EEC128 && \ - etc/GetBaseCounts/GetBaseCounts --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ + $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ --vcf etc/vcf/EEC128.vcf \ --output $$(@) \ From 051868d69208f9349d71b4ff3dda1738e0de4a17 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:54:11 -0400 Subject: [PATCH 075/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 39b7faeb..eb3bc355 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -3,8 +3,8 @@ include modules/Makefile.inc LOGDIR ?= log/get_basecount.$(NOW) PHONY += getbasecount -GBC_ENV = $(home)/share/data/common/eec_sc_split/etc/conda -GBC_EXE = $(home)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts +GBC_ENV = $(HOME)/share/data/common/eec_sc_split/etc/conda +GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) From a1692c992a5f6e34aeb411ad11c544d0276d062b Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 10:57:52 -0400 Subject: [PATCH 076/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index eb3bc355..5719e4e1 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -5,6 +5,8 @@ PHONY += getbasecount GBC_ENV = $(HOME)/share/data/common/eec_sc_split/etc/conda GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts +MAPQ := 10 +BAQ := 15 getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) @@ -16,8 +18,8 @@ gbc/EEC128/$1.txt : bam/EEC128/$1.bam --bam $$(<) \ --vcf etc/vcf/EEC128.vcf \ --output $$(@) \ - --maq 0 \ - --baq 0 \ + --maq $(MAPQ) \ + --baq $(BAQ) \ --filter_duplicate 0 \ --filter_improper_pair 0 \ --filter_qc_failed 1 \ From c492e876cd9ef716c76811840c40b363240958ba Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 11:22:44 -0400 Subject: [PATCH 077/766] +GBC R --- variant_callers/getBaseCount.R | 24 ++++++++++++++++++++++++ variant_callers/getBaseCount.mk | 7 ++++++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 variant_callers/getBaseCount.R diff --git a/variant_callers/getBaseCount.R b/variant_callers/getBaseCount.R new file mode 100644 index 00000000..942ee76c --- /dev/null +++ b/variant_callers/getBaseCount.R @@ -0,0 +1,24 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--file_name", default = NA, type = 'character', help = "sample names set")) +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +genotype = readr::read_tsv(file = opt$file_name, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(Chrom_N = gsub(pattern = "chr", replacement = "" x = Chrom, fixed = TRUE)) %>% + readr::type_convert() %>% + dplyr::arrange(Chrom_N, Pos) %>% + dplyr::select(-Chrom_N) + +write_tsv(genotype, file = gsub(pattern = ".txt", replacement = ".tsv", x = opt$file_name), append = FALSE, col_names = TRUE) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 5719e4e1..d1f67e60 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -8,7 +8,7 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).txt) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) define get-basecount gbc/EEC128/$1.txt : bam/EEC128/$1.bam @@ -24,6 +24,11 @@ gbc/EEC128/$1.txt : bam/EEC128/$1.bam --filter_improper_pair 0 \ --filter_qc_failed 1 \ --thread 6") + +gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt + $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ + $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<)") + endef $(foreach sample,$(SAMPLES),\ From 4e3d064fa8089822a9f997022b354150982b6927 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 11:25:57 -0400 Subject: [PATCH 078/766] Update getBaseCount.R --- variant_callers/getBaseCount.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.R b/variant_callers/getBaseCount.R index 942ee76c..7eb04f93 100644 --- a/variant_callers/getBaseCount.R +++ b/variant_callers/getBaseCount.R @@ -16,7 +16,7 @@ opt <- arguments$options genotype = readr::read_tsv(file = opt$file_name, col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::mutate(Chrom_N = gsub(pattern = "chr", replacement = "" x = Chrom, fixed = TRUE)) %>% + dplyr::mutate(Chrom_N = gsub(pattern = "chr", replacement = "", x = Chrom, fixed = TRUE)) %>% readr::type_convert() %>% dplyr::arrange(Chrom_N, Pos) %>% dplyr::select(-Chrom_N) From 3780e16b8b21b3bcf8ab10588989ce1ef688fa0f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 11:28:43 -0400 Subject: [PATCH 079/766] Update getBaseCount.R --- variant_callers/getBaseCount.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.R b/variant_callers/getBaseCount.R index 7eb04f93..9293c252 100644 --- a/variant_callers/getBaseCount.R +++ b/variant_callers/getBaseCount.R @@ -21,4 +21,4 @@ genotype = readr::read_tsv(file = opt$file_name, col_names = TRUE, col_types = c dplyr::arrange(Chrom_N, Pos) %>% dplyr::select(-Chrom_N) -write_tsv(genotype, file = gsub(pattern = ".txt", replacement = ".tsv", x = opt$file_name), append = FALSE, col_names = TRUE) +write_tsv(genotype, path = gsub(pattern = ".txt", replacement = ".tsv", x = opt$file_name), append = FALSE, col_names = TRUE) From bb5738acb2c42eabbdcaabc0f36c9ed3e7618393 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 13:23:56 -0400 Subject: [PATCH 080/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index d1f67e60..c8b7c9d6 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -8,15 +8,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) define get-basecount -gbc/EEC128/$1.txt : bam/EEC128/$1.bam +gbc/EEC87/$1.txt : bam/EEC87/$1.bam $$(call RUN,-n 6 -s 1G -m 2G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC128 && \ + mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC128.vcf \ + --vcf etc/vcf/EEC87.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -25,7 +25,7 @@ gbc/EEC128/$1.txt : bam/EEC128/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt +gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<)") From dc0d060fc4ab6c4c80f5911e65dffada673ef6ca Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 13:25:33 -0400 Subject: [PATCH 081/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index c8b7c9d6..a4c576ec 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -26,7 +26,7 @@ gbc/EEC87/$1.txt : bam/EEC87/$1.bam --thread 6") gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt - $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ + $$(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<)") From a418faffec44f3749462c0f6345fcab4d2821e8a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 13:26:03 -0400 Subject: [PATCH 082/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index a4c576ec..760965c2 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -12,7 +12,7 @@ getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) define get-basecount gbc/EEC87/$1.txt : bam/EEC87/$1.bam - $$(call RUN,-n 6 -s 1G -m 2G -v $(GBC_ENV),"set -o pipefail && \ + $$(call RUN,-n 6 -s 2G -m 3G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ From e26d8d31b9540afc377c1a9a8961b955fed6404a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 13:34:23 -0400 Subject: [PATCH 083/766] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0daa3147..4253ce2d 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ MAKELOG = log/$(@).$(NOW).log USE_CLUSTER ?= true QMAKE = modules/scripts/qmake.pl -n $@.$(NOW) $(if $(SLACK_CHANNEL),-c $(SLACK_CHANNEL)) -r $(NUM_ATTEMPTS) -m -s -- make -NUM_JOBS ?= 50 +NUM_JOBS ?= 100 define RUN_QMAKE $(QMAKE) -e -f $1 -j $2 $(TARGET) && \ From f5ab31e46acdfec1c4e9c5df2ded10cef56fab9c Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 13:57:15 -0400 Subject: [PATCH 084/766] +genome summary --- Makefile | 1 - copy_number/genomealtered.mk | 6 ++-- copy_number/lstscore.mk | 5 ++-- copy_number/myriadhrdscore.mk | 5 ++-- copy_number/ntaiscore.mk | 5 ++-- summary/genomesummary.mk | 54 ++++++++++++++++++++++++++++------- 6 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 4253ce2d..1df05db6 100644 --- a/Makefile +++ b/Makefile @@ -627,7 +627,6 @@ TARGETS += cravat_summary cravat_summary : $(call RUN_MAKE,modules/summary/cravat_summary.mk) - TARGETS += delmh_summary delmh_summary : $(call RUN_MAKE,modules/summary/delmh_summary.mk) diff --git a/copy_number/genomealtered.mk b/copy_number/genomealtered.mk index 66402738..87d9afd6 100644 --- a/copy_number/genomealtered.mk +++ b/copy_number/genomealtered.mk @@ -1,18 +1,16 @@ include modules/Makefile.inc LOGDIR ?= log/genome_altered.$(NOW) -PHONY += genome_stats genome_altered : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$< --file_out genome_stats/$1_$2.fga") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$(<) --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) - +.PHONY: genome_altered diff --git a/copy_number/lstscore.mk b/copy_number/lstscore.mk index b8664c7d..5db7fb82 100644 --- a/copy_number/lstscore.mk +++ b/copy_number/lstscore.mk @@ -1,17 +1,16 @@ include modules/Makefile.inc LOGDIR ?= log/lst_score.$(NOW) -PHONY += genome_stats lst_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) define lst-score genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out genome_stats/$1_$2.lst") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) +.PHONY: lst_score diff --git a/copy_number/myriadhrdscore.mk b/copy_number/myriadhrdscore.mk index 8d619938..fab758c9 100644 --- a/copy_number/myriadhrdscore.mk +++ b/copy_number/myriadhrdscore.mk @@ -1,17 +1,16 @@ include modules/Makefile.inc LOGDIR ?= log/myriad_score.$(NOW) -PHONY += genome_stats myriad_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) define myriad-score genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out genome_stats/$1_$2.mrs") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) +.PHONY: myriad_score diff --git a/copy_number/ntaiscore.mk b/copy_number/ntaiscore.mk index 2f8d751a..bea85cf1 100644 --- a/copy_number/ntaiscore.mk +++ b/copy_number/ntaiscore.mk @@ -1,17 +1,16 @@ include modules/Makefile.inc LOGDIR ?= log/ntai_score.$(NOW) -PHONY += genome_stats ntai_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) define ntai-score genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out genome_stats/$1_$2.ntai") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) +.PHONY: ntai_score diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 45fbda98..238cadbf 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,25 +1,57 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -PHONY += genome_stats summary summary/tsv -LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) -genome_summary : genome_stats/lst_score.tsv genome_stats/genome_altered.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv summary/tsv/genome_summary.tsv summary/genome_summary.xlsx +genome_summary : genome_stats/genome_altered.tsv \ + genome_stats/lst_score.tsv \ + genome_stats/ntai_score.tsv \ + genome_stats/myriad_score.tsv \ + summary/tsv/genome_summary.tsv \ + summary/genome_summary.xlsx + +genome_summary += genome_altered +genome_summary += lst_score +genome_summary += ntai_score +genome_summary += myriad_score + +genome_stats/genome_altered.tsv : $(GENOME_ALTERED) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(GENOME_ALTERED) > $$(@)") + +genome_stats/lst_score.tsv : $(LST_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(LST_SCORE) > $$(@)") + +genome_stats/ntai_score.tsv : $(NTAI_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(NTAI_SCORE) > $$(@)") + +genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(MYRIAD_SCORE) > $$(@)") + +summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv + $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ + mkdir -p genome_stats && \ + $(RSCRIPT) modules/summary/genomesummary.R") -genome_stats/lst_score.tsv genome_stats/genome_altered.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv summary/tsv/genome_summary.tsv : - $(call RUN,-n 1 -s 4G -m 4G,"cat $(LST_SCORE) > genome_stats/lst_score.tsv && \ - cat $(GENOME_ALTERED) > genome_stats/genome_altered.tsv && \ - cat $(NTAI_SCORE) > genome_stats/ntai_score.tsv && \ - cat $(MYRIAD_SCORE) > genome_stats/myriad_score.tsv && \ - $(RSCRIPT) modules/summary/genomesummary.R") - summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) +.PHONY: genome_sumary + +include modules/copy_number/genomealtered.mk +include modules/copy_number/lstscore.mk +include modules/copy_number/ntaiscore.mk +include modules/copy_number/myriadhrdscore.mk From 1d04fa919090a5dc7be51896da8dd1ee2512a648 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 14:13:48 -0400 Subject: [PATCH 085/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 760965c2..bbcb6254 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -11,12 +11,12 @@ BAQ := 15 getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) define get-basecount -gbc/EEC87/$1.txt : bam/EEC87/$1.bam - $$(call RUN,-n 6 -s 2G -m 3G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC87 && \ +gbc/EEC14/$1.txt : bam/EEC14/$1.bam + $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ + mkdir -p gbc/EEC14 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC87.vcf \ + --vcf etc/vcf/EEC14.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -25,7 +25,7 @@ gbc/EEC87/$1.txt : bam/EEC87/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt +gbc/EEC14/$1.tsv : gbc/EEC14/$1.txt $$(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<)") From 196e9df1f5418bf0e15a81507ab6232f7c878797 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 14 Jul 2021 14:15:12 -0400 Subject: [PATCH 086/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index bbcb6254..1e0560ed 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -8,7 +8,7 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC14/$(sample).tsv) define get-basecount gbc/EEC14/$1.txt : bam/EEC14/$1.bam From 019e7d9de6241931a34a4595d2f6b31b8ca03368 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 11:57:26 -0400 Subject: [PATCH 087/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 1e0560ed..81a31fd2 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -27,7 +27,8 @@ gbc/EEC14/$1.txt : bam/EEC14/$1.bam gbc/EEC14/$1.tsv : gbc/EEC14/$1.txt $$(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ - $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<)") + $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ + rm $$(<)") endef From 669a4f1a52db4981045878a84b39c73318ff8d9e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 12:16:21 -0400 Subject: [PATCH 088/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 81a31fd2..8ae44c5e 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -8,15 +8,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC14/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) define get-basecount -gbc/EEC14/$1.txt : bam/EEC14/$1.bam +gbc/EEC87/$1.txt : bam/EEC87/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC14 && \ + mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC14.vcf \ + --vcf etc/vcf/EEC87.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -25,7 +25,7 @@ gbc/EEC14/$1.txt : bam/EEC14/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC14/$1.tsv : gbc/EEC14/$1.txt +gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt $$(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From d6c91e53b058d59bb390a10519b4b033b3ea2400 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 12:46:43 -0400 Subject: [PATCH 089/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 8ae44c5e..4f04113b 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -8,15 +8,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) define get-basecount -gbc/EEC87/$1.txt : bam/EEC87/$1.bam +gbc/EEC128/$1.txt : bam/EEC128/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC87 && \ + mkdir -p gbc/EEC128 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC87.vcf \ + --vcf etc/vcf/EEC128.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -25,10 +25,10 @@ gbc/EEC87/$1.txt : bam/EEC87/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt - $$(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ - $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ - rm $$(<)") +gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt + $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ + $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ + rm $$(<)") endef From c9c8ffeeff6a69fa6b2d29fd4c7dfd745ffc9630 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 17:31:52 -0400 Subject: [PATCH 090/766] Update Makefile.inc --- Makefile.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.inc b/Makefile.inc index f0c5c5ed..7458fffd 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -80,7 +80,7 @@ ALL_SAMPLES = $(SAMPLE_PAIRS) $(SAMPLES) SHELL = /bin/bash -export TMPDIR := $(HOME)/share/data/common/.TemporaryItems +export TMPDIR := $(HOME)/share/data/$(USER)/tmp PICARD_MEM = 10G PICARD_OPTS = VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=4000000 From 4a7c591e9cdcbbe31eb4c2eed773b2619b4f7f7d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:12:26 -0400 Subject: [PATCH 091/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 238cadbf..88c73f60 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -19,7 +19,7 @@ genome_summary += lst_score genome_summary += ntai_score genome_summary += myriad_score -genome_stats/genome_altered.tsv : $(GENOME_ALTERED) +genome_stats/genome_altered.tsv : $$(GENOME_ALTERED) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ cat $(GENOME_ALTERED) > $$(@)") From 6d194ffbe16193be6c7a7952a128924a38d586c4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:18:41 -0400 Subject: [PATCH 092/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 88c73f60..238cadbf 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -19,7 +19,7 @@ genome_summary += lst_score genome_summary += ntai_score genome_summary += myriad_score -genome_stats/genome_altered.tsv : $$(GENOME_ALTERED) +genome_stats/genome_altered.tsv : $(GENOME_ALTERED) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ cat $(GENOME_ALTERED) > $$(@)") From 222c9a94da28f6368f8a39413977277ae734a7c3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:19:24 -0400 Subject: [PATCH 093/766] Update genomesummary.mk --- summary/genomesummary.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 238cadbf..034ed7e5 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -2,7 +2,7 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) @@ -22,7 +22,7 @@ genome_summary += myriad_score genome_stats/genome_altered.tsv : $(GENOME_ALTERED) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ - cat $(GENOME_ALTERED) > $$(@)") + cat $$(GENOME_ALTERED) > $$(@)") genome_stats/lst_score.tsv : $(LST_SCORE) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ From 0fde87814387d02f55a06548a87383d65700a0b2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:21:04 -0400 Subject: [PATCH 094/766] Update genomesummary.mk --- summary/genomesummary.mk | 76 ++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 034ed7e5..9dd41fbe 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -2,50 +2,50 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) -LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) -NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) -MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) - -genome_summary : genome_stats/genome_altered.tsv \ - genome_stats/lst_score.tsv \ - genome_stats/ntai_score.tsv \ - genome_stats/myriad_score.tsv \ - summary/tsv/genome_summary.tsv \ - summary/genome_summary.xlsx +#GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +#LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) +#NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) +#MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) + +#genome_summary : genome_stats/genome_altered.tsv \ +# genome_stats/lst_score.tsv \ +# genome_stats/ntai_score.tsv \ +# genome_stats/myriad_score.tsv \ +# summary/tsv/genome_summary.tsv \ +# summary/genome_summary.xlsx genome_summary += genome_altered genome_summary += lst_score genome_summary += ntai_score genome_summary += myriad_score -genome_stats/genome_altered.tsv : $(GENOME_ALTERED) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $$(GENOME_ALTERED) > $$(@)") - -genome_stats/lst_score.tsv : $(LST_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(LST_SCORE) > $$(@)") - -genome_stats/ntai_score.tsv : $(NTAI_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(NTAI_SCORE) > $$(@)") - -genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(MYRIAD_SCORE) > $$(@)") - -summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv - $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ - mkdir -p genome_stats && \ - $(RSCRIPT) modules/summary/genomesummary.R") - -summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv - $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") +#genome_stats/genome_altered.tsv : $(GENOME_ALTERED) +# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ +# mkdir -p genome_stats && \ +# cat $$(GENOME_ALTERED) > $$(@)") +# +#genome_stats/lst_score.tsv : $(LST_SCORE) +# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ +# mkdir -p genome_stats && \ +# cat $(LST_SCORE) > $$(@)") +# +#genome_stats/ntai_score.tsv : $(NTAI_SCORE) +# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ +# mkdir -p genome_stats && \ +# cat $(NTAI_SCORE) > $$(@)") +# +#genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) +# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ +# mkdir -p genome_stats && \ +# cat $(MYRIAD_SCORE) > $$(@)") +# +#summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv +# $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ +# mkdir -p genome_stats && \ +# $(RSCRIPT) modules/summary/genomesummary.R") +# +#summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv +# $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") .DELETE_ON_ERROR: .SECONDARY: From 9ba475931b3005b67adb75076b261c2b1dd9db2f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:22:32 -0400 Subject: [PATCH 095/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 9dd41fbe..2d3aeced 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -14,7 +14,7 @@ LOGDIR ?= log/genome_summary.$(NOW) # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx -genome_summary += genome_altered +#genome_summary += genome_altered genome_summary += lst_score genome_summary += ntai_score genome_summary += myriad_score From 56c40c71e8b4340cbf9a3f6b0856eced37cfb5d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:23:41 -0400 Subject: [PATCH 096/766] Update genomesummary.mk --- summary/genomesummary.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 2d3aeced..12d9835c 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -15,8 +15,8 @@ LOGDIR ?= log/genome_summary.$(NOW) # summary/genome_summary.xlsx #genome_summary += genome_altered -genome_summary += lst_score -genome_summary += ntai_score +#genome_summary += lst_score +#genome_summary += ntai_score genome_summary += myriad_score #genome_stats/genome_altered.tsv : $(GENOME_ALTERED) @@ -51,7 +51,7 @@ genome_summary += myriad_score .SECONDARY: .PHONY: genome_sumary -include modules/copy_number/genomealtered.mk -include modules/copy_number/lstscore.mk -include modules/copy_number/ntaiscore.mk +#include modules/copy_number/genomealtered.mk +#include modules/copy_number/lstscore.mk +#include modules/copy_number/ntaiscore.mk include modules/copy_number/myriadhrdscore.mk From 3b7f2dbac818767385c86d9c76f60a8633f8edf9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:24:11 -0400 Subject: [PATCH 097/766] Update genomesummary.mk --- summary/genomesummary.mk | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 12d9835c..43c459dc 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,4 +1,8 @@ include modules/Makefile.inc +include modules/copy_number/genomealtered.mk +include modules/copy_number/lstscore.mk +include modules/copy_number/ntaiscore.mk +include modules/copy_number/myriadhrdscore.mk LOGDIR ?= log/genome_summary.$(NOW) @@ -50,8 +54,3 @@ genome_summary += myriad_score .DELETE_ON_ERROR: .SECONDARY: .PHONY: genome_sumary - -#include modules/copy_number/genomealtered.mk -#include modules/copy_number/lstscore.mk -#include modules/copy_number/ntaiscore.mk -include modules/copy_number/myriadhrdscore.mk From 47f0d96a7f59e790c908d5d44f4a2cf65e81a6a0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:24:41 -0400 Subject: [PATCH 098/766] Update genomesummary.mk --- summary/genomesummary.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 43c459dc..d8ed7a54 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,7 +1,7 @@ include modules/Makefile.inc -include modules/copy_number/genomealtered.mk -include modules/copy_number/lstscore.mk -include modules/copy_number/ntaiscore.mk +#include modules/copy_number/genomealtered.mk +#include modules/copy_number/lstscore.mk +#include modules/copy_number/ntaiscore.mk include modules/copy_number/myriadhrdscore.mk LOGDIR ?= log/genome_summary.$(NOW) From e81b221a97d123516d4156be547b29d3a37d1bc3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:25:01 -0400 Subject: [PATCH 099/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index d8ed7a54..4929de18 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -21,7 +21,7 @@ LOGDIR ?= log/genome_summary.$(NOW) #genome_summary += genome_altered #genome_summary += lst_score #genome_summary += ntai_score -genome_summary += myriad_score +#genome_summary += myriad_score #genome_stats/genome_altered.tsv : $(GENOME_ALTERED) # $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ From b636218df2ce9ef881d216707abf294ce4fb86d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:25:54 -0400 Subject: [PATCH 100/766] Update genomesummary.mk --- summary/genomesummary.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 4929de18..c6fe8ce1 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,8 +1,8 @@ include modules/Makefile.inc #include modules/copy_number/genomealtered.mk -#include modules/copy_number/lstscore.mk +include modules/copy_number/lstscore.mk #include modules/copy_number/ntaiscore.mk -include modules/copy_number/myriadhrdscore.mk +#include modules/copy_number/myriadhrdscore.mk LOGDIR ?= log/genome_summary.$(NOW) From be2c98c39f30a3637d566c296c862fba1c72bfcd Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:26:11 -0400 Subject: [PATCH 101/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index c6fe8ce1..7d660698 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,7 +1,7 @@ include modules/Makefile.inc #include modules/copy_number/genomealtered.mk include modules/copy_number/lstscore.mk -#include modules/copy_number/ntaiscore.mk +include modules/copy_number/ntaiscore.mk #include modules/copy_number/myriadhrdscore.mk LOGDIR ?= log/genome_summary.$(NOW) From c1b8b9d52c06b731e12432e1eb4ffd0717f50b3c Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:30:40 -0400 Subject: [PATCH 102/766] Update genomesummary.mk --- summary/genomesummary.mk | 51 +++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 7d660698..34dc276d 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -1,27 +1,46 @@ include modules/Makefile.inc -#include modules/copy_number/genomealtered.mk -include modules/copy_number/lstscore.mk -include modules/copy_number/ntaiscore.mk -#include modules/copy_number/myriadhrdscore.mk LOGDIR ?= log/genome_summary.$(NOW) -#GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) -#LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) -#NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) -#MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) -#genome_summary : genome_stats/genome_altered.tsv \ -# genome_stats/lst_score.tsv \ -# genome_stats/ntai_score.tsv \ -# genome_stats/myriad_score.tsv \ +genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ + genome_stats/genome_altered.tsv \ + $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) \ + genome_stats/lst_score.tsv \ + $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ + genome_stats/ntai_score.tsv \ + $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ + genome_stats/myriad_score.tsv # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx -#genome_summary += genome_altered -#genome_summary += lst_score -#genome_summary += ntai_score -#genome_summary += myriad_score +define fraction-genome-altered +genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$(<) --file_out $$(@)") +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) + +define lst-score +genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out $$(@)") +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) + +define ntai-score +genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out $$(@)") +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) + +define myriad-score +genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out $$(@)") +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) #genome_stats/genome_altered.tsv : $(GENOME_ALTERED) # $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ From b6cf586d1b2cbbe3b71d4e0290d1eeafd4a21968 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:32:19 -0400 Subject: [PATCH 103/766] Update genomesummary.mk --- summary/genomesummary.mk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 34dc276d..1a3d69e8 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -13,6 +13,11 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ genome_stats/myriad_score.tsv # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx + +GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) +NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) +MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata From ddfe196711217b6eccd4c9a71ed8f20fb957f546 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:33:14 -0400 Subject: [PATCH 104/766] Update genomesummary.mk --- summary/genomesummary.mk | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 1a3d69e8..55534355 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -17,7 +17,7 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) -MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) +MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata @@ -47,26 +47,26 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) -#genome_stats/genome_altered.tsv : $(GENOME_ALTERED) -# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ -# mkdir -p genome_stats && \ -# cat $$(GENOME_ALTERED) > $$(@)") -# -#genome_stats/lst_score.tsv : $(LST_SCORE) -# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ -# mkdir -p genome_stats && \ -# cat $(LST_SCORE) > $$(@)") -# -#genome_stats/ntai_score.tsv : $(NTAI_SCORE) -# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ -# mkdir -p genome_stats && \ -# cat $(NTAI_SCORE) > $$(@)") -# -#genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) -# $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ -# mkdir -p genome_stats && \ -# cat $(MYRIAD_SCORE) > $$(@)") -# +genome_stats/genome_altered.tsv : $(GENOME_ALTERED) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(GENOME_ALTERED) > $$(@)") + +genome_stats/lst_score.tsv : $(LST_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(LST_SCORE) > $$(@)") + +genome_stats/ntai_score.tsv : $(NTAI_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(NTAI_SCORE) > $$(@)") + +genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) + $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ + mkdir -p genome_stats && \ + cat $(MYRIAD_SCORE) > $$(@)") + #summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv # $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ # mkdir -p genome_stats && \ From 0e0b5bb2d1de4396ed52cdd4841205c4d53b56b0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:34:23 -0400 Subject: [PATCH 105/766] Update genomesummary.mk --- summary/genomesummary.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 55534355..b966cad9 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -14,10 +14,10 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx -GENOME_ALTERED ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) -LST_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) -NTAI_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) -MYRIAD_SCORE ?= $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) +GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +LST_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) +NTAI_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) +MYRIAD_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata From 7b7f3cf8a96271b25ab13ab91b6ba4bec7b34f03 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:34:54 -0400 Subject: [PATCH 106/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index b966cad9..d97881ed 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -14,7 +14,7 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx -GENOME_ALTERED = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga)) +GENOME_ALTERED = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga) LST_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) NTAI_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) MYRIAD_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) From 08c35a783f8fefcd870d72d135ddc9b21d7f6f69 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:36:20 -0400 Subject: [PATCH 107/766] Update genomesummary.mk --- summary/genomesummary.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index d97881ed..b3ed0c2e 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -15,9 +15,9 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ # summary/genome_summary.xlsx GENOME_ALTERED = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga) -LST_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst)) -NTAI_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai)) -MYRIAD_SCORE = $(wildcard $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs)) +LST_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst) +NTAI_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai) +MYRIAD_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata From 6a463a7b1b3f989673b48b8b40db8530fdae3b1f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:36:46 -0400 Subject: [PATCH 108/766] Update genomesummary.mk --- summary/genomesummary.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index b3ed0c2e..eb85262b 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -50,22 +50,22 @@ $(foreach pair,$(SAMPLE_PAIRS),\ genome_stats/genome_altered.tsv : $(GENOME_ALTERED) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ - cat $(GENOME_ALTERED) > $$(@)") + cat $(GENOME_ALTERED) > $(@)") genome_stats/lst_score.tsv : $(LST_SCORE) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ - cat $(LST_SCORE) > $$(@)") + cat $(LST_SCORE) > $(@)") genome_stats/ntai_score.tsv : $(NTAI_SCORE) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ - cat $(NTAI_SCORE) > $$(@)") + cat $(NTAI_SCORE) > $(@)") genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ - cat $(MYRIAD_SCORE) > $$(@)") + cat $(MYRIAD_SCORE) > $(@)") #summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv # $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ From ce4de3bed17ae44a37e96e05feec90f913831d0c Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Jul 2021 19:37:48 -0400 Subject: [PATCH 109/766] Update genomesummary.mk --- summary/genomesummary.mk | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index eb85262b..aeee7d9c 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -10,9 +10,9 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ genome_stats/ntai_score.tsv \ $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ - genome_stats/myriad_score.tsv -# summary/tsv/genome_summary.tsv \ -# summary/genome_summary.xlsx + genome_stats/myriad_score.tsv \ + summary/tsv/genome_summary.tsv \ + summary/genome_summary.xlsx GENOME_ALTERED = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga) LST_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst) @@ -67,13 +67,13 @@ genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) mkdir -p genome_stats && \ cat $(MYRIAD_SCORE) > $(@)") -#summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv -# $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ -# mkdir -p genome_stats && \ -# $(RSCRIPT) modules/summary/genomesummary.R") -# -#summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv -# $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") +summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv + $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ + mkdir -p genome_stats && \ + $(RSCRIPT) modules/summary/genomesummary.R") + +summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv + $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") .DELETE_ON_ERROR: .SECONDARY: From 5ef106e0e32d3a7531f84b12403854259715a211 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 11:39:42 -0400 Subject: [PATCH 110/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index aeee7d9c..b40b614e 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -77,4 +77,4 @@ summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv .DELETE_ON_ERROR: .SECONDARY: -.PHONY: genome_sumary +.PHONY: genome_summary From 6c3209fe5572d30ae7025df3d4a2387739e54eea Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 11:41:03 -0400 Subject: [PATCH 111/766] -mk --- copy_number/genomealtered.mk | 16 ---------------- copy_number/lstscore.mk | 16 ---------------- copy_number/myriadhrdscore.mk | 16 ---------------- copy_number/ntaiscore.mk | 16 ---------------- 4 files changed, 64 deletions(-) delete mode 100644 copy_number/genomealtered.mk delete mode 100644 copy_number/lstscore.mk delete mode 100644 copy_number/myriadhrdscore.mk delete mode 100644 copy_number/ntaiscore.mk diff --git a/copy_number/genomealtered.mk b/copy_number/genomealtered.mk deleted file mode 100644 index 87d9afd6..00000000 --- a/copy_number/genomealtered.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/genome_altered.$(NOW) - -genome_altered : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) - -define fraction-genome-altered -genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$(<) --file_out $$(@)") -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: genome_altered diff --git a/copy_number/lstscore.mk b/copy_number/lstscore.mk deleted file mode 100644 index 5db7fb82..00000000 --- a/copy_number/lstscore.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/lst_score.$(NOW) - -lst_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) - -define lst-score -genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out $$(@)") -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: lst_score diff --git a/copy_number/myriadhrdscore.mk b/copy_number/myriadhrdscore.mk deleted file mode 100644 index fab758c9..00000000 --- a/copy_number/myriadhrdscore.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/myriad_score.$(NOW) - -myriad_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) - -define myriad-score -genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out $$(@)") -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: myriad_score diff --git a/copy_number/ntaiscore.mk b/copy_number/ntaiscore.mk deleted file mode 100644 index bea85cf1..00000000 --- a/copy_number/ntaiscore.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/ntai_score.$(NOW) - -ntai_score : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) - -define ntai-score -genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out $$(@)") -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: ntai_score From 3a988d391f0dad3062598ba0e6494b00c60262c3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 11:43:20 -0400 Subject: [PATCH 112/766] Update Makefile --- Makefile | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/Makefile b/Makefile index 1df05db6..ad11f59c 100644 --- a/Makefile +++ b/Makefile @@ -290,39 +290,6 @@ TARGETS += cnvkit_qc cnvkit_qc : $(call RUN_MAKE,modules/copy_number/cnvkitqc.mk) -TARGETS += qdna_seq -qdna_seq : - $(call RUN_MAKE,modules/test/workflows/qdnaseq.mk) - -TARGETS += qdnaseq_extract_test -qdnaseq_extract_test: - $(call RUN_MAKE,modules/test/copy_number/qdnaseqextract.mk) - -TARGETS += qdnaseq_copynumber_test -qdnaseq_copynumber_test: - $(call RUN_MAKE,modules/test/copy_number/qdnaseqcopynumber.mk) - -TARGETS += copynumber_summary -copynumber_summary: - $(call RUN_MAKE,modules/test/workflows/copynumber_summary.mk) - -TARGETS += genome_altered -genome_altered : - $(call RUN_MAKE,modules/copy_number/genomealtered.mk) - -TARGETS += lst_score -lst_score : - $(call RUN_MAKE,modules/copy_number/lstscore.mk) - -TARGETS += ntai_score -ntai_score : - $(call RUN_MAKE,modules/copy_number/ntaiscore.mk) - -TARGETS += myriad_score -myriad_score : - $(call RUN_MAKE,modules/copy_number/myriadhrdscore.mk) - - #================================================== # structural variant callers #================================================== From c6bd7b68ca4173498999def1bd22e4761aa4bd66 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 11:44:10 -0400 Subject: [PATCH 113/766] Update Makefile --- Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Makefile b/Makefile index ad11f59c..6d118d21 100644 --- a/Makefile +++ b/Makefile @@ -556,11 +556,6 @@ TARGETS += krona_classify krona_classify : $(call RUN_MAKE,modules/virus/krona_classify.mk) -TARGETS += fetch_impact -fetch_impact : - $(call RUN_MAKE,modules/test/workflows/fetchimpact.mk) - - #================================================== # phylogeny #================================================== From 55e5bfc2483b2f20604114179f8d747236413b51 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 11:56:40 -0400 Subject: [PATCH 114/766] +genomesummary --- Makefile | 12 - copy_number/genomealtered.R | 32 --- copy_number/lstscore.R | 178 ------------ copy_number/myriadhrdscore.R | 185 ------------- copy_number/ntaiscore.R | 148 ---------- summary/genomesummary.R | 519 ++++++++++++++++++++++++++++++++++- summary/genomesummary.mk | 10 +- 7 files changed, 515 insertions(+), 569 deletions(-) delete mode 100644 copy_number/genomealtered.R delete mode 100644 copy_number/lstscore.R delete mode 100644 copy_number/myriadhrdscore.R delete mode 100644 copy_number/ntaiscore.R diff --git a/Makefile b/Makefile index 6d118d21..13bc3c33 100644 --- a/Makefile +++ b/Makefile @@ -556,18 +556,6 @@ TARGETS += krona_classify krona_classify : $(call RUN_MAKE,modules/virus/krona_classify.mk) -#================================================== -# phylogeny -#================================================== - -TARGETS += medicc -medicc : - $(call RUN_MAKE,modules/test/workflows/medicc.mk) - -TARGETS += pratchet -pratchet : - $(call RUN_MAKE,modules/test/workflows/pratchet.mk) - #================================================== # reports diff --git a/copy_number/genomealtered.R b/copy_number/genomealtered.R deleted file mode 100644 index c2e10564..00000000 --- a/copy_number/genomealtered.R +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"), - make_option("--file_out", default = NA, type = 'character', help = "output file name")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -load(opt$file_in) -alpha = ifelse(is.na(fit$purity), 1, fit$purity) -psi = ifelse(is.na(fit$ploidy), 2, fit$ploidy) -gamma = 1 -x = fit$cncf[,"cnlr.median"] -absolute_copies = round(((((2^(x/gamma))*(alpha*psi+(1-alpha)*2)) - ((1-alpha)*2))/alpha)) -index = absolute_copies!=round(psi) -if (sum(index, na.rm=TRUE)!=0) { - genome_footprint = sum(as.numeric(fit$cncf[,"end"]-fit$cncf[,"start"]), na.rm=TRUE) - genome_altered = sum(as.numeric(fit$cncf[index,"end"]-fit$cncf[index,"start"]), na.rm=TRUE)/genome_footprint -} else { - genome_altered = 0 -} -cat(paste0(gsub("facets/cncf/","", gsub(".Rdata", "", opt$file_in)), "\t", genome_altered), file = opt$file_out, append=FALSE) -cat("\n", file = opt$file_out, append=TRUE) - -warnings() diff --git a/copy_number/lstscore.R b/copy_number/lstscore.R deleted file mode 100644 index 517eaf5d..00000000 --- a/copy_number/lstscore.R +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"), - make_option("--file_out", default = NA, type = 'character', help = "output file name")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -chromStrToNum <- function(str) { - suppressWarnings(cNum <- as.numeric(str)) - if (is.na(cNum) && str == "X" ) { - cNum <- 23 - } else if (is.na(cNum) && str == "Y") { - cNum <- 24 - } - return(invisible(cNum)) -} - -GetChrominfo <- function() { - f <- "modules/copy_number/hg19_chrominfo.txt" - chrom <- read.table(file=f) - chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) - f <- "modules/copy_number/hg19_gaps.txt" - gaps <- read.table(file=f) - centro <- subset(gaps, gaps[,8] == "centromere") - chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) - chrominfo$centromere <- rowMeans(chrominfo[,3:4]) - chrominfo <- chrominfo[,c(1,2,5,3,4)] - colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") - chrominfo[,1] <- as.character(chrominfo[,1]) - chrominfo$chr <- sub("chr", "", chrominfo$chr) - chrominfo$chr <- sub("X", "23", chrominfo$chr) - chrominfo$chr <- sub("Y", "24", chrominfo$chr) - chrominfo[,1] <- as.numeric(chrominfo[,1]) - chrominfo <- chrominfo[order(chrominfo$chr), ] - rownames(chrominfo) <- as.character(chrominfo[,1]) - chrominfo <- as.matrix(chrominfo) - return(invisible(chrominfo)) -} - -fix_facets_column_names <- function(dat) { - colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" - colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" - colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" - colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" - sz <- dat[,"endBP"] - dat[,"startBP"] - dat <- cbind(dat, size=sz) - nA <- dat[,"tcn.em"] - dat[,"nB"] - dat <- cbind(dat, nA=nA) - return(invisible(dat)) -} - -join_adjacent_segments <- function(dat) { - cur_segs <- dat - something_changed <- 1 - while ( something_changed ) { - new_segs <- c() - something_changed <- 0 - x <- 2 - last_changed <- 0 - while (x <= nrow(cur_segs)) { - last_changed <- 0 - if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && - (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && - (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) - ) { - t <- cur_segs[x-1,] - t["endBP"] <- cur_segs[x,"endBP"] - t["end"] <- cur_segs[x,"end"] - t["size"] <- t["endBP"] - t["startBP"] - something_changed <- 1 - new_segs <- rbind(t, new_segs) - x <- x+2 - last_changed <- 1 - } else { - new_segs <- rbind(cur_segs[x-1,], new_segs) - x<-x+1 - } - } - if (! last_changed ) { - new_segs <- rbind(cur_segs[x-1,],new_segs) - } - n <- nrow(new_segs) - new_segs <- new_segs[n:1,] - cur_segs <- new_segs - } - return(invisible(cur_segs)) -} - -fix_facet_segs <- function(dat) { - i <- which(is.na(dat$nB)) - if ( length(i) > 0 ) { - dat <- dat[-i, ] - } - dat <- join_adjacent_segments(dat) - return(invisible(dat)) -} - -chrom_arm_LST_score <- function(dat) { - score <- 0 - segs <- c() - SIZE_THRESH <- 10e6 - SPACE_THRESH <- 3e6 - if ( nrow(dat) >= 2 ) { - for (x in 2:nrow(dat)) { - if ( (dat[x-1,"size"] >= SIZE_THRESH) && - (dat[x,"size"] >= SIZE_THRESH) && - ( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH) - ) { - score <- score +1 - segs <- rbind(dat[x-1,], segs) - } - } - } - tmp <- list() - tmp$score <- score - tmp$segs <- segs - return(invisible(tmp)) -} - -lst_filter <- function(dat, size_thresh) { - i <- which(dat[,"size"] < size_thresh) - sz <- dat[i,"size"] - i <- i[order(sz)] - segs_removed <- 0 - while (length(i) > 0) { - dat <- dat[-i[1], ] - dat <- join_adjacent_segments(dat) - i<- which(dat[,"size"] < size_thresh) - sz <- dat[i,"size"] - i <- i[order(sz)] - segs_removed <- segs_removed + 1 - } - return(invisible(dat)) -} - -score_LST <- function(dat, chromInfo) { - score <- 0 - segs <- c() - dat <- lst_filter(dat, 3e6) - for (c in unique(dat[,"chromosome"]) ) { - i <- which(dat[,"chromosome"] == c) - csegs <- dat[i,] - cNum <- chromStrToNum(c) - i <- which(csegs[,"startBP"] <= chromInfo[cNum,"centstart"]) - parm <- csegs[i,] - tmp <- chrom_arm_LST_score(parm) - score <- score + tmp$score - segs <- rbind(tmp$segs, segs) - i <- which(csegs[,"endBP"] >= chromInfo[cNum,"centend"]) - qarm <- csegs[i,] - tmp <- chrom_arm_LST_score(qarm) - score <- score + tmp$score - segs <- rbind(tmp$segs, segs) - } - tmp <- list() - tmp$score <- score - tmp$segs <- segs - return(invisible(tmp)) -} - -dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) -dat = fix_facets_column_names(dat) -segs = fix_facet_segs(dat) -chromInfo = GetChrominfo() -lst = score_LST(segs, chromInfo) -cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", lst$score), file = opt$file_out, append=FALSE) -cat("\n", file = opt$file_out, append=TRUE) - -warnings() - diff --git a/copy_number/myriadhrdscore.R b/copy_number/myriadhrdscore.R deleted file mode 100644 index 392fa195..00000000 --- a/copy_number/myriadhrdscore.R +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"), - make_option("--file_out", default = NA, type = 'character', help = "output file name")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -chromStrToNum <- function(str) { - suppressWarnings(cNum <- as.numeric(str)) - if (is.na(cNum) && str == "X" ) { - cNum <- 23 - } else if (is.na(cNum) && str == "Y") { - cNum <- 24 - } - return(invisible(cNum)) -} - -GetChrominfo <- function() { - f <- "modules/copy_number/hg19_chrominfo.txt" - chrom <- read.table(file=f) - chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) - f <- "modules/copy_number/hg19_gaps.txt" - gaps <- read.table(file=f) - centro <- subset(gaps, gaps[,8] == "centromere") - chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) - chrominfo$centromere <- rowMeans(chrominfo[,3:4]) - chrominfo <- chrominfo[,c(1,2,5,3,4)] - colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") - chrominfo[,1] <- as.character(chrominfo[,1]) - chrominfo$chr <- sub("chr", "", chrominfo$chr) - chrominfo$chr <- sub("X", "23", chrominfo$chr) - chrominfo$chr <- sub("Y", "24", chrominfo$chr) - chrominfo[,1] <- as.numeric(chrominfo[,1]) - chrominfo <- chrominfo[order(chrominfo$chr), ] - rownames(chrominfo) <- as.character(chrominfo[,1]) - chrominfo <- as.matrix(chrominfo) - return(invisible(chrominfo)) -} - -fix_facets_column_names <- function(dat) { - colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" - colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" - colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" - colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" - sz <- dat[,"endBP"] - dat[,"startBP"] - dat <- cbind(dat, size=sz) - nA <- dat[,"tcn.em"] - dat[,"nB"] - dat <- cbind(dat, nA=nA) - return(invisible(dat)) -} - -join_adjacent_segments <- function(dat) { - cur_segs <- dat - something_changed <- 1 - while ( something_changed ) { - new_segs <- c() - something_changed <- 0 - x <- 2 - last_changed <- 0 - while (x <= nrow(cur_segs)) { - last_changed <- 0 - if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && - (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && - (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) - ) { - t <- cur_segs[x-1,] - t["endBP"] <- cur_segs[x,"endBP"] - t["end"] <- cur_segs[x,"end"] - t["size"] <- t["endBP"] - t["startBP"] - something_changed <- 1 - new_segs <- rbind(t, new_segs) - x <- x+2 - last_changed <- 1 - } else { - new_segs <- rbind(cur_segs[x-1,], new_segs) - x<-x+1 - } - } - if (! last_changed ) { - new_segs <- rbind(cur_segs[x-1,],new_segs) - } - n <- nrow(new_segs) - new_segs <- new_segs[n:1,] - cur_segs <- new_segs - } - return(invisible(cur_segs)) -} - -fix_facet_segs <- function(dat) { - i <- which(is.na(dat$nB)) - if ( length(i) > 0 ) { - dat <- dat[-i, ] - } - dat <- join_adjacent_segments(dat) - return(invisible(dat)) -} - -chrom_arm_LST_score <- function(dat) { - score <- 0 - segs <- c() - SIZE_THRESH <- 10e6 - SPACE_THRESH <- 3e6 - if ( nrow(dat) >= 2 ) { - for (x in 2:nrow(dat)) { - if ( (dat[x-1,"size"] >= SIZE_THRESH) && - (dat[x,"size"] >= SIZE_THRESH) && - ( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH) - ) { - score <- score +1 - segs <- rbind(dat[x-1,], segs) - } - } - } - tmp <- list() - tmp$score <- score - tmp$segs <- segs - return(invisible(tmp)) -} - -lst_filter <- function(dat, size_thresh) { - i <- which(dat[,"size"] < size_thresh) - sz <- dat[i,"size"] - i <- i[order(sz)] - segs_removed <- 0 - while (length(i) > 0) { - dat <- dat[-i[1], ] - dat <- join_adjacent_segments(dat) - i<- which(dat[,"size"] < size_thresh) - sz <- dat[i,"size"] - i <- i[order(sz)] - segs_removed <- segs_removed + 1 - } - return(invisible(dat)) -} - -score_myriad_HRD <- function(dat, thresh=15e6) { - chrDel <- NULL - hrdSegs <- NULL - hrd_score <- 0 - chrList <- unique(dat[,"chromosome"]) - for (x in chrList) { - index <- which(dat[,"chromosome"] == x) - totalnB <- sum(dat[index,"nB"], na.rm=TRUE) - if (totalnB == 0) { - chrDel <- c(x, chrDel) - } - } - for (x in 1:nrow(dat)) { - if ( dat[x,"chromosome"] %in% chrDel ) { - next - } - if ( dat[x,"nB"] != 0 ) { - next - } - if (dat[x,"size"] < thresh) { - next - } - hrd_score <- hrd_score + 1 - hrdSegs <- rbind(dat[x,], hrdSegs) - } - tmp <- list() - tmp$score = hrd_score - tmp$segs = hrdSegs - return(invisible(tmp)) -} - - -dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) -dat = fix_facets_column_names(dat) -segs = fix_facet_segs(dat) -chromInfo = GetChrominfo() -mrs = score_myriad_HRD(segs) -cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", mrs$score), file = opt$file_out, append=FALSE) -cat("\n", file = opt$file_out, append=TRUE) - -warnings() diff --git a/copy_number/ntaiscore.R b/copy_number/ntaiscore.R deleted file mode 100644 index bb35c010..00000000 --- a/copy_number/ntaiscore.R +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--file_in", default = NA, type = 'character', help = "input file name"), - make_option("--file_out", default = NA, type = 'character', help = "output file name")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -chromStrToNum <- function(str) { - suppressWarnings(cNum <- as.numeric(str)) - if (is.na(cNum) && str == "X" ) { - cNum <- 23 - } else if (is.na(cNum) && str == "Y") { - cNum <- 24 - } - return(invisible(cNum)) -} - -GetChrominfo <- function() { - f <- "modules/copy_number/hg19_chrominfo.txt" - chrom <- read.table(file=f) - chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) - f <- "modules/copy_number/hg19_gaps.txt" - gaps <- read.table(file=f) - centro <- subset(gaps, gaps[,8] == "centromere") - chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) - chrominfo$centromere <- rowMeans(chrominfo[,3:4]) - chrominfo <- chrominfo[,c(1,2,5,3,4)] - colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") - chrominfo[,1] <- as.character(chrominfo[,1]) - chrominfo$chr <- sub("chr", "", chrominfo$chr) - chrominfo$chr <- sub("X", "23", chrominfo$chr) - chrominfo$chr <- sub("Y", "24", chrominfo$chr) - chrominfo[,1] <- as.numeric(chrominfo[,1]) - chrominfo <- chrominfo[order(chrominfo$chr), ] - rownames(chrominfo) <- as.character(chrominfo[,1]) - chrominfo <- as.matrix(chrominfo) - return(invisible(chrominfo)) -} - -fix_facets_column_names <- function(dat) { - colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" - colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" - colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" - colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" - sz <- dat[,"endBP"] - dat[,"startBP"] - dat <- cbind(dat, size=sz) - nA <- dat[,"tcn.em"] - dat[,"nB"] - dat <- cbind(dat, nA=nA) - return(invisible(dat)) -} - -join_adjacent_segments <- function(dat) { - cur_segs <- dat - something_changed <- 1 - while ( something_changed ) { - new_segs <- c() - something_changed <- 0 - x <- 2 - last_changed <- 0 - while (x <= nrow(cur_segs)) { - last_changed <- 0 - if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && - (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && - (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) - ) { - t <- cur_segs[x-1,] - t["endBP"] <- cur_segs[x,"endBP"] - t["end"] <- cur_segs[x,"end"] - t["size"] <- t["endBP"] - t["startBP"] - something_changed <- 1 - new_segs <- rbind(t, new_segs) - x <- x+2 - last_changed <- 1 - } else { - new_segs <- rbind(cur_segs[x-1,], new_segs) - x<-x+1 - } - } - if (! last_changed ) { - new_segs <- rbind(cur_segs[x-1,],new_segs) - } - n <- nrow(new_segs) - new_segs <- new_segs[n:1,] - cur_segs <- new_segs - } - return(invisible(cur_segs)) -} - -fix_facet_segs <- function(dat) { - i <- which(is.na(dat$nB)) - if ( length(i) > 0 ) { - dat <- dat[-i, ] - } - dat <- join_adjacent_segments(dat) - return(invisible(dat)) -} - -score_ntAI <- function(dat, chromInfo, min_size=1000, shrink=FALSE) { - index <- dat[,"chromosome"] %in% c("MT", "Y", "24") - dat <- dat[!index,] - index <- dat[,"size"] < min_size - dat <- dat[!index,] - if (shrink) { - dat <- join_adjacent_segments(dat) - } - chrList <- unique(dat[,"chromosome"]) - ntAI_score <- 0 - ntAI_segs <- NULL - for (x in chrList) { - index <- dat[,"chromosome"] == x - chr_segs <- dat[index,] - cNum <- chromStrToNum(x) - if (nrow(chr_segs) < 2 ) { - next - } - if ( (chr_segs[1,"nA"] != chr_segs[1,"nB"]) && (chromInfo[cNum,"centstart"] > chr_segs[1,"endBP"]) ) { - ntAI_score <- ntAI_score+1 - ntAI_segs <- rbind(chr_segs[1,],ntAI_segs) - } - eSeg <- nrow(chr_segs) - if ( (chr_segs[eSeg, "nA"] != chr_segs[eSeg, "nB"]) && (chr_segs[eSeg,"startBP"] > chromInfo[cNum,"centend"]) ) { - ntAI_score <- ntAI_score+1 - ntAI_segs <- rbind(chr_segs[eSeg,],ntAI_segs) - } - } - tmp <- list() - tmp$segs <- ntAI_segs - tmp$score <- ntAI_score - return(invisible(tmp)) -} - -dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) -dat = fix_facets_column_names(dat) -segs = fix_facet_segs(dat) -chromInfo = GetChrominfo() -ntai = score_ntAI(segs, chromInfo) -cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", ntai$score), file = opt$file_out, append=FALSE) -cat("\n", file = opt$file_out, append=TRUE) - -warnings() diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 57c1940e..6027ca26 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -1,13 +1,514 @@ #!/usr/bin/env Rscript -file_names = c("genome_altered.tsv", "lst_score.tsv", "myriad_score.tsv", "ntai_score.tsv") -summary_scores = NULL -for (i in 1:length(file_names)) { - data = read.csv(file=paste0("genome_stats/", file_names[i]), header=FALSE, sep="\t", stringsAsFactors=FALSE) - summary_scores = cbind(summary_scores, data[,2]) +suppressPackageStartupMessages(library("optparse")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) } -summary_scores = cbind(data[,1], summary_scores) -colnames(summary_scores) = c("sample_names", gsub(".tsv", "", file_names)) -write.table(summary_scores, file="summary/tsv/genome_summary.tsv", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) -warnings() +args_list <- list(make_option("--option", default = NA, type = 'character', help = "which analysis to do"), + make_option("--file_in", default = NA, type = 'character', help = "input file name"), + make_option("--file_out", default = NA, type = 'character', help = "output file name")) +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + + +if (as.numeric(opt$option) == 1) { + + load(opt$file_in) + alpha = ifelse(is.na(fit$purity), 1, fit$purity) + psi = ifelse(is.na(fit$ploidy), 2, fit$ploidy) + gamma = 1 + x = fit$cncf[,"cnlr.median"] + absolute_copies = round(((((2^(x/gamma))*(alpha*psi+(1-alpha)*2)) - ((1-alpha)*2))/alpha)) + index = absolute_copies!=round(psi) + if (sum(index, na.rm=TRUE)!=0) { + genome_footprint = sum(as.numeric(fit$cncf[,"end"]-fit$cncf[,"start"]), na.rm=TRUE) + genome_altered = sum(as.numeric(fit$cncf[index,"end"]-fit$cncf[index,"start"]), na.rm=TRUE)/genome_footprint + } else { + genome_altered = 0 + } + cat(paste0(gsub("facets/cncf/","", gsub(".Rdata", "", opt$file_in)), "\t", genome_altered), file = opt$file_out, append=FALSE) + cat("\n", file = opt$file_out, append=TRUE) + +} else if (as.numeric(opt$option) == 2) { + + chromStrToNum <- function(str) { + suppressWarnings(cNum <- as.numeric(str)) + if (is.na(cNum) && str == "X" ) { + cNum <- 23 + } else if (is.na(cNum) && str == "Y") { + cNum <- 24 + } + return(invisible(cNum)) + } + + GetChrominfo <- function() { + f <- "modules/copy_number/hg19_chrominfo.txt" + chrom <- read.table(file=f) + chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) + f <- "modules/copy_number/hg19_gaps.txt" + gaps <- read.table(file=f) + centro <- subset(gaps, gaps[,8] == "centromere") + chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) + chrominfo$centromere <- rowMeans(chrominfo[,3:4]) + chrominfo <- chrominfo[,c(1,2,5,3,4)] + colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") + chrominfo[,1] <- as.character(chrominfo[,1]) + chrominfo$chr <- sub("chr", "", chrominfo$chr) + chrominfo$chr <- sub("X", "23", chrominfo$chr) + chrominfo$chr <- sub("Y", "24", chrominfo$chr) + chrominfo[,1] <- as.numeric(chrominfo[,1]) + chrominfo <- chrominfo[order(chrominfo$chr), ] + rownames(chrominfo) <- as.character(chrominfo[,1]) + chrominfo <- as.matrix(chrominfo) + return(invisible(chrominfo)) + } + + fix_facets_column_names <- function(dat) { + colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" + colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" + colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" + colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" + sz <- dat[,"endBP"] - dat[,"startBP"] + dat <- cbind(dat, size=sz) + nA <- dat[,"tcn.em"] - dat[,"nB"] + dat <- cbind(dat, nA=nA) + return(invisible(dat)) + } + + join_adjacent_segments <- function(dat) { + cur_segs <- dat + something_changed <- 1 + while ( something_changed ) { + new_segs <- c() + something_changed <- 0 + x <- 2 + last_changed <- 0 + while (x <= nrow(cur_segs)) { + last_changed <- 0 + if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && + (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && + (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) + ) { + t <- cur_segs[x-1,] + t["endBP"] <- cur_segs[x,"endBP"] + t["end"] <- cur_segs[x,"end"] + t["size"] <- t["endBP"] - t["startBP"] + something_changed <- 1 + new_segs <- rbind(t, new_segs) + x <- x+2 + last_changed <- 1 + } else { + new_segs <- rbind(cur_segs[x-1,], new_segs) + x<-x+1 + } + } + if (! last_changed ) { + new_segs <- rbind(cur_segs[x-1,],new_segs) + } + n <- nrow(new_segs) + new_segs <- new_segs[n:1,] + cur_segs <- new_segs + } + return(invisible(cur_segs)) + } + + fix_facet_segs <- function(dat) { + i <- which(is.na(dat$nB)) + if ( length(i) > 0 ) { + dat <- dat[-i, ] + } + dat <- join_adjacent_segments(dat) + return(invisible(dat)) + } + + chrom_arm_LST_score <- function(dat) { + score <- 0 + segs <- c() + SIZE_THRESH <- 10e6 + SPACE_THRESH <- 3e6 + if ( nrow(dat) >= 2 ) { + for (x in 2:nrow(dat)) { + if ( (dat[x-1,"size"] >= SIZE_THRESH) && + (dat[x,"size"] >= SIZE_THRESH) && + ( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH) + ) { + score <- score +1 + segs <- rbind(dat[x-1,], segs) + } + } + } + tmp <- list() + tmp$score <- score + tmp$segs <- segs + return(invisible(tmp)) + } + + lst_filter <- function(dat, size_thresh) { + i <- which(dat[,"size"] < size_thresh) + sz <- dat[i,"size"] + i <- i[order(sz)] + segs_removed <- 0 + while (length(i) > 0) { + dat <- dat[-i[1], ] + dat <- join_adjacent_segments(dat) + i<- which(dat[,"size"] < size_thresh) + sz <- dat[i,"size"] + i <- i[order(sz)] + segs_removed <- segs_removed + 1 + } + return(invisible(dat)) + } + + score_LST <- function(dat, chromInfo) { + score <- 0 + segs <- c() + dat <- lst_filter(dat, 3e6) + for (c in unique(dat[,"chromosome"]) ) { + i <- which(dat[,"chromosome"] == c) + csegs <- dat[i,] + cNum <- chromStrToNum(c) + i <- which(csegs[,"startBP"] <= chromInfo[cNum,"centstart"]) + parm <- csegs[i,] + tmp <- chrom_arm_LST_score(parm) + score <- score + tmp$score + segs <- rbind(tmp$segs, segs) + i <- which(csegs[,"endBP"] >= chromInfo[cNum,"centend"]) + qarm <- csegs[i,] + tmp <- chrom_arm_LST_score(qarm) + score <- score + tmp$score + segs <- rbind(tmp$segs, segs) + } + tmp <- list() + tmp$score <- score + tmp$segs <- segs + return(invisible(tmp)) + } + + dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) + dat = fix_facets_column_names(dat) + segs = fix_facet_segs(dat) + chromInfo = GetChrominfo() + lst = score_LST(segs, chromInfo) + cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", lst$score), file = opt$file_out, append=FALSE) + cat("\n", file = opt$file_out, append=TRUE) + +} else if (as.numeric(opt$option) == 3) { + + chromStrToNum <- function(str) { + suppressWarnings(cNum <- as.numeric(str)) + if (is.na(cNum) && str == "X" ) { + cNum <- 23 + } else if (is.na(cNum) && str == "Y") { + cNum <- 24 + } + return(invisible(cNum)) + } + + GetChrominfo <- function() { + f <- "modules/copy_number/hg19_chrominfo.txt" + chrom <- read.table(file=f) + chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) + f <- "modules/copy_number/hg19_gaps.txt" + gaps <- read.table(file=f) + centro <- subset(gaps, gaps[,8] == "centromere") + chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) + chrominfo$centromere <- rowMeans(chrominfo[,3:4]) + chrominfo <- chrominfo[,c(1,2,5,3,4)] + colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") + chrominfo[,1] <- as.character(chrominfo[,1]) + chrominfo$chr <- sub("chr", "", chrominfo$chr) + chrominfo$chr <- sub("X", "23", chrominfo$chr) + chrominfo$chr <- sub("Y", "24", chrominfo$chr) + chrominfo[,1] <- as.numeric(chrominfo[,1]) + chrominfo <- chrominfo[order(chrominfo$chr), ] + rownames(chrominfo) <- as.character(chrominfo[,1]) + chrominfo <- as.matrix(chrominfo) + return(invisible(chrominfo)) + } + + fix_facets_column_names <- function(dat) { + colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" + colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" + colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" + colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" + sz <- dat[,"endBP"] - dat[,"startBP"] + dat <- cbind(dat, size=sz) + nA <- dat[,"tcn.em"] - dat[,"nB"] + dat <- cbind(dat, nA=nA) + return(invisible(dat)) + } + + join_adjacent_segments <- function(dat) { + cur_segs <- dat + something_changed <- 1 + while ( something_changed ) { + new_segs <- c() + something_changed <- 0 + x <- 2 + last_changed <- 0 + while (x <= nrow(cur_segs)) { + last_changed <- 0 + if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && + (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && + (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) + ) { + t <- cur_segs[x-1,] + t["endBP"] <- cur_segs[x,"endBP"] + t["end"] <- cur_segs[x,"end"] + t["size"] <- t["endBP"] - t["startBP"] + something_changed <- 1 + new_segs <- rbind(t, new_segs) + x <- x+2 + last_changed <- 1 + } else { + new_segs <- rbind(cur_segs[x-1,], new_segs) + x<-x+1 + } + } + if (! last_changed ) { + new_segs <- rbind(cur_segs[x-1,],new_segs) + } + n <- nrow(new_segs) + new_segs <- new_segs[n:1,] + cur_segs <- new_segs + } + return(invisible(cur_segs)) + } + + fix_facet_segs <- function(dat) { + i <- which(is.na(dat$nB)) + if ( length(i) > 0 ) { + dat <- dat[-i, ] + } + dat <- join_adjacent_segments(dat) + return(invisible(dat)) + } + + score_ntAI <- function(dat, chromInfo, min_size=1000, shrink=FALSE) { + index <- dat[,"chromosome"] %in% c("MT", "Y", "24") + dat <- dat[!index,] + index <- dat[,"size"] < min_size + dat <- dat[!index,] + if (shrink) { + dat <- join_adjacent_segments(dat) + } + chrList <- unique(dat[,"chromosome"]) + ntAI_score <- 0 + ntAI_segs <- NULL + for (x in chrList) { + index <- dat[,"chromosome"] == x + chr_segs <- dat[index,] + cNum <- chromStrToNum(x) + if (nrow(chr_segs) < 2 ) { + next + } + if ( (chr_segs[1,"nA"] != chr_segs[1,"nB"]) && (chromInfo[cNum,"centstart"] > chr_segs[1,"endBP"]) ) { + ntAI_score <- ntAI_score+1 + ntAI_segs <- rbind(chr_segs[1,],ntAI_segs) + } + eSeg <- nrow(chr_segs) + if ( (chr_segs[eSeg, "nA"] != chr_segs[eSeg, "nB"]) && (chr_segs[eSeg,"startBP"] > chromInfo[cNum,"centend"]) ) { + ntAI_score <- ntAI_score+1 + ntAI_segs <- rbind(chr_segs[eSeg,],ntAI_segs) + } + } + tmp <- list() + tmp$segs <- ntAI_segs + tmp$score <- ntAI_score + return(invisible(tmp)) + } + + dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) + dat = fix_facets_column_names(dat) + segs = fix_facet_segs(dat) + chromInfo = GetChrominfo() + ntai = score_ntAI(segs, chromInfo) + cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", ntai$score), file = opt$file_out, append=FALSE) + cat("\n", file = opt$file_out, append=TRUE) + +} else if (as.numeric(opt$option) == 4) { + + chromStrToNum <- function(str) { + suppressWarnings(cNum <- as.numeric(str)) + if (is.na(cNum) && str == "X" ) { + cNum <- 23 + } else if (is.na(cNum) && str == "Y") { + cNum <- 24 + } + return(invisible(cNum)) + } + + GetChrominfo <- function() { + f <- "modules/copy_number/hg19_chrominfo.txt" + chrom <- read.table(file=f) + chrom <- subset(chrom, grepl("^chr[0-9XY]{1,2}$", chrom[,1])) + f <- "modules/copy_number/hg19_gaps.txt" + gaps <- read.table(file=f) + centro <- subset(gaps, gaps[,8] == "centromere") + chrominfo <- merge(chrom[,1:2], centro[,2:4], by.x = 1, by.y = 1) + chrominfo$centromere <- rowMeans(chrominfo[,3:4]) + chrominfo <- chrominfo[,c(1,2,5,3,4)] + colnames(chrominfo) <- c("chr", "size", "centromere", "centstart", "centend") + chrominfo[,1] <- as.character(chrominfo[,1]) + chrominfo$chr <- sub("chr", "", chrominfo$chr) + chrominfo$chr <- sub("X", "23", chrominfo$chr) + chrominfo$chr <- sub("Y", "24", chrominfo$chr) + chrominfo[,1] <- as.numeric(chrominfo[,1]) + chrominfo <- chrominfo[order(chrominfo$chr), ] + rownames(chrominfo) <- as.character(chrominfo[,1]) + chrominfo <- as.matrix(chrominfo) + return(invisible(chrominfo)) + } + + fix_facets_column_names <- function(dat) { + colnames(dat)[which(colnames(dat)=="chrom")] <- "chromosome" + colnames(dat)[which(colnames(dat)=="loc.start")] <- "startBP" + colnames(dat)[which(colnames(dat)=="loc.end")] <- "endBP" + colnames(dat)[which(colnames(dat)=="lcn.em")] <- "nB" + sz <- dat[,"endBP"] - dat[,"startBP"] + dat <- cbind(dat, size=sz) + nA <- dat[,"tcn.em"] - dat[,"nB"] + dat <- cbind(dat, nA=nA) + return(invisible(dat)) + } + + join_adjacent_segments <- function(dat) { + cur_segs <- dat + something_changed <- 1 + while ( something_changed ) { + new_segs <- c() + something_changed <- 0 + x <- 2 + last_changed <- 0 + while (x <= nrow(cur_segs)) { + last_changed <- 0 + if ( (cur_segs[x-1,"nB"] == cur_segs[x,"nB"]) && + (cur_segs[x-1,"nA"] == cur_segs[x,"nA"]) && + (cur_segs[x-1,"chromosome"] == cur_segs[x,"chromosome"]) + ) { + t <- cur_segs[x-1,] + t["endBP"] <- cur_segs[x,"endBP"] + t["end"] <- cur_segs[x,"end"] + t["size"] <- t["endBP"] - t["startBP"] + something_changed <- 1 + new_segs <- rbind(t, new_segs) + x <- x+2 + last_changed <- 1 + } else { + new_segs <- rbind(cur_segs[x-1,], new_segs) + x<-x+1 + } + } + if (! last_changed ) { + new_segs <- rbind(cur_segs[x-1,],new_segs) + } + n <- nrow(new_segs) + new_segs <- new_segs[n:1,] + cur_segs <- new_segs + } + return(invisible(cur_segs)) + } + + fix_facet_segs <- function(dat) { + i <- which(is.na(dat$nB)) + if ( length(i) > 0 ) { + dat <- dat[-i, ] + } + dat <- join_adjacent_segments(dat) + return(invisible(dat)) + } + + chrom_arm_LST_score <- function(dat) { + score <- 0 + segs <- c() + SIZE_THRESH <- 10e6 + SPACE_THRESH <- 3e6 + if ( nrow(dat) >= 2 ) { + for (x in 2:nrow(dat)) { + if ( (dat[x-1,"size"] >= SIZE_THRESH) && + (dat[x,"size"] >= SIZE_THRESH) && + ( (dat[x,"startBP"] - dat[x-1,"endBP"]) <= SPACE_THRESH) + ) { + score <- score +1 + segs <- rbind(dat[x-1,], segs) + } + } + } + tmp <- list() + tmp$score <- score + tmp$segs <- segs + return(invisible(tmp)) + } + + lst_filter <- function(dat, size_thresh) { + i <- which(dat[,"size"] < size_thresh) + sz <- dat[i,"size"] + i <- i[order(sz)] + segs_removed <- 0 + while (length(i) > 0) { + dat <- dat[-i[1], ] + dat <- join_adjacent_segments(dat) + i<- which(dat[,"size"] < size_thresh) + sz <- dat[i,"size"] + i <- i[order(sz)] + segs_removed <- segs_removed + 1 + } + return(invisible(dat)) + } + + score_myriad_HRD <- function(dat, thresh=15e6) { + chrDel <- NULL + hrdSegs <- NULL + hrd_score <- 0 + chrList <- unique(dat[,"chromosome"]) + for (x in chrList) { + index <- which(dat[,"chromosome"] == x) + totalnB <- sum(dat[index,"nB"], na.rm=TRUE) + if (totalnB == 0) { + chrDel <- c(x, chrDel) + } + } + for (x in 1:nrow(dat)) { + if ( dat[x,"chromosome"] %in% chrDel ) { + next + } + if ( dat[x,"nB"] != 0 ) { + next + } + if (dat[x,"size"] < thresh) { + next + } + hrd_score <- hrd_score + 1 + hrdSegs <- rbind(dat[x,], hrdSegs) + } + tmp <- list() + tmp$score = hrd_score + tmp$segs = hrdSegs + return(invisible(tmp)) + } + + + dat = read.table(opt$file_in, sep="\t", header=TRUE, stringsAsFactor=FALSE) + dat = fix_facets_column_names(dat) + segs = fix_facet_segs(dat) + chromInfo = GetChrominfo() + mrs = score_myriad_HRD(segs) + cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", mrs$score), file = opt$file_out, append=FALSE) + cat("\n", file = opt$file_out, append=TRUE) + +} else if (as.numeric(opt$option)==5) { + + file_names = c("genome_altered.tsv", "lst_score.tsv", "myriad_score.tsv", "ntai_score.tsv") + summary_scores = NULL + for (i in 1:length(file_names)) { + data = read.csv(file=paste0("genome_stats/", file_names[i]), header=FALSE, sep="\t", stringsAsFactors=FALSE) + summary_scores = cbind(summary_scores, data[,2]) + } + summary_scores = cbind(data[,1], summary_scores) + colnames(summary_scores) = c("sample_names", gsub(".tsv", "", file_names)) + write.table(summary_scores, file="summary/tsv/genome_summary.tsv", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) + +} diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index b40b614e..186f2059 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -21,28 +21,28 @@ MYRIAD_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) define fraction-genome-altered genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/genomealtered.R --file_in $$(<) --file_out $$(@)") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 1 --file_in $$(<) --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) define lst-score genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/lstscore.R --file_in $$< --file_out $$(@)") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 2 --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) define ntai-score genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ntaiscore.R --file_in $$< --file_out $$(@)") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 3 --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) define myriad-score genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/myriadhrdscore.R --file_in $$< --file_out $$(@)") + $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 4 --file_in $$< --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) @@ -70,7 +70,7 @@ genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ mkdir -p genome_stats && \ - $(RSCRIPT) modules/summary/genomesummary.R") + $(RSCRIPT) modules/summary/genomesummary.R --option 5") summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") From 9f1df6f615521b273eb7c440ecac06a38186ef9f Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Jul 2021 12:03:00 -0400 Subject: [PATCH 115/766] -mk --- Makefile | 4 - summary/sufamsummary.R | 27 ---- variant_callers/combinesamples.R | 108 ---------------- variant_callers/combinesamplesf.R | 120 ------------------ variant_callers/sufammultisample.mk | 42 ------ variant_callers/updatesamples.R | 190 ---------------------------- 6 files changed, 491 deletions(-) delete mode 100644 summary/sufamsummary.R delete mode 100644 variant_callers/combinesamples.R delete mode 100644 variant_callers/combinesamplesf.R delete mode 100644 variant_callers/sufammultisample.mk delete mode 100644 variant_callers/updatesamples.R diff --git a/Makefile b/Makefile index 13bc3c33..150b0355 100644 --- a/Makefile +++ b/Makefile @@ -189,10 +189,6 @@ TARGETS += sufam sufam: $(call RUN_MAKE,modules/variant_callers/sufamsampleset.mk) -TARGETS += sufam_summary -sufam_summary: - $(call RUN_MAKE,modules/variant_callers/sufammultisample.mk) - TARGETS += get_basecount get_basecount: $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) diff --git a/summary/sufamsummary.R b/summary/sufamsummary.R deleted file mode 100644 index ef96e778..00000000 --- a/summary/sufamsummary.R +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("openxlsx")) -suppressPackageStartupMessages(library("readr")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample_sets", default = NA, type = 'character', help = "sample sets file names")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -sample_names = na.omit(unlist(strsplit(x=opt$sample_sets, split=" ", fixed=TRUE))) -list_of_dfs = list() -for (i in 1:length(sample_names)) { - sample_vars = read_tsv(file=paste0("sufam/", sample_names[i], ".tsv")) - col_names = colnames(sample_vars) - sample_vars = as.data.frame(sample_vars) - sample_vars[sample_vars=="" | sample_vars==" " | is.na(sample_vars)] = "NA" - colnames(sample_vars) = col_names - list_of_dfs[[i]] = sample_vars -} -names(list_of_dfs) = sample_names -write.xlsx(list_of_dfs, file="summary/sufam_summary.xlsx") diff --git a/variant_callers/combinesamples.R b/variant_callers/combinesamples.R deleted file mode 100644 index 5c54d37a..00000000 --- a/variant_callers/combinesamples.R +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) - -all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -tmp_vars = all_vars[all_vars$TUMOR_SAMPLE %in% sample_names,,drop=FALSE] -keys = paste0(tmp_vars$CHROM, ":", tmp_vars$POS, ":", tmp_vars$REF, ":", tmp_vars$ALT) -ukeys = unique(keys) -vars = NULL -for (i in 1:length(ukeys)) { - index = which(keys==ukeys[i]) - Chromosome = tmp_vars[index[1],"CHROM"] - Position = tmp_vars[index[1],"POS"] - Ref = tmp_vars[index[1],"REF"] - Alt = tmp_vars[index[1],"ALT"] - Variant_Caller = tmp_vars[index[1],"variantCaller"] - Gene_Symbol = tmp_vars[index[1],"SYMBOL"] - Variant_Classification = tmp_vars[index[1],"Variant_Classification"] - HGVSp_Short = tmp_vars[index[1],"HGVSp_Short"] - Fuentes = tmp_vars[index[1],"fuentes"] - dgd = tmp_vars[index[1],"dgd"] - OncoKB_Level = tmp_vars[index[1],"oncoKB_level"] - OncoKB_Cancer_Type = tmp_vars[index[1],"oncoKB_cancer_type"] - Cancer_Gene_Census = tmp_vars[index[1],"cancer_gene_census"] - Kandoth = tmp_vars[index[1],"kandoth"] - Lawrence = tmp_vars[index[1],"lawrence"] - Hap_Insuf = tmp_vars[index[1],"hap_insuf"] - ExAC_AF = tmp_vars[index[1],"ExAC_AF"] - MutationTaster = tmp_vars[index[1],"MutationTaster_pred"] - PROVEAN = tmp_vars[index[1],"PROVEAN_pred"] - FATHMM = tmp_vars[index[1],"FATHMM_pred"] - BRCA_Chasm = tmp_vars[index[1],"BRCA_chasm_pred"] - Parssnp = tmp_vars[index[1],"parssnp_pred"] - Pathogenicity = tmp_vars[index[1],"pathogenicity"] - HOTSPOT = tmp_vars[index[1],"HOTSPOT"] - HOTSPOT_INTERNAL = tmp_vars[index[1],"HOTSPOT_INTERNAL"] - CMO_HOTSPOT = tmp_vars[index[1],"cmo_hotspot"] - vars = rbind(vars, c("Chromosome"=Chromosome, - "Position"=Position, - "Ref"=Ref, - "Alt"=Alt, - "Variant_Caller"=Variant_Caller, - "Gene_Symbol"=Gene_Symbol, - "Variant_Classification"=Variant_Classification, - "HGVSp"=HGVSp_Short, - "Fuentes"=Fuentes, - "dgd"=dgd, - "OncoKB_Level"=OncoKB_Level, - "OncoKB_Cancer_Type"=OncoKB_Cancer_Type, - "Cancer_Gene_Census"=Cancer_Gene_Census, - "Kandoth"=Kandoth, - "Lawrence"=Lawrence, - "Hap_Insuf"=Hap_Insuf, - "ExAC"=ExAC_AF, - "MutationTaster"=MutationTaster, - "PROVEAN"=PROVEAN, - "FATHMM"=FATHMM, - "BRCA_Chasm"=BRCA_Chasm, - "Parssnp"=Parssnp, - "Pathogenicity"=Pathogenicity, - "HOTSPOT"=HOTSPOT, - "HOTSPOT_INTERNAL"=HOTSPOT_INTERNAL, - "HOTSPOT_CMO"=CMO_HOTSPOT)) -} - -normal_name = tmp_vars[1,"NORMAL_SAMPLE"] - -VAF = DEPTH = LOH = CALLS = matrix(NA, nrow=length(ukeys), ncol=length(sample_names), dimnames=list(ukeys, sample_names)) -for (j in 1:nrow(tmp_vars)) { - sample_name = tmp_vars[j,"TUMOR_SAMPLE"] - ukey = paste0(tmp_vars$CHROM[j], ":", tmp_vars$POS[j], ":", tmp_vars$REF[j], ":", tmp_vars$ALT[j]) - VAF[ukey,sample_name] = tmp_vars[j,"TUMOR_MAF"] - VAF[ukey,normal_name] = tmp_vars[j,"NORMAL_MAF"] - DEPTH[ukey,sample_name] = tmp_vars[j,"TUMOR_DP"] - DEPTH[ukey,normal_name] = tmp_vars[j,"NORMAL_DP"] - LOH[ukey,sample_name] = tmp_vars[j,"facetsLOHCall"] - CALLS[ukey,sample_name] = 1 -} -colnames(VAF) = paste0("MAF_", colnames(VAF)) -colnames(DEPTH) = paste0("DP_", colnames(DEPTH)) -colnames(LOH) = paste0("LOH_", colnames(LOH)) -colnames(CALLS) = paste0("CALL_", colnames(CALLS)) -CALLS[is.na(CALLS)] = 0 -vars = cbind(vars, VAF, DEPTH, LOH, CALLS) -mutect = grepl("mutect", vars[,"Variant_Caller"]) -main_indels = grepl("varscan", vars[,"Variant_Caller"]) & grepl("strelka", vars[,"Variant_Caller"]) -other_indels = ((grepl("platypus", vars[,"Variant_Caller"]) & grepl("scalpel", vars[,"Variant_Caller"])) | - (grepl("platypus", vars[,"Variant_Caller"]) & grepl("lancet", vars[,"Variant_Caller"]))) & - (nchar(vars[,"Ref"])>3 | nchar(vars[,"Alt"])>3) & - !grepl("In_Frame", vars[,"Variant_Classification"]) -index = mutect | main_indels | other_indels -vars = vars[index,,drop=FALSE] -index = vars[,"Variant_Classification"] %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", "Splice_Site") -vars = vars[index,,drop=FALSE] - -write.table(vars, file=paste0("sufam/", opt$sample_set, ".txt"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) diff --git a/variant_callers/combinesamplesf.R b/variant_callers/combinesamplesf.R deleted file mode 100644 index 0e90be09..00000000 --- a/variant_callers/combinesamplesf.R +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) - -all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -tmp_vars = all_vars[all_vars$TUMOR_SAMPLE %in% sample_names,,drop=FALSE] -keys = paste0(tmp_vars$CHROM, ":", tmp_vars$POS, ":", tmp_vars$REF, ":", tmp_vars$ALT) -ukeys = unique(keys) -vars = NULL -for (i in 1:length(ukeys)) { - index = which(keys==ukeys[i]) - Chromosome = tmp_vars[index[1],"CHROM"] - Position = tmp_vars[index[1],"POS"] - Ref = tmp_vars[index[1],"REF"] - Alt = tmp_vars[index[1],"ALT"] - Variant_Caller = tmp_vars[index[1],"variantCaller"] - Gene_Symbol = tmp_vars[index[1],"SYMBOL"] - Variant_Classification = tmp_vars[index[1],"Variant_Classification"] - HGVSp_Short = tmp_vars[index[1],"HGVSp_Short"] - Fuentes = tmp_vars[index[1],"fuentes"] - dgd = tmp_vars[index[1],"dgd"] - OncoKB_Level = tmp_vars[index[1],"oncoKB_level"] - OncoKB_Cancer_Type = tmp_vars[index[1],"oncoKB_cancer_type"] - Cancer_Gene_Census = tmp_vars[index[1],"cancer_gene_census"] - Kandoth = tmp_vars[index[1],"kandoth"] - Lawrence = tmp_vars[index[1],"lawrence"] - Hap_Insuf = tmp_vars[index[1],"hap_insuf"] - ExAC_AF = tmp_vars[index[1],"ExAC_AF"] - MutationTaster = tmp_vars[index[1],"MutationTaster_pred"] - PROVEAN = tmp_vars[index[1],"PROVEAN_pred"] - FATHMM = tmp_vars[index[1],"FATHMM_pred"] - BRCA_Chasm = tmp_vars[index[1],"BRCA_chasm_pred"] - Parssnp = tmp_vars[index[1],"parssnp_pred"] - Pathogenicity = tmp_vars[index[1],"pathogenicity"] - HOTSPOT = tmp_vars[index[1],"HOTSPOT"] - HOTSPOT_INTERNAL = tmp_vars[index[1],"HOTSPOT_INTERNAL"] - CMO_HOTSPOT = tmp_vars[index[1],"cmo_hotspot"] - vars = rbind(vars, c("Chromosome"=Chromosome, - "Position"=Position, - "Ref"=Ref, - "Alt"=Alt, - "Variant_Caller"=Variant_Caller, - "Gene_Symbol"=Gene_Symbol, - "Variant_Classification"=Variant_Classification, - "HGVSp"=HGVSp_Short, - "Fuentes"=Fuentes, - "dgd"=dgd, - "OncoKB_Level"=OncoKB_Level, - "OncoKB_Cancer_Type"=OncoKB_Cancer_Type, - "Cancer_Gene_Census"=Cancer_Gene_Census, - "Kandoth"=Kandoth, - "Lawrence"=Lawrence, - "Hap_Insuf"=Hap_Insuf, - "ExAC"=ExAC_AF, - "MutationTaster"=MutationTaster, - "PROVEAN"=PROVEAN, - "FATHMM"=FATHMM, - "BRCA_Chasm"=BRCA_Chasm, - "Parssnp"=Parssnp, - "Pathogenicity"=Pathogenicity, - "HOTSPOT"=HOTSPOT, - "HOTSPOT_INTERNAL"=HOTSPOT_INTERNAL, - "HOTSPOT_CMO"=CMO_HOTSPOT)) -} - -normal_name = tmp_vars[1,"NORMAL_SAMPLE"] - -VAF = DEPTH = LOH = CALLS = matrix(NA, nrow=length(ukeys), ncol=length(sample_names), dimnames=list(ukeys, sample_names)) -for (j in 1:nrow(tmp_vars)) { - sample_name = tmp_vars[j,"TUMOR_SAMPLE"] - ukey = paste0(tmp_vars$CHROM[j], ":", tmp_vars$POS[j], ":", tmp_vars$REF[j], ":", tmp_vars$ALT[j]) - VAF[ukey,sample_name] = tmp_vars[j,"TUMOR_MAF"] - VAF[ukey,normal_name] = tmp_vars[j,"NORMAL_MAF"] - DEPTH[ukey,sample_name] = tmp_vars[j,"TUMOR_DP"] - DEPTH[ukey,normal_name] = tmp_vars[j,"NORMAL_DP"] - LOH[ukey,sample_name] = tmp_vars[j,"facetsLOHCall"] - CALLS[ukey,sample_name] = 1 -} -colnames(VAF) = paste0("MAF_", colnames(VAF)) -colnames(DEPTH) = paste0("DP_", colnames(DEPTH)) -colnames(LOH) = paste0("LOH_", colnames(LOH)) -colnames(CALLS) = paste0("CALL_", colnames(CALLS)) -CALLS[is.na(CALLS)] = 0 -vars = cbind(vars, VAF, DEPTH, LOH, CALLS) -mutect = grepl("mutect", vars[,"Variant_Caller"]) -main_indels = grepl("varscan", vars[,"Variant_Caller"]) & grepl("strelka", vars[,"Variant_Caller"]) -other_indels = ((grepl("platypus", vars[,"Variant_Caller"]) & grepl("scalpel", vars[,"Variant_Caller"])) | - (grepl("platypus", vars[,"Variant_Caller"]) & grepl("lancet", vars[,"Variant_Caller"]))) & - (nchar(vars[,"Ref"])>3 | nchar(vars[,"Alt"])>3) & - !grepl("In_Frame", vars[,"Variant_Classification"]) -index = mutect | main_indels | other_indels -vars = vars[index,,drop=FALSE] -index = vars[,"Variant_Classification"] %in% c("Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", "Splice_Site") -vars = vars[index,,drop=FALSE] - -blacklist = read.csv(file="summary/tsv/mouse_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -indx = grep("AD", colnames(blacklist)) -index = matrix(0, nrow=nrow(blacklist), ncol=length(indx)) -for (i in 1:length(indx)) { - index[blacklist[,indx[i]]!=0,i] = 1 -} -index = apply(index, 1, sum)>0 -all_id = paste0(vars[,"Chromosome"], ":", vars[,"Position"], "_", vars[,"Ref"], ">", vars[,"Alt"]) -blacklist_id = paste0(blacklist[index,"Chromosome"], ":", blacklist[index,"Position"], "_", blacklist[,"Reference_Allele"], ">", blacklist[,"Alternate_Allele"]) -keep_id = which(!(all_id %in% blacklist_id)) -vars = vars[keep_id,,drop=FALSE] - -write.table(vars, file=paste0("sufam/", opt$sample_set, ".txt"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) diff --git a/variant_callers/sufammultisample.mk b/variant_callers/sufammultisample.mk deleted file mode 100644 index cb0157c8..00000000 --- a/variant_callers/sufammultisample.mk +++ /dev/null @@ -1,42 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/sufam_multisample.$(NOW) -PHONY += sufam summary - -sufam_multisample : $(foreach set,$(SAMPLE_SETS),sufam/$(set).tsv) summary/sufam_summary.xlsx - -ifeq ($(PDX),true) - -define combine-samples-pdx -sufam/%.txt : summary/tsv/mutation_summary.tsv - $$(call RUN,-c -s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/combinesamplesf.R --sample_set $$*") - -sufam/%.tsv : sufam/%.txt - $$(call RUN,-c -s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/updatesamples.R --sample_set $$*") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call combine-samples-pdx,$(set)))) - -else - -define combine-samples -sufam/%.txt : summary/tsv/mutation_summary.tsv - $$(call RUN,-s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/combinesamples.R --sample_set $$*") - -sufam/%.tsv : sufam/%.txt - $$(call RUN,-s 4G -m 6G,"$(RSCRIPT) modules/variant_callers/updatesamples.R --sample_set $$*") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call combine-samples,$(set)))) - -endif - -summary/sufam_summary.xlsx : $(wildcard $(foreach set,$(SAMPLE_SETS),sufam/$(set).tsv)) - $(call RUN,-s 12G -m 16G,"export R_LIBS='~/share/usr/anaconda-envs/jrflab-modules-0.1.4/lib/R/library:~/share/usr/lib64/R/library' && \ - $(RSCRIPT) modules/summary/sufamsummary.R --sample_sets '$(SAMPLE_SETS)'") - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) \ No newline at end of file diff --git a/variant_callers/updatesamples.R b/variant_callers/updatesamples.R deleted file mode 100644 index d9be2876..00000000 --- a/variant_callers/updatesamples.R +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("CNtu")) -suppressPackageStartupMessages(library("readr")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--sample_set", default = NA, type = 'character', help = "sample names set")) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -sample_names = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) - -vars = read_tsv(file=paste0("sufam/", opt$sample_set, ".txt")) -col_names = colnames(vars) -vars = as.data.frame(vars) -colnames(vars) = col_names - -#==================================== -# sufam -#==================================== -chr = vars$Chromosome -pos = vars$Position -id = rep(".", nrow(vars)) -ref = vars$Ref -alt = vars$Alt -qual = rep(100, nrow(vars)) -filter = rep("PASS", nrow(vars)) -info = rep(".", nrow(vars)) -vcf = cbind(chr, pos, id, ref, alt, qual, filter, info) -colnames(vcf) = c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO") -write.table(vcf, file=paste0("sufam/", opt$sample_set, ".vcf"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) - -#==================================== -# dp and maf -#==================================== -for (i in 1:length(sample_names)) { - if (!file.exists(paste0("sufam/", sample_names[i], ".mat"))) { - system(paste0("source ~/share/usr/anaconda/bin/activate ~/share/usr/anaconda-envs/sufam-dev && sufam ~/share/reference/GATK_bundle/2.3/human_g1k_v37.fa sufam/", opt$sample_set, ".vcf bam/", sample_names[i], ".bam > sufam/", sample_names[i], ".mat")) - } - tmp = read.csv(file=paste0("sufam/", sample_names[i], ".mat"), header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = paste0("DP_", sample_names[i]) - vars[,index] = tmp[,"cov"] - index = paste0("MAF_", sample_names[i]) - vars[,index] = tmp[,"val_maf"] -} - -#==================================== -# qt and q2 -#==================================== -q_t = q_2 = NULL -for (i in 1:length(sample_names)) { - file_names = dir(path="ascat/ascat", pattern=".RData", full.names=TRUE) - index = grep(sample_names[i], file_names, fixed=TRUE) - if (length(index)==1) { - load(file_names[index]) - Chromosomes = tmp2$SNPpos[tmp3$seg[,"start"],1] - Chromosomes[Chromosomes==23] = "X" - Start = tmp2$SNPpos[tmp3$seg[,"start"],2] - End = tmp2$SNPpos[tmp3$seg[,"end"],2] - qt = tmp3$seg[,"nA"] + tmp3$seg[,"nB"] - q2 = apply(tmp3$seg[,c("nA","nB")], 1, max) - index = rep(NA, nrow(vars)) - for (j in 1:nrow(vars)) { - indx = which(Chromosomes==vars[j,"Chromosome"] & Start<=vars[j,"Position"] & End>=vars[j,"Position"]) - if (length(indx)!=0) { - index[j] = indx - } else { - index[j] = NA - } - } - q_t = cbind(q_t, qt[index]) - q_2 = cbind(q_2, q2[index]) - } else { - q_t = cbind(q_t, rep(2, nrow(vars))) - q_2 = cbind(q_2, rep(1, nrow(vars))) - } -} -q_t[is.na(q_t)] = 2 -q_2[is.na(q_2)] = 1 -colnames(q_t) = colnames(q_2) = sample_names -colnames(q_t) = paste0("qt_", colnames(q_t)) -colnames(q_2) = paste0("q2_", colnames(q_2)) -vars = cbind(vars, q_t, q_2) - -#==================================== -# loh -#==================================== -for (i in 1:length(sample_names)) { - loh = rep(0, nrow(vars)) - for (j in 1:nrow(vars)) { - if (q_t[j,i]==q_2[j,i]) { - loh[j] = 1 - } - } - vars[,paste0("LOH_", sample_names[i])] = loh -} - -#==================================== -# ccf -#==================================== -cancer_cell_fraction = NULL -ccf_95CI_low = NULL -ccf_95CI_high = NULL -pr_somatic_clonal = NULL -ll = NULL -sq = NULL -clonal_status = NULL -for (i in 1:length(sample_names)) { - file_names = dir(path="ascat/ascat", pattern=".RData", full.names=TRUE) - index = grep(sample_names[i], file_names, fixed=TRUE) - if (length(index)==1) { - load(file_names[index]) - f_hat = vars[,paste0("MAF_", sample_names[i])] - n = vars[,paste0("DP_", sample_names[i])] - qt = vars[,paste0("qt_", sample_names[i])] - qt[qt>10] = 10 - q2 = vars[,paste0("q2_", sample_names[i])] - q2[q2>10] = 10 - alpha = seq(.1, to=.9, length=50) - alpha_hat = list() - indx = f_hat>.1 - if (sum(indx)>5) { - for (j in 1:length(alpha)) { - alpha_hat[[j]] = cancercellFraction(f_hat[indx], n[indx], qt[indx], q2[indx], alpha[j], e=0.01) - } - LL = unlist(lapply(alpha_hat, function(x) {sum(x[,"LL"])})) - pdf(file=paste0("sufam/", sample_names[i], ".pdf")) - plot(alpha, LL, type="o", col="steelblue", axes=FALSE, frame.plot=FALSE, xlab="", ylab="") - axis(1, at = NULL, cex.axis = 1.5, padj = 0.25) - axis(2, at = NULL, cex.axis = 1.5, las = 1) - mtext(side = 1, text = expression(alpha), line = 4, cex = 1.5) - mtext(side = 2, text = expression(Sigma~"LL"), line = 4, cex = 1.5) - index = which.max(LL) - title(main = paste0("alpha* = ", signif(alpha[index], 3)), cex.main = 1.5) - box(lwd = 2) - dev.off() - index = which.max(LL) - alpha_hat = cancercellFraction(f_hat, n, qt, q2, ifelse((alpha[index]-.25)<=0, alpha[index], alpha[index]-.25), e=0.01) - cancer_cell_fraction = cbind(cancer_cell_fraction, alpha_hat[,"cancer_cell_frac"]) - ccf_95CI_low = cbind(ccf_95CI_low, alpha_hat[,"ccf_95CI_low"]) - ccf_95CI_high = cbind(ccf_95CI_high, alpha_hat[,"ccf_95CI_high"]) - pr_somatic_clonal = cbind(pr_somatic_clonal, alpha_hat[,"Pr_somatic_clonal"]) - ll = cbind(ll, alpha_hat[,"LL"]) - sq = cbind(sq, alpha_hat[,"sq"]) - clonal_estimate = rep("Subclonal", nrow(vars)) - clonal_estimate[cancer_cell_fraction[,i]>.75 | pr_somatic_clonal[,i]>.5 | ccf_95CI_low[,i]>.9] = "Clonal" - clonal_status = cbind(clonal_status, clonal_estimate) - } else { - cancer_cell_fraction = cbind(cancer_cell_fraction, rep(NA, nrow(vars))) - ccf_95CI_low = cbind(ccf_95CI_low, rep(NA, nrow(vars))) - ccf_95CI_high = cbind(ccf_95CI_high, rep(NA, nrow(vars))) - pr_somatic_clonal = cbind(pr_somatic_clonal, rep(NA, nrow(vars))) - ll = cbind(ll, rep(NA, nrow(vars))) - sq = cbind(sq, rep(NA, nrow(vars))) - clonal_status = cbind(clonal_status, rep(NA, nrow(vars))) - } - } else { - cancer_cell_fraction = cbind(cancer_cell_fraction, rep(NA, nrow(vars))) - ccf_95CI_low = cbind(ccf_95CI_low, rep(NA, nrow(vars))) - ccf_95CI_high = cbind(ccf_95CI_high, rep(NA, nrow(vars))) - pr_somatic_clonal = cbind(pr_somatic_clonal, rep(NA, nrow(vars))) - ll = cbind(ll, rep(NA, nrow(vars))) - sq = cbind(sq, rep(NA, nrow(vars))) - clonal_status = cbind(clonal_status, rep(NA, nrow(vars))) - } -} -colnames(cancer_cell_fraction) = colnames(ccf_95CI_low) = colnames(ccf_95CI_high) = colnames(pr_somatic_clonal) = colnames(ll) = colnames(sq) = colnames(clonal_status) = sample_names -colnames(cancer_cell_fraction) = paste0("CCF_", colnames(cancer_cell_fraction)) -colnames(ccf_95CI_low) = paste0("CCF_95CI_Low_", colnames(ccf_95CI_low)) -colnames(ccf_95CI_high) = paste0("CCF_95CI_High_", colnames(ccf_95CI_high)) -colnames(pr_somatic_clonal) = paste0("Pr_Somatic_Clonal_", colnames(pr_somatic_clonal)) -colnames(ll) = paste0("LL_", colnames(ll)) -colnames(sq) = paste0("sq_", colnames(sq)) -colnames(clonal_status) = paste0("Clonal_Status_", colnames(clonal_status)) - -vars = cbind(vars, cancer_cell_fraction, - ccf_95CI_low, - ccf_95CI_high, - pr_somatic_clonal, - ll, - sq, - clonal_status) - -write.table(vars, file=paste0("sufam/", opt$sample_set, ".tsv"), col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) From 75b1f0793a8b6a187b6d4b2c9c7525fda7e215af Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 19 Jul 2021 18:23:55 -0400 Subject: [PATCH 116/766] Update splitRG.mk --- bam_tools/splitRG.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 1b8a88be..412e8b98 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -1,15 +1,15 @@ include modules/Makefile.inc -LOGDIR = log/splitRG.$(NOW) +LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),rg/XXX/$(sample).bam) +split : $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam) define split-rg -rg/XXX/$1.bam : bam/XXX.bam +bam/EEC25/$1.bam : etc/bam/EEC25-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p rg/XXX && \ - $$(SAMTOOLS) view -b -r $1 bam/XXX.bam > rg/XXX/$1.bam && \ - $$(SAMTOOLS) index rg/XXX/$1.bam") + mkdir -p bam/EEC25 && \ + $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@) && \ + $$(SAMTOOLS) index $$(@)") endef $(foreach sample,$(SAMPLES),\ From 2efa09fa9102f8ce2f844a13acc8c0a614f56cdc Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 19 Jul 2021 18:33:17 -0400 Subject: [PATCH 117/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 412e8b98..feb021a2 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,14 +2,18 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam) +split : $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam.bai) define split-rg bam/EEC25/$1.bam : etc/bam/EEC25-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/EEC25 && \ - $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@) && \ - $$(SAMTOOLS) index $$(@)") + $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") + +bam/EEC25/$1.bam.bai : bam/EEC25/$1.bam + $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ + $$(SAMTOOLS) index $$(<)") endef $(foreach sample,$(SAMPLES),\ From 9aa1b899cc6af0ae9fd28380716e4cd95a24a73d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 19 Jul 2021 21:25:20 -0400 Subject: [PATCH 118/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index feb021a2..45e00031 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam.bai) define split-rg -bam/EEC25/$1.bam : etc/bam/EEC25-1.bam +bam/EEC25/$1.bam : etc/bam/EEC25-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/EEC25 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From ec8d9d934e14b0f9578f3e8df3a9848f19c59a84 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 20 Jul 2021 09:34:25 -0400 Subject: [PATCH 119/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 45e00031..b06e8a4f 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC25/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/EEC131/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC131/$(sample).bam.bai) define split-rg -bam/EEC25/$1.bam : etc/bam/EEC25-2.bam +bam/EEC131/$1.bam : etc/bam/EEC131.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC25 && \ + mkdir -p bam/EEC131 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC25/$1.bam.bai : bam/EEC25/$1.bam +bam/EEC131/$1.bam.bai : bam/EEC131/$1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From b1931b15983ca2f5d799373ce589c587ae8457c4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 20 Jul 2021 09:36:41 -0400 Subject: [PATCH 120/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index b06e8a4f..a0fbee1f 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -12,7 +12,7 @@ bam/EEC131/$1.bam : etc/bam/EEC131.bam $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") bam/EEC131/$1.bam.bai : bam/EEC131/$1.bam - $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ + $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") endef From 1592edd46714c2ec29066fc12e154a1076e6e17e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 20 Jul 2021 10:14:41 -0400 Subject: [PATCH 121/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 4f04113b..c6be723f 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -1,22 +1,21 @@ include modules/Makefile.inc LOGDIR ?= log/get_basecount.$(NOW) -PHONY += getbasecount GBC_ENV = $(HOME)/share/data/common/eec_sc_split/etc/conda GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC131/$(sample).tsv) define get-basecount -gbc/EEC128/$1.txt : bam/EEC128/$1.bam +gbc/EEC131/$1.txt : bam/EEC131/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC128 && \ + mkdir -p gbc/EEC131 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC128.vcf \ + --vcf etc/vcf/EEC131.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -25,7 +24,7 @@ gbc/EEC128/$1.txt : bam/EEC128/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt +gbc/EEC131/$1.tsv : gbc/EEC131/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 712d63229748e674a4f03e12080d254241ce7bce Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 21 Jul 2021 10:36:56 -0400 Subject: [PATCH 122/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index c6be723f..93fea919 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC131/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC25/$(sample).tsv) define get-basecount -gbc/EEC131/$1.txt : bam/EEC131/$1.bam +gbc/EEC25/$1.txt : bam/EEC25/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC131 && \ + mkdir -p gbc/EEC25 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC131.vcf \ + --vcf etc/vcf/EEC25.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC131/$1.txt : bam/EEC131/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC131/$1.tsv : gbc/EEC131/$1.txt +gbc/EEC25/$1.tsv : gbc/EEC25/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From ea6be1eb0e242f523da453b82f7cb1a69baefb13 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 21 Jul 2021 12:44:45 -0400 Subject: [PATCH 123/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index a0fbee1f..31be882c 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC131/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC131/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/EEC132/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC132/$(sample).bam.bai) define split-rg -bam/EEC131/$1.bam : etc/bam/EEC131.bam +bam/EEC132/$1.bam : etc/bam/EEC132.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC131 && \ + mkdir -p bam/EEC132 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC131/$1.bam.bai : bam/EEC131/$1.bam +bam/EEC132/$1.bam.bai : bam/EEC132/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From b1ebe36fd69f16a2877b29279e1604d5d99b10be Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 21 Jul 2021 12:45:33 -0400 Subject: [PATCH 124/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 93fea919..8b40bac6 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC25/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC132/$(sample).tsv) define get-basecount -gbc/EEC25/$1.txt : bam/EEC25/$1.bam +gbc/EEC132/$1.txt : bam/EEC132/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC25 && \ + mkdir -p gbc/EEC132 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC25.vcf \ + --vcf etc/vcf/EEC132.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC25/$1.txt : bam/EEC25/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC25/$1.tsv : gbc/EEC25/$1.txt +gbc/EEC132/$1.tsv : gbc/EEC132/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From e5bd9054506bc3a3358c6c34c1d539d75db61e06 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 21 Jul 2021 16:33:06 -0400 Subject: [PATCH 125/766] +98 --- bam_tools/splitRG.mk | 10 +++++----- variant_callers/getBaseCount.mk | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 31be882c..18cd5d25 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC132/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC132/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam.bai) define split-rg -bam/EEC132/$1.bam : etc/bam/EEC132.bam +bam/EEC98/$1.bam : etc/bam/EE98.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC132 && \ + mkdir -p bam/EEC98 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC132/$1.bam.bai : bam/EEC132/$1.bam +bam/EEC98/$1.bam.bai : bam/EEC98/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 8b40bac6..4c943445 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC132/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC98/$(sample).tsv) define get-basecount -gbc/EEC132/$1.txt : bam/EEC132/$1.bam +gbc/EEC98/$1.txt : bam/EEC98/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC132 && \ + mkdir -p gbc/EEC98 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC132.vcf \ + --vcf etc/vcf/EEC98.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC132/$1.txt : bam/EEC132/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC132/$1.tsv : gbc/EEC132/$1.txt +gbc/EEC98/$1.tsv : gbc/EEC98/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 31c260d09ba21f1e277ac97f9ae736eec4e15546 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 21 Jul 2021 16:34:03 -0400 Subject: [PATCH 126/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 18cd5d25..f1b8fcd0 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam.bai) define split-rg -bam/EEC98/$1.bam : etc/bam/EE98.bam +bam/EEC98/$1.bam : etc/bam/EEC98.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/EEC98 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 8a6086561c389c1eae8ea18cacabc1fb108c9585 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 30 Jul 2021 17:38:51 -0400 Subject: [PATCH 127/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index f1b8fcd0..ed17dc27 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC98/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam.bai) define split-rg -bam/EEC98/$1.bam : etc/bam/EEC98.bam +bam/EEC3/$1.bam : etc/bam/EEC3-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC98 && \ + mkdir -p bam/EEC3 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC98/$1.bam.bai : bam/EEC98/$1.bam +bam/EEC3/$1.bam.bai : bam/EEC3/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From 46c6e316773c1913ee864b399a26fd36b6d2c89a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 31 Jul 2021 12:08:03 -0400 Subject: [PATCH 128/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index ed17dc27..eeb76776 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam.bai) define split-rg -bam/EEC3/$1.bam : etc/bam/EEC3-1.bam +bam/EEC3/$1.bam : etc/bam/EEC3-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/EEC3 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 9b05f749b67e56606527ef04cfc200b360a8eafe Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 12:48:25 -0400 Subject: [PATCH 129/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 4c943445..5db01f1b 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC98/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC3/$(sample).tsv) define get-basecount -gbc/EEC98/$1.txt : bam/EEC98/$1.bam +gbc/EEC3/$1.txt : bam/EEC3/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC98 && \ + mkdir -p gbc/EEC3 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC98.vcf \ + --vcf etc/vcf/EEC3.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC98/$1.txt : bam/EEC98/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC98/$1.tsv : gbc/EEC98/$1.txt +gbc/EEC3/$1.tsv : gbc/EEC3/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From c92abf151e6c5fe62ebd8a5830b73728155e3d51 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 15:49:47 -0400 Subject: [PATCH 130/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 5db01f1b..26f72944 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC3/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC14/$(sample).tsv) define get-basecount -gbc/EEC3/$1.txt : bam/EEC3/$1.bam +gbc/EEC14/$1.txt : bam/EEC14/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC3 && \ + mkdir -p gbc/EEC14 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC3.vcf \ + --vcf etc/vcf/EEC14.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC3/$1.txt : bam/EEC3/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC3/$1.tsv : gbc/EEC3/$1.txt +gbc/EEC14/$1.tsv : gbc/EEC14/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 44b4cdd615e752d4935f319e287ca05b761a319b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 16:05:29 -0400 Subject: [PATCH 131/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 26f72944..93fea919 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC14/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC25/$(sample).tsv) define get-basecount -gbc/EEC14/$1.txt : bam/EEC14/$1.bam +gbc/EEC25/$1.txt : bam/EEC25/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC14 && \ + mkdir -p gbc/EEC25 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC14.vcf \ + --vcf etc/vcf/EEC25.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC14/$1.txt : bam/EEC14/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC14/$1.tsv : gbc/EEC14/$1.txt +gbc/EEC25/$1.tsv : gbc/EEC25/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 5474ae42a92b48cb5274eee682e65f506328b08f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 16:31:19 -0400 Subject: [PATCH 132/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 93fea919..d29335bb 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC25/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) define get-basecount -gbc/EEC25/$1.txt : bam/EEC25/$1.bam +gbc/EEC87/$1.txt : bam/EEC87/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC25 && \ + mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC25.vcf \ + --vcf etc/vcf/EEC87.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC25/$1.txt : bam/EEC25/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC25/$1.tsv : gbc/EEC25/$1.txt +gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 822ede3fb7709c8703ac2d7ad9c634613fe4d6c8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 16:51:26 -0400 Subject: [PATCH 133/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index d29335bb..1896ecaf 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC87/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC98/$(sample).tsv) define get-basecount -gbc/EEC87/$1.txt : bam/EEC87/$1.bam +gbc/EEC98/$1.txt : bam/EEC98/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC87.vcf \ + --vcf etc/vcf/EEC98.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC87/$1.txt : bam/EEC87/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC87/$1.tsv : gbc/EEC87/$1.txt +gbc/EEC98/$1.tsv : gbc/EEC98/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 383096c04357b6c95cfc618c757badf1b15b889a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 17:59:40 -0400 Subject: [PATCH 134/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 1896ecaf..80d1472d 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC98/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC132/$(sample).tsv) define get-basecount -gbc/EEC98/$1.txt : bam/EEC98/$1.bam +gbc/EEC132/$1.txt : bam/EEC132/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/EEC87 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC98.vcf \ + --vcf etc/vcf/EEC132.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC98/$1.txt : bam/EEC98/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC98/$1.tsv : gbc/EEC98/$1.txt +gbc/EEC132/$1.tsv : gbc/EEC132/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From a923988bbf0e0c2f75c1ed9330e060792f5014d8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 18:00:42 -0400 Subject: [PATCH 135/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 80d1472d..8b40bac6 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -12,7 +12,7 @@ getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC132/$(sample).tsv) define get-basecount gbc/EEC132/$1.txt : bam/EEC132/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC87 && \ + mkdir -p gbc/EEC132 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ --vcf etc/vcf/EEC132.vcf \ From ae4c8fdfbf36d7a578dca15b230572c8db394acf Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 19:45:00 -0400 Subject: [PATCH 136/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 8b40bac6..17b50383 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC132/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) define get-basecount -gbc/EEC132/$1.txt : bam/EEC132/$1.bam +gbc/EEC128/$1.txt : bam/EEC128/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC132 && \ + mkdir -p gbc/EEC128 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC132.vcf \ + --vcf etc/vcf/EEC128.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC132/$1.txt : bam/EEC132/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC132/$1.tsv : gbc/EEC132/$1.txt +gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 54a1988a8506f79bdad68be415bdc3de80af33ce Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 1 Aug 2021 22:15:43 -0400 Subject: [PATCH 137/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 17b50383..c6be723f 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC128/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC131/$(sample).tsv) define get-basecount -gbc/EEC128/$1.txt : bam/EEC128/$1.bam +gbc/EEC131/$1.txt : bam/EEC131/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC128 && \ + mkdir -p gbc/EEC131 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC128.vcf \ + --vcf etc/vcf/EEC131.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC128/$1.txt : bam/EEC128/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC128/$1.tsv : gbc/EEC128/$1.txt +gbc/EEC131/$1.tsv : gbc/EEC131/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From ff779481c02f6d4438148fffa9128b5dffa72fd8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 6 Aug 2021 18:26:55 -0400 Subject: [PATCH 138/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index eeb76776..1cffef6d 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC3/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam.bai) define split-rg -bam/EEC3/$1.bam : etc/bam/EEC3-2.bam +bam/HEC6/$1.bam : etc/bam/HEC6-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC3 && \ + mkdir -p bam/HEC6 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC3/$1.bam.bai : bam/EEC3/$1.bam +bam/HEC6/$1.bam.bai : bam/HEC6/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From 5afc6a6a1f233f1c73b38b97095666597c4f827d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 6 Aug 2021 18:38:43 -0400 Subject: [PATCH 139/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 1cffef6d..8ec2dbe7 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam.bai) define split-rg -bam/HEC6/$1.bam : etc/bam/HEC6-1.bam +bam/HEC6/$1.bam : etc/bam/HEC6-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/HEC6 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 7e36037ed1945b7ac96f5ea2db1dcbe96a80f17a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 18:17:14 -0400 Subject: [PATCH 140/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 8ec2dbe7..465615f3 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/HEC6/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam.bai) define split-rg -bam/HEC6/$1.bam : etc/bam/HEC6-2.bam +bam/ISHI/$1.bam : etc/bam/ISHI-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/HEC6 && \ + mkdir -p bam/ISHI && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/HEC6/$1.bam.bai : bam/HEC6/$1.bam +bam/ISHI/$1.bam.bai : bam/ISHI/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From e8a997132fb0de288fc767fa9dd831eeb4462877 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 18:26:55 -0400 Subject: [PATCH 141/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 465615f3..0c702346 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam.bai) define split-rg -bam/ISHI/$1.bam : etc/bam/ISHI-1.bam +bam/ISHI/$1.bam : etc/bam/ISHI-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/ISHI && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 8266a718a766f1e21205ae0943bf32b74ae59d6f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 18:53:04 -0400 Subject: [PATCH 142/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 0c702346..cf952150 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/ISHI/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam.bai) define split-rg -bam/ISHI/$1.bam : etc/bam/ISHI-2.bam +bam/HEC6-ISHI/$1.bam : etc/bam/HEC6-ISHI-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/ISHI && \ + mkdir -p bam/HEC6-ISHI && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/ISHI/$1.bam.bai : bam/ISHI/$1.bam +bam/HEC6-ISHI/$1.bam.bai : bam/HEC6-ISHI/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From c4431ab36a8cd54b611965e907749e362a6e4081 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 19:06:47 -0400 Subject: [PATCH 143/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index cf952150..59af8dc9 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam.bai) define split-rg -bam/HEC6-ISHI/$1.bam : etc/bam/HEC6-ISHI-1.bam +bam/HEC6-ISHI/$1.bam : etc/bam/HEC6-ISHI-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/HEC6-ISHI && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From e68e764f5cf2946d69b57a58210d0ee822a8b0c0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 19:23:22 -0400 Subject: [PATCH 144/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 59af8dc9..a57db636 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/HEC6-ISHI/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam.bai) define split-rg -bam/HEC6-ISHI/$1.bam : etc/bam/HEC6-ISHI-2.bam +bam/ISHI-HEC6/$1.bam : etc/bam/ISHI-HEC6-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/HEC6-ISHI && \ + mkdir -p bam/ISHI-HEC6 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/HEC6-ISHI/$1.bam.bai : bam/HEC6-ISHI/$1.bam +bam/ISHI-HEC6/$1.bam.bai : bam/ISHI-HEC6/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From c10f0de832c7510c5b17e97ace6a84400cdb07c9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 19:36:28 -0400 Subject: [PATCH 145/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index a57db636..564050fe 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam.bai) define split-rg -bam/ISHI-HEC6/$1.bam : etc/bam/ISHI-HEC6-1.bam +bam/ISHI-HEC6/$1.bam : etc/bam/ISHI-HEC6-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/ISHI-HEC6 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 696fee32fc7526f7c010de3cc8a699b268b97985 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 20:06:41 -0400 Subject: [PATCH 146/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 564050fe..16556016 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/ISHI-HEC6/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam.bai) define split-rg -bam/ISHI-HEC6/$1.bam : etc/bam/ISHI-HEC6-2.bam +bam/EEC91/$1.bam : etc/bam/EEC91-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/ISHI-HEC6 && \ + mkdir -p bam/EEC91 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/ISHI-HEC6/$1.bam.bai : bam/ISHI-HEC6/$1.bam +bam/EEC91/$1.bam.bai : bam/EEC91/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From ca30683b7c5eaf64b447d213ca1b35024d551771 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 21:11:14 -0400 Subject: [PATCH 147/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 16556016..2bafc6df 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam.bai) define split-rg -bam/EEC91/$1.bam : etc/bam/EEC91-1.bam +bam/EEC91/$1.bam : etc/bam/EEC91-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/EEC91 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From ddfa25832eea1e4229fe08e17eb7cb61d91c5289 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 22:04:42 -0400 Subject: [PATCH 148/766] Update splitRG.mk --- bam_tools/splitRG.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index 2bafc6df..b2f0ad37 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -2,16 +2,16 @@ include modules/Makefile.inc LOGDIR = log/split_rg.$(NOW) -split : $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/EEC91/$(sample).bam.bai) +split : $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam.bai) define split-rg -bam/EEC91/$1.bam : etc/bam/EEC91-2.bam +bam/MFE296/$1.bam : etc/bam/MFE296-1.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/EEC91 && \ + mkdir -p bam/MFE296 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") -bam/EEC91/$1.bam.bai : bam/EEC91/$1.bam +bam/MFE296/$1.bam.bai : bam/MFE296/$1.bam $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ $$(SAMTOOLS) index $$(<)") From ca3470ea9fb1a488f7a059deae7fcd31b54b7eba Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 7 Aug 2021 23:34:54 -0400 Subject: [PATCH 149/766] Update splitRG.mk --- bam_tools/splitRG.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk index b2f0ad37..91dac523 100644 --- a/bam_tools/splitRG.mk +++ b/bam_tools/splitRG.mk @@ -6,7 +6,7 @@ split : $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam.bai) define split-rg -bam/MFE296/$1.bam : etc/bam/MFE296-1.bam +bam/MFE296/$1.bam : etc/bam/MFE296-2.bam $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ mkdir -p bam/MFE296 && \ $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") From 9c6a268879ec8b7eb248222f10d12d48142b7342 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 14:13:12 -0400 Subject: [PATCH 150/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index c6be723f..cd519dce 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -10,12 +10,12 @@ BAQ := 15 getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC131/$(sample).tsv) define get-basecount -gbc/EEC131/$1.txt : bam/EEC131/$1.bam +gbc/HEC6/$1.txt : bam/HEC6/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC131 && \ + mkdir -p gbc/HEC6 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC131.vcf \ + --vcf etc/vcf/HEC6.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC131/$1.txt : bam/EEC131/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC131/$1.tsv : gbc/EEC131/$1.txt +gbc/HEC6/$1.tsv : gbc/HEC6/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 983fb0ddaa8c546d0171a498a85582f5f851aa41 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 14:14:52 -0400 Subject: [PATCH 151/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index cd519dce..af33001c 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,7 +7,7 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC131/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/HEC6/$(sample).tsv) define get-basecount gbc/HEC6/$1.txt : bam/HEC6/$1.bam From 69d2946096aaa3ffa2eabf8ef496363eebcd32a8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 14:24:27 -0400 Subject: [PATCH 152/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index af33001c..b034a00b 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/HEC6/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/ISHI/$(sample).tsv) define get-basecount -gbc/HEC6/$1.txt : bam/HEC6/$1.bam +gbc/ISHI/$1.txt : bam/ISHI/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/HEC6 && \ + mkdir -p gbc/ISHI && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/HEC6.vcf \ + --vcf etc/vcf/ISHI.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/HEC6/$1.txt : bam/HEC6/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/HEC6/$1.tsv : gbc/HEC6/$1.txt +gbc/ISHI/$1.tsv : gbc/ISHI/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From 9c7d2c172b63e8e3447a29b6797cbac1db288195 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 15:06:47 -0400 Subject: [PATCH 153/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index b034a00b..0abf13bc 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/ISHI/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/HEC6-ISHI/$(sample).tsv) define get-basecount -gbc/ISHI/$1.txt : bam/ISHI/$1.bam +gbc/HEC6-ISHI/$1.txt : bam/HEC6-ISHI/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/ISHI && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/ISHI.vcf \ + --vcf etc/vcf/HEC6-ISHI.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/ISHI/$1.txt : bam/ISHI/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/ISHI/$1.tsv : gbc/ISHI/$1.txt +gbc/HEC6-ISHI/$1.tsv : gbc/HEC6-ISHI/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From a9f9790f71d0af1e5e268d3bc347909616287fbe Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 15:20:31 -0400 Subject: [PATCH 154/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index 0abf13bc..c13e6e9f 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/HEC6-ISHI/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/ISHI-HEC6/$(sample).tsv) define get-basecount -gbc/HEC6-ISHI/$1.txt : bam/HEC6-ISHI/$1.bam +gbc/ISHI-HEC6/$1.txt : bam/ISHI-HEC6/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/ISHI && \ + mkdir -p gbc/ISHI-HEC6 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/HEC6-ISHI.vcf \ + --vcf etc/vcf/ISHI-HEC6.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/HEC6-ISHI/$1.txt : bam/HEC6-ISHI/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/HEC6-ISHI/$1.tsv : gbc/HEC6-ISHI/$1.txt +gbc/ISHI-HEC6/$1.tsv : gbc/ISHI-HEC6/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From d44e7ddc87758ab1b06e291fb5904b26ab7506b8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 15:33:07 -0400 Subject: [PATCH 155/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index c13e6e9f..aae960d4 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/ISHI-HEC6/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC91/$(sample).tsv) define get-basecount -gbc/ISHI-HEC6/$1.txt : bam/ISHI-HEC6/$1.bam +gbc/EEC91/$1.txt : bam/EEC91/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/ISHI-HEC6 && \ + mkdir -p gbc/EEC91 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/ISHI-HEC6.vcf \ + --vcf etc/vcf/EEC91.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/ISHI-HEC6/$1.txt : bam/ISHI-HEC6/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/ISHI-HEC6/$1.tsv : gbc/ISHI-HEC6/$1.txt +gbc/EEC91/$1.tsv : gbc/EEC91/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From bcbe8d12f55d13a4d784047a8709fe34ec9a3405 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 8 Aug 2021 15:58:17 -0400 Subject: [PATCH 156/766] Update getBaseCount.mk --- variant_callers/getBaseCount.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/variant_callers/getBaseCount.mk b/variant_callers/getBaseCount.mk index aae960d4..56ad36fa 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/getBaseCount.mk @@ -7,15 +7,15 @@ GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts MAPQ := 10 BAQ := 15 -getbasecount : $(foreach sample,$(SAMPLES),gbc/EEC91/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/MFE296/$(sample).tsv) define get-basecount -gbc/EEC91/$1.txt : bam/EEC91/$1.bam +gbc/MFE296/$1.txt : bam/MFE296/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/EEC91 && \ + mkdir -p gbc/MFE296 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ --bam $$(<) \ - --vcf etc/vcf/EEC91.vcf \ + --vcf etc/vcf/MFE296.vcf \ --output $$(@) \ --maq $(MAPQ) \ --baq $(BAQ) \ @@ -24,7 +24,7 @@ gbc/EEC91/$1.txt : bam/EEC91/$1.bam --filter_qc_failed 1 \ --thread 6") -gbc/EEC91/$1.tsv : gbc/EEC91/$1.txt +gbc/MFE296/$1.tsv : gbc/MFE296/$1.txt $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ rm $$(<)") From ab20a3a652bf841a7d5e8215f28a5ff8f339d683 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 13 Aug 2021 18:46:03 -0400 Subject: [PATCH 157/766] Update qmake.pl --- scripts/qmake.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 0e1c8960..4136e62d 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -13,7 +13,8 @@ selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", brownd7 => "W013UH0HWUF", - parejaf => "UBF6MRSV8" + parejaf => "UBF6MRSV8", + zhuy1 => "W013UH382P9" ); sub HELP_MESSAGE { From 711a290788dde80410aee3fa8ca8eaab44fbd0e6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 7 Oct 2021 10:18:34 -0400 Subject: [PATCH 158/766] Update qmake.pl --- scripts/qmake.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 4136e62d..e9b424e6 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -14,7 +14,8 @@ dacruzpa => "U6PAUB3C6", brownd7 => "W013UH0HWUF", parejaf => "UBF6MRSV8", - zhuy1 => "W013UH382P9" + zhuy1 => "W013UH382P9", + peix => "W0147TPN3E1" ); sub HELP_MESSAGE { From 4542a097c8a5fdbc0bfb27daee9625eeec771c4f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 16 Dec 2021 09:37:14 -0500 Subject: [PATCH 159/766] Adds issabhas to slack map --- scripts/qmake.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index e9b424e6..cb3c4bc5 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -15,7 +15,8 @@ brownd7 => "W013UH0HWUF", parejaf => "UBF6MRSV8", zhuy1 => "W013UH382P9", - peix => "W0147TPN3E1" + peix => "W0147TPN3E1", + issabhas => "U01V8R1RKQU" ); sub HELP_MESSAGE { From a0178ebea9e5f411a69fcf2e462825b961e37d58 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Feb 2022 17:07:56 -0500 Subject: [PATCH 160/766] Update qmake.pl --- scripts/qmake.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index cb3c4bc5..d5085fac 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -12,11 +12,11 @@ my %slack_map = ( selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", - brownd7 => "W013UH0HWUF", parejaf => "UBF6MRSV8", zhuy1 => "W013UH382P9", peix => "W0147TPN3E1", - issabhas => "U01V8R1RKQU" + issabhas => "U01V8R1RKQU", + xiaoy => "U01C8MPBSH5" ); sub HELP_MESSAGE { From ad24d991ebc59e0e6c8d0ce3a7734cffcf63f45d Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:30:50 -0400 Subject: [PATCH 161/766] + --- copy_number/ascat.R | 14 +++++++------- copy_number/ascat.mk | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index 24432af1..babeb396 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -10,13 +10,13 @@ if (!interactive()) { } args_list <- list(make_option("--type", default = NA, type = 'character', help = "type of analysis"), - make_option("--file_in", default = NA, type = 'character', help = "input file name"), - make_option("--file_out", default = NA, type = 'character', help = "output file name"), - make_option("--gamma", default = NA, type = 'numeric', help = "gamma parameter in pcf"), - make_option("--nlog2", default = NA, type = 'numeric', help = "number of clusters in Log2 ratio"), - make_option("--nbaf", default = NA, type = 'numeric', help = "number of clusters in BAF"), - make_option("--rho", default = NA, type = 'numeric', help = "purity for ASCAT"), - make_option("--psi", default = NA, type = 'numeric', help = "ploidy for ASCAT")) + make_option("--file_in", default = NA, type = 'character', help = "input file name"), + make_option("--file_out", default = NA, type = 'character', help = "output file name"), + make_option("--gamma", default = NA, type = 'numeric', help = "gamma parameter in pcf"), + make_option("--nlog2", default = NA, type = 'numeric', help = "number of clusters in Log2 ratio"), + make_option("--nbaf", default = NA, type = 'numeric', help = "number of clusters in BAF"), + make_option("--rho", default = NA, type = 'numeric', help = "purity for ASCAT"), + make_option("--psi", default = NA, type = 'numeric', help = "ploidy for ASCAT")) parser <- OptionParser(usage = "%prog", option_list = args_list) arguments <- parse_args(parser, positional_arguments = T) diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 3af26268..b5b03b58 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -1,9 +1,15 @@ include modules/Makefile.inc LOGDIR ?= log/ascat.$(NOW) -PHONY += ascat ascat/log2 ascat/bafall ascat/bafhet ascat/mad ascat/log2nbaf ascat/ascat ascat/total ascat/bychr -ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf ascat/bafall/$(pair).pdf ascat/bafhet/$(pair).pdf ascat/mad/$(pair).RData ascat/log2nbaf/$(pair).pdf ascat/ascat/$(pair).pdf ascat/total/$(pair).pdf ascat/bychr/$(pair)/timestamp) +ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) +# $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ +# $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) define ascat-plot-log2 ascat/log2/$1_$2.pdf : facets/cncf/$1_$2.Rdata @@ -67,7 +73,9 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-plot-chr,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: + +..DUMMY := $(shell mkdir -p version; \ + R --version > version/ascat.txt;) .SECONDARY: -.PHONY: $(PHONY) +.DELETE_ON_ERROR: +.PHONY: ascat From aa36281a1f2207ffbc2d8eb8e365f3dd6471815c Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:32:11 -0400 Subject: [PATCH 162/766] Update qmake.pl --- scripts/qmake.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index d5085fac..4e5aab1c 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,6 +10,7 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( + brownd7 => "W013UH0HWUF", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", parejaf => "UBF6MRSV8", From bd37823857390006be3252d4484205b0b7d0768b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:39:27 -0400 Subject: [PATCH 163/766] ++ --- copy_number/ascat.R | 2 +- copy_number/ascat.mk | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index babeb396..d2ad0b8a 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -83,7 +83,7 @@ if (opt$type=="log2") { abline(v=max(BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1) abline(h=0.5, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) + rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) title(main = gsub(".pdf", "", gsub("ascat/bafall/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) box(lwd=1.5) dev.off() diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index b5b03b58..699d9a56 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -2,8 +2,8 @@ include modules/Makefile.inc LOGDIR ?= log/ascat.$(NOW) -ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ +ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) \ @@ -13,7 +13,11 @@ ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) define ascat-plot-log2 ascat/log2/$1_$2.pdf : facets/cncf/$1_$2.Rdata - $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type log2 --file_in $$< --file_out ascat/log2/$1_$2.pdf") + $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type log2 \ + --file_in $$(<) \ + --file_out ascat/log2/$1_$2.pdf") endef $(foreach pair,$(SAMPLE_PAIRS),\ @@ -21,7 +25,11 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-plot-bafall ascat/bafall/$1_$2.pdf : facets/cncf/$1_$2.Rdata - $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type bafall --file_in $$< --file_out ascat/bafall/$1_$2.pdf") + $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type bafall \ + --file_in $$(<) \ + --file_out ascat/bafall/$1_$2.pdf") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 8cee8fce814c43fc1aa129fcbee8bacff86b0c2a Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:41:55 -0400 Subject: [PATCH 164/766] ++ --- copy_number/ascat.R | 4 ++-- copy_number/ascat.mk | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index d2ad0b8a..db9d735c 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -117,9 +117,9 @@ if (opt$type=="log2") { abline(v=max(BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1) abline(h=0.5, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) + rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) title(main = gsub(".pdf", "", gsub("ascat/bafhet/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) + box(lwd=1.5) dev.off() } else if (opt$type=="aspcf") { diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 699d9a56..cb9a32eb 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -3,8 +3,8 @@ include modules/Makefile.inc LOGDIR ?= log/ascat.$(NOW) ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ @@ -37,7 +37,11 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-plot-bafhet ascat/bafhet/$1_$2.pdf : facets/cncf/$1_$2.Rdata - $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"$(RSCRIPT) modules/copy_number/ascat.R --type bafhet --file_in $$< --file_out ascat/bafhet/$1_$2.pdf") + $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type bafhet \ + --file_in $$(<) \ + --file_out ascat/bafhet/$1_$2.pdf") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 1e8a4a7ac93871f84afd328907e1240e57a655cd Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:44:33 -0400 Subject: [PATCH 165/766] >> --- copy_number/ascat.R | 2 +- copy_number/ascat.mk | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index db9d735c..24609ad3 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -124,7 +124,7 @@ if (opt$type=="log2") { } else if (opt$type=="aspcf") { - gamma = ifelse(is.na(as.numeric(opt$gamma)), 70, as.numeric(opt$gamma)) + gamma = ifelse(is.na(as.numeric(opt$gamma)), 20, as.numeric(opt$gamma)) CN_and_BAF = out2$jointseg[,c("chrom", "maploc", "cnlr", "vafT"),drop=FALSE] index = out2$jointseg[,"het"]==1 diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index cb9a32eb..e0f7fd04 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -49,7 +49,12 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-aspcf ascat/mad/$1_$2.RData : facets/cncf/$1_$2.Rdata - $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type aspcf --file_in $$< --file_out ascat/mad/$1_$2.RData --gamma '$${aspcf_gamma.$1}'") + $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type aspcf \ + --file_in $$(<) \ + --file_out ascat/mad/$1_$2.RData \ + --gamma '$${aspcf_gamma.$1}'") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 81229621dd9adcbd898660213121d357a1969b6f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 16:44:53 -0400 Subject: [PATCH 166/766] Update ascat.mk --- copy_number/ascat.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index e0f7fd04..06842511 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -4,8 +4,8 @@ LOGDIR ?= log/ascat.$(NOW) ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) # $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ From a48d5dee61acace432d6bb3d031f7ad5e20fc9c2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 17:29:08 -0400 Subject: [PATCH 167/766] Update ascat.R --- copy_number/ascat.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index 24609ad3..875775d1 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -51,9 +51,9 @@ if (opt$type=="log2") { abline(v=max(CN[,"Position"]), col="goldenrod3", lty=3, lwd=1) abline(h=0, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(CN[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) + rect(xleft=1-1e10, xright=max(CN[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) title(main = gsub(".pdf", "", gsub("ascat/log2/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) + box(lwd=1.5) dev.off() } else if (opt$type=="bafall") { @@ -85,7 +85,7 @@ if (opt$type=="log2") { axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) title(main = gsub(".pdf", "", gsub("ascat/bafall/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) + box(lwd=1.5) dev.off() } else if (opt$type=="bafhet") { @@ -132,7 +132,17 @@ if (opt$type=="log2") { colnames(CN_and_BAF) = c("Chromosome", "Position", "Log2Ratio", "BAF") index = CN_and_BAF[,"BAF"]>0.5 CN_and_BAF[index,"BAF"] = 1 - CN_and_BAF[index,"BAF"] + TMP = CN_and_BAF + for (i in 1:23) { + CN_and_BAF$Position[CN_and_BAF$Chromosome == i] = 1:sum(CN_and_BAF$Chromosome == i) + } tmp = multipcf(data=winsorize(data=CN_and_BAF, method="mad", tau=2.5, k=25, verbose=FALSE), gamma=gamma, fast=FALSE, verbose=FALSE) + for (i in 1:23) { + tmp[tmp$chrom == i,"start.pos"] = (TMP$Position[TMP$Chromosome == i])[tmp$start.pos[tmp$chrom == i]] + } + for (i in 1:23) { + tmp[tmp$chrom == i,"end.pos"] = (TMP$Position[TMP$Chromosome == i])[tmp$end.pos[tmp$chrom == i]] + } colnames(tmp) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio", "BAF") save(CN_and_BAF, tmp, file=opt$file_out) From 55b1274e77183a0f47893ba4c975b7ed1de59df1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 17:31:20 -0400 Subject: [PATCH 168/766] Update ascat.mk --- copy_number/ascat.mk | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 06842511..1d6313ee 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -5,8 +5,8 @@ LOGDIR ?= log/ascat.$(NOW) ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) @@ -62,7 +62,13 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-plot-aspcf ascat/log2nbaf/$1_$2.pdf : ascat/mad/$1_$2.RData - $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type plot-aspcf --file_in $$< --file_out ascat/log2nbaf/$1_$2.pdf --nlog2 '$${aspcf_nlog2.$1}' --nbaf '$${aspcf_nbaf.$1}'") + $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type plot-aspcf \ + --file_in $$(<) \ + --file_out ascat/log2nbaf/$1_$2.pdf \ + --nlog2 '$${aspcf_nlog2.$1}' \ + --nbaf '$${aspcf_nbaf.$1}'") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 5880d127525b68c875d7c25229c5cf8d835c55f7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 17:36:36 -0400 Subject: [PATCH 169/766] ++ --- copy_number/ascat.R | 1 + copy_number/ascat.mk | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index 875775d1..ece49c6c 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -143,6 +143,7 @@ if (opt$type=="log2") { for (i in 1:23) { tmp[tmp$chrom == i,"end.pos"] = (TMP$Position[TMP$Chromosome == i])[tmp$end.pos[tmp$chrom == i]] } + CN_and_BAF = TMP colnames(tmp) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio", "BAF") save(CN_and_BAF, tmp, file=opt$file_out) diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 1d6313ee..4af6b024 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -3,10 +3,10 @@ include modules/Makefile.inc LOGDIR ?= log/ascat.$(NOW) ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/bafall/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/bafhet/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/baf_all/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/baf_het/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/log2nbaf/$(pair).pdf) + $(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) @@ -24,24 +24,24 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-plot-log2,$(tumor.$(pair)),$(normal.$(pair))))) define ascat-plot-bafall -ascat/bafall/$1_$2.pdf : facets/cncf/$1_$2.Rdata +ascat/baf_all/$1_$2.pdf : facets/cncf/$1_$2.Rdata $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \ $(RSCRIPT) modules/copy_number/ascat.R \ --type bafall \ --file_in $$(<) \ - --file_out ascat/bafall/$1_$2.pdf") + --file_out ascat/baf_all/$1_$2.pdf") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-plot-bafall,$(tumor.$(pair)),$(normal.$(pair))))) define ascat-plot-bafhet -ascat/bafhet/$1_$2.pdf : facets/cncf/$1_$2.Rdata +ascat/baf_het/$1_$2.pdf : facets/cncf/$1_$2.Rdata $$(call RUN,-c -v $(ASCAT_ENV) -s 1G -m 2G,"set -o pipefail && \ $(RSCRIPT) modules/copy_number/ascat.R \ --type bafhet \ --file_in $$(<) \ - --file_out ascat/bafhet/$1_$2.pdf") + --file_out ascat/baf_het/$1_$2.pdf") endef $(foreach pair,$(SAMPLE_PAIRS),\ @@ -61,12 +61,12 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-aspcf,$(tumor.$(pair)),$(normal.$(pair))))) define ascat-plot-aspcf -ascat/log2nbaf/$1_$2.pdf : ascat/mad/$1_$2.RData +ascat/log2_baf/$1_$2.pdf : ascat/mad/$1_$2.RData $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \ $(RSCRIPT) modules/copy_number/ascat.R \ --type plot-aspcf \ --file_in $$(<) \ - --file_out ascat/log2nbaf/$1_$2.pdf \ + --file_out ascat/log2_baf/$1_$2.pdf \ --nlog2 '$${aspcf_nlog2.$1}' \ --nbaf '$${aspcf_nbaf.$1}'") From 802c70c55db3d5a34402b4924fcd1796a9f698d1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 17:39:53 -0400 Subject: [PATCH 170/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 4e5aab1c..3de36395 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,7 +10,7 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "W013UH0HWUF", + brownd7 => "W01BT68MSSD", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", parejaf => "UBF6MRSV8", From ec86ceffb7d99292417508f6f84214ddb9a8472d Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 17:53:02 -0400 Subject: [PATCH 171/766] ++ --- copy_number/ascat.R | 46 ++++++++++++++++++++++---------------------- copy_number/ascat.mk | 14 +++++++++++--- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index ece49c6c..a7ce0760 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -329,30 +329,30 @@ if (opt$type=="log2") { gender = "2323" sexchromosomes = c(23, 24) tmp2 = list(Tumor_LogR=Tumor_LogR, - Tumor_BAF=Tumor_BAF, - Tumor_LogR_segmented=Tumor_LogR_segmented, - Tumor_BAF_segmented=Tumor_BAF_segmented, - SNPpos=SNPpos, - chromosomes=ch, - chrnames=chrs, - gender=gender, - sexchromosomes=sexchromosomes) + Tumor_BAF=Tumor_BAF, + Tumor_LogR_segmented=Tumor_LogR_segmented, + Tumor_BAF_segmented=Tumor_BAF_segmented, + SNPpos=SNPpos, + chromosomes=ch, + chrnames=chrs, + gender=gender, + sexchromosomes=sexchromosomes) - tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR, - baf=tmp2$Tumor_BAF, - lrrsegmented=tmp2$Tumor_LogR_segmented, - bafsegmented=tmp2$Tumor_BAF_segmented, - gender=tmp2$gender, - SNPpos=tmp2$SNPpos, - chromosomes=tmp2$chromosomes, - chrnames=tmp2$chrnames, - sexchromosomes=tmp2$sexchromosomes, - failedqualitycheck=FALSE, - distance = opt$file_out, - copynumberprofile = NULL, - nonroundedprofile = NULL, - aberrationreliability = NULL, - gamma = 1, rho_manual = rho, psi_manual = psi, y_limit = 3, circos = NA)) + tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR, + baf=tmp2$Tumor_BAF, + lrrsegmented=tmp2$Tumor_LogR_segmented, + bafsegmented=tmp2$Tumor_BAF_segmented, + gender=tmp2$gender, + SNPpos=tmp2$SNPpos, + chromosomes=tmp2$chromosomes, + chrnames=tmp2$chrnames, + sexchromosomes=tmp2$sexchromosomes, + failedqualitycheck=FALSE, + distance = opt$file_out, + copynumberprofile = NULL, + nonroundedprofile = NULL, + aberrationreliability = NULL, + gamma = 1, rho_manual = rho, psi_manual = psi, y_limit = 3, circos = NA)) if (!("try-error" %in% is(tmp3))) { purity = tmp3$rho diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 4af6b024..dace8cb5 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -6,8 +6,8 @@ ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/baf_all/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/baf_het/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ # $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) @@ -76,7 +76,15 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-run-ascat ascat/ascat/$1_$2.pdf : ascat/mad/$1_$2.RData - $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"$(RSCRIPT) modules/copy_number/ascat.R --type run-ascat --file_in $$< --file_out ascat/ascat/$1_$2.pdf --rho '$${ascat_rho.$1}' --psi '$${ascat_psi.$1}' --nlog2 '$${aspcf_nlog2.$1}' --nbaf '$${aspcf_nbaf.$1}'") + $$(call RUN,-c -v $(ASCAT_ENV) -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type run-ascat \ + --file_in $$(<) \ + --file_out ascat/ascat/$1_$2.pdf \ + --rho '$${ascat_rho.$1}' \ + --psi '$${ascat_psi.$1}' \ + --nlog2 '$${aspcf_nlog2.$1}' \ + --nbaf '$${aspcf_nbaf.$1}'") ascat/total/$1_$2.pdf : facets/cncf/$1_$2.Rdata ascat/ascat/$1_$2.pdf $$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"$(RSCRIPT) modules/copy_number/ascat.R --type total-copy --file_in $$< --file_out ascat/total/$1_$2.pdf") From 04de094b908226e554f1ba35361da0b3450b0054 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 18:12:04 -0400 Subject: [PATCH 172/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 3de36395..7d3060b1 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,7 +10,7 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "W01BT68MSSD", + brownd7 => "U01ERFWQXCY", selenicp => "U22A5U23X", dacruzpa => "U6PAUB3C6", parejaf => "UBF6MRSV8", From 054a9432914198c4ab7804736388e7ef355e68c4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 19 Apr 2022 18:15:06 -0400 Subject: [PATCH 173/766] Update qmake.pl --- scripts/qmake.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 7d3060b1..50d64221 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,10 +10,10 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "U01ERFWQXCY", - selenicp => "U22A5U23X", - dacruzpa => "U6PAUB3C6", - parejaf => "UBF6MRSV8", + brownd7 => "W01BT68MSSD", + selenicp => "W0142HA5LNA", + dacruzpa => "W01BT68MSSD", + parejaf => "W01BLNUF7J8", zhuy1 => "W013UH382P9", peix => "W0147TPN3E1", issabhas => "U01V8R1RKQU", From cfaa8e4c49f866e8df195e9578d23018a14c2f6c Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 20 Apr 2022 10:53:33 -0400 Subject: [PATCH 174/766] Update ascat.R --- copy_number/ascat.R | 62 ++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index a7ce0760..824cae67 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -84,7 +84,7 @@ if (opt$type=="log2") { abline(h=0.5, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) - title(main = gsub(".pdf", "", gsub("ascat/bafall/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) + title(main = gsub(".pdf", "", gsub("ascat/baf_all/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) box(lwd=1.5) dev.off() @@ -118,7 +118,7 @@ if (opt$type=="log2") { abline(h=0.5, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) rect(xleft=1-1e10, xright=max(BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) - title(main = gsub(".pdf", "", gsub("ascat/bafhet/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) + title(main = gsub(".pdf", "", gsub("ascat/baf_het/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) box(lwd=1.5) dev.off() @@ -229,9 +229,9 @@ if (opt$type=="log2") { abline(v=max(CN_and_BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1) abline(h=0, col="red") axis(1, at = .5*(start+end), labels=rep(" ", 23), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) - title(main = gsub(".pdf", "", gsub("ascat/log2nbaf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1) - box(lwd=1.5) + rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) + title(main = gsub(".pdf", "", gsub("ascat/log2_baf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1) + box(lwd=1.5) screen(zz[2]) plot(CN_and_BAF[,"Position"], CN_and_BAF[,"BAF"], type="p", pch=".", cex=1, col=col, axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(0,1.125)) @@ -249,10 +249,10 @@ if (opt$type=="log2") { abline(v=max(CN_and_BAF[,"Position"]), col="goldenrod3", lty=3, lwd=1) abline(h=0.5, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) - title(main = gsub(".pdf", "", gsub("ascat/log2nbaf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1) - box(lwd=1.5) - close.screen(all.screens=TRUE) + rect(xleft=1-1e10, xright=max(CN_and_BAF[,"Position"])+1e10, ybottom=1, ytop=1.25, col="lightgrey", border="black", lwd=1.5) + title(main = gsub(".pdf", "", gsub("ascat/log2_baf/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1.35, cex.main=.75, font.main=1) + box(lwd=1.5) + close.screen(all.screens=TRUE) dev.off() } else if (opt$type=="run-ascat") { @@ -328,31 +328,35 @@ if (opt$type=="log2") { chrs = 1:23 gender = "2323" sexchromosomes = c(23, 24) - tmp2 = list(Tumor_LogR=Tumor_LogR, - Tumor_BAF=Tumor_BAF, - Tumor_LogR_segmented=Tumor_LogR_segmented, - Tumor_BAF_segmented=Tumor_BAF_segmented, - SNPpos=SNPpos, - chromosomes=ch, - chrnames=chrs, - gender=gender, - sexchromosomes=sexchromosomes) + tmp2 = list(Tumor_LogR = Tumor_LogR, + Tumor_BAF = Tumor_BAF, + Tumor_LogR_segmented = Tumor_LogR_segmented, + Tumor_BAF_segmented = Tumor_BAF_segmented, + SNPpos = SNPpos, + chromosomes = ch, + chrnames = chrs, + gender = gender, + sexchromosomes = sexchromosomes) - tmp3 = try(runASCAT(lrr=tmp2$Tumor_LogR, - baf=tmp2$Tumor_BAF, - lrrsegmented=tmp2$Tumor_LogR_segmented, - bafsegmented=tmp2$Tumor_BAF_segmented, - gender=tmp2$gender, - SNPpos=tmp2$SNPpos, - chromosomes=tmp2$chromosomes, - chrnames=tmp2$chrnames, - sexchromosomes=tmp2$sexchromosomes, - failedqualitycheck=FALSE, + tmp3 = try(runASCAT(lrr = tmp2$Tumor_LogR, + baf = tmp2$Tumor_BAF, + lrrsegmented = tmp2$Tumor_LogR_segmented, + bafsegmented = tmp2$Tumor_BAF_segmented, + gender = tmp2$gender, + SNPpos = tmp2$SNPpos, + chromosomes = tmp2$chromosomes, + chrnames = tmp2$chrnames, + sexchromosomes = tmp2$sexchromosomes, + failedqualitycheck = FALSE, distance = opt$file_out, copynumberprofile = NULL, nonroundedprofile = NULL, aberrationreliability = NULL, - gamma = 1, rho_manual = rho, psi_manual = psi, y_limit = 3, circos = NA)) + gamma = 1, + rho_manual = rho, + psi_manual = psi, + y_limit = 3, + circos = NA)) if (!("try-error" %in% is(tmp3))) { purity = tmp3$rho From fee79ad8b9e9a46fc42b949207cbb0c2ad3be6f3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 20 Apr 2022 13:56:18 -0400 Subject: [PATCH 175/766] ++ --- copy_number/ascat.R | 6 +++--- copy_number/ascat.mk | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/copy_number/ascat.R b/copy_number/ascat.R index 824cae67..cacee796 100644 --- a/copy_number/ascat.R +++ b/copy_number/ascat.R @@ -423,10 +423,10 @@ if (opt$type=="log2") { abline(v=max(CN[,"pos"]), col="goldenrod3", lty=3, lwd=1) abline(h=0, col="red") axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - load(gsub(".pdf", ".RData", gsub("total", "ascat", opt$file_out))) - rect(xleft=1-1e10, xright=max(CN[,"pos"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) + load(gsub(".pdf", ".RData", gsub("total", "ascat", opt$file_out))) + rect(xleft=1-1e10, xright=max(CN[,"pos"])+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) title(main = gsub(".pdf", "", gsub("ascat/total/", "", opt$file_out, fixed=TRUE), fixed=TRUE), line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) + box(lwd=1.5) dev.off() } else if (opt$type=="plot-chr") { diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index dace8cb5..84ac12c6 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -7,8 +7,8 @@ ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/baf_het/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) # $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) define ascat-plot-log2 @@ -87,10 +87,13 @@ ascat/ascat/$1_$2.pdf : ascat/mad/$1_$2.RData --nbaf '$${aspcf_nbaf.$1}'") ascat/total/$1_$2.pdf : facets/cncf/$1_$2.Rdata ascat/ascat/$1_$2.pdf - $$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"$(RSCRIPT) modules/copy_number/ascat.R --type total-copy --file_in $$< --file_out ascat/total/$1_$2.pdf") + $$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type total-copy \ + --file_in $$(<) \ + --file_out ascat/total/$1_$2.pdf") endef - $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-run-ascat,$(tumor.$(pair)),$(normal.$(pair))))) From 6d31c021b23dc7a971d2203bb65ebd75cdf95978 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 20 Apr 2022 14:06:28 -0400 Subject: [PATCH 176/766] Update ascat.mk --- copy_number/ascat.mk | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/copy_number/ascat.mk b/copy_number/ascat.mk index 84ac12c6..77cefcbd 100644 --- a/copy_number/ascat.mk +++ b/copy_number/ascat.mk @@ -8,8 +8,8 @@ ascat : $(foreach pair,$(SAMPLE_PAIRS),ascat/log2/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/mad/$(pair).RData) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/log2_baf/$(pair).pdf) \ $(foreach pair,$(SAMPLE_PAIRS),ascat/ascat/$(pair).pdf) \ - $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) -# $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) + $(foreach pair,$(SAMPLE_PAIRS),ascat/total/$(pair).pdf) \ + $(foreach pair,$(SAMPLE_PAIRS),ascat/bychr/$(pair)/timestamp) define ascat-plot-log2 ascat/log2/$1_$2.pdf : facets/cncf/$1_$2.Rdata @@ -99,12 +99,13 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define ascat-plot-chr ascat/bychr/$1_$2/timestamp : facets/cncf/$1_$2.Rdata ascat/ascat/$1_$2.pdf - $$(call RUN, -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p ascat/bychr/ && \ - mkdir -p ascat/bychr/$1_$2 && \ - $(RSCRIPT) modules/copy_number/ascat.R --type plot-chr --file_in $$< --file_out ascat/bychr/$1_$2") + $$(call RUN, -v $(ASCAT_ENV) -s 6G -m 12G,"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/ascat.R \ + --type plot-chr \ + --file_in $$(<) \ + --file_out ascat/bychr/$1_$2") endef - $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ascat-plot-chr,$(tumor.$(pair)),$(normal.$(pair))))) From 896e1ab06d93c268ddc08192d247ac21869852f3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 14:51:19 -0400 Subject: [PATCH 177/766] -- --- Makefile | 37 ++------- copy_number/cnvkitbinqc.R | 68 ---------------- copy_number/cnvkitcoverage.mk | 30 ------- copy_number/cnvkitfix.mk | 18 ----- copy_number/cnvkitheatmap.R | 33 -------- copy_number/cnvkitheatmap.mk | 25 ------ copy_number/cnvkitplot.R | 91 --------------------- copy_number/cnvkitplot.mk | 16 ---- copy_number/cnvkitprcomp.R | 85 -------------------- copy_number/cnvkitprcomp.mk | 19 ----- copy_number/cnvkitqc.R | 140 --------------------------------- copy_number/cnvkitqc.mk | 28 ------- copy_number/cnvkitreference.mk | 13 --- copy_number/cnvkitsegment.mk | 23 ------ copy_number/cnvkitsummary.R | 60 -------------- copy_number/cnvkitsummary.mk | 13 --- 16 files changed, 5 insertions(+), 694 deletions(-) delete mode 100644 copy_number/cnvkitbinqc.R delete mode 100644 copy_number/cnvkitcoverage.mk delete mode 100644 copy_number/cnvkitfix.mk delete mode 100644 copy_number/cnvkitheatmap.R delete mode 100644 copy_number/cnvkitheatmap.mk delete mode 100644 copy_number/cnvkitplot.R delete mode 100644 copy_number/cnvkitplot.mk delete mode 100644 copy_number/cnvkitprcomp.R delete mode 100644 copy_number/cnvkitprcomp.mk delete mode 100644 copy_number/cnvkitqc.R delete mode 100644 copy_number/cnvkitqc.mk delete mode 100644 copy_number/cnvkitreference.mk delete mode 100644 copy_number/cnvkitsegment.mk delete mode 100755 copy_number/cnvkitsummary.R delete mode 100644 copy_number/cnvkitsummary.mk diff --git a/Makefile b/Makefile index 150b0355..87f72e72 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,10 @@ sufam: TARGETS += get_basecount get_basecount: $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) + +TARGETS += strelka_varscan_indels +strelka_varscan_indels: + $(call RUN_MAKE,modules/variant_callers/somatic/strelkaVarscanIndels.mk) #================================================== @@ -214,10 +218,6 @@ TARGETS += titan titan : $(call RUN_MAKE,modules/copy_number/titan.mk) -TARGETS += strelka_varscan_indels -strelka_varscan_indels: - $(call RUN_MAKE,modules/variant_callers/somatic/strelkaVarscanIndels.mk) - TARGETS += varscan_cnv varscan_cnv : $(call RUN_MAKE,modules/copy_number/varscanCNV.mk) @@ -256,36 +256,9 @@ snp6 : TARGETS += cnv_kit cnv_kit : - $(call RUN_MAKE,modules/test/workflows/cnvkit.mk) + $(call RUN_MAKE,modules/copy_number/cnvkit.mk) -TARGETS += cnvkit_coverage -cnvkit_coverage : - $(call RUN_MAKE,modules/copy_number/cnvkitcoverage.mk) - -TARGETS += cnvkit_reference -cnvkit_reference : - $(call RUN_MAKE,modules/copy_number/cnvkitreference.mk) - -TARGETS += cnvkit_fix -cnvkit_fix : - $(call RUN_MAKE,modules/copy_number/cnvkitfix.mk) -TARGETS += cnvkit_plot -cnvkit_plot : - $(call RUN_MAKE,modules/copy_number/cnvkitplot.mk) - -TARGETS += cnvkit_heatmap -cnvkit_heatmap : - $(call RUN_MAKE,modules/copy_number/cnvkitheatmap.mk) - -TARGETS += cnvkit_pca -cnvkit_pca : - $(call RUN_MAKE,modules/copy_number/cnvkitprcomp.mk) - -TARGETS += cnvkit_qc -cnvkit_qc : - $(call RUN_MAKE,modules/copy_number/cnvkitqc.mk) - #================================================== # structural variant callers #================================================== diff --git a/copy_number/cnvkitbinqc.R b/copy_number/cnvkitbinqc.R deleted file mode 100644 index 7d3e430e..00000000 --- a/copy_number/cnvkitbinqc.R +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal samples input file names"), - make_option("--tumor_files", default = NA, type = 'character', help = "tumor samples input file names"), - make_option("--out_file", default = NA, type = 'character', help = "output file name")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -in_file_normal = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE)) -in_file_tumor = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE)) -out_file = opt$out_file - -depth_n = list() -for (i in 1:length(in_file_normal)) { - print(i) - data = read.csv(file=in_file_normal[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% as.character(1:22) - depth_n[[i]] = as.numeric(data[index,"depth"]) -} -depth_n = do.call(cbind, depth_n) - -depth_t = list() -for (i in 1:length(in_file_tumor)) { - print(i) - data = read.csv(file=in_file_tumor[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% as.character(1:22) - depth_t[[i]] = as.numeric(data[index,"depth"]) -} -depth_t = do.call(cbind, depth_t) - -bin_size = as.numeric(data[index,"end"]) - as.numeric(data[index,"start"]) -var_bin_n = apply(depth_n, 1, sd, na.rm=TRUE) -var_bin_t = apply(depth_t, 1, sd, na.rm=TRUE) -data = data.frame(bin_size, var_bin_n, var_bin_t) -write.table(data, file=out_file, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) - -ymin = min(var_bin_n, var_bin_t) -ymax = max(var_bin_n, var_bin_t) - -pdf(file=gsub(".tsv", ".pdf", x=out_file, fixed=TRUE), width=7, height=7) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(bin_size, var_bin_n, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="y", ylim=c(ymin, ymax)) -points(x=bin_size, y=var_bin_n, col = "grey50", bg = "grey90", pch = 21, cex = 1, lwd = .1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "Bin size (bp)", line = 4, cex = 1.5) -mtext(side = 2, text = "SD", line = 5, cex = 1.5) -plot(bin_size, var_bin_t, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="y", ylim=c(ymin, ymax)) -points(x=bin_size, y=var_bin_t, col = "black", bg = "steelblue", pch = 21, cex = 1, lwd = .1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "Bin size (bp)", line = 4, cex = 1.5) -mtext(side = 2, text = "SD", line = 5, cex = 1.5) -plot(var_bin_n, var_bin_t, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", log="xy", xlim=c(ymin, ymax), ylim=c(ymin, ymax)) -points(x=var_bin_n, y=var_bin_t, col = "black", bg = "steelblue", pch = 21, cex = 1, lwd = .1) -abline(a=0, b=1, col="goldenrod3", lwd=2) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "Normal SD", line = 4, cex = 1.5) -mtext(side = 2, text = "Tumor SD", line = 5, cex = 1.5) -dev.off() diff --git a/copy_number/cnvkitcoverage.mk b/copy_number/cnvkitcoverage.mk deleted file mode 100644 index 696f395a..00000000 --- a/copy_number/cnvkitcoverage.mk +++ /dev/null @@ -1,30 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_coverage.$(NOW) -PHONY += cnvkit cnvkit/cnn cnvkit/cnn/tumor cnvkit/cnn/normal - -cnvkit_coverage : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn) $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) - -define cnvkit-tumor-cnn -cnvkit/cnn/tumor/%.targetcoverage.cnn : bam/%.bam - $$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/tumor/$$(*).targetcoverage.cnn") - -cnvkit/cnn/tumor/%.antitargetcoverage.cnn : bam/%.bam - $$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/tumor/$$(*).antitargetcoverage.cnn") -endef - $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-tumor-cnn,$(sample)))) - -define cnvkit-normal-cnn -cnvkit/cnn/normal/%.targetcoverage.cnn : bam/%.bam - $$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/normal/$$(*).targetcoverage.cnn") - -cnvkit/cnn/normal/%.antitargetcoverage.cnn : bam/%.bam - $$(call RUN,-c -n 4 -s 6G -m 8G,"cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/normal/$$(*).antitargetcoverage.cnn") -endef - $(foreach sample,$(NORMAL_SAMPLES),\ - $(eval $(call cnvkit-normal-cnn,$(sample)))) - -.PHONY: $(PHONY) - diff --git a/copy_number/cnvkitfix.mk b/copy_number/cnvkitfix.mk deleted file mode 100644 index c83aa1af..00000000 --- a/copy_number/cnvkitfix.mk +++ /dev/null @@ -1,18 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_fix.$(NOW) -PHONY += cnvkit cnvkit/cnr - -cnvkit_fix : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) - -define cnvkit-cnr -cnvkit/cnr/%.cnr : cnvkit/cnn/tumor/%.targetcoverage.cnn cnvkit/cnn/tumor/%.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr - $$(call RUN,-c -s 6G -m 8G,"cnvkit.py fix $$(<) $$(<<) cnvkit/reference/combined_reference.cnr -o cnvkit/cnr/$$(*).cnr") - -endef - $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-cnr,$(sample)))) - -.PHONY: $(PHONY) - diff --git a/copy_number/cnvkitheatmap.R b/copy_number/cnvkitheatmap.R deleted file mode 100644 index 1513a473..00000000 --- a/copy_number/cnvkitheatmap.R +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("RColorBrewer")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--in_file", default = NA, type = 'character', help = "input file names"), - make_option("--out_file", default = NA, type = 'character', help = "output file name")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -in_file = unlist(strsplit(x=opt$in_file, split=" ", fixed=TRUE)) -out_file = opt$out_file - -depth = list() -for (i in 1:length(in_file)) { - print(i) - data = read.csv(file=in_file[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% c(as.character(1:22), "X") - depth[[i]] = as.numeric(data[index,"depth"]) -} -depth = do.call(cbind, depth) -pdf(file=out_file, width=14, height=14) -heatmap(x=depth, labRow=rep(" ", nrow(depth)), labCol=rep(" ", ncol(depth)), col=colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(256)) -dev.off() - -png(file=gsub(".pdf", ".png", out_file, fixed=TRUE), width=1440, height=1440) -heatmap(x=depth, labRow=rep(" ", nrow(depth)), labCol=rep(" ", ncol(depth)), col=colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(256)) -dev.off() diff --git a/copy_number/cnvkitheatmap.mk b/copy_number/cnvkitheatmap.mk deleted file mode 100644 index cbbad3b1..00000000 --- a/copy_number/cnvkitheatmap.mk +++ /dev/null @@ -1,25 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/cnvkit_heatmap.$(NOW) -PHONY += cnvkit cnvkit/heatmap - -CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn)) -CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn)) -CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn)) -CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn)) - -cnvkit : cnvkit/heatmap/normal_samples_ontarget.pdf cnvkit/heatmap/normal_samples_offtarget.pdf cnvkit/heatmap/tumor_samples_ontarget.pdf cnvkit/heatmap/tumor_samples_offtarget.pdf - -cnvkit/heatmap/normal_samples_ontarget.pdf : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_NORMAL_ON_TARGET)' --out_file cnvkit/heatmap/normal_samples_ontarget.pdf") - -cnvkit/heatmap/normal_samples_offtarget.pdf : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_NORMAL_OFF_TARGET)' --out_file cnvkit/heatmap/normal_samples_offtarget.pdf") - -cnvkit/heatmap/tumor_samples_ontarget.pdf : $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_TUMOR_ON_TARGET)' --out_file cnvkit/heatmap/tumor_samples_ontarget.pdf") - -cnvkit/heatmap/tumor_samples_offtarget.pdf : $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitheatmap.R --in_file '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file cnvkit/heatmap/tumor_samples_offtarget.pdf") - -.PHONY: $(PHONY) diff --git a/copy_number/cnvkitplot.R b/copy_number/cnvkitplot.R deleted file mode 100644 index 971b6654..00000000 --- a/copy_number/cnvkitplot.R +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("copynumber")) -suppressPackageStartupMessages(library("colorspace")) -suppressPackageStartupMessages(library("ASCAT")) -suppressPackageStartupMessages(library("GAP")) - -'plot_log2_' <- function(x, title = "") -{ - par(mar=c(5, 5, 4, 2)+.1) - data("CytoBand") - end = NULL - for (i in 1:23) { - end = c(end, max(CytoBand[CytoBand[,1]==i,"End"])) - } - end = cumsum(end) - start = c(1, end[1:22]+1) - CytoBand = cbind(start, end) - index = NULL - for (i in 1:23) { - index = c(index, seq(from = CytoBand[i, "start"], to=CytoBand[i, "end"], length=sum(x$chromosome==i))) - } - plot(index, x$log2, type="p", pch=".", cex=1.95, col="grey80", axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5)) - axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) - mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) - abline(v=1, col="goldenrod3", lty=3, lwd=.5) - abline(h=0, col="red", lty=1, lwd=1) - for (j in 1:23) { - abline(v=CytoBand[j,"end"], col="goldenrod3", lty=3, lwd=.5) - } - axis(1, at = .5*(CytoBand[,"start"]+CytoBand[,"end"]), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=CytoBand[23,"end"]+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) - title(main = title, line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) -} - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--in_file", default = NA, type = 'character', help = "input file name")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -outfile_on_target = gsub("cnr", "log2", gsub(".cnr", ".ontarget.pdf", opt$in_file, fixed=TRUE), fixed=TRUE) -outfile_off_target = gsub("cnr", "log2", gsub(".cnr", ".offtarget.pdf", opt$in_file, fixed=TRUE), fixed=TRUE) - -data = read.table(file=opt$in_file, header=TRUE, sep="\t", comment.char="#", stringsAsFactors=FALSE) -data = subset(data, data[,"depth"]!=0) - -if (nrow(data)==0) { - system(paste0("touch ", outfile_on_target)) - system(paste0("touch ", outfile_off_target)) -} else { - data[,"chromosome"] = gsub(pattern="chr", replacement="", x=data[,"chromosome"], fixed=TRUE) - data[data[,"chromosome"]=="X", "chromosome"] = 23 - data[data[,"chromosome"]=="Y", "chromosome"] = 24 - data[,"chromosome"] = as.numeric(data[,"chromosome"]) - data = subset(data, data[,"chromosome"]<=23) - - if (sum(data$gene=="-")>0) { - flag = 1 - } else if (sum(data$gene=="Antitarget")>0) { - flag = 2 - } - - if (flag==1) { - ontarget = subset(data, data$gene=="-") - } else if (flag==2) { - ontarget = subset(data, data$gene!="Antitarget") - } - - pdf(file=outfile_on_target, width=10, height=4.25) - plot_log2_(x=ontarget, title=gsub("cnvkit/cnr/", "", gsub(".cnr", "", opt$in_file, fixed=TRUE), fixed=TRUE)) - dev.off() - - if (flag==1) { - offtarget = subset(data, data$gene!="-") - } else if (flag==2) { - offtarget = subset(data, data$gene=="Antitarget") - } - - tmp = offtarget[,c("chromosome", "start", "log2"),drop=FALSE] - tmp = winsorize(data=tmp, tau=3.5, k=25, verbose=FALSE, return.outliers=TRUE) - offtarget[tmp$wins.outliers[,3]!=0,"log2"] = NA - pdf(file=outfile_off_target, width=10, height=4.25) - plot_log2_(x=offtarget, title=gsub("cnvkit/cnr/", "", gsub(".cnr", "", opt$in_file, fixed=TRUE), fixed=TRUE)) - dev.off() -} diff --git a/copy_number/cnvkitplot.mk b/copy_number/cnvkitplot.mk deleted file mode 100644 index ba16ff8d..00000000 --- a/copy_number/cnvkitplot.mk +++ /dev/null @@ -1,16 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_plot.$(NOW) -PHONY += cnvkit cnvkit/log2 - -cnvkit_plot : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/log2/$(sample).ontarget.pdf cnvkit/log2/$(sample).offtarget.pdf) - -define cnvkit-plot -cnvkit/log2/%.ontarget.pdf cnvkit/log2/%.offtarget.pdf : cnvkit/cnr/%.cnr - $$(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 4G -m 6G,"$(RSCRIPT) modules/copy_number/cnvkitplot.R --in_file $$(<)") -endef - $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-plot,$(sample)))) - -.PHONY: $(PHONY) diff --git a/copy_number/cnvkitprcomp.R b/copy_number/cnvkitprcomp.R deleted file mode 100644 index 0353609e..00000000 --- a/copy_number/cnvkitprcomp.R +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("RColorBrewer")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal samples input file names"), - make_option("--tumor_files", default = NA, type = 'character', help = "tumor samples input file names"), - make_option("--out_file_normal", default = NA, type = 'character', help = "normal samples output file name"), - make_option("--out_file_tumor", default = NA, type = 'character', help = "tumor samples output file name")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -in_file_normal = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE)) -normal_samples = gsub(".antitargetcoverage", "", x=gsub(".targetcoverage", "", x=gsub(pattern=".cnn", replacement="", x=gsub(pattern="cnvkit/cnn/normal/", replacement="", x=in_file_normal, fixed=TRUE), fixed=TRUE), fixed=TRUE), fixed=TRUE) -in_file_tumor = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE)) -tumor_samples = gsub(".antitargetcoverage", "", x=gsub(".targetcoverage", "", x=gsub(pattern=".cnn", replacement="", x=gsub(pattern="cnvkit/cnn/tumor/", replacement="", x=in_file_tumor, fixed=TRUE), fixed=TRUE), fixed=TRUE), fixed=TRUE) -out_file_normal = opt$out_file_normal -out_file_tumor = opt$out_file_tumor - -depth_n = list() -for (i in 1:length(in_file_normal)) { - print(i) - data = read.csv(file=in_file_normal[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% as.character(1:22) - depth_n[[i]] = as.numeric(data[index,"depth"]) -} -depth_n = do.call(cbind, depth_n) - -depth_t = list() -for (i in 1:length(in_file_tumor)) { - print(i) - data = read.csv(file=in_file_tumor[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% as.character(1:22) - depth_t[[i]] = as.numeric(data[index,"depth"]) -} -depth_t = do.call(cbind, depth_t) - -pca_n = prcomp(t(depth_n), center=TRUE, scale.=TRUE) -pca_t = predict(object=pca_n, newdata=t(depth_t)) -x = c(pca_n$x[,1], pca_t[,1]) -y = c(pca_n$x[,2], pca_t[,2]) -bg = c(rep("grey90", nrow(pca_n$x)), rep("steelblue", nrow(pca_t))) -col = c(rep("grey50", nrow(pca_n$x)), rep("black", nrow(pca_t))) -pch = 21 -index = c(rep(TRUE, nrow(pca_n$x)), rep(FALSE, nrow(pca_t))) - -pdf(file=out_file_normal, width=9, height=9) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(x=x, y=y, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "") -points(x=x[index], y=y[index], col = col[index], bg = bg[index], pch = pch, cex = 1, lwd = .1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "PC 1", line = 4, cex = 1.5) -mtext(side = 2, text = "PC 2", line = 4, cex = 1.5) -dev.off() - -pdf(file=out_file_tumor, width=9, height=9) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(x=x, y=y, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "") -points(x=x[!index], y=y[!index], col = col[!index], bg = bg[!index], pch = pch, cex = 1, lwd = .1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "PC 1", line = 4, cex = 1.5) -mtext(side = 2, text = "PC 2", line = 4, cex = 1.5) -dev.off() - -pdf(file=gsub("tumor", "all", out_file_tumor, fixed=TRUE), width=9, height=9) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(x=x, y=y, col = col, bg = bg, pch = pch, cex = 1, lwd = .1, axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "") -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = NULL, cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "PC 1", line = 4, cex = 1.5) -mtext(side = 2, text = "PC 2", line = 4, cex = 1.5) -dev.off() - -data = rbind(pca_n$x, pca_t) -rownames(data) = c(normal_samples, tumor_samples) -colnames(data) = paste("PC", 1:ncol(data)) -file_name = paste0("cnvkit/pca/pc_", ifelse(grepl("offtarget", out_file_tumor, fixed=TRUE), "offtarget", "ontarget"), ".txt") -write.table(data, file=file_name, sep="\t", col.names=TRUE, row.names=TRUE, quote=FALSE) diff --git a/copy_number/cnvkitprcomp.mk b/copy_number/cnvkitprcomp.mk deleted file mode 100644 index e787762f..00000000 --- a/copy_number/cnvkitprcomp.mk +++ /dev/null @@ -1,19 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/cnvkit_pca.$(NOW) -PHONY += cnvkit cnvkit/pca - -CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn)) -CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn)) -CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn)) -CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn)) - -cnvkit : cnvkit/pca/normal_samples_ontarget.pdf cnvkit/pca/normal_samples_offtarget.pdf cnvkit/pca/tumor_samples_ontarget.pdf cnvkit/pca/tumor_samples_offtarget.pdf - -cnvkit/pca/normal_samples_ontarget.pdf cnvkit/pca/tumor_samples_ontarget.pdf : $(wildcard cnvkit/cnn/tumor/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitprcomp.R --normal_files '$(CNVKIT_NORMAL_ON_TARGET)' --tumor_files '$(CNVKIT_TUMOR_ON_TARGET)' --out_file_normal cnvkit/pca/normal_samples_ontarget.pdf --out_file_tumor cnvkit/pca/tumor_samples_ontarget.pdf") - -cnvkit/pca/normal_samples_offtarget.pdf cnvkit/pca/tumor_samples_offtarget.pdf : $(wildcard cnvkit/cnn/tumor/$(NORMAL_SAMPLES).antitargetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitprcomp.R --normal_files '$(CNVKIT_NORMAL_OFF_TARGET)' --tumor_files '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file_normal cnvkit/pca/normal_samples_offtarget.pdf --out_file_tumor cnvkit/pca/tumor_samples_offtarget.pdf") - -.PHONY: $(PHONY) diff --git a/copy_number/cnvkitqc.R b/copy_number/cnvkitqc.R deleted file mode 100644 index a8002d3a..00000000 --- a/copy_number/cnvkitqc.R +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--normal_files", default = NA, type = 'character', help = "normal input files"), - make_option("--tumor_files", default = NA, type = 'character', help = "tumor input files"), - make_option("--out_file", default = NA, type = 'character', help = "output file"), - make_option("--option", default = NA, type = 'character', help = "1-0 for ontarget or offtarget")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -normal_files = unlist(strsplit(x=opt$normal_files, split=" ", fixed=TRUE)) -normal_samples = gsub(pattern=".cnr", replacement="", x=gsub(pattern="cnvkit/cnr/", replacement="", x=normal_files, fixed=TRUE), fixed=TRUE) -tumor_files = unlist(strsplit(x=opt$tumor_files, split=" ", fixed=TRUE)) -tumor_samples = gsub(pattern=".cnr", replacement="", x=gsub(pattern="cnvkit/cnr/", replacement="", x=tumor_files, fixed=TRUE), fixed=TRUE) -out_file = opt$out_file - -'MAD' <- function(x) -{ - x = na.omit(x) - q2 = mad(x) - return(invisible(q2)) -} - -'MAPD' <- function(x) -{ - x = na.omit(x) - q2 = median(abs(x[1:(length(x)-1)] - x[2:length(x)])) - return(invisible(q2)) -} - -'MIQR' <- function(x) -{ - x = na.omit(x) - iq = stats::IQR(abs(x[1:(length(x)-1)] - x[2:length(x)])) - return(invisible(iq)) -} - -'scale.' <- function(x) -{ - y = (x-min(x))/(max(x)-min(x)) - return(invisible(y)) -} - -'transparentRgb' <- function (col = "black", alpha = 85) -{ - tmp = c(col2rgb(col), alpha, 255) - names(tmp) = c("red", "green", "blue", "alpha", "maxColorValue") - out = do.call("rgb", as.list(tmp)) - return(invisible(out)) -} - - -qc = matrix(NA, nrow=length(c(normal_samples, tumor_samples)), ncol=3, dimnames=list(c(normal_samples, tumor_samples), c("MAD", "MAPD", "IQR"))) -for (i in 1:length(normal_files)) { - print(i) - data = read.csv(file=normal_files[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% 1:22 & data[,"gene"] == ifelse(opt$option==1, "-", "Antitarget") - qc[normal_samples[i],1] = MAD(data[index,"log2"]) - qc[normal_samples[i],2] = MAPD(data[index,"log2"]) - qc[normal_samples[i],3] = MIQR(data[index,"log2"]) -} -for (i in 1:length(tumor_files)) { - print(i) - data = read.csv(file=tumor_files[i], header=TRUE, sep="\t", stringsAsFactors=FALSE) - index = data[,"chromosome"] %in% 1:22 & data[,"gene"] == ifelse(opt$option==1, "-", "Antitarget") - qc[tumor_samples[i],1] = MAD(data[index,"log2"]) - qc[tumor_samples[i],2] = MAPD(data[index,"log2"]) - qc[tumor_samples[i],3] = MIQR(data[index,"log2"]) -} -data = qc -colnames(data) = c("MAD", "MAPD", "IQR") -data = cbind("SAMPLE_NAME"=c(normal_samples, tumor_samples), "SAMPLE_TYPE"=c(rep("N", length(normal_samples)), rep("T", length(tumor_samples))), data) -write.table(data, file=out_file, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) - -# MAPD -file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_mapd.pdf") -x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "MAPD"]) -y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "MAPD"]) -dx = density(x, from=0, to=max(x,y)) -dx$y = scale.(dx$y) -dy = density(y, from=0, to=max(x,y)) -dy$y = scale.(dy$y) -pdf(file=file_name, width=7, height=7) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2)) -polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2) -polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2) -legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "MAPD", line = 4, cex = 1.5) -mtext(side = 2, text = "Density", line = 4, cex = 1.5) -dev.off() - -# MAD -file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_mad.pdf") -x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "MAD"]) -y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "MAD"]) -dx = density(x, from=0, to=max(x,y)) -dx$y = scale.(dx$y) -dy = density(y, from=0, to=max(x,y)) -dy$y = scale.(dy$y) -pdf(file=file_name, width=7, height=7) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2)) -polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2) -polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2) -legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "MAD", line = 4, cex = 1.5) -mtext(side = 2, text = "Density", line = 4, cex = 1.5) -dev.off() - -# IQR -file_name = paste0("cnvkit/qc/", ifelse(opt$option==1, "on", "off"), "target_iqr.pdf") -x = as.numeric(data[data[,"SAMPLE_TYPE"]=="T", "IQR"]) -y = as.numeric(data[data[,"SAMPLE_TYPE"]=="N", "IQR"]) -dx = density(x, from=0, to=max(x,y)) -dx$y = scale.(dx$y) -dy = density(y, from=0, to=max(x,y)) -dy$y = scale.(dy$y) -pdf(file=file_name, width=7, height=7) -par(mar = c(6.1, 6.5, 4.1, 1.1)) -plot(0, 0, type="n", axes = FALSE, frame.plot = FALSE, main = "", xlab = "", ylab = "", xlim=c(0, max(max(x, y), 1.5)), ylim=c(0,1.2)) -polygon(x=c(dx$x, rev(dx$x)), y=c(dx$y, rep(0, length(dx$y))), border="steelblue", col=transparentRgb("steelblue", 155), lwd=2) -polygon(x=c(dy$x, rev(dy$x)), y=c(dy$y, rep(0, length(dy$y))), border="grey50", col=transparentRgb("grey50", 155), lwd=2) -legend("topright", col=c("steelblue", "grey50"), pch=15, legend=c("Tumor", "Normal"), box.lwd=-1) -axis(1, at = NULL, cex.axis = 1.5, padj = 0.25, lwd=1.25, lwd.ticks=1.15) -axis(2, at = seq(0, 1, by=.2), labels = seq(0, 1, by=.2), cex.axis = 1.5, las = 1, lwd=1.25, lwd.ticks=1.15) -mtext(side = 1, text = "IQR", line = 4, cex = 1.5) -mtext(side = 2, text = "Density", line = 4, cex = 1.5) -dev.off() - diff --git a/copy_number/cnvkitqc.mk b/copy_number/cnvkitqc.mk deleted file mode 100644 index ab73e82f..00000000 --- a/copy_number/cnvkitqc.mk +++ /dev/null @@ -1,28 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/cnvkit_qc.$(NOW) -PHONY += cnvkit cnvkit/qc - -CNVKIT_NORMAL ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnr/$(sample).cnr)) -CNVKIT_TUMOR ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr)) -CNVKIT_NORMAL_ON_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn)) -CNVKIT_NORMAL_OFF_TARGET ?= $(wildcard $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn)) -CNVKIT_TUMOR_ON_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn)) -CNVKIT_TUMOR_OFF_TARGET ?= $(wildcard $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn)) - -cnvkit : cnvkit/qc/qc_ontarget.tsv cnvkit/qc/qc_offtarget.tsv cnvkit/qc/bin_qc_ontarget.tsv cnvkit/qc/bin_qc_offtarget.tsv - -cnvkit/qc/qc_ontarget.tsv : $(wildcard cnvkit/cnr/$(SAMPLES).cnr) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 8G -m 16G,"$(RSCRIPT) modules/copy_number/cnvkitqc.R --normal_files '$(CNVKIT_NORMAL)' --tumor_files '$(CNVKIT_TUMOR)' --out_file cnvkit/qc/qc_ontarget.tsv --option 1") - -cnvkit/qc/qc_offtarget.tsv : $(wildcard cnvkit/cnr/$(SAMPLES).cnr) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 8G -m 16G,"$(RSCRIPT) modules/copy_number/cnvkitqc.R --normal_files '$(CNVKIT_NORMAL)' --tumor_files '$(CNVKIT_TUMOR)' --out_file cnvkit/qc/qc_offtarget.tsv --option 0") - -cnvkit/qc/bin_qc_ontarget.tsv : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).targetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitbinqc.R --normal_files '$(CNVKIT_NORMAL_ON_TARGET)' --tumor_files '$(CNVKIT_TUMOR_ON_TARGET)' --out_file cnvkit/qc/bin_qc_ontarget.tsv") - -cnvkit/qc/bin_qc_offtarget.tsv : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn) $(wildcard cnvkit/cnn/tumor/$(TUMOR_SAMPLES).antitargetcoverage.cnn) - $(call RUN,-c -v ~/share/usr/anaconda-envs/ascat -s 32G -m 48G,"$(RSCRIPT) modules/copy_number/cnvkitbinqc.R --normal_files '$(CNVKIT_NORMAL_OFF_TARGET)' --tumor_files '$(CNVKIT_TUMOR_OFF_TARGET)' --out_file cnvkit/qc/bin_qc_offtarget.tsv") - - -.PHONY: $(PHONY) diff --git a/copy_number/cnvkitreference.mk b/copy_number/cnvkitreference.mk deleted file mode 100644 index f4932a9f..00000000 --- a/copy_number/cnvkitreference.mk +++ /dev/null @@ -1,13 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_reference.$(NOW) -PHONY += cnvkit cnvkit/reference - -cnvkit_reference : cnvkit/reference/combined_reference.cnr - -cnvkit/reference/combined_reference.cnr : $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).targetcoverage.cnn) $(wildcard cnvkit/cnn/normal/$(NORMAL_SAMPLES).antitargetcoverage.cnn) - $(call RUN,-n 1 -s 24G -m 32G,"cnvkit.py reference cnvkit/cnn/normal/*.cnn -f $(REF_FASTA) --no-edge -o cnvkit/reference/combined_reference.cnr") - -.PHONY: $(PHONY) - diff --git a/copy_number/cnvkitsegment.mk b/copy_number/cnvkitsegment.mk deleted file mode 100644 index 7c051d3d..00000000 --- a/copy_number/cnvkitsegment.mk +++ /dev/null @@ -1,23 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_segment.$(NOW) -PHONY += cnvkit cnvkit/segmented cnvkit/totalcopy cnvkit/called - -cnvkit_segment : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).RData) $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).pdf) $(foreach sample,$(TUMOR_SAMPLES),cnvkit/called/$(sample).RData) - -define cnvkit-totalcopy -cnvkit/segmented/%.pdf cnvkit/totalcopy/%.RData : cnvkit/cnr/%.cnr - $$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p cnvkit/segmented && \ - mkdir -p cnvkit/totalcopy && \ - $(RSCRIPT) modules/copy_number/cnvkit.R --type total-copy --sample_name $$(*)") - -cnvkit/called/%.RData : cnvkit/totalcopy/%.RData - $$(call RUN,-c -v $(ASCAT_ENV) -s 6G -m 12G,"mkdir -p cnvkit/called && \ - $(RSCRIPT) modules/copy_number/cnvkit.R --type call-cna --sample_name $$(*)") - -endef - $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-totalcopy,$(sample)))) - -.PHONY: $(PHONY) diff --git a/copy_number/cnvkitsummary.R b/copy_number/cnvkitsummary.R deleted file mode 100755 index cfa7bf5b..00000000 --- a/copy_number/cnvkitsummary.R +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("RColorBrewer")) -suppressPackageStartupMessages(library("GenomicRanges")) -suppressPackageStartupMessages(library("plyr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("stringr")) -suppressPackageStartupMessages(library("tidyr")) -suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("foreach")) -suppressPackageStartupMessages(library("rtracklayer")) -suppressPackageStartupMessages(library("grid")) -suppressPackageStartupMessages(library("rlist")) - -optList <- list( - make_option("--sample_names", default = NULL, help = "list of sample names") - ) - -parser <- OptionParser(usage = "%prog [options] [facets files]", option_list = optList) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -sample_names = unlist(strsplit(opt$sample_names, split=" ", fixed=TRUE)) -genes = read.csv(file="~/share/reference/annotation_gene_lists/annotation_impact_468.txt", header=TRUE, sep="\t", stringsAsFactors=FALSE) %>% - filter(Chromosome %in% as.character(c(1:22, "X", "Y"))) %>% - filter(!duplicated(Gene_Symbol)) %>% - arrange(as.integer(Chromosome), Start, End) - -genes_granges = genes %$% - GRanges(seqnames = Chromosome, ranges = IRanges(Start, End), Gene_Symbol = Gene_Symbol) -mm = lapply(1:length(sample_names), function(i, sample_names, genes, genes_granges) { - cat(i, "of", length(sample_names), "\n") - load(paste0("cnvkit/called/", sample_names[i], ".RData")) - tmp[tmp[,"Chromosome"]==23,"Chromosome"] = "X" - tmp[tmp[,"Chromosome"]==24,"Chromosome"] = "Y" - tmp_granges = tmp %$% GRanges(seqnames = Chromosome, ranges = IRanges(Start, End)) - mcols(tmp_granges) = tmp %>% select(Cat5) - fo = findOverlaps(tmp_granges, genes_granges) - x = mcols(genes_granges)[subjectHits(fo),] - y = mcols(tmp_granges)[queryHits(fo),] - df = data.frame("Gene_Symbol"=x, "Cat5"=y) - df = df %>% - group_by(Gene_Symbol) %>% - top_n(1, abs(Cat5)) - z = as.numeric(df$Cat5) - names(z) = as.character(df$Gene_Symbol) - z = z[names(z) %in% genes[,1]] - res = rep(NA, nrow(genes)) - names(res) = genes[,1] - res[names(z)] = z - return(res) -}, sample_names, genes, genes_granges) -bygene = do.call(cbind, mm) -colnames(bygene) = sample_names -bygene = cbind(genes, bygene) %>% - arrange(as.integer(Chromosome), Start, End) - -save(bygene, file="cnvkit/summary/bygene.RData") -write.table(bygene, file="cnvkit/summary/bygene.txt", sep="\t", col.names=TRUE, row.names=FALSE, na="", quote=FALSE) diff --git a/copy_number/cnvkitsummary.mk b/copy_number/cnvkitsummary.mk deleted file mode 100644 index 41aeeffb..00000000 --- a/copy_number/cnvkitsummary.mk +++ /dev/null @@ -1,13 +0,0 @@ -include modules/Makefile.inc -include modules/genome_inc/b37.inc - -LOGDIR ?= log/cnvkit_summary.$(NOW) -PHONY += cnvkit cnvkit/summary - -cnvkit_summary : cnvkit/summary/bygene.txt - -cnvkit/summary/bygene.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/called/$(sample).RData) - $(call RUN,-c -s 24G -m 48G,"mkdir -p cnvkit/summary && \ - $(RSCRIPT) modules/copy_number/cnvkitsummary.R --sample_names '$(TUMOR_SAMPLES)'") - -.PHONY: $(PHONY) From 6f8bc36908cd7a8a13aa6f1a2a1b23ee1b6b84b1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 15:14:48 -0400 Subject: [PATCH 178/766] Redo Picard MardkDuplicates --- aligners/bwamemAligner.mk | 32 ++++++++++++++++++++------ config.inc | 47 +++++++-------------------------------- 2 files changed, 33 insertions(+), 46 deletions(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index 6cf97dc7..d2605ee6 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -21,13 +21,8 @@ BWAMEM_REF_FASTA ?= $(REF_FASTA) BWAMEM_THREADS = 8 BWAMEM_MEM_PER_THREAD = $(if $(findstring true,$(PDX)),4G,2G) -..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt ) -.SECONDARY: -.DELETE_ON_ERROR: -.PHONY: bwamem - - -BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) +BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) @@ -50,6 +45,29 @@ bwamem/bam/%.bwamem.bam : fastq/%.fastq.gz fastq/%.fastq.gz : fastq/%.fastq $(call RUN,,"gzip -c $< > $(@) && $(RM) $<") + +define dedup-metrics +metrics/$1.dedup_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 16G -m 24G -v $(INNOVATION_ENV), "set -o pipefail && \ + picard \ + -Xmx16G \ + MarkDuplicates \ + VALIDATION_STRINGENCY=LENIENT \ + MAX_RECORDS_IN_RAM=4000000 \ + TMP_DIR=$(TMPDIR) \ + INPUT=$$(<) \ + OUTPUT=/dev/null + METRICS=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call dedup-metrics,$(sample)))) + + +..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt ) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: bwamem include modules/bam_tools/processBam.mk include modules/fastq_tools/fastq.mk diff --git a/config.inc b/config.inc index ef57e3bc..19f67122 100644 --- a/config.inc +++ b/config.inc @@ -14,19 +14,18 @@ R ?= R MY_RSCRIPT ?= Rscript RSCRIPT ?= Rscript -## General python 2.7 environment -ANACONDA_27_ENV ?= $(HOME)/share/usr/anaconda-envs/anaconda-2.7 - -## SUFAM python environment -SUFAM_ENV ?= $(HOME)/share/usr/anaconda-envs/sufam-dev - +ANACONDA_27_ENV = $(HOME)/share/usr/anaconda-envs/anaconda-2.7 +SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev MUTSIG_REPORT_ENV = $(HOME)/share/usr/anaconda-envs/mutsig-report-0.0.1 +JRFLAB_MODULES_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4 +ONCOTATOR_ENV = $(HOME)/share/usr/venv/oncotator-1.9.2.0 +VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86 +ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat +MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc +INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1 JARDIR ?= $(HOME)/share/usr/lib/java -## jrflab modules environment -JRFLAB_MODULES_ENV ?= $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4 - ## Applications UNZIP ?= /usr/bin/unzip FASTQC ?= $(PERL) $(HOME)/share/usr/FastQC/fastqc @@ -125,9 +124,6 @@ TVC ?= $(HOME)/share/usr/bin/tvc ANNOVAR = $(PERL) $(HOME)/share/usr/annovar-2017-07-16/table_annovar.pl -ONCOTATOR_ENV = $(HOME)/share/usr/venv/oncotator-1.9.2.0 - -VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86 VEP_PATH = $(VEP_ENV)/bin SPLIT_BED = python modules/scripts/split_bed.py @@ -137,35 +133,8 @@ SNP_FILTER_VCF = python modules/vcf_tools/snp_filter_vcf.py MERGE_VCF = python modules/vcf_tools/merge_vcf.py -ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat - -MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc MEDICC_VAR = $(MEDICC_ENV)/PROFILE MEDICC_BIN = $(MEDICC_ENV)/opt/medicc -PHYLO_ENV = $(HOME)/share/usr/anaconda-envs/phylotools - -CNTILP_ENV = $(HOME)/share/usr/anaconda-envs/cnt-ilp -CNTILP_CONF = $(CNTILP_ENV)/PROFILE -CNTILP_BIN = $(CNTILP_ENV)/opt/CNT-ILP/build - -CNTMD_ENV = $(HOME)/share/usr/anaconda-envs/cnt-md -CNTMD_CONF = $(CNTMD_ENV)/PROFILE -CNTMD_BIN = $(CNTMD_ENV)/opt/CNT-MD/build - -MACHINA_ENV = $(HOME)/share/usr/anaconda-envs/machina -MACHINA_VAR = $(MACHINA_ENV)/PROFILE -MACHINA_BIN = $(MACHINA_ENV)/opt/machina/build - -HATCHET_ENV = $(HOME)/share/usr/anaconda-envs/hatchet -HATCHET_VAR = $(HATCHET_ENV)/PROFILE -HATCHET_BIN = $(HATCHET_ENV)/opt/machina/build - -DECONSTRUCTSIGS_ENV = $(HOME)/share/usr/anaconda-envs/deconstructsigs - -PHANGORN_ENV = $(HOME)/share/usr/anaconda-envs/phangorn - -FGBIO_ENV = $(HOME)/share/usr/anaconda-envs/fgbio-0.8.1 - endif CONFIG_INC = true From ece57ae6328bdb373d3ef8584dd12d2ff951520f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 15:17:31 -0400 Subject: [PATCH 179/766] Update bwamemAligner.mk --- aligners/bwamemAligner.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index d2605ee6..6aacfc09 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -56,7 +56,7 @@ metrics/$1.dedup_metrics.txt : bam/$1.bam MAX_RECORDS_IN_RAM=4000000 \ TMP_DIR=$(TMPDIR) \ INPUT=$$(<) \ - OUTPUT=/dev/null + OUTPUT=/dev/null \ METRICS=$$(@)") endef From e5c64d301ab56a996577bd943174f9dba18b8123 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 15:23:35 -0400 Subject: [PATCH 180/766] Update bwamemAligner.mk --- aligners/bwamemAligner.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index 6aacfc09..f3ccdeb8 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -21,10 +21,10 @@ BWAMEM_REF_FASTA ?= $(REF_FASTA) BWAMEM_THREADS = 8 BWAMEM_MEM_PER_THREAD = $(if $(findstring true,$(PDX)),4G,2G) -BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ - $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) +BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) -bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) +bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) bam/%.bam : bwamem/bam/%.bwamem.$(BAM_SUFFIX) $(call RUN,,"ln -f $(<) $(@)") From 4e8ba974074ec683c3f4f17f853a2b2b68756773 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 15:29:27 -0400 Subject: [PATCH 181/766] Update bwamemAligner.mk --- aligners/bwamemAligner.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index f3ccdeb8..cc243ea2 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -57,7 +57,7 @@ metrics/$1.dedup_metrics.txt : bam/$1.bam TMP_DIR=$(TMPDIR) \ INPUT=$$(<) \ OUTPUT=/dev/null \ - METRICS=$$(@)") + METRICS_FILE=$$(@)") endef $(foreach sample,$(SAMPLES),\ From 7d6815e25cecfa39d46e86bb985b9dff4cfe5abf Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 1 Oct 2022 15:34:29 -0400 Subject: [PATCH 182/766] Update bwamemAligner.mk --- aligners/bwamemAligner.mk | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index cc243ea2..66d99bca 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -48,16 +48,16 @@ fastq/%.fastq.gz : fastq/%.fastq define dedup-metrics metrics/$1.dedup_metrics.txt : bam/$1.bam - $$(call RUN, -c -n 1 -s 16G -m 24G -v $(INNOVATION_ENV), "set -o pipefail && \ - picard \ - -Xmx16G \ - MarkDuplicates \ - VALIDATION_STRINGENCY=LENIENT \ - MAX_RECORDS_IN_RAM=4000000 \ - TMP_DIR=$(TMPDIR) \ - INPUT=$$(<) \ - OUTPUT=/dev/null \ - METRICS_FILE=$$(@)") + $$(call RUN, -c -n 1 -s 16G -m 24G -v $(INNOVATION_ENV) -w 24:00:00, "set -o pipefail && \ + picard \ + -Xmx16G \ + MarkDuplicates \ + VALIDATION_STRINGENCY=LENIENT \ + MAX_RECORDS_IN_RAM=4000000 \ + TMP_DIR=$(TMPDIR) \ + INPUT=$$(<) \ + OUTPUT=/dev/null \ + METRICS_FILE=$$(@)") endef $(foreach sample,$(SAMPLES),\ From 24b1fb8f8d7cce01d1452f32a5de5e1898bdf5c2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 2 Oct 2022 15:32:03 -0400 Subject: [PATCH 183/766] Update qmake.pl --- scripts/qmake.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index 50d64221..a95593f9 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -10,7 +10,7 @@ my $fin_slack = "pipeline_finished"; my %slack_map = ( - brownd7 => "W01BT68MSSD", + brownd7 => "W013UH0HWUF", selenicp => "W0142HA5LNA", dacruzpa => "W01BT68MSSD", parejaf => "W01BLNUF7J8", From ceee0495aef6907fcb50cc97d41ee9a74eff5868 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 2 Oct 2022 15:41:18 -0400 Subject: [PATCH 184/766] ++ --- aligners/bwamemAligner.mk | 8 +++++++- scripts/dedup_summary.R | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 scripts/dedup_summary.R diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index 66d99bca..1fd4a711 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -24,7 +24,8 @@ BWAMEM_MEM_PER_THREAD = $(if $(findstring true,$(PDX)),4G,2G) BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) \ - $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) + $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) \ + metrics/dedup_metrics.txt bam/%.bam : bwamem/bam/%.bwamem.$(BAM_SUFFIX) $(call RUN,,"ln -f $(<) $(@)") @@ -62,6 +63,11 @@ metrics/$1.dedup_metrics.txt : bam/$1.bam endef $(foreach sample,$(SAMPLES),\ $(eval $(call dedup-metrics,$(sample)))) + +metrics/dedup_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/dedup_summary.R --option 1 --sample_names '$(SAMPLES)'") + ..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt ) diff --git a/scripts/dedup_summary.R b/scripts/dedup_summary.R new file mode 100644 index 00000000..9e977864 --- /dev/null +++ b/scripts/dedup_summary.R @@ -0,0 +1,30 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_names", default = NA, type = 'character', help = "sample names")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".dedup_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="metrics/dedup_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} From 06f1c710de149529d9510ebb5b57617f5feba1f5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 2 Oct 2022 15:49:23 -0400 Subject: [PATCH 185/766] ++ --- aligners/bwamemAligner.mk | 6 +++++- scripts/dedup_summary.R | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index 1fd4a711..b49510b5 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -25,7 +25,8 @@ BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) \ $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) \ - metrics/dedup_metrics.txt + metrics/dedup_metrics.txt \ + metrics/dedup_summary.txt \ bam/%.bam : bwamem/bam/%.bwamem.$(BAM_SUFFIX) $(call RUN,,"ln -f $(<) $(@)") @@ -68,6 +69,9 @@ metrics/dedup_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_ $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/dedup_summary.R --option 1 --sample_names '$(SAMPLES)'") +metrics/dedup_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/dedup_summary.R --option 2 --sample_names '$(SAMPLES)'") ..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt ) diff --git a/scripts/dedup_summary.R b/scripts/dedup_summary.R index 9e977864..c4889638 100644 --- a/scripts/dedup_summary.R +++ b/scripts/dedup_summary.R @@ -27,4 +27,21 @@ if (as.numeric(opt$option)==1) { x = do.call(rbind, x) write_tsv(x, path="metrics/dedup_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) +} else if (as.numeric(opt$option)==2) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".dedup_metrics.txt"), + skip = 10, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(family_size = BIN, + coverage_multiple = CoverageMult, + all_counts = all_sets, + optical_counts = optical_sets, + non_optical_counts = non_optical_sets) %>% + dplyr::mutate(sample_name = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="metrics/dedup_summary.txt", na = "NA", append = FALSE, col_names = TRUE) + } From 3b00a4f0f0acb45df0eb7bcfc20b0a6844d6c9f0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 4 Oct 2022 16:47:51 -0400 Subject: [PATCH 186/766] Update fixBam.mk --- bam_tools/fixBam.mk | 103 +++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/bam_tools/fixBam.mk b/bam_tools/fixBam.mk index 28e39a7b..42a08ead 100644 --- a/bam_tools/fixBam.mk +++ b/bam_tools/fixBam.mk @@ -1,10 +1,8 @@ include modules/Makefile.inc include modules/genome_inc/b37.inc -LOGDIR ?= log/fix_bam.$(NOW) -PHONY += fixed_bam +LOGDIR = log/fix_bam.$(NOW) -VPATH = fixed_bam unprocessed_bam PICARD_JAR = ~/share/usr/picard/bin/picard.jar fix_bam : $(foreach sample,$(SAMPLES),fixed_bam/$(sample).bam) @@ -12,61 +10,66 @@ fix_bam : $(foreach sample,$(SAMPLES),fixed_bam/$(sample).bam) define fix-bam unprocessed_bam/%.ubam : unprocessed_bam/%.bam $$(call RUN,-c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) RevertSam \ - I=$$(<) \ - O=unprocessed_bam/$$(*).ubam \ - SANITIZE=true \ - MAX_DISCARD_FRACTION=0.005 \ - ATTRIBUTE_TO_CLEAR=XT \ - ATTRIBUTE_TO_CLEAR=XN \ - ATTRIBUTE_TO_CLEAR=AS \ - ATTRIBUTE_TO_CLEAR=OC \ - ATTRIBUTE_TO_CLEAR=OP \ - SORT_ORDER=queryname \ - RESTORE_ORIGINAL_QUALITIES=true \ - REMOVE_DUPLICATE_INFORMATION=true \ - REMOVE_ALIGNMENT_INFORMATION=true \ - TMP_DIR=$(TMPDIR)") + I=$$(<) \ + O=unprocessed_bam/$$(*).ubam \ + SANITIZE=true \ + MAX_DISCARD_FRACTION=0.005 \ + ATTRIBUTE_TO_CLEAR=XT \ + ATTRIBUTE_TO_CLEAR=XN \ + ATTRIBUTE_TO_CLEAR=AS \ + ATTRIBUTE_TO_CLEAR=OC \ + ATTRIBUTE_TO_CLEAR=OP \ + SORT_ORDER=queryname \ + RESTORE_ORIGINAL_QUALITIES=true \ + REMOVE_DUPLICATE_INFORMATION=true \ + REMOVE_ALIGNMENT_INFORMATION=true \ + TMP_DIR=$(TMPDIR)") + unprocessed_bam/%.fixed.bam : unprocessed_bam/%.ubam $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \ - R=$$(DMP_FASTA) \ - UNMAPPED_BAM=$$(<) \ - ALIGNED_BAM=unprocessed_bam/$$(*).bam \ - O=unprocessed_bam/$$(*).fixed.bam \ - CREATE_INDEX=true \ - ADD_MATE_CIGAR=true \ - CLIP_ADAPTERS=true \ - CLIP_OVERLAPPING_READS=true \ - INCLUDE_SECONDARY_ALIGNMENTS=false \ - MAX_INSERTIONS_OR_DELETIONS=-1 \ - TMP_DIR=$(TMPDIR)") + R=$$(DMP_FASTA) \ + UNMAPPED_BAM=$$(<) \ + ALIGNED_BAM=unprocessed_bam/$$(*).bam \ + O=unprocessed_bam/$$(*).fixed.bam \ + CREATE_INDEX=true \ + ADD_MATE_CIGAR=true \ + CLIP_ADAPTERS=true \ + CLIP_OVERLAPPING_READS=true \ + INCLUDE_SECONDARY_ALIGNMENTS=false \ + MAX_INSERTIONS_OR_DELETIONS=-1 \ + TMP_DIR=$(TMPDIR)") + unprocessed_bam/%.dedup.bam : unprocessed_bam/%.fixed.bam $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MarkDuplicates \ - I=$$(<) \ - O=unprocessed_bam/$$(*).dedup.bam \ - M=unprocessed_bam/$$(*).txt \ - TMP_DIR=$$(TMPDIR)") + I=$$(<) \ + O=unprocessed_bam/$$(*).dedup.bam \ + M=unprocessed_bam/$$(*).txt \ + TMP_DIR=$$(TMPDIR)") + fixed_bam/%.bam : unprocessed_bam/%.dedup.bam $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) AddOrReplaceReadGroups \ - I=$$(<) \ - O=fixed_bam/$$(*).bam \ - RGID=$$(*) \ - RGLB=$$(*) \ - RGPL=illumina \ - RGPU=NA \ - RGSM=$$(*) \ - TMP_DIR=$(TMPDIR) && \ - samtools index fixed_bam/$$(*).bam && \ - cp fixed_bam/$$(*).bam.bai fixed_bam/$$(*).bai && \ - rm -rf unprocessed_bam/$$(*).ubam && \ - rm -rf unprocessed_bam/$$(*).fixed.bam && \ - rm -rf unprocessed_bam/$$(*).dedup.bam && \ - rm -rf unprocessed_bam/$$(*).fixed.bai && \ - rm -rf unprocessed_bam/$$(*).dedup.bai && \ - rm -rf unprocessed_bam/$$(*).txt") + I=$$(<) \ + O=fixed_bam/$$(*).bam \ + RGID=$$(*) \ + RGLB=$$(*) \ + RGPL=illumina \ + RGPU=NA \ + RGSM=$$(*) \ + TMP_DIR=$(TMPDIR) && \ + samtools index fixed_bam/$$(*).bam && \ + cp fixed_bam/$$(*).bam.bai fixed_bam/$$(*).bai && \ + rm -rf unprocessed_bam/$$(*).ubam && \ + rm -rf unprocessed_bam/$$(*).fixed.bam && \ + rm -rf unprocessed_bam/$$(*).dedup.bam && \ + rm -rf unprocessed_bam/$$(*).fixed.bai && \ + rm -rf unprocessed_bam/$$(*).dedup.bai && \ + rm -rf unprocessed_bam/$$(*).txt") endef $(foreach sample,$(SAMPLES),\ $(eval $(call fix-bam,$(sample)))) +..DUMMY := $(shell mkdir -p version; \ + echo "picard" > version/fix_bam.txt) +.SECONDARY: .DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) +.PHONY: fix_bam \ No newline at end of file From 8c1433a34077236492461961f63333c258537eab Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 12:57:27 -0400 Subject: [PATCH 187/766] ++ --- Makefile | 4 ++++ scripts/annotateSummaryVcf.R | 31 +++++++++++++++++++++++++ vcf_tools/annotateSummaryVcf.mk | 41 +++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100755 scripts/annotateSummaryVcf.R create mode 100644 vcf_tools/annotateSummaryVcf.mk diff --git a/Makefile b/Makefile index 87f72e72..56ab0d70 100644 --- a/Makefile +++ b/Makefile @@ -574,6 +574,10 @@ cravat_annotation : TARGETS += cravat_annotate cravat_annotate : $(call RUN_MAKE,modules/vcf_tools/cravat_annotation.mk) + +TARGETS += ann_summary_vcf +ann_summary_vcf: + $(call RUN_MAKE,modules/vcf_tools/annotateSummaryVcf.mk) #================================================== diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R new file mode 100755 index 00000000..7ee2a038 --- /dev/null +++ b/scripts/annotateSummaryVcf.R @@ -0,0 +1,31 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--input", default = NA, type = 'character', help = "input file path"), + make_option("--output", default = NA, type = 'character', help = "output file path")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + smry = readr::read_tsv(file = opt$input, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(`#CHROM` = CHROM, + POS = POS) %>% + dplyr::mutate(ID = ".", + QUAL = 100, + FILTER = "PASS", + INFO = ".") %>% + dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, FILTER, INFO) + cat("##fileformat=VCFv4.2\n", file = opt$output, append = FALSE) + readr::write_tsv(smry, path = opt$output, na = "NA", append = FALSE, col_names = TRUE) +} diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk new file mode 100644 index 00000000..f6e89aa6 --- /dev/null +++ b/vcf_tools/annotateSummaryVcf.mk @@ -0,0 +1,41 @@ +include modules/Makefile.inc +include modules/genome_inc/b37.inc + +LOGDIR ?= log/annotate_smry_maf.$(NOW) + +annotate_smry_maf : vcf2maf/mutation_summary.vcf + +VCF2MAF_ENV = $(HOME)/share/usr/env/vcf2maf-1.6.17 +VCF2MAF = vcf2maf.pl + +vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 1 --input $$(<) --output $$(@)") + +vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf + $(call RUN, -c -n 12 -s 2G -m 3G -v $(VCF2MAF_ENV) -w 72:00:00,"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$(<) \ + --output-maf $$(@) \ + --tmp-dir $$(TMPDIR) \ + --tumor-id NA \ + --normal-id NA \ + --vep-path $$(VCF2MAF_ENV)/bin \ + --vep-data $$(HOME)/share/lib/resource_files/VEP/GRCh37/ \ + --vep-forks 12 \ + --ref-fasta $$(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ + --filter-vcf $$(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ + --species homo_sapiens \ + --ncbi-build GRCh37 \ + --maf-center MSKCC && \ + $$(RM) $$(TMPDIR)/$$(*).vep.vcf") + +vcf2maf/mutation_summary.txt : vcf2maf/mutation_summary.maf + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 2 --input $$(<) --output $$(@)") + +..DUMMY := $(shell mkdir -p version; \ + source $(VCF2MAF_ENV)/bin/activate $(VCF2MAF_ENV) && $(VCF2MAF) --man >> version/annotate_smry_maf.txt) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: annotate_smry_maf From e3dfdffd1739845685275af2a1bb421492c86ccd Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:02:25 -0400 Subject: [PATCH 188/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index f6e89aa6..e251aee0 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -10,7 +10,7 @@ VCF2MAF = vcf2maf.pl vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 1 --input $$(<) --output $$(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 1 --input $(<) --output $(@)") vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf $(call RUN, -c -n 12 -s 2G -m 3G -v $(VCF2MAF_ENV) -w 72:00:00,"set -o pipefail && \ From 3fa4d827a327ba1492ed38bc71d3a4343d0a51d8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:06:04 -0400 Subject: [PATCH 189/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index e251aee0..286abf2f 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -3,7 +3,8 @@ include modules/genome_inc/b37.inc LOGDIR ?= log/annotate_smry_maf.$(NOW) -annotate_smry_maf : vcf2maf/mutation_summary.vcf +annotate_smry_maf : vcf2maf/mutation_summary.vcf \ + vcf2maf/mutation_summary.maf \ VCF2MAF_ENV = $(HOME)/share/usr/env/vcf2maf-1.6.17 VCF2MAF = vcf2maf.pl @@ -14,21 +15,21 @@ vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf $(call RUN, -c -n 12 -s 2G -m 3G -v $(VCF2MAF_ENV) -w 72:00:00,"set -o pipefail && \ - $$(VCF2MAF) \ - --input-vcf $$(<) \ - --output-maf $$(@) \ - --tmp-dir $$(TMPDIR) \ + $(VCF2MAF) \ + --input-vcf $(<) \ + --output-maf $(@) \ + --tmp-dir $(TMPDIR) \ --tumor-id NA \ --normal-id NA \ - --vep-path $$(VCF2MAF_ENV)/bin \ - --vep-data $$(HOME)/share/lib/resource_files/VEP/GRCh37/ \ + --vep-path $(VCF2MAF_ENV)/bin \ + --vep-data $(HOME)/share/lib/resource_files/VEP/GRCh37/ \ --vep-forks 12 \ - --ref-fasta $$(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ - --filter-vcf $$(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ + --ref-fasta $(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ + --filter-vcf $(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ - $$(RM) $$(TMPDIR)/$$(*).vep.vcf") + $(RM) $(TMPDIR)/$(*).vep.vcf") vcf2maf/mutation_summary.txt : vcf2maf/mutation_summary.maf $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ From aad66f0e148c22f2bdb1029c52974cb136baf922 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:10:17 -0400 Subject: [PATCH 190/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 286abf2f..21ea451a 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -4,7 +4,7 @@ include modules/genome_inc/b37.inc LOGDIR ?= log/annotate_smry_maf.$(NOW) annotate_smry_maf : vcf2maf/mutation_summary.vcf \ - vcf2maf/mutation_summary.maf \ + vcf2maf/mutation_summary.maf VCF2MAF_ENV = $(HOME)/share/usr/env/vcf2maf-1.6.17 VCF2MAF = vcf2maf.pl From aca16181323f4f86c90d13b8df0b67dff74d7716 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:17:55 -0400 Subject: [PATCH 191/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 21ea451a..71a87775 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -6,22 +6,19 @@ LOGDIR ?= log/annotate_smry_maf.$(NOW) annotate_smry_maf : vcf2maf/mutation_summary.vcf \ vcf2maf/mutation_summary.maf -VCF2MAF_ENV = $(HOME)/share/usr/env/vcf2maf-1.6.17 -VCF2MAF = vcf2maf.pl - vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 1 --input $(<) --output $(@)") vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf - $(call RUN, -c -n 12 -s 2G -m 3G -v $(VCF2MAF_ENV) -w 72:00:00,"set -o pipefail && \ + $(call RUN, -c -n 12 -s 2G -m 3G -v $(VEP_ENV) -w 72:00:00,"set -o pipefail && \ $(VCF2MAF) \ --input-vcf $(<) \ --output-maf $(@) \ --tmp-dir $(TMPDIR) \ --tumor-id NA \ --normal-id NA \ - --vep-path $(VCF2MAF_ENV)/bin \ + --vep-path $(VEP_ENV)/bin \ --vep-data $(HOME)/share/lib/resource_files/VEP/GRCh37/ \ --vep-forks 12 \ --ref-fasta $(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ From ae74dae4f44b0dd800bff9e2228f455eef029635 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:24:47 -0400 Subject: [PATCH 192/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 71a87775..6eec86c5 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -19,10 +19,10 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --tumor-id NA \ --normal-id NA \ --vep-path $(VEP_ENV)/bin \ - --vep-data $(HOME)/share/lib/resource_files/VEP/GRCh37/ \ + --vep-data $(HOME)/share/reference/vep \ --vep-forks 12 \ - --ref-fasta $(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ - --filter-vcf $(HOME)/share/lib/resource_files/VEP/GRCh37/homo_sapiens/99_GRCh37/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ + --ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ + --filter-vcf $(HOME)/share/reference/vep/v86/homo_sapiens/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ From f70e27637cbdc58e92f49a61155f0609d65ca1a8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:26:04 -0400 Subject: [PATCH 193/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 6eec86c5..b06fabb0 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -22,7 +22,7 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --vep-data $(HOME)/share/reference/vep \ --vep-forks 12 \ --ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ - --filter-vcf $(HOME)/share/reference/vep/v86/homo_sapiens/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ + --filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ From 48c0910d22bacbf88786809e304f7e63a57f55d6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:27:55 -0400 Subject: [PATCH 194/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index b06fabb0..65404c39 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -21,8 +21,8 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --vep-path $(VEP_ENV)/bin \ --vep-data $(HOME)/share/reference/vep \ --vep-forks 12 \ - --ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz \ - --filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ + --ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa \ + --filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf \ --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ From e7331184b0caf5d08a69bc377f21da92333a0bec Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:30:26 -0400 Subject: [PATCH 195/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 65404c39..24d8120f 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -19,10 +19,10 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --tumor-id NA \ --normal-id NA \ --vep-path $(VEP_ENV)/bin \ - --vep-data $(HOME)/share/reference/vep \ + --vep-data $(HOME)/share/reference/vep/v86/ \ --vep-forks 12 \ --ref-fasta $(HOME)/share/reference/vep/v86/homo_sapiens/86_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa \ - --filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf \ + --filter-vcf $(HOME)/share/reference/vep/v86/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz \ --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ From 5843c16751f4476d00447661140ab21eeb0ace43 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:32:09 -0400 Subject: [PATCH 196/766] Update annotateSummaryVcf.R --- scripts/annotateSummaryVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R index 7ee2a038..d14425d4 100755 --- a/scripts/annotateSummaryVcf.R +++ b/scripts/annotateSummaryVcf.R @@ -27,5 +27,5 @@ if (as.numeric(opt$option)==1) { INFO = ".") %>% dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, FILTER, INFO) cat("##fileformat=VCFv4.2\n", file = opt$output, append = FALSE) - readr::write_tsv(smry, path = opt$output, na = "NA", append = FALSE, col_names = TRUE) + readr::write_tsv(smry, path = opt$output, na = "NA", append = TRUE, col_names = TRUE) } From 877bfc1ac834f001a6faceb8120c2c2d5f96b97f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:37:12 -0400 Subject: [PATCH 197/766] Update annotateSummaryVcf.mk --- vcf_tools/annotateSummaryVcf.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index 24d8120f..c7739a8d 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -26,7 +26,7 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --species homo_sapiens \ --ncbi-build GRCh37 \ --maf-center MSKCC && \ - $(RM) $(TMPDIR)/$(*).vep.vcf") + $(RM) $(TMPDIR)/mutation_summary.vep.vcf") vcf2maf/mutation_summary.txt : vcf2maf/mutation_summary.maf $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ From 7b36f2d811eeefc20261cb1e8c414063f59f4d70 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:55:13 -0400 Subject: [PATCH 198/766] ++ --- scripts/annotateSummaryVcf.R | 22 ++++++++++++++++++++++ vcf_tools/annotateSummaryVcf.mk | 7 ++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R index d14425d4..ed24631f 100755 --- a/scripts/annotateSummaryVcf.R +++ b/scripts/annotateSummaryVcf.R @@ -11,6 +11,7 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--input", default = NA, type = 'character', help = "input file path"), + make_option("--maf", default = NA, type = 'character', help = "input maf file path"), make_option("--output", default = NA, type = 'character', help = "output file path")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) @@ -28,4 +29,25 @@ if (as.numeric(opt$option)==1) { dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, FILTER, INFO) cat("##fileformat=VCFv4.2\n", file = opt$output, append = FALSE) readr::write_tsv(smry, path = opt$output, na = "NA", append = TRUE, col_names = TRUE) + +} else if (as.numeric(opt$option)==2) { + smry = readr::read_tsv(file = opt$input, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + maf = readr::read_tsv(file = opt$maf, comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(Tumor_Sample_Barcode = smry$TUMOR_SAMPLE, + Matched_Norm_Sample_Barcode = smry$NORMAL_SAMPLE, + Tumor_Sample_UUID = smry$TUMOR_SAMPLE, + Matched_Norm_Sample_UUID = smry$NORMAL_SAMPLE, + t_depth = smry$TUMOR_DP, + t_ref_count = round((1-smry$TUMOR_MAF) * smry$TUMOR_DP), + t_alt_count = round(TUMOR_MAF*smry$TUMOR_DP), + n_depth = smry$NORMAL_DP, + n_ref_count = round((1-smry$NORMAL_MAF) * smry$NORMAL_DP), + n_alt_count = round(smry$NORMAL_MAF*smry$NORMAL_DP), + CCF = smry$ccf, + LOH = smry$facetsLOHCall, + HOTSPOT = smry$HOTSPOT) + readr::write_stv(x = maf, path = opt$output) + } diff --git a/vcf_tools/annotateSummaryVcf.mk b/vcf_tools/annotateSummaryVcf.mk index c7739a8d..58233a61 100644 --- a/vcf_tools/annotateSummaryVcf.mk +++ b/vcf_tools/annotateSummaryVcf.mk @@ -4,7 +4,8 @@ include modules/genome_inc/b37.inc LOGDIR ?= log/annotate_smry_maf.$(NOW) annotate_smry_maf : vcf2maf/mutation_summary.vcf \ - vcf2maf/mutation_summary.maf + vcf2maf/mutation_summary.maf \ + vcf2maf/mutation_summary.txt vcf2maf/mutation_summary.vcf : summary/tsv/mutation_summary.tsv $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ @@ -28,9 +29,9 @@ vcf2maf/mutation_summary.maf : vcf2maf/mutation_summary.vcf --maf-center MSKCC && \ $(RM) $(TMPDIR)/mutation_summary.vep.vcf") -vcf2maf/mutation_summary.txt : vcf2maf/mutation_summary.maf +vcf2maf/mutation_summary.txt : summary/tsv/mutation_summary.tsv vcf2maf/mutation_summary.maf $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 2 --input $$(<) --output $$(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/annotateSummaryVcf.R --option 2 --input $(<) --maf $(<<) --output $(@)") ..DUMMY := $(shell mkdir -p version; \ source $(VCF2MAF_ENV)/bin/activate $(VCF2MAF_ENV) && $(VCF2MAF) --man >> version/annotate_smry_maf.txt) From a4b56ccbb774dbe4689b6cae7e8423fc45dcc978 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:56:40 -0400 Subject: [PATCH 199/766] Update annotateSummaryVcf.R --- scripts/annotateSummaryVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R index ed24631f..ea788874 100755 --- a/scripts/annotateSummaryVcf.R +++ b/scripts/annotateSummaryVcf.R @@ -41,7 +41,7 @@ if (as.numeric(opt$option)==1) { Matched_Norm_Sample_UUID = smry$NORMAL_SAMPLE, t_depth = smry$TUMOR_DP, t_ref_count = round((1-smry$TUMOR_MAF) * smry$TUMOR_DP), - t_alt_count = round(TUMOR_MAF*smry$TUMOR_DP), + t_alt_count = round(smry$TUMOR_MAF*smry$TUMOR_DP), n_depth = smry$NORMAL_DP, n_ref_count = round((1-smry$NORMAL_MAF) * smry$NORMAL_DP), n_alt_count = round(smry$NORMAL_MAF*smry$NORMAL_DP), From 1fe6e9b49e04b5f281015810a4b807fe580b5de1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 5 Oct 2022 13:57:43 -0400 Subject: [PATCH 200/766] Update annotateSummaryVcf.R --- scripts/annotateSummaryVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/annotateSummaryVcf.R b/scripts/annotateSummaryVcf.R index ea788874..ce3fc2ca 100755 --- a/scripts/annotateSummaryVcf.R +++ b/scripts/annotateSummaryVcf.R @@ -48,6 +48,6 @@ if (as.numeric(opt$option)==1) { CCF = smry$ccf, LOH = smry$facetsLOHCall, HOTSPOT = smry$HOTSPOT) - readr::write_stv(x = maf, path = opt$output) + readr::write_tsv(x = maf, path = opt$output) } From 7fda5f8740b12237e700f6ebcbfe281741933988 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 8 Oct 2022 12:53:11 -0400 Subject: [PATCH 201/766] ++ --- Makefile | 4 ++++ sv_callers/svabaTN.mk | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 sv_callers/svabaTN.mk diff --git a/Makefile b/Makefile index 56ab0d70..778295b3 100644 --- a/Makefile +++ b/Makefile @@ -282,6 +282,10 @@ manta : TARGETS += mantaTN mantaTN : $(call RUN_MAKE,modules/sv_callers/mantaTN.mk) + +TARGETS += svabaTN +svabaTN : + $(call RUN_MAKE,modules/sv_callers/svabaTN.mk) TARGETS += brass brass : diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk new file mode 100644 index 00000000..ac9c94cc --- /dev/null +++ b/sv_callers/svabaTN.mk @@ -0,0 +1,39 @@ +include modules/Makefile.inc + +LOGDIR = log/svabaTN.$(NOW) + +SVABA_CORES ?= 8 +SVABA_MEM_CORE ?= 6G +SVABA_REF ?= $(REF_FASTA) +SVABA_DBSNP ?= $(HOME)/share/lib/resource_files/svaba/dbsnp_indel.vcf +SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/wgs_blacklist_meres.bed +SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 +SVABA ?= svaba + +svaba : $(foreach pair,$(SAMPLE_PAIRS),svaba/$(pair).svaba.somatic.indel.vcf) + +define svaba-tumor-normal +svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam + $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ + mkdir -p svaba && \ + cd svaba && \ + $$(SVABA) run \ + -t ../bam/$1.bam \ + -n ../bam/$2.bam \ + -p $$(SVABA_CORES) \ + -D $$(SVABA_DBSNP) \ + -L 100000 \ + -x 25000 \ + -k $$(SVABA_BLACKLIST) \ + -a $1_$2 \ + -G $$(SVABA_REF)") +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call svaba-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) + + +..DUMMY := $(shell mkdir -p version; \ + $(SVABA) --help &> version/svabaTN.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: svaba From b1788c6bceec5a7f56a8e157ffd279b71c643a84 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 8 Oct 2022 13:01:36 -0400 Subject: [PATCH 202/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index ac9c94cc..94efcb21 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -6,7 +6,7 @@ SVABA_CORES ?= 8 SVABA_MEM_CORE ?= 6G SVABA_REF ?= $(REF_FASTA) SVABA_DBSNP ?= $(HOME)/share/lib/resource_files/svaba/dbsnp_indel.vcf -SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/wgs_blacklist_meres.bed +SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.bed SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SVABA ?= svaba From e0211476eb5b9a2d20ca33d2688b02b830bcad37 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 8 Oct 2022 14:36:50 -0400 Subject: [PATCH 203/766] Update Makefile --- Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Makefile b/Makefile index 778295b3..4fa6fcca 100644 --- a/Makefile +++ b/Makefile @@ -593,9 +593,5 @@ hotspot_summary: $(MAKE) -f modules/variant_callers/genotypehotspots.mk -j $(NUM_JOBS) $(call RUN_MAKE,modules/summary/hotspotsummary.mk) -#================================================== -# alpha testing -#================================================== - .PHONY : $(TARGETS) From cd5038babfab695cdb5bda4235ef5dffd3f7de1f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 13 Oct 2022 23:41:14 -0400 Subject: [PATCH 204/766] Update mantaTN.mk --- sv_callers/mantaTN.mk | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sv_callers/mantaTN.mk b/sv_callers/mantaTN.mk index 7e5fd54f..abb2a9cc 100644 --- a/sv_callers/mantaTN.mk +++ b/sv_callers/mantaTN.mk @@ -1,21 +1,19 @@ -# run manta on tumour-normal matched pairs - include modules/Makefile.inc include modules/sv_callers/manta.inc -LOGDIR ?= log/manta.$(NOW) -PHONY += manta manta_vcfs - -manta : manta_vcfs +LOGDIR ?= log/mantaTN.$(NOW) -manta_vcfs : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.eff.vcf vcf/$(pair).manta_indels.eff.vcf vcf/$(pair).manta_candidate_sv.eff.vcf) +manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.eff.vcf \ + vcf/$(pair).manta_indels.eff.vcf \ + vcf/$(pair).manta_candidate_sv.eff.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai $$(INIT) $$(CONFIG_MANTA) $$(CONFIG_MANTA_OPTS) --tumorBam $$< --normalBam $$(<<) --runDir $$(@D) manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py - $$(call RUN,-n 8 -s 2G -m 2G,"python $$< -m local -j 8 && touch $$@") + $$(call RUN,-n 8 -s 2G -m 4G,"set -o pipefail && \ + python $$< -m local -j 8 && touch $$@") manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp From d43e71a27ab4c2b9c924ffdc5009f67cc3f2fec7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 00:15:49 -0400 Subject: [PATCH 205/766] ++ --- sv_callers/manta.inc | 8 ++++---- sv_callers/mantaTN.mk | 26 ++++++++++++++++---------- sv_callers/svabaTN.mk | 18 +++++++++++++++++- vcf_tools/vcftools.mk | 3 --- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/sv_callers/manta.inc b/sv_callers/manta.inc index 259fab2a..9def6aad 100644 --- a/sv_callers/manta.inc +++ b/sv_callers/manta.inc @@ -5,10 +5,10 @@ MANTA_HS_CONFIG = modules/sv_callers/manta_hs_config.py.ini MANTA_CONFIG = modules/sv_callers/manta_config.py.ini MANTA_HIGH_SENS ?= false CONFIG_MANTA_OPTS = --referenceFasta $(REF_FASTA) \ - --config $(if $(findstring true,$(MANTA_HIGH_SENS)),\ - $(MANTA_HS_CONFIG),$(MANTA_CONFIG)) \ - $(if $(TARGETS_FILE),--exome) \ - $(if $(MANTA_REGION),--region $(MANTA_REGION)) + --config $(if $(findstring true,$(MANTA_HIGH_SENS)),\ + $(MANTA_HS_CONFIG),$(MANTA_CONFIG)) \ + $(if $(TARGETS_FILE),--exome) \ + $(if $(MANTA_REGION),--region $(MANTA_REGION)) endif MANTA_INC = true diff --git a/sv_callers/mantaTN.mk b/sv_callers/mantaTN.mk index abb2a9cc..9a671826 100644 --- a/sv_callers/mantaTN.mk +++ b/sv_callers/mantaTN.mk @@ -3,9 +3,9 @@ include modules/sv_callers/manta.inc LOGDIR ?= log/mantaTN.$(NOW) -manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.eff.vcf \ - vcf/$(pair).manta_indels.eff.vcf \ - vcf/$(pair).manta_candidate_sv.eff.vcf) +manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ + vcf/$(pair).manta_indels.vcf \ + vcf/$(pair).manta_candidate_sv.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai @@ -21,17 +21,23 @@ manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz : manta/$1_$2.manta_tim manta/$1_$2/results/variants/candidateSV.vcf.gz : manta/$1_$2.manta_timestamp -vcf/$1_$2.manta_indels.vcf : manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz - $$(INIT) zcat $$< > $$@ vcf/$1_$2.manta_sv.vcf : manta/$1_$2/results/variants/somaticSV.vcf.gz $$(INIT) zcat $$< > $$@ -vcf/$1_$2.manta_candidate_sv.vcf : manta/$1_$2/results/variants/candidateSV.vcf.gz +vcf/$1_$2.manta_indels.vcf : manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz $$(INIT) zcat $$< > $$@ -endef -$(foreach pair,$(SAMPLE_PAIRS),$(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) -.PHONY: $(PHONY) +vcf/$1_$2.manta_candidate_sv.vcf : manta/$1_$2/results/variants/candidateSV.vcf.gz + $$(INIT) zcat $$< > $$@ -include modules/vcf_tools/vcftools.mk +endef +$(foreach pair,$(SAMPLE_PAIRS), \ + $(eval $(call manta-tumor-normal, \ + $(tumor.$(pair)),$(normal.$(pair))))) + +..DUMMY := $(shell mkdir -p version; \ + python --version &> version/mantaTN.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: manta diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 94efcb21..0ca46df7 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -10,7 +10,9 @@ SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.be SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SVABA ?= svaba -svaba : $(foreach pair,$(SAMPLE_PAIRS),svaba/$(pair).svaba.somatic.indel.vcf) +svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ + vcf/$(pair).svaba_indels.vcf \ + vcf/$(pair).candidate_sv.vcf) define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam @@ -27,6 +29,20 @@ svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam -k $$(SVABA_BLACKLIST) \ -a $1_$2 \ -G $$(SVABA_REF)") + +svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf + +svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf + +vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf + $$(INIT) zcat $$< > $$@ + +vcf/$1_$2.svaba_indels.vcf : svaba/$1_$2.svaba.somatic.indel.vcf + $$(INIT) zcat $$< > $$@ + +vcf/$1_$2.svaba_candidate_sv.vcf : svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf + $$(INIT) zcat $$< > $$@ + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call svaba-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) diff --git a/vcf_tools/vcftools.mk b/vcf_tools/vcftools.mk index 18656fc6..0bda8c32 100644 --- a/vcf_tools/vcftools.mk +++ b/vcf_tools/vcftools.mk @@ -1,6 +1,3 @@ -# vim: set ft=make : -# sub module containing vcf related tools - ifndef VCFTOOLS_MK include modules/Makefile.inc From 1ac13744293f37139b4af5a052f4b10459efd700 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 10:57:10 -0400 Subject: [PATCH 206/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 0ca46df7..0dc7ffb7 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -12,7 +12,7 @@ SVABA ?= svaba svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ vcf/$(pair).svaba_indels.vcf \ - vcf/$(pair).candidate_sv.vcf) + vcf/$(pair).svaba_candidate_sv.vcf) define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam From 5503a63ba847001eb0647f511169c1947ded06a2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 10:58:08 -0400 Subject: [PATCH 207/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 0dc7ffb7..ae7ef95e 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -16,7 +16,7 @@ svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam - $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ + $$(call RUN,-n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ mkdir -p svaba && \ cd svaba && \ $$(SVABA) run \ From 406441f7fbb619fc333f5b6d627ca1d433beda01 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 10:58:33 -0400 Subject: [PATCH 208/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index ae7ef95e..0dc7ffb7 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -16,7 +16,7 @@ svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam - $$(call RUN,-n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ + $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ mkdir -p svaba && \ cd svaba && \ $$(SVABA) run \ From 6cfac18dc055303870a6db0f714f3ef841de3e83 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 10:59:35 -0400 Subject: [PATCH 209/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 0dc7ffb7..947a3508 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -15,20 +15,20 @@ svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ vcf/$(pair).svaba_candidate_sv.vcf) define svaba-tumor-normal -svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam - $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ - mkdir -p svaba && \ - cd svaba && \ - $$(SVABA) run \ - -t ../bam/$1.bam \ - -n ../bam/$2.bam \ - -p $$(SVABA_CORES) \ - -D $$(SVABA_DBSNP) \ - -L 100000 \ - -x 25000 \ - -k $$(SVABA_BLACKLIST) \ - -a $1_$2 \ - -G $$(SVABA_REF)") +#svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam +# $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ +# mkdir -p svaba && \ +# cd svaba && \ +# $$(SVABA) run \ +# -t ../bam/$1.bam \ +# -n ../bam/$2.bam \ +# -p $$(SVABA_CORES) \ +# -D $$(SVABA_DBSNP) \ +# -L 100000 \ +# -x 25000 \ +# -k $$(SVABA_BLACKLIST) \ +# -a $1_$2 \ +# -G $$(SVABA_REF)") svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf From c4795bd7fb309d5b7af290b4fa4b5ee7691679c4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 11:01:05 -0400 Subject: [PATCH 210/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 947a3508..d54883a6 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -35,13 +35,13 @@ svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf - $$(INIT) zcat $$< > $$@ + $$(INIT) cat $$< > $$@ vcf/$1_$2.svaba_indels.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - $$(INIT) zcat $$< > $$@ + $$(INIT) cat $$< > $$@ vcf/$1_$2.svaba_candidate_sv.vcf : svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf - $$(INIT) zcat $$< > $$@ + $$(INIT) cat $$< > $$@ endef $(foreach pair,$(SAMPLE_PAIRS),\ From bfbe300236f043023ddf6918755af28eca58fdd8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 11:02:32 -0400 Subject: [PATCH 211/766] Update svabaTN.mk --- sv_callers/svabaTN.mk | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index d54883a6..3a8a3820 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -15,20 +15,20 @@ svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ vcf/$(pair).svaba_candidate_sv.vcf) define svaba-tumor-normal -#svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam -# $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ -# mkdir -p svaba && \ -# cd svaba && \ -# $$(SVABA) run \ -# -t ../bam/$1.bam \ -# -n ../bam/$2.bam \ -# -p $$(SVABA_CORES) \ -# -D $$(SVABA_DBSNP) \ -# -L 100000 \ -# -x 25000 \ -# -k $$(SVABA_BLACKLIST) \ -# -a $1_$2 \ -# -G $$(SVABA_REF)") +svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam + $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ + mkdir -p svaba && \ + cd svaba && \ + $$(SVABA) run \ + -t ../bam/$1.bam \ + -n ../bam/$2.bam \ + -p $$(SVABA_CORES) \ + -D $$(SVABA_DBSNP) \ + -L 100000 \ + -x 25000 \ + -k $$(SVABA_BLACKLIST) \ + -a $1_$2 \ + -G $$(SVABA_REF)") svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf From 3007341067198be3e6c2bb76d2c5f8b9d3b7f499 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 11:05:31 -0400 Subject: [PATCH 212/766] ++ --- sv_callers/mantaTN.mk | 8 ++++---- sv_callers/svabaTN.mk | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sv_callers/mantaTN.mk b/sv_callers/mantaTN.mk index 9a671826..f5452814 100644 --- a/sv_callers/mantaTN.mk +++ b/sv_callers/mantaTN.mk @@ -3,9 +3,9 @@ include modules/sv_callers/manta.inc LOGDIR ?= log/mantaTN.$(NOW) -manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ - vcf/$(pair).manta_indels.vcf \ - vcf/$(pair).manta_candidate_sv.vcf) +manta_tn : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ + vcf/$(pair).manta_indels.vcf \ + vcf/$(pair).manta_candidate_sv.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai @@ -40,4 +40,4 @@ $(foreach pair,$(SAMPLE_PAIRS), \ python --version &> version/mantaTN.txt) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: manta +.PHONY: manta_tn diff --git a/sv_callers/svabaTN.mk b/sv_callers/svabaTN.mk index 3a8a3820..819aa64e 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svabaTN.mk @@ -10,9 +10,9 @@ SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.be SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SVABA ?= svaba -svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ - vcf/$(pair).svaba_indels.vcf \ - vcf/$(pair).svaba_candidate_sv.vcf) +svaba_tn : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ + vcf/$(pair).svaba_indels.vcf \ + vcf/$(pair).svaba_candidate_sv.vcf) define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam @@ -52,4 +52,4 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(SVABA) --help &> version/svabaTN.txt) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: svaba +.PHONY: svaba_tn From a4ced1de45b6c984e3f40c3db98c7b128f11621e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 11:08:07 -0400 Subject: [PATCH 213/766] Update mantaTN.mk --- sv_callers/mantaTN.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sv_callers/mantaTN.mk b/sv_callers/mantaTN.mk index f5452814..8939ac72 100644 --- a/sv_callers/mantaTN.mk +++ b/sv_callers/mantaTN.mk @@ -33,8 +33,7 @@ vcf/$1_$2.manta_candidate_sv.vcf : manta/$1_$2/results/variants/candidateSV.vcf. endef $(foreach pair,$(SAMPLE_PAIRS), \ - $(eval $(call manta-tumor-normal, \ - $(tumor.$(pair)),$(normal.$(pair))))) + $(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) ..DUMMY := $(shell mkdir -p version; \ python --version &> version/mantaTN.txt) From b234b15ac9f353d7ee89dc442220544cb484d1fe Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 11:28:21 -0400 Subject: [PATCH 214/766] merge sv --- Makefile | 3 +++ vcf_tools/merge_sv.mk | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 vcf_tools/merge_sv.mk diff --git a/Makefile b/Makefile index 4fa6fcca..3aa28700 100644 --- a/Makefile +++ b/Makefile @@ -593,5 +593,8 @@ hotspot_summary: $(MAKE) -f modules/variant_callers/genotypehotspots.mk -j $(NUM_JOBS) $(call RUN_MAKE,modules/summary/hotspotsummary.mk) +TARGETS += merge_sv +merge_sv: + $(call RUN_MAKE,modules/vcf_tools/merge_sv.mk) .PHONY : $(TARGETS) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk new file mode 100644 index 00000000..c8c00764 --- /dev/null +++ b/vcf_tools/merge_sv.mk @@ -0,0 +1,25 @@ +include modules/Makefile.inc + +LOGDIR ?= log/merge_sv.$(NOW) + +SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 +SV_CALLERS = svaba manta + +merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) +# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) +# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_indels.vcf) \ +# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) + +define merge-sv +merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) + for i in $(SV_CALLERS); do \ + echo vcf/$1_$2.$i_sv.vcf >> $@; + done + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) + +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: merge_sv From 2a2103d87ba00fd0788a47e75447dfc03ae19046 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:04:45 -0400 Subject: [PATCH 215/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index c8c00764..886ce8d2 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $(SV_CALLERS); do \ + $(INIT) for i in $$(SV_CALLERS); do \ echo vcf/$1_$2.$i_sv.vcf >> $@; done From c35411476c9ddfd9e217d93a1a27ed4ec91d651b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:05:44 -0400 Subject: [PATCH 216/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 886ce8d2..95fbdab9 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -13,7 +13,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) $(INIT) for i in $$(SV_CALLERS); do \ - echo vcf/$1_$2.$i_sv.vcf >> $@; + echo vcf/$1_$2.$$i_sv.vcf >> $@; done endef From d842f78b3fecd4ab854311a823eaed14c988442c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:07:51 -0400 Subject: [PATCH 217/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 95fbdab9..b48eeaa6 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,9 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $(INIT) for i in $$(SV_CALLERS); do \ - echo vcf/$1_$2.$$i_sv.vcf >> $@; - done + $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From a4f0c37f262a78ba01ddc6bc28529953f3bafdab Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:08:10 -0400 Subject: [PATCH 218/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index b48eeaa6..14c72b07 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $(@); done + $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 7fa51d91ec6dfbf20bb1ebae172366a5db559cbb Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:10:35 -0400 Subject: [PATCH 219/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 14c72b07..6ac021f5 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $$(@); done + $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$(i)_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From ab74eababfdbd332ebef77bc2c4168b1d87aa390 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:15:39 -0400 Subject: [PATCH 220/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 6ac021f5..561a7a47 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $(INIT) for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$(i)_sv.vcf >> $$(@); done + for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$(i)_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 90f056128206f6e30b089dc781be0f9a70d2b5bc Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:16:12 -0400 Subject: [PATCH 221/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 561a7a47..439ae12e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $$(SV_CALLERS); do echo vcf/$1_$2.$$(i)_sv.vcf >> $$(@); done + for i in $$(SV_CALLERS); do echo vcf/$1_$2.${i}_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 93210a4bafcca6616cb3be090476a24aa667f9ba Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:16:58 -0400 Subject: [PATCH 222/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 439ae12e..d86e971c 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $$(SV_CALLERS); do echo vcf/$1_$2.${i}_sv.vcf >> $$(@); done + for i in $$(SV_CALLERS); do echo vcf/$1_$2.$${i}_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 82932ea6ab02ff3b28eef8d10028305877bf7f35 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:17:42 -0400 Subject: [PATCH 223/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index d86e971c..8d023bfa 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $$(SV_CALLERS); do echo vcf/$1_$2.$${i}_sv.vcf >> $$(@); done + for i in $$(SV_CALLERS); do (echo vcf/$1_$2.$$i_sv.vcf) >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 9483563b48d8857578e5f6b4eccbb895e8e01b78 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:18:08 -0400 Subject: [PATCH 224/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 8d023bfa..f0bc774e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $$(SV_CALLERS); do (echo vcf/$1_$2.$$i_sv.vcf) >> $$(@); done + for i in $$(SV_CALLERS); do echo "vcf/$1_$2.$$i_sv.vcf" >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From be355da24f32eed57909195e7285fa045fb53d2a Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:18:50 -0400 Subject: [PATCH 225/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index f0bc774e..4c1e418f 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $$(SV_CALLERS); do echo "vcf/$1_$2.$$i_sv.vcf" >> $$(@); done + for i in $(SV_CALLERS); do echo vcf/$1_$2.$i_sv.vcf >> $$(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 85b357ac40612754a485d9cfabc15eca876550e9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:23:18 -0400 Subject: [PATCH 226/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 4c1e418f..d3459690 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $(SV_CALLERS); do echo vcf/$1_$2.$i_sv.vcf >> $$(@); done + for i in $(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ From 714823b7e6142dff61f2c2e583eda3c0e64c76b1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:26:34 -0400 Subject: [PATCH 227/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index d3459690..84bf31bd 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -11,12 +11,13 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) define merge-sv -merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - for i in $(SV_CALLERS); do echo vcf/$1_$2.$$i_sv.vcf >> $(@); done +merge_sv/$1_$2/sample_list.txt : vcf/$1_$2.$3_sv.vcf + echo vcf/$1_$2.$3_sv.vcf >> $(@); done endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) + $(foreach caller,$(SV_CALLERS), \ + $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair)),$(caller))))) .DELETE_ON_ERROR: .SECONDARY: From 5a3821f767b5156a9e30a81c35d373fc0a9e6b4f Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:29:15 -0400 Subject: [PATCH 228/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 84bf31bd..446d4399 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -11,13 +11,12 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) define merge-sv -merge_sv/$1_$2/sample_list.txt : vcf/$1_$2.$3_sv.vcf - echo vcf/$1_$2.$3_sv.vcf >> $(@); done +merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) + echo vcf/$1_$2.$(SV_CALLERS)_sv.vcf >> $(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(foreach caller,$(SV_CALLERS), \ - $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair)),$(caller))))) + $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) .DELETE_ON_ERROR: .SECONDARY: From 4d3d375232de5b91a9158945816650bf6bef7795 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:35:42 -0400 Subject: [PATCH 229/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 446d4399..681cfb11 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - echo vcf/$1_$2.$(SV_CALLERS)_sv.vcf >> $(@) + echo vcf/$1_$2.$(SV_CALLERS)[0]_sv.vcf >> $(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From 0bc4039f6c337fb75d2126dad981fb2a15bebaf9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:41:34 -0400 Subject: [PATCH 230/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 681cfb11..0ba8be9e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,7 +12,8 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - echo vcf/$1_$2.$(SV_CALLERS)[0]_sv.vcf >> $(@) + echo vcf/$1_$2.svaba_sv.vcf >> $(@) + echo vcf/$1_$2.manta_sv.vcf >> $(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From 98176f4bd79c7b700aed9657b7a0d6bfefdb023f Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:42:18 -0400 Subject: [PATCH 231/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 0ba8be9e..9323b60e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,8 +12,8 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) define merge-sv merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - echo vcf/$1_$2.svaba_sv.vcf >> $(@) - echo vcf/$1_$2.manta_sv.vcf >> $(@) + echo vcf/$1_$2.svaba_sv.vcf >> $$(@) + echo vcf/$1_$2.manta_sv.vcf >> $$(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From c5c9bc429d41a6d98ae5ee2c8708cf79bc5e8ba3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:49:03 -0400 Subject: [PATCH 232/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 9323b60e..8931cfe3 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -5,16 +5,21 @@ LOGDIR ?= log/merge_sv.$(NOW) SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 SV_CALLERS = svaba manta -merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) -# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) +merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_indels.vcf) \ # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) define merge-sv -merge_sv/$1_$2/sample_list.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - echo vcf/$1_$2.svaba_sv.vcf >> $$(@) +merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) + echo vcf/$1_$2.svaba_sv.vcf > $$(@) echo vcf/$1_$2.manta_sv.vcf >> $$(@) +vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR merge $$(<) \ + 500 2 1 1 0 30 $$(@)") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 011081b9a569c771c90202304490d3d2c2235ee7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:50:07 -0400 Subject: [PATCH 233/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 8931cfe3..4880e5af 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,8 +12,8 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) \ define merge-sv merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - echo vcf/$1_$2.svaba_sv.vcf > $$(@) - echo vcf/$1_$2.manta_sv.vcf >> $$(@) + $(INIT) echo vcf/$1_$2.svaba_sv.vcf > $$(@) + $(INIT) echo vcf/$1_$2.manta_sv.vcf >> $$(@) vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ From 7b3baf827a32b3e8b6fe80fcdc10b91532ed24da Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:50:32 -0400 Subject: [PATCH 234/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 4880e5af..b2ae0ad2 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -5,15 +5,15 @@ LOGDIR ?= log/merge_sv.$(NOW) SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 SV_CALLERS = svaba manta -merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list.txt) \ +merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_sv.txt) \ $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_indels.vcf) \ # $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) define merge-sv merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $(INIT) echo vcf/$1_$2.svaba_sv.vcf > $$(@) - $(INIT) echo vcf/$1_$2.manta_sv.vcf >> $$(@) + echo vcf/$1_$2.svaba_sv.vcf > $$(@) + echo vcf/$1_$2.manta_sv.vcf >> $$(@) vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ From 84b30e5d6882914d2eeb5cf1b71c0992569940ed Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:52:08 -0400 Subject: [PATCH 235/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index b2ae0ad2..78887228 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -12,6 +12,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_sv.txt) define merge-sv merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) + mkdir -p merge_sv/$1_$2 echo vcf/$1_$2.svaba_sv.vcf > $$(@) echo vcf/$1_$2.manta_sv.vcf >> $$(@) From 8d4a14036901a8a760223088f7d340c89b85698b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 12:59:29 -0400 Subject: [PATCH 236/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 78887228..5037673b 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -19,7 +19,7 @@ merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(c vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ SURVIVOR merge $$(<) \ - 500 2 1 1 0 30 $$(@)") + 500 1 1 1 0 30 $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 8ab18096183abc8a756381aa48d355e19993f64d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 14 Oct 2022 13:12:18 -0400 Subject: [PATCH 237/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 5037673b..b2d59473 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,11 +4,16 @@ LOGDIR ?= log/merge_sv.$(NOW) SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 SV_CALLERS = svaba manta +MAX_DIST = 500 +NUM_CALLERS = 1 +TYPE = 1 +STRAND = 1 +MIN_SIZE = 30 merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_sv.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) -# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_indels.vcf) \ -# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_candidate_sv.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) define merge-sv merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) @@ -19,8 +24,19 @@ merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(c vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ SURVIVOR merge $$(<) \ - 500 1 1 1 0 30 $$(@)") + $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") + +merge_sv/$1_$2/sample_list_candidate_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_candidate_sv.vcf) + mkdir -p merge_sv/$1_$2 + echo vcf/$1_$2.svaba_candidate_sv.vcf > $$(@) + echo vcf/$1_$2.manta_candidate_sv.vcf >> $$(@) +vcf/$1_$2.merged_candidate_sv.vcf : merge_sv/$1_$2/sample_list_candidate_sv.txt + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR merge $$(<) \ + $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") + + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 348d48fb59332864266721ff8f9d59b369ae3b94 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 15 Oct 2022 16:02:23 -0400 Subject: [PATCH 238/766] Kallisto --- Makefile | 4 ++++ config.inc | 3 +++ rnaseq/kallisto.mk | 48 ++++++++++++++++++++++++++++++++++++++ scripts/summarize_sleuth.R | 34 +++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) create mode 100644 rnaseq/kallisto.mk create mode 100644 scripts/summarize_sleuth.R diff --git a/Makefile b/Makefile index 3aa28700..b896cd59 100644 --- a/Makefile +++ b/Makefile @@ -448,6 +448,10 @@ TARGETS += exon_counts exon_counts : $(call RUN_MAKE,modules/rnaseq/dexseq.mk) +TARGETS += kallisto +kallisto : + $(call RUN_MAKE,modules/rnaseq/kallisto.mk) + #================================================== # chip sequencing diff --git a/config.inc b/config.inc index 19f67122..6ea4d1d6 100644 --- a/config.inc +++ b/config.inc @@ -23,6 +23,8 @@ VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86 ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1 +PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6 +KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 JARDIR ?= $(HOME)/share/usr/lib/java @@ -37,6 +39,7 @@ SAMTOOLS2 ?= samtools VCFUTILS ?= $(HOME)/share/usr/bin/vcfutils.pl BCFTOOLS2 ?= bcftools BCFTOOLS ?= bcftools +PIGZ ?= pigz BEDTOOLS ?= $(HOME)/share/usr/bin/bedtools BGZIP ?= $(HOME)/share/usr/bin/bgzip IGVTOOLS ?= $(HOME)/share/usr/IGVTools/igvtools diff --git a/rnaseq/kallisto.mk b/rnaseq/kallisto.mk new file mode 100644 index 00000000..5007c13a --- /dev/null +++ b/rnaseq/kallisto.mk @@ -0,0 +1,48 @@ +include modules/Makefile.inc + +LOGDIR = log/kallisto.$(NOW) + +kallisto : $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R1.fastq.gz) \ + $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R2.fastq.gz) \ + $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) \ + kallisto/tpm_bygene.txt + +SLEUTH_ANNOT ?= $(HOME)/share/lib/resource_files/Hugo_ENST_ensembl75_fixed.txt +KALLISTO_INDEX ?= $(HOME)/share/lib/ref_files/b37/ensembl_v75-0.43.0_kallisto_index + +define merge-fastq +kallisto/$1/$1_R1.fastq.gz : $$(foreach split,$2,$$(word 1, $$(fq.$$(split)))) + $$(call RUN,-c -n 12 -s 0.5G -m 1G -w 24:00:00 -v $(PIGZ_ENV),"set -o pipefail && \ + $$(PIGZ) -cd -p 12 $$(^) | $$(PIGZ) -c -p 12 > $$(@)") + +kallisto/$1/$1_R2.fastq.gz : $$(foreach split,$2,$$(word 2, $$(fq.$$(split)))) + $$(call RUN,-c -n 12 -s 0.5G -m 1G -w 24:00:00 -v $(PIGZ_ENV),"set -o pipefail && \ + $$(PIGZ) -cd -p 12 $$(^) | $$(PIGZ) -c -p 12 > $$(@)") +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call merge-fastq,$(sample),$(split.$(sample))))) + +define fastq-to-kallisto +kallisto/$1/abundance.tsv : kallisto/$1/$1_R1.fastq.gz kallisto/$1/$1_R2.fastq.gz + $$(call RUN,-c -n 12 -s 2G -m 3G -v $(KALLISTO_ENV),"set -o pipefail && \ + kallisto quant \ + -i $$(KALLISTO_INDEX) \ + -o kallisto/$1 \ + --bias -b 100 -t 12\ + --fusion $$(<) $$(<<)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call fastq-to-kallisto,$(sample)))) + +kallisto/tpm_bygene.txt : $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) + $(call RUN, -c -n 24 -s 1G -m 2G -v $(KALLISTO_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/summarize_sleuth.R --annotation $(SLEUTH_ANNOT) --samples '$(SAMPLES)'") + +..DUMMY := $(shell mkdir -p version; \ + $(SAMTOOLS) --version > version/kallisto.txt; \ + ~/share/usr/env/kallisto-0.46.2/bin/kallisto version >> version/kallisto.txt; \ + ~/share/usr/env/kallisto-0.46.2/bin/R --version >> version/kallisto.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: kallisto diff --git a/scripts/summarize_sleuth.R b/scripts/summarize_sleuth.R new file mode 100644 index 00000000..58985f3f --- /dev/null +++ b/scripts/summarize_sleuth.R @@ -0,0 +1,34 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("sleuth")) + + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option('--annotation', type = 'character', default = NA, help = 'path to annotation file'), + make_option('--samples', type = 'character', default = NA, help = 'list of samples names')) +parser = OptionParser(usage = "%prog", option_list=optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +sample_names = unlist(strsplit(x=opt$samples, split=" ", fixed=TRUE)) +annotation = readr::read_tsv(file=opt$annotation, col_names=TRUE, col_types=cols(.default=col_character())) +manifest = dplyr::tibble(sample = sample_names, + condition = rep(1, length(sample_names)), + path = paste0("kallisto/", sample_names)) +data = sleuth::sleuth_prep(sample_to_covariates = manifest, + extra_bootstrap_summary = TRUE, + read_bootstrap_tpm = TRUE, + target_mapping = annotation, + aggregation_column = "hugo", + gene_mode = TRUE) +res = as.data.frame(sleuth_to_matrix(data, "obs_norm", "tpm")) +tpm_bygene = dplyr::tibble(gene_symbol = rownames(res)) %>% + dplyr::bind_cols(dplyr::as_tibble(res)) +write_tsv(x=tpm_bygene, path="kallisto/tpm_bygene.txt", append=FALSE, col_names=TRUE, quote_escape=FALSE) From 1e30b530f30bfcd2ef630d3aa0ac78275741bd30 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 15 Oct 2022 17:00:27 -0400 Subject: [PATCH 239/766] Immune deconvolution --- Makefile | 4 +++ config.inc | 1 + rnaseq/immunedeconv.mk | 25 ++++++++++++++++++ scripts/immunedeconv.R | 57 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 rnaseq/immunedeconv.mk create mode 100644 scripts/immunedeconv.R diff --git a/Makefile b/Makefile index b896cd59..2ee872e7 100644 --- a/Makefile +++ b/Makefile @@ -452,6 +452,10 @@ TARGETS += kallisto kallisto : $(call RUN_MAKE,modules/rnaseq/kallisto.mk) +TARGETS += immunedeconv +immunedeconv : + $(call RUN_MAKE,modules/rnaseq/immunedeconv.mk) + #================================================== # chip sequencing diff --git a/config.inc b/config.inc index 6ea4d1d6..f590f0b7 100644 --- a/config.inc +++ b/config.inc @@ -25,6 +25,7 @@ MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1 PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6 KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 +IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/rnaseq/immunedeconv.mk b/rnaseq/immunedeconv.mk new file mode 100644 index 00000000..df387f53 --- /dev/null +++ b/rnaseq/immunedeconv.mk @@ -0,0 +1,25 @@ +include modules/Makefile.inc + +LOGDIR = log/immunedeconv.$(NOW) + +immunedeconv : immunedeconv/quantiseq.txt \ + immunedeconv/mcpcounter.txt \ + immunedeconv/cibersort.txt + +immunedeconv/quantiseq.txt : kallisto/tpm_bygene.txt + $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 1 --input_file $$(<) --output_file $$(@)") + +immunedeconv/mcpcounter.txt : kallisto/tpm_bygene.txt + $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 2 --input_file $$(<) --output_file $$(@)") + +immunedeconv/cibersort.txt : kallisto/tpm_bygene.txt + $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 3 --input_file $$(<) --output_file $$(@)") + +..DUMMY := $(shell mkdir -p version; \ + ~/share/usr/env/r-immunedeconv-2.1.0/bin/R --version >> version/immunedeconv.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: kallisto diff --git a/scripts/immunedeconv.R b/scripts/immunedeconv.R new file mode 100644 index 00000000..4633af65 --- /dev/null +++ b/scripts/immunedeconv.R @@ -0,0 +1,57 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("immunedeconv")) + + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option('--option', type = 'character', default = NA, help = 'Immune deconv algorithm'), + make_option('--input_file', type = 'character', default = NA, help = 'Expression input file'), + make_option('--output_file', type = 'character', default = NA, help = 'Immune cell output file')) +parser = OptionParser(usage = "%prog", option_list=optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +set_cibersort_binary("~/share/usr/lib/resource_files/CIBERSORT/CIBERSORT.R") +set_cibersort_mat("~/share/usr/lib/resource_files/CIBERSORT/LM22.txt") + +if (as.numeric(opt$option)==1) { + tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::arrange(gene_symbol) + gene_expr = tpm_by_gene %>% + dplyr::select(-gene_symbol) %>% + as.matrix() + rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]] + quantiseq = immunedeconv::deconvolute(gene_expression = gene_expr, method = "quantiseq", scale_mrna = FALSE) + readr::write_tsv(x = quantiseq, file = opt$output_file, col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==2) { + tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::arrange(gene_symbol) + gene_expr = tpm_by_gene %>% + dplyr::select(-gene_symbol) %>% + as.matrix() + rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]] + mcpcounter = immunedeconv::deconvolute(gene_expression = gene_expr, method = "mcp_counter", scale_mrna = FALSE) + readr::write_tsv(x = mcpcounter, file = opt$output_file, col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==3) { + tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::arrange(gene_symbol) + gene_expr = tpm_by_gene %>% + dplyr::select(-gene_symbol) %>% + as.matrix() + rownames(gene_expr) = tpm_by_gene %>% .[["gene_symbol"]] + cibersort = immunedeconv::deconvolute(gene_expression = gene_expr, method = "cibersort_abs", scale_mrna = FALSE) + readr::write_tsv(x = cibersort, file = opt$output_file, col_names = TRUE, append = FALSE) + +} From c82ef7962e12fb33187ec402db7075f4ac6d85bc Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 15 Oct 2022 17:53:13 -0400 Subject: [PATCH 240/766] Update immunedeconv.mk --- rnaseq/immunedeconv.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rnaseq/immunedeconv.mk b/rnaseq/immunedeconv.mk index df387f53..ac2bcbd8 100644 --- a/rnaseq/immunedeconv.mk +++ b/rnaseq/immunedeconv.mk @@ -8,15 +8,15 @@ immunedeconv : immunedeconv/quantiseq.txt \ immunedeconv/quantiseq.txt : kallisto/tpm_bygene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 1 --input_file $$(<) --output_file $$(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 1 --input_file $(<) --output_file $(@)") immunedeconv/mcpcounter.txt : kallisto/tpm_bygene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 2 --input_file $$(<) --output_file $$(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 2 --input_file $(<) --output_file $(@)") immunedeconv/cibersort.txt : kallisto/tpm_bygene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 3 --input_file $$(<) --output_file $$(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 3 --input_file $(<) --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ ~/share/usr/env/r-immunedeconv-2.1.0/bin/R --version >> version/immunedeconv.txt;) From 26404aa360ad54de5ba51b491d1bec7b1a1b59eb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 15 Oct 2022 17:57:40 -0400 Subject: [PATCH 241/766] Update immunedeconv.R --- scripts/immunedeconv.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/immunedeconv.R b/scripts/immunedeconv.R index 4633af65..b5497b5f 100644 --- a/scripts/immunedeconv.R +++ b/scripts/immunedeconv.R @@ -18,8 +18,8 @@ parser = OptionParser(usage = "%prog", option_list=optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options -set_cibersort_binary("~/share/usr/lib/resource_files/CIBERSORT/CIBERSORT.R") -set_cibersort_mat("~/share/usr/lib/resource_files/CIBERSORT/LM22.txt") +set_cibersort_binary("~/share/lib/resource_files/CIBERSORT/CIBERSORT.R") +set_cibersort_mat("~/share/lib/resource_files/CIBERSORT/LM22.txt") if (as.numeric(opt$option)==1) { tpm_by_gene = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% From b2abcdb5f53a1408b8c820e269e7c5f39b963ad6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 23 Oct 2022 21:05:16 -0400 Subject: [PATCH 242/766] Update Makefile --- Makefile | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 2ee872e7..66bfbf6a 100644 --- a/Makefile +++ b/Makefile @@ -260,7 +260,7 @@ cnv_kit : #================================================== -# structural variant callers +# RNAseq structural variant callers #================================================== TARGETS += star_fusion @@ -274,6 +274,31 @@ tophat_fusion : TARGETS += manta_rnaseq manta_rnaseq : $(call RUN_MAKE,modules/sv_callers/mantaRnaseq.mk) + +TARGETS += integrate_rnaseq +integrate_rnaseq : + $(call RUN_MAKE,modules/sv_callers/integrateRnaseq.mk) + +TARGETS += soapfuse +soapfuse : + $(call RUN_MAKE,modules/sv_callers/soapFuse.mk) + +TARGETS += mapsplice +mapsplice : + $(call RUN_MAKE,modules/sv_callers/mapsplice.mk) + +TARGETS += fusioncatcher +fusioncatcher : + $(call RUN_MAKE,modules/sv_callers/fusioncatcher.mk) + +TARGETS += oncofuse +oncofuse : + $(call RUN_MAKE,modules/sv_callers/oncofuse.mk) + + +#================================================== +# RNAseq structural variant callers +#================================================== TARGETS += manta manta : @@ -291,10 +316,6 @@ TARGETS += brass brass : $(call RUN_MAKE,modules/sv_callers/brass.mk) -TARGETS += integrate_rnaseq -integrate_rnaseq : - $(call RUN_MAKE,modules/sv_callers/integrateRnaseq.mk) - TARGETS += integrate integrate : $(call RUN_MAKE,modules/sv_callers/integrate.mk) @@ -308,10 +329,6 @@ TARGETS += chimscan chimscan : $(call RUN_MAKE_J,modules/sv_callers/chimerascan.mk,$(NUM_CHIMSCAN_JOBS)) -TARGETS += oncofuse -oncofuse : - $(call RUN_MAKE,modules/sv_callers/oncofuse.mk) - TARGETS += lumpy lumpy : $(call RUN_MAKE,modules/sv_callers/lumpy.mk) @@ -324,18 +341,6 @@ TARGETS += nfuse_wgss_wtss nfuse_wgss_wtss : $(call RUN_MAKE,modules/sv_callers/nfuseWGSSWTSS.mk) -TARGETS += soapfuse -soapfuse : - $(call RUN_MAKE,modules/sv_callers/soapFuse.mk) - -TARGETS += mapsplice -mapsplice : - $(call RUN_MAKE,modules/sv_callers/mapsplice.mk) - -TARGETS += fusioncatcher -fusioncatcher : - $(call RUN_MAKE,modules/sv_callers/fusioncatcher.mk) - TARGETS += crest crest : $(call RUN_MAKE,modules/sv_callers/crest.mk) From d084ed6844842279f2ac4eda5afde72d0b63d0a2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 23 Oct 2022 21:06:54 -0400 Subject: [PATCH 243/766] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 66bfbf6a..0f9b5595 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,7 @@ oncofuse : #================================================== -# RNAseq structural variant callers +# DNA structural variant callers #================================================== TARGETS += manta From 8bdcd90bbe3403ed394f700a7b49c9160537e7a7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 24 Oct 2022 22:34:14 -0400 Subject: [PATCH 244/766] ++ --- Makefile | 25 +--- config.inc | 5 + rnaseq/cufflinks.mk | 75 ---------- rnaseq/deseq.Rnw | 138 ------------------ rnaseq/deseq.mk | 23 --- rnaseq/dexseq.mk | 18 --- rnaseq/immunedeconv.mk | 8 +- rnaseq/kallisto.mk | 4 +- rnaseq/rpkm.mk | 7 - rnaseq/sumRNASeqReads.mk | 44 ------ rnaseq/sumreads.mk | 41 ++++++ .../summarize_rnaseqreads.R | 0 .../summarize_rnaseqreads_byexon.R | 0 .../summarize_rnaseqreads_byintron.R | 0 scripts/summarize_sleuth.R | 2 +- variant_callers/somatic/macs2TN.mk | 23 --- 16 files changed, 57 insertions(+), 356 deletions(-) delete mode 100644 rnaseq/cufflinks.mk delete mode 100644 rnaseq/deseq.Rnw delete mode 100644 rnaseq/deseq.mk delete mode 100644 rnaseq/dexseq.mk delete mode 100644 rnaseq/rpkm.mk delete mode 100644 rnaseq/sumRNASeqReads.mk create mode 100644 rnaseq/sumreads.mk rename rnaseq/summarizeRNASeqReads.R => scripts/summarize_rnaseqreads.R (100%) rename rnaseq/summarizeRNASeqReadsByExon.R => scripts/summarize_rnaseqreads_byexon.R (100%) rename rnaseq/summarizeRNASeqReadsByIntron.R => scripts/summarize_rnaseqreads_byintron.R (100%) delete mode 100644 variant_callers/somatic/macs2TN.mk diff --git a/Makefile b/Makefile index 0f9b5595..9ee003a2 100644 --- a/Makefile +++ b/Makefile @@ -438,39 +438,22 @@ bam_stats : #================================================== -# rna sequencing +# RNA sequencing #================================================== -TARGETS += cufflinks -cufflinks : - $(call RUN_MAKE,modules/rnaseq/cufflinks.mk) - TARGETS += sum_reads sum_reads : - $(call RUN_MAKE,modules/rnaseq/sumRNASeqReads.mk) + $(call RUN_MAKE,modules/rnaseq/sumreads.mk) -TARGETS += exon_counts -exon_counts : - $(call RUN_MAKE,modules/rnaseq/dexseq.mk) - TARGETS += kallisto kallisto : $(call RUN_MAKE,modules/rnaseq/kallisto.mk) -TARGETS += immunedeconv -immunedeconv : +TARGETS += immune_deconv +immune_deconv : $(call RUN_MAKE,modules/rnaseq/immunedeconv.mk) -#================================================== -# chip sequencing -#================================================== - -TARGETS += macs2TN -macs2TN: - $(call RUN_MAKE,modules/variant_callers/somatic/macs2TN.mk) - - #================================================== # ploidy #================================================== diff --git a/config.inc b/config.inc index f590f0b7..9c1bfc6a 100644 --- a/config.inc +++ b/config.inc @@ -26,6 +26,7 @@ INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1 PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6 KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0 +SUMREADS_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 JARDIR ?= $(HOME)/share/usr/lib/java @@ -73,6 +74,10 @@ INTRON_POSN_LOOKUP ?= $(SCRIPTS_DIR)/posnGeneLookup.pl RBIND ?= $(SCRIPTS_DIR)/rbind.R NORMAL_FILTER ?= $(PERL) $(SCRIPTS_DIR)/normalFilterVCF.pl SOMATIC_FILTER_VCF ?= $(PERL) $(SCRIPTS_DIR)/somaticFilterVCF.pl +SUM_READS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads.R +SUM_EXONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byexon.R +SUM_INTRONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byintron.R + JAVA_BIN ?= $(JAVA8_BIN) JAVA6_BIN ?= $(HOME)/share/usr/jdk1.6.0_45/bin/java diff --git a/rnaseq/cufflinks.mk b/rnaseq/cufflinks.mk deleted file mode 100644 index 54e294fe..00000000 --- a/rnaseq/cufflinks.mk +++ /dev/null @@ -1,75 +0,0 @@ -# This module is used for running cufflinks -# input: $(SAMPLES) -# Options: BAM_PHRED64 = true/false -# Authors: Fong Chun Chan -# -include modules/Makefile.inc - -LOGDIR = log/cufflinks.$(NOW) - - -NUM_CORES ?= 8 -CUFFLINKS = $(HOME)/share/usr/bin/cufflinks -CUFFLINKS_OPTS = -b $(REF_FASTA) -u -g $(GENES_GTF) -p $(NUM_CORES) -u --no-update-check -v -CUFFCOMPARE = $(HOME)/share/usr/bin/cuffcompare -CUFFCOMPARE_OPTS = --no-update-check -CUFFMERGE = $(HOME)/share/usr/bin/cuffmerge -CUFFMERGE_OPTS = --no-update-check -CUFFDIFF = $(HOME)/share/usr/bin/cuffdiff -CUFFDIFF_OPTS = --no-update-check -v -CUFFQUANT = $(HOME)/share/usr/bin/cuffquant -CUFFQUANT_OPTS = --no-update-check -v -CUFFNORM = $(HOME)/share/usr/bin/cuffnorm -CUFFNORM_OPTS = --no-update-check -v -CUFFCOMPARE_OPTS = --no-update-check -s $(REF_FASTA) -r $(GENES_GTF) -V -v - -PHENO_FILE ?= pheno.txt -ifneq ($(wildcard $(PHENO_FILE)),) - A = $(shell sed '1d' $(PHENO_FILE) | cut -f1) - B = $(shell sed '1d' $(PHENO_FILE) | cut -f2) - $(foreach i,$(shell seq 1 $(words $(A))),$(eval pheno.$(word $i,$(B)) += $(word $i,$(A)))) - PHENOTYPES = $(shell sed '1d' $(PHENO_FILE) | cut -f2 | sort | uniq) -endif - -..DUMMY := $(shell mkdir -p version; $(CUFFLINKS) &> version/tophat.txt; echo "options: $(CUFFLINKS_OPTS)" >> version/cufflinks.txt) -.SECONDARY: -.DELETE_ON_ERROR: -.PHONY : all_cufflinks cufflinks cuffcmp cuffmerge cuffdiff cuffnorm - -all_cufflinks : cufflinks cuffcmp cuffmerge cuffdiff cuffnorm -cufflinks : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf) -cuffcmp : cufflinks/cuffcmp/cc.stats -cuffmerge : cufflinks/gtf/merged.gtf -cuffdiff : cufflinks/cuffdiff/gene_exp.diff -cuffnorm : cufflinks/cuffnorm/gene_exp.txt - -cufflinks/gtf/%.transcripts.gtf cufflinks/fpkm_tracking/%.isoforms.fpkm_tracking cufflinks/fpkm_tracking/%.genes.fpkm_tracking : bam/%.bam - $(call RUN,-n $(NUM_CORES) -s 2G -m 4G,"${CUFFLINKS} ${CUFFLINKS_OPTS} -o cufflinks/$* $< && \ - mkdir -p cufflinks/gtf cufflinks/fpkm_tracking && \ - ln cufflinks/$*/transcripts.gtf cufflinks/gtf/$*.transcripts.gtf && \ - ln cufflinks/$*/isoforms.fpkm_tracking cufflinks/fpkm_tracking/$*.isoforms.fpkm_tracking && \ - ln cufflinks/$*/genes.fpkm_tracking cufflinks/fpkm_tracking/$*.genes.fpkm_tracking") - -cufflinks/cuffcmp/cc.stats : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf) - $(call RUN,-s 10G -m 20G,"$(CUFFCOMPARE) $(CUFFCOMPARE_OPTS) -o $(@:.stats=) $^") - -cufflinks/assembly_list.txt : $(foreach sample,$(SAMPLES),cufflinks/gtf/$(sample).transcripts.gtf) - $(INIT) echo "$^" | tr ' ' '\n' > $@ - -cufflinks/gtf/merged.gtf : cufflinks/assembly_list.txt - $(call RUN,-n 8 -s 1G -m 2.5G,"$(CUFFMERGE) $(CUFFMERGE_OPTS) -o $(@D) -g $(GENES_GTF) -p 8 $<") - -cufflinks/cxb/%.cxb : cufflinks/gtf/merged.gtf bam/%.bam - $(call RUN,-n 4 -s 1G -m 2.5G,"mkdir -p cufflinks/$* && \ - $(CUFFQUANT) $(CUFFQUANT_OPTS) -o cufflinks/$* -b $(REF_FASTA) -p 4 $^ && \ - ln cufflinks/$*/abundances.cxb $@") - -cufflinks/cuffdiff/gene_exp.diff : cufflinks/gtf/merged.gtf $(foreach sample,$(SAMPLES),cufflinks/cxb/$(sample).cxb) - $(call RUN,-n 8 -s 1G -m 4G,"$(CUFFDIFF) $(CUFFDIFF_OPTS) -o $(@D) -p 8 $< \ - $(foreach pheno,$(PHENOTYPES),$(subst $( ),$(,),$(foreach s,$(pheno.$(pheno)),cufflinks/cxb/$s.cxb))) \ - -L $(subst $( ),$(,),$(PHENOTYPES))") - -cufflinks/cuffnorm/gene_exp.txt : cufflinks/gtf/merged.gtf $(foreach sample,$(SAMPLES),cufflinks/cxb/$(sample).cxb) - $(call RUN,-n 8 -s 1G -m 2G,"$(CUFFNORM) $(CUFFNORM_OPTS) -o $(@D) -p 8 $< \ - $(foreach pheno,$(PHENOTYPES),$(subst $( ),$(,),$(foreach s,$(pheno.$(pheno)),cufflinks/cxb/$s.cxb))) \ - -L $(subst $( ),$(,),$(PHENOTYPES))") diff --git a/rnaseq/deseq.Rnw b/rnaseq/deseq.Rnw deleted file mode 100644 index 8cc9f8de..00000000 --- a/rnaseq/deseq.Rnw +++ /dev/null @@ -1,138 +0,0 @@ -%%% Applies DESeq on a matrix of count data -%%% Inputs: counts matrix and pheno design matrix - -\documentclass{article} -\usepackage[margin=1in]{geometry} -\usepackage{here} - -\title{DESeq Analysis} -\author{Raymond Lim} - -\begin{document} - -\maketitle - -\SweaveOpts{cache=T, prefix.string=graphics/deSeq} - - -<>= -dir.create('graphics', showWarnings = F) -options(width = 100) - -includeGraphic <- function(filename, caption = NULL, width = 1) { - if (is.null(caption)) { - cat("\\includegraphics[width=", width, "\\linewidth]{", filename, "}\n", sep = "") - } else { - cat("\\begin{figure}[h!]\n") - cat("\\includegraphics[width=", width, "\\linewidth]{", filename, "}\n", sep = "") - cat("\\caption{", caption, "}\n", sep = "") - cat("\\end{figure}\n") - } - -} - -includeGraphics <- function(filenames, width = 1, caption) { - cat("\\begin{figure}[h!]\n") - for (filename in filenames) { - includeGraphic(filename, width) - } - cat("\\caption{", caption, "}\n", sep = "") - cat("\\end{figure}\n") -} -@ - - -<>= -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("DESeq")) -suppressPackageStartupMessages(library("GenomicFeatures")) -suppressPackageStartupMessages(library("Rsamtools")) -suppressPackageStartupMessages(library("xtable")) -#library(multicore) -@ - -<>= -optList <- list( - make_option("--condition", default = 'Condition', help = "Factor of interest in pheno file [default %default]"), - make_option("--refCondition", default = 'Normal', help = "Reference condition [default %default]"), - make_option("--altCondition", default = 'Variant', help = "Reference condition [default %default]"), - make_option("--outFile", default = NULL, help = "Output results to this file [optional]")); - -parser <- OptionParser(usage = "%prog [options] [counts file] [pheno file]", option_list = optList); - -arguments <- parse_args(parser, positional_arguments = T, arg = arguments); -opt <- arguments$options; - -if (length(arguments$args) != 2) { - cat("Need pheno design file and counts data"); - print_help(parser); - stop(); -} -@ - -<>= -phenoFile <- arguments$args[2]; -countsFile <- arguments$args[1]; - -pheno <- read.table(phenoFile, header = T, sep = '\t', row.names = 1, check.names = F); -pheno[, opt$condition] <- relevel(pheno[, opt$condition], opt$refCondition); - -counts <- read.table(countsFile, header = T, sep = '\t', na.strings = "", comment.char = "", stringsAsFactors = F, check.names = F); -counts <- counts[!duplicated(counts[,1]), ] -rownames(counts) <- counts[,1] -counts <- counts[-1] - -if (!all(colnames(counts) %in% rownames(pheno))) { - cat("Design does not match data"); -} -counts <- counts[, rownames(pheno)] - -cds <- newCountDataSet(counts, pheno[, opt$condition]) -@ - -Estimate the effective library size: - -<>= -cds <- estimateSizeFactors(cds) -sizeFactors(cds) -@ - -Estimate dispersion/variance: - -<>= -cds <- estimateDispersions(cds) - -str(fitInfo(cds)) -@ - -<>= -res <- nbinomTest(cds, levels(pData(cds)$condition)[1], levels(pData(cds)$condition)[2]) -@ - -\begin{figure} -<>= -plot(res$baseMean, res$log2FoldChange, log = "x", pch = 20, cex = .3, col = ifelse(res$padj < .1, "red", "black"), ylab = 'M', xlab = 'A') -@ - \caption{MA plot, normalised mean vs. log2 fold change} -\end{figure} - -\begin{figure} -<>= -hist(res$pval, breaks = 100, col = 'skyblue', border = 'slateblue', main = "", xlab = 'p-value') -@ - \caption{Histogram of p-values} -\end{figure} - -<>= -capt <- 'Top differentially expressed genes' -print(xtable(head(res[order(res$padj), ], 20), caption = capt)) -@ - -<>= -if (!is.null(opt$outFile)) { - write.table(res, file = opt$outFile, sep = '\t', quote = F, col.names=NA) -} -@ - -\end{document} - diff --git a/rnaseq/deseq.mk b/rnaseq/deseq.mk deleted file mode 100644 index bfec8c1e..00000000 --- a/rnaseq/deseq.mk +++ /dev/null @@ -1,23 +0,0 @@ -include modules/Makefile.inc -include modules/variant_callers/gatk.inc - -LOGDIR = log/deseq.$(NOW) - -DESEQ_RNW = modules/rnaseq/deseq.Rnw -SWEAVE = $(RSCRIPT) modules/scripts/Sweave.R - -DESEQ_CONDITION ?= condition -DESEQ_REF_CONDITION ?= ref - -# pheno file: sample\tpheno with header -PHENO_FILE ?= pheno.txt - -.DELETE_ON_ERROR: -.SECONDARY: - -.PHONY : all - -deseq_results.txt : sumreads/geneCounts.txt - mkdir -p graphics; $(SWEAVE) $(DESEQ_RNW) --condition $(DESEQ_CONDITION) --refCondition $(DESEQ_REF_CONDITION) --outFile $@ $< $(PHENO_FILE) - - diff --git a/rnaseq/dexseq.mk b/rnaseq/dexseq.mk deleted file mode 100644 index 23b5668f..00000000 --- a/rnaseq/dexseq.mk +++ /dev/null @@ -1,18 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/exon_counts.$(NOW) -PHONY += dexseq - -dexseq : $(foreach sample,$(TUMOR_SAMPLES),dexseq/$(sample).txt) - -define exon-count -dexseq/%.txt : star/bam/%.star.sorted.filtered.bam - $$(call RUN,-c -s 8G -m 12G -w 1440,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/dexseq && \ - /home/${USER}/share/usr/anaconda-envs/dexseq/lib/R/library/DEXSeq/python_scripts/dexseq_count.py -f bam -p yes -r pos /home/${USER}/share/reference/Ensembl/Homo_sapiens.GRCh37.75.gff $$< dexseq/$$*.txt") -endef -$(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call exon-count,$sample))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/rnaseq/immunedeconv.mk b/rnaseq/immunedeconv.mk index ac2bcbd8..e112137b 100644 --- a/rnaseq/immunedeconv.mk +++ b/rnaseq/immunedeconv.mk @@ -6,15 +6,15 @@ immunedeconv : immunedeconv/quantiseq.txt \ immunedeconv/mcpcounter.txt \ immunedeconv/cibersort.txt -immunedeconv/quantiseq.txt : kallisto/tpm_bygene.txt +immunedeconv/quantiseq.txt : kallisto/tpm_by_gene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 1 --input_file $(<) --output_file $(@)") -immunedeconv/mcpcounter.txt : kallisto/tpm_bygene.txt +immunedeconv/mcpcounter.txt : kallisto/tpm_by_gene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 2 --input_file $(<) --output_file $(@)") -immunedeconv/cibersort.txt : kallisto/tpm_bygene.txt +immunedeconv/cibersort.txt : kallisto/tpm_by_gene.txt $(call RUN, -c -n 1 -s 8G -m 16G -v $(IMMUNE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/immunedeconv.R --option 3 --input_file $(<) --output_file $(@)") @@ -22,4 +22,4 @@ immunedeconv/cibersort.txt : kallisto/tpm_bygene.txt ~/share/usr/env/r-immunedeconv-2.1.0/bin/R --version >> version/immunedeconv.txt;) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: kallisto +.PHONY: immunedeconv diff --git a/rnaseq/kallisto.mk b/rnaseq/kallisto.mk index 5007c13a..6db1acf1 100644 --- a/rnaseq/kallisto.mk +++ b/rnaseq/kallisto.mk @@ -5,7 +5,7 @@ LOGDIR = log/kallisto.$(NOW) kallisto : $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R1.fastq.gz) \ $(foreach sample,$(SAMPLES),kallisto/$(sample)/$(sample)_R2.fastq.gz) \ $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) \ - kallisto/tpm_bygene.txt + kallisto/tpm_by_gene.txt SLEUTH_ANNOT ?= $(HOME)/share/lib/resource_files/Hugo_ENST_ensembl75_fixed.txt KALLISTO_INDEX ?= $(HOME)/share/lib/ref_files/b37/ensembl_v75-0.43.0_kallisto_index @@ -35,7 +35,7 @@ endef $(foreach sample,$(SAMPLES),\ $(eval $(call fastq-to-kallisto,$(sample)))) -kallisto/tpm_bygene.txt : $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) +kallisto/tpm_by_gene.txt : $(foreach sample,$(SAMPLES),kallisto/$(sample)/abundance.tsv) $(call RUN, -c -n 24 -s 1G -m 2G -v $(KALLISTO_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/summarize_sleuth.R --annotation $(SLEUTH_ANNOT) --samples '$(SAMPLES)'") diff --git a/rnaseq/rpkm.mk b/rnaseq/rpkm.mk deleted file mode 100644 index 34a6d9b5..00000000 --- a/rnaseq/rpkm.mk +++ /dev/null @@ -1,7 +0,0 @@ -#Module calculates RPKM values. Depends on the sumRNASeqReads.mk -RPKM_RSCRIPT = ${RSCRIPT} ~/gascoyne/scripts/calculateRPKM.R - -rpkm/%.rpkm.txt : summarized_reads/%.summarized_reads.txt - SGE_RREQ="-N $(@F) -l mem_free=1G -q all.q -now n" \ - $(MKDIR) $(@D)/logs;\ - $(RPKM_RSCRIPT) ${TXDB_FILE} $< $@ > $(@D)/logs/$*.log 2>&1 diff --git a/rnaseq/sumRNASeqReads.mk b/rnaseq/sumRNASeqReads.mk deleted file mode 100644 index 931cf949..00000000 --- a/rnaseq/sumRNASeqReads.mk +++ /dev/null @@ -1,44 +0,0 @@ -include modules/Makefile.inc -include modules/variant_callers/gatk.inc - -LOGDIR = log/sum_reads.$(NOW) - -DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 - -SUM_READS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReads.R -SUM_EXONS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReadsByExon.R -SUM_INTRONS_RSCRIPT = ${RSCRIPT} modules/rnaseq/summarizeRNASeqReadsByIntron.R -SUM_READS_OPTS = - -.DELETE_ON_ERROR: -.SECONDARY: - -.PHONY : all sumreads - -SUM_TYPE = byGene byExon - -all : $(foreach type,$(SUM_TYPE),$(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.$(type).txt)) sumreads/rpkm_by_gene.txt sumreads/rpkm_by_exon.txt sumreads/counts_by_gene.txt sumreads/counts_by_exon.txt - -sumreads/%.sumreads.byGene.txt : bam/%.bam bam/%.bam.bai - $(call RUN,-v $(DEFAULT_ENV) -s 24G -m 48G,"$(SUM_READS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<") - -sumreads/%.sumreads.byExon.txt : bam/%.bam bam/%.bam.bai - $(call RUN,-v $(DEFAULT_ENV) -s 24G -m 48G,"$(SUM_EXONS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<") - -sumreads/rpkm_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byGene.txt) - cut -f 2 $< > $@; \ - for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 7 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done - -sumreads/rpkm_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byExon.txt) - cut -f 1-2 $< > $@; \ - for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 6 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done - -sumreads/counts_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byGene.txt) - cut -f 2 $< > $@; \ - for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 3 $$x | sed "s/countsByGene/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done - -sumreads/counts_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.byExon.txt) - cut -f 1-2 $< > $@; \ - for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 4 $$x | sed "s/exonCount/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done - -include modules/bam_tools/processBam.mk diff --git a/rnaseq/sumreads.mk b/rnaseq/sumreads.mk new file mode 100644 index 00000000..f8a6e0af --- /dev/null +++ b/rnaseq/sumreads.mk @@ -0,0 +1,41 @@ +include modules/Makefile.inc + +LOGDIR = log/sum_reads.$(NOW) + +sumreads : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt) \ + $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt) \ + sumreads/rpkm_by_gene.txt \ + sumreads/rpkm_by_exon.txt \ + sumreads/counts_by_gene.txt \ + sumreads/counts_by_exon.txt + +SUM_READS_OPTS = +REF ?= b37 + +sumreads/%.sumreads.by_gene.txt : bam/%.bam bam/%.bam.bai + $(call RUN,-v $(SUMREADS_ENV) -s 24G -m 48G,"$(SUM_READS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<") + +sumreads/%.sumreads.by_exon.txt : bam/%.bam bam/%.bam.bai + $(call RUN,-v $(SUMREADS_ENV) -s 24G -m 48G,"$(SUM_EXONS_RSCRIPT) --genome $(REF) --outFile $@ $(SUM_READS_OPTS) $<") + +sumreads/rpkm_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt) + cut -f 2 $< > $@; \ + for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 7 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done + +sumreads/rpkm_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt) + cut -f 1-2 $< > $@; \ + for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 6 $$x | sed "s/exonRPKM/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done + +sumreads/counts_by_gene.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_gene.txt) + cut -f 2 $< > $@; \ + for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 3 $$x | sed "s/countsByGene/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done + +sumreads/counts_by_exon.txt : $(foreach sample,$(SAMPLES),sumreads/$(sample).sumreads.by_exon.txt) + cut -f 1-2 $< > $@; \ + for x in $^; do sample=`echo $$x | sed 's/.*\///; s/\..*//'`; cut -f 4 $$x | sed "s/exonCount/$$sample/" | paste $@ - > $@.tmp; mv $@.tmp $@; done + +..DUMMY := $(shell mkdir -p version; \ + $(SUMREADS_ENV)/bin/R --version >> version/sumreads.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: sumreads diff --git a/rnaseq/summarizeRNASeqReads.R b/scripts/summarize_rnaseqreads.R similarity index 100% rename from rnaseq/summarizeRNASeqReads.R rename to scripts/summarize_rnaseqreads.R diff --git a/rnaseq/summarizeRNASeqReadsByExon.R b/scripts/summarize_rnaseqreads_byexon.R similarity index 100% rename from rnaseq/summarizeRNASeqReadsByExon.R rename to scripts/summarize_rnaseqreads_byexon.R diff --git a/rnaseq/summarizeRNASeqReadsByIntron.R b/scripts/summarize_rnaseqreads_byintron.R similarity index 100% rename from rnaseq/summarizeRNASeqReadsByIntron.R rename to scripts/summarize_rnaseqreads_byintron.R diff --git a/scripts/summarize_sleuth.R b/scripts/summarize_sleuth.R index 58985f3f..51f018b4 100644 --- a/scripts/summarize_sleuth.R +++ b/scripts/summarize_sleuth.R @@ -31,4 +31,4 @@ data = sleuth::sleuth_prep(sample_to_covariates = manifest, res = as.data.frame(sleuth_to_matrix(data, "obs_norm", "tpm")) tpm_bygene = dplyr::tibble(gene_symbol = rownames(res)) %>% dplyr::bind_cols(dplyr::as_tibble(res)) -write_tsv(x=tpm_bygene, path="kallisto/tpm_bygene.txt", append=FALSE, col_names=TRUE, quote_escape=FALSE) +write_tsv(x=tpm_bygene, path="kallisto/tpm_by_gene.txt", append=FALSE, col_names=TRUE, quote_escape=FALSE) diff --git a/variant_callers/somatic/macs2TN.mk b/variant_callers/somatic/macs2TN.mk deleted file mode 100644 index 259e4e65..00000000 --- a/variant_callers/somatic/macs2TN.mk +++ /dev/null @@ -1,23 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/macs2TN.$(NOW) -PHONY += macs2 macs2/broadpeaks macs2/narrowpeaks - -macs2TN : $(foreach pair,$(SAMPLE_PAIRS),macs2/$(pair).timestamp) - -define macs2-case-control -macs2/broadpeaks/$1_$2.task.complete : bam/$1.bam bam/$2.bam - $$(call RUN,-c -s 8G -m 12G,"macs2 callpeak -t $$< -c $$(<<) -f BAM -g hs --keep-dup all --broad --outdir macs2/broadpeaks -n $1_$2 -B --verbose 2 --nomodel -p 0.1 && echo $$< $$(<<) > macs2/broadpeaks/$1_$2.task.complete") - -macs2/narrowpeaks/$1_$2.task.complete : bam/$1.bam bam/$2.bam - $$(call RUN,-c -s 8G -m 12G,"macs2 callpeak -t $$< -c $$(<<) -f BAM -g hs --keep-dup all --outdir macs2/narrowpeaks -n $1_$2 -B --verbose 2 --nomodel -p 0.1 && echo $$< $$(<<) > macs2/narrowpeaks/$1_$2.task.complete") - -macs2/$1_$2.timestamp : macs2/broadpeaks/$1_$2.task.complete macs2/narrowpeaks/$1_$2.task.complete - $$(call RUN,-c -s 1G -m 1G,"echo $$< $$(<<) > macs2/$1_$2.timestamp") -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call macs2-case-control,$(tumor.$(pair)),$(normal.$(pair))))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) From 6ab5cbc5a61f260b16ae4ba92279deae353f49e8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 24 Oct 2022 22:44:24 -0400 Subject: [PATCH 245/766] ++ --- Makefile | 54 +++++++++++++------------ bam_tools/{fixBam.mk => fix_bam.mk} | 0 bam_tools/{fixMate.mk => fix_mate.mk} | 0 bam_tools/{fixRG.mk => fix_rg.mk} | 4 +- bam_tools/{mergeBam.mk => merge_bam.mk} | 8 ++-- bam_tools/splitRG.mk | 26 ------------ 6 files changed, 35 insertions(+), 57 deletions(-) rename bam_tools/{fixBam.mk => fix_bam.mk} (100%) rename bam_tools/{fixMate.mk => fix_mate.mk} (100%) rename bam_tools/{fixRG.mk => fix_rg.mk} (91%) rename bam_tools/{mergeBam.mk => merge_bam.mk} (100%) delete mode 100644 bam_tools/splitRG.mk diff --git a/Makefile b/Makefile index 9ee003a2..ead7c67d 100644 --- a/Makefile +++ b/Makefile @@ -300,10 +300,6 @@ oncofuse : # DNA structural variant callers #================================================== -TARGETS += manta -manta : - $(call RUN_MAKE,modules/sv_callers/manta.mk) - TARGETS += mantaTN mantaTN : $(call RUN_MAKE,modules/sv_callers/mantaTN.mk) @@ -312,6 +308,10 @@ TARGETS += svabaTN svabaTN : $(call RUN_MAKE,modules/sv_callers/svabaTN.mk) +TARGETS += manta +manta : + $(call RUN_MAKE,modules/sv_callers/manta.mk) + TARGETS += brass brass : $(call RUN_MAKE,modules/sv_callers/brass.mk) @@ -348,23 +348,40 @@ crest : TARGETS += delly delly : $(call RUN_MAKE,modules/sv_callers/delly.mk) - + #================================================== -# pre-processing +# BAM tools #================================================== -TARGETS += merge_fastq -merge_fastq : - $(call RUN_MAKE,modules/fastq_tools/mergeFastq.mk) - TARGETS += fix_bam fix_bam : - $(call RUN_MAKE,modules/bam_tools/fixBam.mk) + $(call RUN_MAKE,modules/bam_tools/fix_bam.mk) TARGETS += fix_rg fix_rg : - $(call RUN_MAKE,modules/bam_tools/fixRG.mk) + $(call RUN_MAKE,modules/bam_tools/fix_rg.mk) + +TARGETS += fix_mate +fix_mate : + $(call RUN_MAKE,modules/bam_tools/fix_mate.mk) + +TARGETS += merge_bam +merge_bam : + $(call RUN_MAKE,modules/bam_tools/merge_bam.mk) + +TARGETS += process_bam +process_bam : + $(call RUN_MAKE,modules/bam_tools/processBam.mk) + + +#================================================== +# FASTQ tools +#================================================== + +TARGETS += merge_fastq +merge_fastq : + $(call RUN_MAKE,modules/fastq_tools/mergeFastq.mk) TARGETS += merge_split_fastq merge_split_fastq : @@ -385,19 +402,6 @@ extract_unmapped_pairs : TARGETS += bam_to_fasta bam_to_fasta : $(call RUN_MAKE,modules/fastq_tools/bamtoFasta.mk) - -TARGETS += process_bam -process_bam : - $(call RUN_MAKE,modules/bam_tools/processBam.mk) - -TARGETS += merge_bam -merge_bam : - $(call RUN_MAKE,modules/bam_tools/mergeBam.mk) - -TARGETS += split_rg -split_rg : - $(call RUN_MAKE,modules/bam_tools/splitRG.mk) - #================================================== diff --git a/bam_tools/fixBam.mk b/bam_tools/fix_bam.mk similarity index 100% rename from bam_tools/fixBam.mk rename to bam_tools/fix_bam.mk diff --git a/bam_tools/fixMate.mk b/bam_tools/fix_mate.mk similarity index 100% rename from bam_tools/fixMate.mk rename to bam_tools/fix_mate.mk diff --git a/bam_tools/fixRG.mk b/bam_tools/fix_rg.mk similarity index 91% rename from bam_tools/fixRG.mk rename to bam_tools/fix_rg.mk index d957c9ad..0e7735f8 100644 --- a/bam_tools/fixRG.mk +++ b/bam_tools/fix_rg.mk @@ -2,13 +2,13 @@ include modules/Makefile.inc include modules/variant_callers/gatk.inc include modules/aligners/align.inc -LOGDIR ?= log/fixRG.$(NOW) +LOGDIR ?= log/fix_rg.$(NOW) BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) + fixed_bams : $(BAMS) $(addsuffix .bai,$(BAMS)) bam/%.bam : unprocessed_bam/%.rg.bam $(INIT) ln -f $(<) $(@) - include modules/bam_tools/processBam.mk diff --git a/bam_tools/mergeBam.mk b/bam_tools/merge_bam.mk similarity index 100% rename from bam_tools/mergeBam.mk rename to bam_tools/merge_bam.mk index bfaeedb8..68eda7b0 100644 --- a/bam_tools/mergeBam.mk +++ b/bam_tools/merge_bam.mk @@ -2,10 +2,6 @@ include modules/Makefile.inc LOGDIR = log/merge.$(NOW) -.SECONDARY: -.DELETE_ON_ERROR: -.PHONY : merged_bam - merged_bam : $(foreach sample,$(MERGE_SAMPLES),bam/$(sample).bam bam/$(sample).bam.bai) define merged-bam @@ -32,4 +28,8 @@ $(foreach sample,$(MERGE_SAMPLES),\ bam/%.bam : merged_bam/%.rg.bam $(INIT) ln -f $< $@ +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY : merged_bam + include modules/bam_tools/processBam.mk diff --git a/bam_tools/splitRG.mk b/bam_tools/splitRG.mk deleted file mode 100644 index 91dac523..00000000 --- a/bam_tools/splitRG.mk +++ /dev/null @@ -1,26 +0,0 @@ -include modules/Makefile.inc - -LOGDIR = log/split_rg.$(NOW) - -split : $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/MFE296/$(sample).bam.bai) - -define split-rg -bam/MFE296/$1.bam : etc/bam/MFE296-2.bam - $$(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ - mkdir -p bam/MFE296 && \ - $$(SAMTOOLS) view -b -r $1 $$(<) > $$(@)") - -bam/MFE296/$1.bam.bai : bam/MFE296/$1.bam - $$(call RUN,-n 1 -s 2G -m 4G,"set -o pipefail && \ - $$(SAMTOOLS) index $$(<)") - -endef -$(foreach sample,$(SAMPLES),\ - $(eval $(call split-rg,$(sample)))) - -..DUMMY := $(shell mkdir -p version; \ - $(SAMTOOLS) --version > version/split_rg.txt;) -.SECONDARY: -.DELETE_ON_ERROR: -.PHONY: split From 8356d5e0eb124f4693d273fe89e9f476cff6faa4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 15:27:30 -0400 Subject: [PATCH 246/766] sufam --- Makefile | 14 +++++------- scripts/sufam_gt.R | 33 +++++++++++++++++++++++++++++ variant_callers/genotypehotspots.mk | 22 ------------------- variant_callers/genotypepdx.R | 14 ------------ variant_callers/genotypepdx.mk | 29 ------------------------- variant_callers/sufam_gt.mk | 32 ++++++++++++++++++++++++++++ 6 files changed, 70 insertions(+), 74 deletions(-) create mode 100644 scripts/sufam_gt.R delete mode 100644 variant_callers/genotypehotspots.mk delete mode 100644 variant_callers/genotypepdx.R delete mode 100644 variant_callers/genotypepdx.mk create mode 100644 variant_callers/sufam_gt.mk diff --git a/Makefile b/Makefile index ead7c67d..02b38bb2 100644 --- a/Makefile +++ b/Makefile @@ -173,14 +173,6 @@ TARGETS += hotspot hotspot: $(call RUN_MAKE,modules/variant_callers/hotspot.mk) -TARGETS += genotype_hotspot -genotype_hotspot: - $(call RUN_MAKE,modules/variant_callers/genotypehotspots.mk) - -TARGETS += genotype_pdx -genotype_pdx: - $(call RUN_MAKE,modules/variant_callers/genotypepdx.mk) - TARGETS += jsm jsm : $(call RUN_MAKE,modules/variant_callers/somatic/jsm.mk) @@ -188,7 +180,11 @@ jsm : TARGETS += sufam sufam: $(call RUN_MAKE,modules/variant_callers/sufamsampleset.mk) - + +TARGETS += sufam_gt +sufam_gt: + $(call RUN_MAKE,modules/variant_callers/sufam_gt.mk) + TARGETS += get_basecount get_basecount: $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R new file mode 100644 index 00000000..6934862a --- /dev/null +++ b/scripts/sufam_gt.R @@ -0,0 +1,33 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_set", default = NA, type = 'character', help = "sample set"), + make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), + make_option("--input_file", default = NA, type = 'character', help = "input file"), + make_option("--output_file", default = NA, type = 'character', help = "output file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + + +all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) +CHROM = all_vars[,"CHROM"] +POS = all_vars[,"POS"] +ID = all_vars[,"ID"] +REF = all_vars[,"REF"] +ALT = all_vars[,"ALT"] +QUAL = FILTER = rep(".", nrow(all_vars)) +INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"]) +vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO) + +cat("#", file="sufam/pdx.vcf", append=FALSE) +write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE) diff --git a/variant_callers/genotypehotspots.mk b/variant_callers/genotypehotspots.mk deleted file mode 100644 index 6d425f0e..00000000 --- a/variant_callers/genotypehotspots.mk +++ /dev/null @@ -1,22 +0,0 @@ -include modules/Makefile.inc - -SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' - -LOGDIR ?= log/genotype_hotspots.$(NOW) -PHONY += hotspot - -genotype_hotspots : $(foreach sample,$(SAMPLES),hotspot/$(sample).txt) - -define genotype-hotspots -hotspot/%.txt : bam/%.bam - $$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) modules/reference/hotspots/hotspot-dedup.vcf bam/$$(*).bam > hotspot/$$(*).txt") - -endef - $(foreach sample,$(SAMPLES),\ - $(eval $(call genotype-hotspots,$(sample)))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) - diff --git a/variant_callers/genotypepdx.R b/variant_callers/genotypepdx.R deleted file mode 100644 index 2c84bf14..00000000 --- a/variant_callers/genotypepdx.R +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env Rscript - -all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -CHROM = all_vars[,"CHROM"] -POS = all_vars[,"POS"] -ID = all_vars[,"ID"] -REF = all_vars[,"REF"] -ALT = all_vars[,"ALT"] -QUAL = FILTER = rep(".", nrow(all_vars)) -INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"]) -vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO) - -cat("#", file="sufam/pdx.vcf", append=FALSE) -write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE) diff --git a/variant_callers/genotypepdx.mk b/variant_callers/genotypepdx.mk deleted file mode 100644 index c599b82d..00000000 --- a/variant_callers/genotypepdx.mk +++ /dev/null @@ -1,29 +0,0 @@ -include modules/Makefile.inc - -SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' -MOUSE_SAMPLES = $(sample_category.mouse) - -LOGDIR ?= log/genotype_pdx.$(NOW) -PHONY += sufam summary - -genotype_pdx : $(foreach sample,$(sample_category.mouse),sufam/$(sample).txt) sufam/pdx.vcf summary/mouse_summary.xlsx - -sufam/pdx.vcf : summary/tsv/mutation_summary.tsv - $(call RUN, -c -s 8G -m 16G,"$(RSCRIPT) modules/variant_callers/genotypepdx.R") - -define genotype-pdx -sufam/%.txt : bam/%.bam sufam/pdx.vcf - $$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) sufam/pdx.vcf bam/$$(*).bam > sufam/$$(*).txt") - -endef - $(foreach sample,$(sample_category.mouse),\ - $(eval $(call genotype-pdx,$(sample)))) - -summary/mouse_summary.xlsx : $(wildcard $(foreach sample,$(sample_category.mouse),sufam/$(sample).txt)) - $(call RUN,-n 1 -s 4G -m 4G,"$(RSCRIPT) modules/summary/mousesummary.R --sample_names '$(MOUSE_SAMPLES)' --out_file summary/tsv/mouse_summary.tsv && \ - python modules/summary/mouse_summary_excel.py") - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk new file mode 100644 index 00000000..5b0f25c8 --- /dev/null +++ b/variant_callers/sufam_gt.mk @@ -0,0 +1,32 @@ +include modules/Makefile.inc + +LOGDIR ?= log/sufam_gt.$(NOW) + +SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' + +sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) + +define sufam-genotype +sufam/$1.vcf : summary/tsv/all.tsv + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set $1 \ + --normal_sample '$(NORMAL_SAMPLES)' \ + --input_file $$(<) \ + --output_file $$(@)") + + + +#hotspot/%.txt : bam/%.bam +# $$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) modules/reference/hotspots/hotspot-dedup.vcf bam/$$(*).bam > hotspot/$$(*).txt") + +endef + $(foreach set,$(SAMPLE_SETS),\ + $(eval $(call sufam-genotype,$(set)))) + +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: $(PHONY) + From 6f2b9e84cb870fa48f1afb3bfa497aa8a9488f46 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 15:50:19 -0400 Subject: [PATCH 247/766] Update sufam_gt.R --- scripts/sufam_gt.R | 57 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 6934862a..06716dc4 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -11,23 +11,54 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--sample_set", default = NA, type = 'character', help = "sample set"), - make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), + make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"), make_option("--input_file", default = NA, type = 'character', help = "input file"), make_option("--output_file", default = NA, type = 'character', help = "output file")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options +if (as.numeric(opt$option)==1) { + sample_names = unlist(strsplit(x = as.character(opt$sample_set), split="_", fixed=TRUE)) + normal_sample = intersect(sample_names, unlist(strsplit(x = as.character(opt$normal_samples), split=" ", fixed=TRUE))) + sample_names = setdiff(sample_names, normal_sample) + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(TUMOR_SAMPLE %in% sample_names) %>% + dplyr::filter(NORMAL_SAMPLE == normal_sample) %>% + dplyr::mutate(UUID = paste0(CHROM, ":", POS, "_", REF, ">", ALT)) %>% + dplyr::filter(!duplicated(UUID)) %>% + dplyr::mutate(`#CHROM` = CHROM, + POS = POS, + ID = ".", + REF = REF, + ALT = ALT, + QUAL = 100, + FILTER = "PASS", + INFO = ".") %>% + dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, INFO) %>% + dplyr::mutate(chr_n = case_when( + `#CHROM` == "X" ~ "23", + `#CHROM` == "Y" ~ "24", + TRUE ~ `#CHROM` + )) %>% + readr::type_convert() %>% + dplyr::arrange(chr_n) %>% + dplyr::select(-chr_n) + + cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) + readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) +} -all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -CHROM = all_vars[,"CHROM"] -POS = all_vars[,"POS"] -ID = all_vars[,"ID"] -REF = all_vars[,"REF"] -ALT = all_vars[,"ALT"] -QUAL = FILTER = rep(".", nrow(all_vars)) -INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"]) -vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO) - -cat("#", file="sufam/pdx.vcf", append=FALSE) -write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE) +#all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) +#CHROM = all_vars[,"CHROM"] +#POS = all_vars[,"POS"] +#ID = all_vars[,"ID"] +#REF = all_vars[,"REF"] +#ALT = all_vars[,"ALT"] +#QUAL = FILTER = rep(".", nrow(all_vars)) +#INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"]) +#vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO) +# +#cat("#", file="sufam/pdx.vcf", append=FALSE) +#write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE) From dfa5e1c83ed84a5561a5377918390026f97a123f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 15:51:51 -0400 Subject: [PATCH 248/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 5b0f25c8..7cfb17b2 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -13,7 +13,7 @@ sufam/$1.vcf : summary/tsv/all.tsv $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ --sample_set $1 \ - --normal_sample '$(NORMAL_SAMPLES)' \ + --normal_samples '$(NORMAL_SAMPLES)' \ --input_file $$(<) \ --output_file $$(@)") From e2a8f4da4efa72880b167e6178e50155f653be26 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:07:41 -0400 Subject: [PATCH 249/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 7cfb17b2..d58cfe5a 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -6,8 +6,9 @@ SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) + $(foreach sample,$(SAMPLES),sufam/$(sample).txt) -define sufam-genotype +define tsv-2-vcf sufam/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ @@ -17,16 +18,26 @@ sufam/$1.vcf : summary/tsv/all.tsv --input_file $$(<) \ --output_file $$(@)") +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call tsv-2-vcf,$(set)))) +define sufam-genotype +sufam/$1.txt : $$(foreach sample,$2,$$(word 1, $$(set.$$(sample)))) + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set $1 \ + --normal_samples '$(NORMAL_SAMPLES)' \ + --input_file $$(<) \ + --output_file $$(@)") -#hotspot/%.txt : bam/%.bam -# $$(call RUN,-v $$(SUFAM_ENV) -c -s 2G -m 4G -w 2880,"sufam --sample_name $$(*) $$(SUFAM_OPTS) $$(REF_FASTA) modules/reference/hotspots/hotspot-dedup.vcf bam/$$(*).bam > hotspot/$$(*).txt") - endef - $(foreach set,$(SAMPLE_SETS),\ - $(eval $(call sufam-genotype,$(set)))) +$(foreach sample,$(SAMPLES),\ + $(eval $(call sufam-genotype,$(sample),$(set.$(sample))))) + + .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) - +.PHONY: From de6a8fa6e61d9c4d79cfe3afe7c960f453f4e666 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:07:55 -0400 Subject: [PATCH 250/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index d58cfe5a..60c5e696 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -25,12 +25,7 @@ $(foreach set,$(SAMPLE_SETS),\ define sufam-genotype sufam/$1.txt : $$(foreach sample,$2,$$(word 1, $$(set.$$(sample)))) $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 1 \ - --sample_set $1 \ - --normal_samples '$(NORMAL_SAMPLES)' \ - --input_file $$(<) \ - --output_file $$(@)") + ") endef $(foreach sample,$(SAMPLES),\ From f26f30cb35883270b93a3a10f3875fcbd55fa40b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:08:43 -0400 Subject: [PATCH 251/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 60c5e696..f3fc098a 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -5,7 +5,7 @@ LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' -sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) +sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) \ $(foreach sample,$(SAMPLES),sufam/$(sample).txt) define tsv-2-vcf From ee59c65e8e1ed0e07e4ac4cfc815cee1f46a97ac Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:20:48 -0400 Subject: [PATCH 252/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index f3fc098a..3eb4f834 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -6,7 +6,7 @@ SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) \ - $(foreach sample,$(SAMPLES),sufam/$(sample).txt) + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) define tsv-2-vcf sufam/$1.vcf : summary/tsv/all.tsv @@ -23,13 +23,13 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call tsv-2-vcf,$(set)))) define sufam-genotype -sufam/$1.txt : $$(foreach sample,$2,$$(word 1, $$(set.$$(sample)))) +sufam/$2.txt : sufam/$1.vcf $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ ") endef -$(foreach sample,$(SAMPLES),\ - $(eval $(call sufam-genotype,$(sample),$(set.$(sample))))) +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call sufam-genotype,$(set),$(set.$(sample))))) From bf73e70c15d85d1d827051b7895c9620c76ef9dc Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:23:53 -0400 Subject: [PATCH 253/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 3eb4f834..c3b24257 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -6,7 +6,7 @@ SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) + $(foreach set,$(SAMPLE_SETS),sufam/$(set).taskcomplete) define tsv-2-vcf sufam/$1.vcf : summary/tsv/all.tsv @@ -23,7 +23,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call tsv-2-vcf,$(set)))) define sufam-genotype -sufam/$2.txt : sufam/$1.vcf +sufam/$1.taskcomplete : sufam/$1.vcf $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ ") From c8786bcc36270df15a15fe1e2e7921ef4760d3f1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:33:22 -0400 Subject: [PATCH 254/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index c3b24257..03d51cfb 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -5,8 +5,7 @@ LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' -sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set).taskcomplete) +sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) define tsv-2-vcf sufam/$1.vcf : summary/tsv/all.tsv @@ -22,17 +21,6 @@ endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call tsv-2-vcf,$(set)))) -define sufam-genotype -sufam/$1.taskcomplete : sufam/$1.vcf - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - ") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call sufam-genotype,$(set),$(set.$(sample))))) - - - .DELETE_ON_ERROR: .SECONDARY: .PHONY: From bdc57005ae16509d1803d206b60c1e95c7ba294e Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:41:24 -0400 Subject: [PATCH 255/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 03d51cfb..c9a8814b 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -5,22 +5,23 @@ LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' -sufam_gt : $(foreach set,$(SAMPLE_SETS),sufam/$(set).vcf) +sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) define tsv-2-vcf sufam/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ - --sample_set $1 \ - --normal_samples '$(NORMAL_SAMPLES)' \ + --sample $1 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ --input_file $$(<) \ --output_file $$(@)") endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call tsv-2-vcf,$(set)))) - +$(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call tsv-2-vcf,$(sample)))) + .DELETE_ON_ERROR: .SECONDARY: .PHONY: From 37cff4017a80ddd2792711e195efa41b4c445b6a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:46:19 -0400 Subject: [PATCH 256/766] Update sufam_gt.R --- scripts/sufam_gt.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 06716dc4..2d69086f 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -10,8 +10,9 @@ if (!interactive()) { } optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample", default = NA, type = 'character', help = "sample"), make_option("--sample_set", default = NA, type = 'character', help = "sample set"), - make_option("--normal_samples", default = NA, type = 'character', help = "normal samples"), + make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), make_option("--input_file", default = NA, type = 'character', help = "input file"), make_option("--output_file", default = NA, type = 'character', help = "output file")) parser = OptionParser(usage = "%prog", option_list = optList) @@ -19,12 +20,12 @@ arguments = parse_args(parser, positional_arguments = T) opt = arguments$options if (as.numeric(opt$option)==1) { - sample_names = unlist(strsplit(x = as.character(opt$sample_set), split="_", fixed=TRUE)) - normal_sample = intersect(sample_names, unlist(strsplit(x = as.character(opt$normal_samples), split=" ", fixed=TRUE))) - sample_names = setdiff(sample_names, normal_sample) + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + sample_set = setdiff(sample_set, normal_sample) smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::filter(TUMOR_SAMPLE %in% sample_names) %>% + dplyr::filter(TUMOR_SAMPLE %in% sample_set) %>% dplyr::filter(NORMAL_SAMPLE == normal_sample) %>% dplyr::mutate(UUID = paste0(CHROM, ":", POS, "_", REF, ">", ALT)) %>% dplyr::filter(!duplicated(UUID)) %>% @@ -45,7 +46,6 @@ if (as.numeric(opt$option)==1) { readr::type_convert() %>% dplyr::arrange(chr_n) %>% dplyr::select(-chr_n) - cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) } From 4a2eb7b87c129c5f8733b12453ace6e4821c5d5b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:47:24 -0400 Subject: [PATCH 257/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index c9a8814b..14fcafb4 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -7,7 +7,7 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) -define tsv-2-vcf +define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ @@ -20,7 +20,7 @@ sufam/$1.vcf : summary/tsv/all.tsv endef $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call tsv-2-vcf,$(sample)))) + $(eval $(call sufam-gt,$(sample)))) .DELETE_ON_ERROR: .SECONDARY: From 0fc9b6aa708290094d0fc2b6e10271eaeffae643 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:47:41 -0400 Subject: [PATCH 258/766] Update sufam_gt.R --- scripts/sufam_gt.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 2d69086f..e2fdde5b 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -49,16 +49,3 @@ if (as.numeric(opt$option)==1) { cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) } - -#all_vars = read.csv(file="summary/tsv/mutation_summary.tsv", header=TRUE, sep="\t", stringsAsFactors=FALSE) -#CHROM = all_vars[,"CHROM"] -#POS = all_vars[,"POS"] -#ID = all_vars[,"ID"] -#REF = all_vars[,"REF"] -#ALT = all_vars[,"ALT"] -#QUAL = FILTER = rep(".", nrow(all_vars)) -#INFO = paste0(all_vars[,"SYMBOL"], all_vars[,"HGVSp_Short"]) -#vcf = data.frame(CHROM, POS, ID, REF, ALT, QUAL, INFO) -# -#cat("#", file="sufam/pdx.vcf", append=FALSE) -#write.table(vcf, file="sufam/pdx.vcf", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE, append=TRUE) From 5414d09debf4ff99a3d2f48c104d13ecc1bbfc7a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:48:32 -0400 Subject: [PATCH 259/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 14fcafb4..4ae82730 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -21,7 +21,9 @@ sufam/$1.vcf : summary/tsv/all.tsv endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call sufam-gt,$(sample)))) - + +..DUMMY := $(shell mkdir -p version; \ + R --version > version/sufam_gt.txt) .DELETE_ON_ERROR: .SECONDARY: .PHONY: From 8fc93c752359f77880f6f70ca4f5734e5f0eef7b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 16:54:18 -0400 Subject: [PATCH 260/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 4ae82730..90b49aac 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -5,7 +5,8 @@ LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' -sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) +sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv @@ -17,6 +18,16 @@ sufam/$1.vcf : summary/tsv/all.tsv --normal_sample '$(normal.$1)' \ --input_file $$(<) \ --output_file $$(@)") + +sufam/$1.txt : sufam/$1.vcf bam/$1.bam + $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ + sufam \ + --sample_name $1 \ + $$(SUFAM_OPTS) \ + $$(REF_FASTA) \ + $$(<) \ + $$(<<) \ + > $$(@)") endef $(foreach sample,$(TUMOR_SAMPLES),\ From a2cebc3c430f59970b49c4ee48cb8210252629b3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 17:05:45 -0400 Subject: [PATCH 261/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 90b49aac..0255481a 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -6,7 +6,8 @@ SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv @@ -29,6 +30,19 @@ sufam/$1.txt : sufam/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") +sufam/$1.maf : sufam/$1.vcf + $$(call RUN,-c -n 12 -s 1G -m 2G,"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$< \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") + + endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call sufam-gt,$(sample)))) From 602a94972f8bcc405a28a50dfc49c4fb09ac10e1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 17:10:47 -0400 Subject: [PATCH 262/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 0255481a..1bfad717 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -31,16 +31,16 @@ sufam/$1.txt : sufam/$1.vcf bam/$1.bam > $$(@)") sufam/$1.maf : sufam/$1.vcf - $$(call RUN,-c -n 12 -s 1G -m 2G,"set -o pipefail && \ - $$(VCF2MAF) \ - --input-vcf $$< \ - --tumor-id $1 \ - --filter-vcf $$(EXAC_NONTCGA) \ - --ref-fasta $$(REF_FASTA) \ - --vep-path $$(VEP_PATH) \ - --vep-data $$(VEP_DATA) \ - --tmp-dir `mktemp -d` \ - --output-maf $$(@)") + $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$< \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") endef From c68ec2b20be0636b573f4448dd7cb47351a4075f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 17:50:24 -0400 Subject: [PATCH 263/766] ++ --- scripts/sufam_gt.R | 9 +++++++-- variant_callers/sufam_gt.mk | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index e2fdde5b..17f40129 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -10,7 +10,6 @@ if (!interactive()) { } optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), - make_option("--sample", default = NA, type = 'character', help = "sample"), make_option("--sample_set", default = NA, type = 'character', help = "sample set"), make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), make_option("--input_file", default = NA, type = 'character', help = "input file"), @@ -48,4 +47,10 @@ if (as.numeric(opt$option)==1) { dplyr::select(-chr_n) cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) -} + +} else if (as.numeric(opt$option)==2) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + sample_set = setdiff(sample_set, normal_sample) + +} \ No newline at end of file diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 1bfad717..5f7f6ed6 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -7,14 +7,14 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ + $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ - --sample $1 \ --sample_set '$(set.$1)' \ --normal_sample '$(normal.$1)' \ --input_file $$(<) \ @@ -46,6 +46,19 @@ sufam/$1.maf : sufam/$1.vcf endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call sufam-gt,$(sample)))) + +define combine-maf +sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 2 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call combine-maf,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ R --version > version/sufam_gt.txt) From db35c6fd888549086ba2b91f578b240580cda453 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 17:50:55 -0400 Subject: [PATCH 264/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 5f7f6ed6..d456ab4a 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -58,7 +58,7 @@ sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach endef $(foreach set,$(SAMPLE_SETS),\ - $(eval $(call combine-maf,$(sample)))) + $(eval $(call combine-maf,$(set)))) ..DUMMY := $(shell mkdir -p version; \ R --version > version/sufam_gt.txt) From 5e787a02f5300a5c37d67cf4196a678e8828b834 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:14:48 -0400 Subject: [PATCH 265/766] ++ --- scripts/sufam_gt.R | 29 +++++++++++++++++++++++++++-- variant_callers/sufam_gt.mk | 8 ++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 17f40129..b3c6d9a6 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -52,5 +52,30 @@ if (as.numeric(opt$option)==1) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) sample_set = setdiff(sample_set, normal_sample) - -} \ No newline at end of file + maf = list() + for (i in 1:length(sample_set)) { + sufam = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(Chromosome = chrom, + Start_Position = pos, + Reference_Allele = ref, + t_depth = cov, + t_alt_count = val_al_count) %>% + dplyr::mutate(t_ref_count = t_depth - t_alt_count) + + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-t_depth, -t_alt_count, -t_ref_count) %>% + dplyr::bind_cols(sufam) + } + maf = do.call(bind_rows, maf) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==3) { + maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(t_alt_count > 0) %>% + dplyr::filter(t_ref_count > 0) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index d456ab4a..fc5eb9ec 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -56,6 +56,14 @@ sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach --normal_sample '$(normal.$1)' \ --output_file $$(@)") +sufam/$1_ft.maf : sufam/$1.maf + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 3 \ + --input_file $$(<) \ + --output_file $$(@)") + + endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call combine-maf,$(set)))) From 515b4eea9c7338305d43f3ca4ec088dff7b6491d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:15:15 -0400 Subject: [PATCH 266/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index fc5eb9ec..fbd6ed7b 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -8,7 +8,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) + $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ + $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv From 0da236644abf7ac33ca86cdb132134f3c1e36aca Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:21:56 -0400 Subject: [PATCH 267/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index fbd6ed7b..b7d354e8 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -9,7 +9,9 @@ sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) + $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) \ + sufam/mutation_summary.maf \ + sufam/mutation_summary_ft.maf define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv @@ -68,6 +70,22 @@ sufam/$1_ft.maf : sufam/$1.maf endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call combine-maf,$(set)))) + + +sufam/mutation_summary.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/summary/sufam_gt.R \ + --option 4 \ + --sample_set '$(SAMPLE_SETS)' \ + --output_file $(@)") + + +sufam/mutation_summary_ft.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/summary/sufam_gt.R \ + --option 5 \ + --sample_set '$(SAMPLE_SETS)' \ + --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ R --version > version/sufam_gt.txt) From bc04dfba12cb7cc3aa6689342b9b5b28f380dd28 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:24:23 -0400 Subject: [PATCH 268/766] Update sufam_gt.R --- scripts/sufam_gt.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index b3c6d9a6..ed3aa502 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -78,4 +78,25 @@ if (as.numeric(opt$option)==1) { dplyr::filter(t_ref_count > 0) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) +} else if (as.numeric(opt$option)==4) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + maf = list() + for (i in 1:length(sample_set)) { + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + maf = do.call(bind_rows, maf) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==5) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + maf = list() + for (i in 1:length(sample_set)) { + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + maf = do.call(bind_rows, maf) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + } + From 57598a060fca4d7ef935ce666857e28b837cf657 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:25:25 -0400 Subject: [PATCH 269/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index b7d354e8..18de5419 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -74,7 +74,7 @@ $(foreach set,$(SAMPLE_SETS),\ sufam/mutation_summary.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/summary/sufam_gt.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 4 \ --sample_set '$(SAMPLE_SETS)' \ --output_file $(@)") @@ -82,7 +82,7 @@ sufam/mutation_summary.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) sufam/mutation_summary_ft.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/summary/sufam_gt.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 5 \ --sample_set '$(SAMPLE_SETS)' \ --output_file $(@)") From 2e93f8559a7f131169c118b90c0001f6f97c656d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Oct 2022 18:27:56 -0400 Subject: [PATCH 270/766] Update sufam_gt.R --- scripts/sufam_gt.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index ed3aa502..a97f1337 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -82,8 +82,7 @@ if (as.numeric(opt$option)==1) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) maf = list() for (i in 1:length(sample_set)) { - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } maf = do.call(bind_rows, maf) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) @@ -92,8 +91,7 @@ if (as.numeric(opt$option)==1) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) maf = list() for (i in 1:length(sample_set)) { - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } maf = do.call(bind_rows, maf) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) From 19271c879798ebb9b3d1ef2c43923ff2717ae767 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 12:56:44 -0400 Subject: [PATCH 271/766] Update sufam_gt.R --- scripts/sufam_gt.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index a97f1337..30c07292 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -56,9 +56,9 @@ if (as.numeric(opt$option)==1) { for (i in 1:length(sample_set)) { sufam = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::select(Chromosome = chrom, - Start_Position = pos, - Reference_Allele = ref, + dplyr::select(CHROM = chrom, + POS = pos, + REF = ref, t_depth = cov, t_alt_count = val_al_count) %>% dplyr::mutate(t_ref_count = t_depth - t_alt_count) From 32f67cb1ab316efe1a612525cc2742c3ebecd32a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 15:38:13 -0400 Subject: [PATCH 272/766] ++ --- scripts/sufam_gt.R | 29 +++++++++++++++++++++++++++++ variant_callers/sufam_gt.mk | 6 ++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 30c07292..a821bc6f 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -59,6 +59,7 @@ if (as.numeric(opt$option)==1) { dplyr::select(CHROM = chrom, POS = pos, REF = ref, + ALT = val_alt, t_depth = cov, t_alt_count = val_al_count) %>% dplyr::mutate(t_ref_count = t_depth - t_alt_count) @@ -85,6 +86,20 @@ if (as.numeric(opt$option)==1) { maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } maf = do.call(bind_rows, maf) + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% + dplyr::mutate(is_loh = facetsLOHCall) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::group_by(CHROM, POS, REF, ALT) %>% + dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% + dplyr::ungroup(), + by = c("CHROM", "POS", "REF", "ALT")) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), + by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option)==5) { @@ -94,6 +109,20 @@ if (as.numeric(opt$option)==1) { maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } maf = do.call(bind_rows, maf) + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% + dplyr::mutate(is_loh = facetsLOHCall) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::group_by(CHROM, POS, REF, ALT) %>% + dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% + dplyr::ungroup(), + by = c("CHROM", "POS", "REF", "ALT")) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), + by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) } diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 18de5419..f4e7a78d 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -72,19 +72,21 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call combine-maf,$(set)))) -sufam/mutation_summary.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) +sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 4 \ --sample_set '$(SAMPLE_SETS)' \ + --input_file $$(<) \ --output_file $(@)") -sufam/mutation_summary_ft.maf : $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) +sufam/mutation_summary_ft.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 5 \ --sample_set '$(SAMPLE_SETS)' \ + --input_file $$(<) \ --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From 63b4fa968e888d49ffd8eeae49858102cb3e8e78 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 15:41:38 -0400 Subject: [PATCH 273/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index f4e7a78d..d332ff00 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -77,7 +77,7 @@ sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),su $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 4 \ --sample_set '$(SAMPLE_SETS)' \ - --input_file $$(<) \ + --input_file $(<) \ --output_file $(@)") @@ -86,7 +86,7 @@ sufam/mutation_summary_ft.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS) $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 5 \ --sample_set '$(SAMPLE_SETS)' \ - --input_file $$(<) \ + --input_file $(<) \ --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From c0f4b7812c02e124e4ce0cc09b6ebcb6cda30044 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 15:52:09 -0400 Subject: [PATCH 274/766] Update sufam_gt.R --- scripts/sufam_gt.R | 48 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index a821bc6f..bdf01e89 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -87,9 +87,29 @@ if (as.numeric(opt$option)==1) { } maf = do.call(bind_rows, maf) smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% + dplyr::mutate(HOTSPOT = case_when( + is.na(HOTSPOT) ~ FALSE, + HOTSPOT == "True" ~ TRUE, + HOTSPOT == "False" ~ FALSE + )) %>% + dplyr::mutate(HOTSPOT_INTERNAL = case_when( + is.na(HOTSPOT_INTERNAL) ~ FALSE, + HOTSPOT_INTERNAL == "True" ~ TRUE, + HOTSPOT_INTERNAL == "False" ~ FALSE + )) %>% + dplyr::mutate(cmo_hotspot = case_when( + is.na(cmo_hotspot) ~ FALSE, + cmo_hotspot == "True" ~ TRUE, + cmo_hotspot == "False" ~ FALSE + )) %>% dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% - dplyr::mutate(is_loh = facetsLOHCall) + dplyr::mutate(facetsLOHCall = case_when( + is.na(facetsLOHCall) ~ FALSE, + facetsLOHCall == "True" ~ TRUE, + facetsLOHCall == "False" ~ FALSE + )) %>% + dplyr::mutate(is_loh = facetsLOHCall) %>% + readr::type_convert() maf = maf %>% dplyr::left_join(smry %>% dplyr::group_by(CHROM, POS, REF, ALT) %>% @@ -110,9 +130,29 @@ if (as.numeric(opt$option)==1) { } maf = do.call(bind_rows, maf) smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% + dplyr::mutate(HOTSPOT = case_when( + is.na(HOTSPOT) ~ FALSE, + HOTSPOT == "True" ~ TRUE, + HOTSPOT == "False" ~ FALSE + )) %>% + dplyr::mutate(HOTSPOT_INTERNAL = case_when( + is.na(HOTSPOT_INTERNAL) ~ FALSE, + HOTSPOT_INTERNAL == "True" ~ TRUE, + HOTSPOT_INTERNAL == "False" ~ FALSE + )) %>% + dplyr::mutate(cmo_hotspot = case_when( + is.na(cmo_hotspot) ~ FALSE, + cmo_hotspot == "True" ~ TRUE, + cmo_hotspot == "False" ~ FALSE + )) %>% dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% - dplyr::mutate(is_loh = facetsLOHCall) + dplyr::mutate(facetsLOHCall = case_when( + is.na(facetsLOHCall) ~ FALSE, + facetsLOHCall == "True" ~ TRUE, + facetsLOHCall == "False" ~ FALSE + )) %>% + dplyr::mutate(is_loh = facetsLOHCall) %>% + readr::type_convert() maf = maf %>% dplyr::left_join(smry %>% dplyr::group_by(CHROM, POS, REF, ALT) %>% From 3ee967cea5d8163300295346409f6f91f2c83c76 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 16:31:55 -0400 Subject: [PATCH 275/766] Update sufam_gt.R --- scripts/sufam_gt.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index bdf01e89..c3674a24 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -85,7 +85,8 @@ if (as.numeric(opt$option)==1) { for (i in 1:length(sample_set)) { maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } - maf = do.call(bind_rows, maf) + maf = do.call(bind_rows, maf) %>% + readr::type_convert() smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% dplyr::mutate(HOTSPOT = case_when( is.na(HOTSPOT) ~ FALSE, @@ -128,7 +129,8 @@ if (as.numeric(opt$option)==1) { for (i in 1:length(sample_set)) { maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } - maf = do.call(bind_rows, maf) + maf = do.call(bind_rows, maf) %>% + readr::type_convert() smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% dplyr::mutate(HOTSPOT = case_when( is.na(HOTSPOT) ~ FALSE, From 75659241f881a299c7c0895bcd8c848502852fbe Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 30 Oct 2022 19:43:55 -0400 Subject: [PATCH 276/766] Update sufam_gt.R --- scripts/sufam_gt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index c3674a24..0d808503 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -58,7 +58,7 @@ if (as.numeric(opt$option)==1) { readr::type_convert() %>% dplyr::select(CHROM = chrom, POS = pos, - REF = ref, + REF = val_ref, ALT = val_alt, t_depth = cov, t_alt_count = val_al_count) %>% From c9d5a7ec713d5b0424c5c383ad77166227ae796a Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 31 Oct 2022 17:53:01 -0400 Subject: [PATCH 277/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index d332ff00..fcfb25d2 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -3,7 +3,7 @@ include modules/Makefile.inc LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff 1024' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ From c0bbcb44be0abd55ad4cbd0892dc8c6a5c4a4ac7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 31 Oct 2022 18:03:54 -0400 Subject: [PATCH 278/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index fcfb25d2..5b98420c 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -3,7 +3,7 @@ include modules/Makefile.inc LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff 1024' +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY,QCFAIL' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ From c6c7ebf97d5c096b7ed6d7def655d356b2e51c96 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 19:30:31 -0400 Subject: [PATCH 279/766] pyclone --- Makefile | 21 +++++-------- ploidy/pyclone_multi_sample.mk | 54 ++++++++++++++++++++++++++++++++++ variant_callers/sufam_gt.mk | 2 +- 3 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 ploidy/pyclone_multi_sample.mk diff --git a/Makefile b/Makefile index 02b38bb2..5a63d7b7 100644 --- a/Makefile +++ b/Makefile @@ -401,7 +401,7 @@ bam_to_fasta : #================================================== -# quality control +# QC #================================================== TARGETS += bam_metrics @@ -455,18 +455,13 @@ immune_deconv : #================================================== -# ploidy +# Ploidy/ Clonality #================================================== TARGETS += pyloh pyloh : $(call RUN_MAKE,modules/ploidy/pyloh.mk) - -#================================================== -# clonality -#================================================== - TARGETS += clonehd clonehd : $(call RUN_MAKE,modules/clonality/clonehd.mk) @@ -475,13 +470,13 @@ TARGETS += absolute_seq absolute_seq : $(call RUN_MAKE,modules/clonality/absoluteSeq.mk) -TARGETS += ms_pyclone -ms_pyclone : - $(call RUN_MAKE,modules/test/workflows/mspyclone.mk) +TARGETS += pyclone_single_sample +pyclone_single_sample : + $(call RUN_MAKE,modules/ploidy/pyclone_single_sample.mk) -TARGETS += ss_pyclone -ss_pyclone : - $(call RUN_MAKE,modules/test/workflows/pyclone.mk) +TARGETS += pyclone_multi_sample +pyclone_multi_sample : + $(call RUN_MAKE,modules/ploidy/pyclone_multi_sample.mk) #================================================== diff --git a/ploidy/pyclone_multi_sample.mk b/ploidy/pyclone_multi_sample.mk new file mode 100644 index 00000000..650d473c --- /dev/null +++ b/ploidy/pyclone_multi_sample.mk @@ -0,0 +1,54 @@ +include modules/Makefile.inc + +LOGDIR ?= log/pyclone_multi_sample.$(NOW) + +SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY,QCFAIL' + +pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) + + +define sufam-gt +sufam/$1.vcf : summary/tsv/all.tsv + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") + +sufam/$1.txt : sufam/$1.vcf bam/$1.bam + $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ + sufam \ + --sample_name $1 \ + $$(SUFAM_OPTS) \ + $$(REF_FASTA) \ + $$(<) \ + $$(<<) \ + > $$(@)") + +sufam/$1.maf : sufam/$1.vcf + $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$< \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") + + +endef +$(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call sufam-gt,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + R --version > version/pyclone_multi_sample.txt) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: pyclone diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 5b98420c..53251a05 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -93,4 +93,4 @@ sufam/mutation_summary_ft.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS) R --version > version/sufam_gt.txt) .DELETE_ON_ERROR: .SECONDARY: -.PHONY: +.PHONY: sufam_gt From bafba2f30666c9bd3a804ed99ebea081eaf72806 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 19:46:40 -0400 Subject: [PATCH 280/766] Update pyclone_multi_sample.mk --- ploidy/pyclone_multi_sample.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_multi_sample.mk b/ploidy/pyclone_multi_sample.mk index 650d473c..97e836be 100644 --- a/ploidy/pyclone_multi_sample.mk +++ b/ploidy/pyclone_multi_sample.mk @@ -3,7 +3,7 @@ include modules/Makefile.inc LOGDIR ?= log/pyclone_multi_sample.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY,QCFAIL' +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ From 19bb29d16d1314dd777e4b8d89afe856669b043b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 19:47:46 -0400 Subject: [PATCH 281/766] Update pyclone_multi_sample.mk --- ploidy/pyclone_multi_sample.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ploidy/pyclone_multi_sample.mk b/ploidy/pyclone_multi_sample.mk index 97e836be..fd318c2d 100644 --- a/ploidy/pyclone_multi_sample.mk +++ b/ploidy/pyclone_multi_sample.mk @@ -11,7 +11,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ define sufam-gt -sufam/$1.vcf : summary/tsv/all.tsv +pyclone/$1.vcf : pyclone/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ @@ -20,7 +20,7 @@ sufam/$1.vcf : summary/tsv/all.tsv --input_file $$(<) \ --output_file $$(@)") -sufam/$1.txt : sufam/$1.vcf bam/$1.bam +pyclone/$1.txt : pyclone/$1.vcf bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ sufam \ --sample_name $1 \ @@ -30,7 +30,7 @@ sufam/$1.txt : sufam/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") -sufam/$1.maf : sufam/$1.vcf +pyclone/$1.maf : pyclone/$1.vcf $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ $$(VCF2MAF) \ --input-vcf $$< \ From 556b46086e9d3d77c0d091407c8cbb031d28119a Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 19:48:15 -0400 Subject: [PATCH 282/766] Update pyclone_multi_sample.mk --- ploidy/pyclone_multi_sample.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_multi_sample.mk b/ploidy/pyclone_multi_sample.mk index fd318c2d..cc0bab16 100644 --- a/ploidy/pyclone_multi_sample.mk +++ b/ploidy/pyclone_multi_sample.mk @@ -11,7 +11,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ define sufam-gt -pyclone/$1.vcf : pyclone/tsv/all.tsv +pyclone/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ From a386858b7a8d6b3bd4780b2147e4a01468d42bce Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 20:26:20 -0400 Subject: [PATCH 283/766] ++ --- Makefile | 11 +- .../{pyclone_multi_sample.mk => pyclone.mk} | 4 +- scripts/pyclone.R | 171 ++++++++++++++++++ 3 files changed, 176 insertions(+), 10 deletions(-) rename ploidy/{pyclone_multi_sample.mk => pyclone.mk} (93%) create mode 100644 scripts/pyclone.R diff --git a/Makefile b/Makefile index 5a63d7b7..be02314c 100644 --- a/Makefile +++ b/Makefile @@ -470,14 +470,9 @@ TARGETS += absolute_seq absolute_seq : $(call RUN_MAKE,modules/clonality/absoluteSeq.mk) -TARGETS += pyclone_single_sample -pyclone_single_sample : - $(call RUN_MAKE,modules/ploidy/pyclone_single_sample.mk) - -TARGETS += pyclone_multi_sample -pyclone_multi_sample : - $(call RUN_MAKE,modules/ploidy/pyclone_multi_sample.mk) - +TARGETS += pyclone +pyclone : + $(call RUN_MAKE,modules/ploidy/pyclone.mk) #================================================== # mutational signatures diff --git a/ploidy/pyclone_multi_sample.mk b/ploidy/pyclone.mk similarity index 93% rename from ploidy/pyclone_multi_sample.mk rename to ploidy/pyclone.mk index cc0bab16..047cd07f 100644 --- a/ploidy/pyclone_multi_sample.mk +++ b/ploidy/pyclone.mk @@ -1,6 +1,6 @@ include modules/Makefile.inc -LOGDIR ?= log/pyclone_multi_sample.$(NOW) +LOGDIR ?= log/pyclone.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' @@ -48,7 +48,7 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call sufam-gt,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - R --version > version/pyclone_multi_sample.txt) + R --version > version/pyclone.txt) .DELETE_ON_ERROR: .SECONDARY: .PHONY: pyclone diff --git a/scripts/pyclone.R b/scripts/pyclone.R new file mode 100644 index 00000000..0d808503 --- /dev/null +++ b/scripts/pyclone.R @@ -0,0 +1,171 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_set", default = NA, type = 'character', help = "sample set"), + make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), + make_option("--input_file", default = NA, type = 'character', help = "input file"), + make_option("--output_file", default = NA, type = 'character', help = "output file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + sample_set = setdiff(sample_set, normal_sample) + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(TUMOR_SAMPLE %in% sample_set) %>% + dplyr::filter(NORMAL_SAMPLE == normal_sample) %>% + dplyr::mutate(UUID = paste0(CHROM, ":", POS, "_", REF, ">", ALT)) %>% + dplyr::filter(!duplicated(UUID)) %>% + dplyr::mutate(`#CHROM` = CHROM, + POS = POS, + ID = ".", + REF = REF, + ALT = ALT, + QUAL = 100, + FILTER = "PASS", + INFO = ".") %>% + dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, INFO) %>% + dplyr::mutate(chr_n = case_when( + `#CHROM` == "X" ~ "23", + `#CHROM` == "Y" ~ "24", + TRUE ~ `#CHROM` + )) %>% + readr::type_convert() %>% + dplyr::arrange(chr_n) %>% + dplyr::select(-chr_n) + cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) + readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) + +} else if (as.numeric(opt$option)==2) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + sample_set = setdiff(sample_set, normal_sample) + maf = list() + for (i in 1:length(sample_set)) { + sufam = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(CHROM = chrom, + POS = pos, + REF = val_ref, + ALT = val_alt, + t_depth = cov, + t_alt_count = val_al_count) %>% + dplyr::mutate(t_ref_count = t_depth - t_alt_count) + + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-t_depth, -t_alt_count, -t_ref_count) %>% + dplyr::bind_cols(sufam) + } + maf = do.call(bind_rows, maf) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==3) { + maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(t_alt_count > 0) %>% + dplyr::filter(t_ref_count > 0) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==4) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + maf = list() + for (i in 1:length(sample_set)) { + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) + } + maf = do.call(bind_rows, maf) %>% + readr::type_convert() + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::mutate(HOTSPOT = case_when( + is.na(HOTSPOT) ~ FALSE, + HOTSPOT == "True" ~ TRUE, + HOTSPOT == "False" ~ FALSE + )) %>% + dplyr::mutate(HOTSPOT_INTERNAL = case_when( + is.na(HOTSPOT_INTERNAL) ~ FALSE, + HOTSPOT_INTERNAL == "True" ~ TRUE, + HOTSPOT_INTERNAL == "False" ~ FALSE + )) %>% + dplyr::mutate(cmo_hotspot = case_when( + is.na(cmo_hotspot) ~ FALSE, + cmo_hotspot == "True" ~ TRUE, + cmo_hotspot == "False" ~ FALSE + )) %>% + dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% + dplyr::mutate(facetsLOHCall = case_when( + is.na(facetsLOHCall) ~ FALSE, + facetsLOHCall == "True" ~ TRUE, + facetsLOHCall == "False" ~ FALSE + )) %>% + dplyr::mutate(is_loh = facetsLOHCall) %>% + readr::type_convert() + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::group_by(CHROM, POS, REF, ALT) %>% + dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% + dplyr::ungroup(), + by = c("CHROM", "POS", "REF", "ALT")) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), + by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==5) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) + maf = list() + for (i in 1:length(sample_set)) { + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) + } + maf = do.call(bind_rows, maf) %>% + readr::type_convert() + smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::mutate(HOTSPOT = case_when( + is.na(HOTSPOT) ~ FALSE, + HOTSPOT == "True" ~ TRUE, + HOTSPOT == "False" ~ FALSE + )) %>% + dplyr::mutate(HOTSPOT_INTERNAL = case_when( + is.na(HOTSPOT_INTERNAL) ~ FALSE, + HOTSPOT_INTERNAL == "True" ~ TRUE, + HOTSPOT_INTERNAL == "False" ~ FALSE + )) %>% + dplyr::mutate(cmo_hotspot = case_when( + is.na(cmo_hotspot) ~ FALSE, + cmo_hotspot == "True" ~ TRUE, + cmo_hotspot == "False" ~ FALSE + )) %>% + dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% + dplyr::mutate(facetsLOHCall = case_when( + is.na(facetsLOHCall) ~ FALSE, + facetsLOHCall == "True" ~ TRUE, + facetsLOHCall == "False" ~ FALSE + )) %>% + dplyr::mutate(is_loh = facetsLOHCall) %>% + readr::type_convert() + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::group_by(CHROM, POS, REF, ALT) %>% + dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% + dplyr::ungroup(), + by = c("CHROM", "POS", "REF", "ALT")) + maf = maf %>% + dplyr::left_join(smry %>% + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), + by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} + From aaf5a29e997379bcbe734a563426f6889a755573 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:07:37 -0400 Subject: [PATCH 284/766] ++ --- ploidy/pyclone.mk | 20 ++++- scripts/pyclone.R | 197 +++++++++++++--------------------------------- 2 files changed, 71 insertions(+), 146 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 047cd07f..e5d2ac3b 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -7,10 +7,11 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) -define sufam-gt +define run-sufam pyclone/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ @@ -45,7 +46,20 @@ pyclone/$1.maf : pyclone/$1.vcf endef $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call sufam-gt,$(sample)))) + $(eval $(call run-sufam,$(sample)))) + +define run-pyclone +pyclone/$1.tsv : + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set $1 \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call run-pyclone,$(set)))) ..DUMMY := $(shell mkdir -p version; \ R --version > version/pyclone.txt) diff --git a/scripts/pyclone.R b/scripts/pyclone.R index 0d808503..5eab230a 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -4,6 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("fuzzyjoin")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -19,153 +20,63 @@ arguments = parse_args(parser, positional_arguments = T) opt = arguments$options if (as.numeric(opt$option)==1) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) - normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_")) + normal_sample = as.character(opt$normal_sample) sample_set = setdiff(sample_set, normal_sample) - smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::filter(TUMOR_SAMPLE %in% sample_set) %>% - dplyr::filter(NORMAL_SAMPLE == normal_sample) %>% - dplyr::mutate(UUID = paste0(CHROM, ":", POS, "_", REF, ">", ALT)) %>% - dplyr::filter(!duplicated(UUID)) %>% - dplyr::mutate(`#CHROM` = CHROM, - POS = POS, - ID = ".", - REF = REF, - ALT = ALT, - QUAL = 100, - FILTER = "PASS", - INFO = ".") %>% - dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, INFO) %>% - dplyr::mutate(chr_n = case_when( - `#CHROM` == "X" ~ "23", - `#CHROM` == "Y" ~ "24", - TRUE ~ `#CHROM` - )) %>% - readr::type_convert() %>% - dplyr::arrange(chr_n) %>% - dplyr::select(-chr_n) - cat("##fileformat=VCFv4.2\n", file = as.character(opt$output_file), append=FALSE) - readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) - -} else if (as.numeric(opt$option)==2) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) - normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) - sample_set = setdiff(sample_set, normal_sample) - maf = list() + pyclone = list() for (i in 1:length(sample_set)) { - sufam = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sufam = readr::read_tsv(file = paste0("pyclone/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::select(CHROM = chrom, - POS = pos, - REF = val_ref, - ALT = val_alt, + dplyr::select(Chromosome = chrom, + Position = pos, + Reference_Allele = val_ref, + Alternate_Allele = val_alt, t_depth = cov, t_alt_count = val_al_count) %>% - dplyr::mutate(t_ref_count = t_depth - t_alt_count) - - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::select(-t_depth, -t_alt_count, -t_ref_count) %>% - dplyr::bind_cols(sufam) - } - maf = do.call(bind_rows, maf) - write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==3) { - maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::filter(t_alt_count > 0) %>% - dplyr::filter(t_ref_count > 0) - write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==4) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) - maf = list() - for (i in 1:length(sample_set)) { - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) + dplyr::mutate(t_ref_count = t_depth - t_alt_count) %>% + dplyr::mutate(mutation_id = paste0(Chromosome, ":", Position, ":", Reference_Allele, ":", Alternate_Allele), + ref_counts = t_ref_count, + alt_counts = t_alt_count, + normal_cn = 2) + + facets = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>% + dplyr::mutate(Chromosome = chrom, + Start_Position = loc.start, + End_Position = loc.end, + minor_cn = ifelse(is.na(lcn.em), "0", lcn.em), + major_cn = tcn.em) %>% + readr::type_convert() %>% + dplyr::mutate(major_cn = major_cn - minor_cn) %>% + dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn) + + pyclone[[i]] = sufam %>% + dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>% + dplyr::mutate(Start_Position = Position, + End_Position = Position +1) %>% + readr::type_convert() %>% + fuzzyjoin::genome_left_join(facets %>% + dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>% + readr::type_convert(), + by = c("Chromosome", "Start_Position", "End_Position")) %>% + dplyr::mutate(sample_id = sample_set[i]) %>% + dplyr::select(mutation_id, sample_id, ref_counts, alt_counts, normal_cn, major_cn, minor_cn) + + params = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(grepl("# Purity", X1)) %>% + dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>% + readr::type_convert() %>% + .[["X1"]] + + pyclone[[i]] = pyclone[[i]] %>% + dplyr::mutate(tumour_content = params) } - maf = do.call(bind_rows, maf) %>% - readr::type_convert() - smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - dplyr::mutate(HOTSPOT = case_when( - is.na(HOTSPOT) ~ FALSE, - HOTSPOT == "True" ~ TRUE, - HOTSPOT == "False" ~ FALSE - )) %>% - dplyr::mutate(HOTSPOT_INTERNAL = case_when( - is.na(HOTSPOT_INTERNAL) ~ FALSE, - HOTSPOT_INTERNAL == "True" ~ TRUE, - HOTSPOT_INTERNAL == "False" ~ FALSE - )) %>% - dplyr::mutate(cmo_hotspot = case_when( - is.na(cmo_hotspot) ~ FALSE, - cmo_hotspot == "True" ~ TRUE, - cmo_hotspot == "False" ~ FALSE - )) %>% - dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% - dplyr::mutate(facetsLOHCall = case_when( - is.na(facetsLOHCall) ~ FALSE, - facetsLOHCall == "True" ~ TRUE, - facetsLOHCall == "False" ~ FALSE - )) %>% - dplyr::mutate(is_loh = facetsLOHCall) %>% - readr::type_convert() - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::group_by(CHROM, POS, REF, ALT) %>% - dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% - dplyr::ungroup(), - by = c("CHROM", "POS", "REF", "ALT")) - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), - by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) - write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==5) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) - maf = list() - for (i in 1:length(sample_set)) { - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) - } - maf = do.call(bind_rows, maf) %>% - readr::type_convert() - smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - dplyr::mutate(HOTSPOT = case_when( - is.na(HOTSPOT) ~ FALSE, - HOTSPOT == "True" ~ TRUE, - HOTSPOT == "False" ~ FALSE - )) %>% - dplyr::mutate(HOTSPOT_INTERNAL = case_when( - is.na(HOTSPOT_INTERNAL) ~ FALSE, - HOTSPOT_INTERNAL == "True" ~ TRUE, - HOTSPOT_INTERNAL == "False" ~ FALSE - )) %>% - dplyr::mutate(cmo_hotspot = case_when( - is.na(cmo_hotspot) ~ FALSE, - cmo_hotspot == "True" ~ TRUE, - cmo_hotspot == "False" ~ FALSE - )) %>% - dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% - dplyr::mutate(facetsLOHCall = case_when( - is.na(facetsLOHCall) ~ FALSE, - facetsLOHCall == "True" ~ TRUE, - facetsLOHCall == "False" ~ FALSE - )) %>% - dplyr::mutate(is_loh = facetsLOHCall) %>% - readr::type_convert() - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::group_by(CHROM, POS, REF, ALT) %>% - dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% - dplyr::ungroup(), - by = c("CHROM", "POS", "REF", "ALT")) - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), - by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) - write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - + pyclone = do.call(rbind, pyclone) %>% + dplyr::filter(!is.na(ref_counts)) %>% + dplyr::filter(!is.na(alt_counts)) %>% + dplyr::filter(!is.na(major_cn)) %>% + dplyr::filter(!is.na(minor_cn)) + + readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) } - From 39b88553dfe70cbda959b1cba9660b2604fa6045 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:10:34 -0400 Subject: [PATCH 285/766] Update pyclone.mk --- ploidy/pyclone.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index e5d2ac3b..b532f3d0 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -49,9 +49,9 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call run-sufam,$(sample)))) define run-pyclone -pyclone/$1.tsv : +pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ --option 1 \ --sample_set $1 \ --normal_sample '$(normal.$1)' \ From 90c83fb70b3deb3967ec2ebb453c7c68ea2aa26b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:13:53 -0400 Subject: [PATCH 286/766] ++ --- config.inc | 1 + ploidy/pyclone.mk | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/config.inc b/config.inc index 9c1bfc6a..419599b3 100644 --- a/config.inc +++ b/config.inc @@ -27,6 +27,7 @@ PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6 KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0 SUMREADS_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 +PYCLONE_ENV = $(HOME)/share/usr/env/pyclone-vi-0.1.2 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index b532f3d0..ad7a51dd 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -50,12 +50,12 @@ $(foreach sample,$(TUMOR_SAMPLES),\ define run-pyclone pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ - --option 1 \ - --sample_set $1 \ - --normal_sample '$(normal.$1)' \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + --option 1 \ + --sample_set $1 \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ From 3fd464bd4c6a2bf64a61911c03f941b3e53e0542 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:24:09 -0400 Subject: [PATCH 287/766] Update pyclone.mk --- ploidy/pyclone.mk | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index ad7a51dd..690c122a 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -8,7 +8,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) define run-sufam @@ -56,6 +57,18 @@ pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) --sample_set $1 \ --normal_sample '$(normal.$1)' \ --output_file $$(@)") + +pyclone/pyclone/$1.hd5 : pyclone/$1.tsv + $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ + pyclone-vi fit \ + --in-file $$(<) \ + --out-file $$(@) \ + --num-clusters 10 \ + --density beta-binomial \ + --num-grid-points 100 \ + --max-iters 10000 \ + --mix-weight-prior 1 \ + --precision 200") endef $(foreach set,$(SAMPLE_SETS),\ From 1e76bfd03300587f66e7035442ea6e591fbaf96f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:24:51 -0400 Subject: [PATCH 288/766] Update pyclone.mk --- ploidy/pyclone.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 690c122a..dcbc94df 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -58,7 +58,7 @@ pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) --normal_sample '$(normal.$1)' \ --output_file $$(@)") -pyclone/pyclone/$1.hd5 : pyclone/$1.tsv +pyclone/$1.hd5 : pyclone/$1.tsv $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ pyclone-vi fit \ --in-file $$(<) \ From 94d27c550a7b244516729388bc09d3fe44255ab1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 22:27:30 -0400 Subject: [PATCH 289/766] Update pyclone.mk --- ploidy/pyclone.mk | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index dcbc94df..9f27f7d6 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -7,12 +7,11 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) -define run-sufam +define r-sufam pyclone/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ @@ -32,24 +31,11 @@ pyclone/$1.txt : pyclone/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") -pyclone/$1.maf : pyclone/$1.vcf - $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ - $$(VCF2MAF) \ - --input-vcf $$< \ - --tumor-id $1 \ - --filter-vcf $$(EXAC_NONTCGA) \ - --ref-fasta $$(REF_FASTA) \ - --vep-path $$(VEP_PATH) \ - --vep-data $$(VEP_DATA) \ - --tmp-dir `mktemp -d` \ - --output-maf $$(@)") - - endef $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call run-sufam,$(sample)))) + $(eval $(call r-sufam,$(sample)))) -define run-pyclone +define r-pyclone pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ @@ -66,13 +52,13 @@ pyclone/$1.hd5 : pyclone/$1.tsv --num-clusters 10 \ --density beta-binomial \ --num-grid-points 100 \ - --max-iters 10000 \ + --max-iters 1000000 \ --mix-weight-prior 1 \ --precision 200") endef $(foreach set,$(SAMPLE_SETS),\ - $(eval $(call run-pyclone,$(set)))) + $(eval $(call r-pyclone,$(set)))) ..DUMMY := $(shell mkdir -p version; \ R --version > version/pyclone.txt) From 0bc038595bf9440bbd166c55cc171e8893dd5a98 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 23:27:19 -0400 Subject: [PATCH 290/766] Update pyclone.mk --- ploidy/pyclone.mk | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 9f27f7d6..1afe88a9 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -8,7 +8,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) define r-sufam @@ -54,7 +55,14 @@ pyclone/$1.hd5 : pyclone/$1.tsv --num-grid-points 100 \ --max-iters 1000000 \ --mix-weight-prior 1 \ - --precision 200") + --precision 200 \ + --num-restarts 100") + +pyclone/$1.txt : pyclone/$1.hd5 + $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV),"set -o pipefail && \ + write-results-file \ + --in-file $$(<) \ + --out-file $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ From 17a74f005f4851442389795bdd024c31d6cecf27 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 1 Nov 2022 23:30:33 -0400 Subject: [PATCH 291/766] Update pyclone.mk --- ploidy/pyclone.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 1afe88a9..a904bf91 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -60,7 +60,7 @@ pyclone/$1.hd5 : pyclone/$1.tsv pyclone/$1.txt : pyclone/$1.hd5 $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV),"set -o pipefail && \ - write-results-file \ + pyclone-vi write-results-file \ --in-file $$(<) \ --out-file $$(@)") From 5924ace289b30e6ffd52f08fcedd8fbf20941b6a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 12:10:54 -0400 Subject: [PATCH 292/766] Update pyclone.R --- scripts/pyclone.R | 55 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/scripts/pyclone.R b/scripts/pyclone.R index 5eab230a..cce4cb97 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -5,6 +5,8 @@ suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("fuzzyjoin")) +suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("reshape2")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -19,7 +21,7 @@ parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options -if (as.numeric(opt$option)==1) { +if (as.numeric(opt$option) == 1) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_")) normal_sample = as.character(opt$normal_sample) sample_set = setdiff(sample_set, normal_sample) @@ -79,4 +81,55 @@ if (as.numeric(opt$option)==1) { dplyr::filter(!is.na(minor_cn)) readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option) == 2) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + + pyclone_ft = list() + index = 1 + for (i in 1:(length(sample_set)-1)) { + for (j in 2:length(sample_set)) { + pyclone_ft[[index]] = pyclone %>% + dplyr::filter(sample_id == sample_set[i]) %>% + dplyr::rename(sample_id_x = sample_id, + cellular_prevalence_x = cellular_prevalence, + cellular_prevalence_std_x = cellular_prevalence_std) %>% + dplyr::full_join(pyclone %>% + dplyr::filter(sample_id == sample_set[j]) %>% + dplyr::rename(sample_id_y = sample_id, + cellular_prevalence_y = cellular_prevalence, + cellular_prevalence_std_y = cellular_prevalence_std)) %>% + readr::type_convert() + index = index + 1 + } + } + pyclone_ft = do.call(bind_rows, pyclone_ft) %>% + dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) + smry_c = pyclone_ft %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(cluster_id = unique(cluster_id)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(n = n()) + pyclone_ft = pyclone_ft %>% + dplyr::left_join(smry_c, by = "cluster_id") + + plot_ = pyclone_ft %>% + ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + + geom_point(stat = "identity", alpha = .75, shape = 21) + + scale_color_brewer(type = "qual", palette = 6) + + xlab("\n\nCCF (%)\n") + + ylab("\nCCF (%)\n\n") + + guides(color = guide_legend(title = "Cluster"), + size = guide_legend(title = "N")) + + facet_wrap(sample_id_x~sample_id_y) + pdf(file = as.character(opt$output_file), width = 21, height = 21) + print(plot_) + dev.off() + + + + } From 48ccbc47e53ab630e614302a650eb0d14bfa30e8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 12:13:32 -0400 Subject: [PATCH 293/766] Update pyclone.mk --- ploidy/pyclone.mk | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index a904bf91..68720161 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -9,7 +9,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set).pdf) define r-sufam @@ -59,10 +60,18 @@ pyclone/$1.hd5 : pyclone/$1.tsv --num-restarts 100") pyclone/$1.txt : pyclone/$1.hd5 - $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV),"set -o pipefail && \ - pyclone-vi write-results-file \ - --in-file $$(<) \ - --out-file $$(@)") + $$(call RUN,-c -n 1 -s 8 -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + pyclone-vi write-results-file \ + --in-file $$(<) \ + --out-file $$(@)") + +pyclone/$1.pdf : pyclone/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + --option 2 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ From afd521e092454cc10ca287aaaebf05396d7e5c05 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 12:16:57 -0400 Subject: [PATCH 294/766] Update pyclone.mk --- ploidy/pyclone.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 68720161..4a841eaa 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -60,7 +60,7 @@ pyclone/$1.hd5 : pyclone/$1.tsv --num-restarts 100") pyclone/$1.txt : pyclone/$1.hd5 - $$(call RUN,-c -n 1 -s 8 -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ pyclone-vi write-results-file \ --in-file $$(<) \ --out-file $$(@)") From a0375e5c88a26d087eca7285aaa5fb99ef4c1f44 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 12:31:26 -0400 Subject: [PATCH 295/766] Update pyclone.R --- scripts/pyclone.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/pyclone.R b/scripts/pyclone.R index cce4cb97..acca540b 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -113,9 +113,17 @@ if (as.numeric(opt$option) == 1) { dplyr::ungroup() %>% dplyr::group_by(cluster_id) %>% dplyr::summarize(n = n()) - pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_c, by = "cluster_id") + smry_p = pyclone %>% + dplyr::group_by(cluster_id, sample_id) %>% + dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) + pyclone_ft = pyclone_ft %>% + dplyr::left_join(smry_c, by = "cluster_id") %>% + dplyr::left_join(smry_p, by = "cluster_id") + plot_ = pyclone_ft %>% ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + geom_point(stat = "identity", alpha = .75, shape = 21) + @@ -128,8 +136,5 @@ if (as.numeric(opt$option) == 1) { pdf(file = as.character(opt$output_file), width = 21, height = 21) print(plot_) dev.off() - - - } From 599deeaa6e2868f13776408c168cccb1f0429bf7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 14:13:50 -0400 Subject: [PATCH 296/766] ++ --- ploidy/pyclone.mk | 13 +++++++++++++ scripts/pyclone.R | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 4a841eaa..fae19fe0 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -7,6 +7,7 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) \ @@ -32,6 +33,18 @@ pyclone/$1.txt : pyclone/$1.vcf bam/$1.bam $$(<) \ $$(<<) \ > $$(@)") + +sufam/$1.maf : sufam/$1.vcf + $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$< \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") endef $(foreach sample,$(TUMOR_SAMPLES),\ diff --git a/scripts/pyclone.R b/scripts/pyclone.R index acca540b..792435c1 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -72,7 +72,7 @@ if (as.numeric(opt$option) == 1) { .[["X1"]] pyclone[[i]] = pyclone[[i]] %>% - dplyr::mutate(tumour_content = params) + dplyr::mutate(tumour_content = params/1.5) } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% From 65170ff4c852a4020edac4b8a0c2b0a7149bbbc6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 14:17:35 -0400 Subject: [PATCH 297/766] Update pyclone.mk --- ploidy/pyclone.mk | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index fae19fe0..28f6430c 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -5,17 +5,17 @@ LOGDIR ?= log/pyclone.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set).pdf) +pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).maf) +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) \ +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).pdf) define r-sufam -pyclone/$1.vcf : summary/tsv/all.tsv +pyclone/$1/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ @@ -24,7 +24,7 @@ pyclone/$1.vcf : summary/tsv/all.tsv --input_file $$(<) \ --output_file $$(@)") -pyclone/$1.txt : pyclone/$1.vcf bam/$1.bam +pyclone/$1/$1.txt : pyclone/$1/$1.vcf bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ sufam \ --sample_name $1 \ @@ -34,7 +34,7 @@ pyclone/$1.txt : pyclone/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") -sufam/$1.maf : sufam/$1.vcf +pyclone/$1/$1.maf : pyclone/$1/$1.vcf $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ $$(VCF2MAF) \ --input-vcf $$< \ From f878809bce58c1617a598cf153d937677e159704 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 14:25:34 -0400 Subject: [PATCH 298/766] ++ --- ploidy/pyclone.mk | 21 +++++++++++---------- scripts/pyclone.R | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 28f6430c..d76ad6d8 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -7,11 +7,12 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).maf) -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).tsv) \ -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).hd5) \ -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).txt) \ -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set).pdf) + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).maf) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).tsv) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).hd5) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)_CCF_PSP.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)_CCF_HM.pdf) define r-sufam @@ -51,7 +52,7 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) define r-pyclone -pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) +pyclone/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ --option 1 \ @@ -59,7 +60,7 @@ pyclone/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample).txt) --normal_sample '$(normal.$1)' \ --output_file $$(@)") -pyclone/$1.hd5 : pyclone/$1.tsv +pyclone/$1/$1.hd5 : pyclone/$1/$1.tsv $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ pyclone-vi fit \ --in-file $$(<) \ @@ -69,16 +70,16 @@ pyclone/$1.hd5 : pyclone/$1.tsv --num-grid-points 100 \ --max-iters 1000000 \ --mix-weight-prior 1 \ - --precision 200 \ + --precision 500 \ --num-restarts 100") -pyclone/$1.txt : pyclone/$1.hd5 +pyclone/$1/$1.txt : pyclone/$1/$1.hd5 $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ pyclone-vi write-results-file \ --in-file $$(<) \ --out-file $$(@)") -pyclone/$1.pdf : pyclone/$1.txt +pyclone/$1/$1_CCF_PSP.pdf : pyclone/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ --option 2 \ diff --git a/scripts/pyclone.R b/scripts/pyclone.R index 792435c1..4b135aba 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -27,7 +27,7 @@ if (as.numeric(opt$option) == 1) { sample_set = setdiff(sample_set, normal_sample) pyclone = list() for (i in 1:length(sample_set)) { - sufam = readr::read_tsv(file = paste0("pyclone/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sufam = readr::read_tsv(file = paste0("pyclone/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::select(Chromosome = chrom, Position = pos, From 3171eba86ac6933445b852c48f8207a11f75a61e Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 15:44:19 -0400 Subject: [PATCH 299/766] Update pyclone.mk --- ploidy/pyclone.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index d76ad6d8..cd56b687 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -11,8 +11,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).hd5) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)_CCF_PSP.pdf) \ -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)_CCF_HM.pdf) +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__HM__.pdf) define r-sufam @@ -79,7 +79,7 @@ pyclone/$1/$1.txt : pyclone/$1/$1.hd5 --in-file $$(<) \ --out-file $$(@)") -pyclone/$1/$1_CCF_PSP.pdf : pyclone/$1/$1.txt +pyclone/$1/$1__CCF__.pdf : pyclone/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ --option 2 \ From e23ff6c8f7839eb476dd8f063e8d061ce8b9b786 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 15:44:59 -0400 Subject: [PATCH 300/766] Update pyclone.mk --- ploidy/pyclone.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index cd56b687..8e6c09c1 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -10,8 +10,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) # $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__HM__.pdf) @@ -79,7 +79,7 @@ pyclone/$1/$1.txt : pyclone/$1/$1.hd5 --in-file $$(<) \ --out-file $$(@)") -pyclone/$1/$1__CCF__.pdf : pyclone/$1/$1.txt +pyclone/$1/$1__PS__.pdf : pyclone/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ --option 2 \ From 353fece82e8a839bf86bb8a7d9ec3038c5b3be49 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 17:24:34 -0400 Subject: [PATCH 301/766] ++ --- ploidy/pyclone.mk | 12 +++++++++-- scripts/pyclone.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone.mk b/ploidy/pyclone.mk index 8e6c09c1..bc363b36 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone.mk @@ -11,8 +11,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).hd5) \ $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) -# $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__HM__.pdf) + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__HM__.pdf) define r-sufam @@ -86,6 +86,14 @@ pyclone/$1/$1__PS__.pdf : pyclone/$1/$1.txt --sample_set '$(tumors.$1)' \ --input_file $$(<) \ --output_file $$(@)") + +pyclone/$1/$1__HM__.pdf : pyclone/$1/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + --option 3 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ diff --git a/scripts/pyclone.R b/scripts/pyclone.R index 4b135aba..d7866b1c 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -7,6 +7,7 @@ suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("reshape2")) +suppressPackageStartupMessages(library("superheat")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -137,4 +138,55 @@ if (as.numeric(opt$option) == 1) { print(plot_) dev.off() +} else if (as.numeric(opt$option) == 3) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_id = paste0(sample_id, " ")) + + pyclone_mt = pyclone %>% + reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>% + dplyr::left_join(pyclone %>% + dplyr::select(mutation_id, cluster_id), by = "mutation_id") + + smry_cl = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::arrange(desc(mean)) %>% + dplyr::mutate(cluster_id_ordered = nrow(.):1) + + pyclone_mt = pyclone_mt %>% + dplyr::left_join(smry_cl, by = "cluster_id") + + index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) + pyclone_mt = pyclone_mt[index,,drop=FALSE] + pyclone_mt = pyclone_mt %>% + dplyr::arrange(cluster_id_ordered) + + + pdf(file = as.character(opt$output_file), width = 10, height = 21) + superheat(X = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), + membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], + pretty.order.rows = FALSE, + pretty.order.cols = TRUE, + row.dendrogram = FALSE, + col.dendrogram = FALSE, + smooth.heat = FALSE, + scale = FALSE, + heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + legend = FALSE, + grid.hline = TRUE, + grid.vline = TRUE, + force.grid.hline = TRUE, + force.grid.vline = TRUE, + grid.hline.col = "white", + grid.vline.col = "white", + grid.hline.size = .1, + grid.vline.size = 1, + bottom.label.text.angle = 90, + bottom.label.text.alignment = "right") + dev.off() + } From 4cbf896ed276a1b6417323f16edc3d59e278d6d2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 18:14:57 -0400 Subject: [PATCH 302/766] Update pyclone.R --- scripts/pyclone.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/pyclone.R b/scripts/pyclone.R index d7866b1c..67742fc9 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -73,13 +73,13 @@ if (as.numeric(opt$option) == 1) { .[["X1"]] pyclone[[i]] = pyclone[[i]] %>% - dplyr::mutate(tumour_content = params/1.5) + dplyr::mutate(tumour_content = params) } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% dplyr::filter(!is.na(alt_counts)) %>% - dplyr::filter(!is.na(major_cn)) %>% - dplyr::filter(!is.na(minor_cn)) + dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>% + dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) @@ -147,7 +147,8 @@ if (as.numeric(opt$option) == 1) { pyclone_mt = pyclone %>% reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>% dplyr::left_join(pyclone %>% - dplyr::select(mutation_id, cluster_id), by = "mutation_id") + dplyr::select(mutation_id, cluster_id) %>% + dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") smry_cl = pyclone %>% dplyr::group_by(cluster_id) %>% @@ -177,13 +178,13 @@ if (as.numeric(opt$option) == 1) { scale = FALSE, heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), legend = FALSE, - grid.hline = TRUE, + grid.hline = FALSE, grid.vline = TRUE, force.grid.hline = TRUE, force.grid.vline = TRUE, grid.hline.col = "white", grid.vline.col = "white", - grid.hline.size = .1, + grid.hline.size = .05, grid.vline.size = 1, bottom.label.text.angle = 90, bottom.label.text.alignment = "right") From df5a9435312002b1617aa20a4f0ae02ddeaf9801 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 2 Nov 2022 21:35:26 -0400 Subject: [PATCH 303/766] Update pyclone.R --- scripts/pyclone.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone.R b/scripts/pyclone.R index 67742fc9..5bfebbef 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone.R @@ -91,7 +91,7 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = list() index = 1 for (i in 1:(length(sample_set)-1)) { - for (j in 2:length(sample_set)) { + for (j in (i+1):length(sample_set)) { pyclone_ft[[index]] = pyclone %>% dplyr::filter(sample_id == sample_set[i]) %>% dplyr::rename(sample_id_x = sample_id, From ed961a0da01a6d8076011213eec29c5990e14407 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 15:58:21 -0400 Subject: [PATCH 304/766] pyclone-vi --- Makefile | 6 ++-- ploidy/{pyclone.mk => pyclone_vi.mk} | 42 ++++++++++++++-------------- scripts/{pyclone.R => pyclone_vi.R} | 2 +- 3 files changed, 25 insertions(+), 25 deletions(-) rename ploidy/{pyclone.mk => pyclone_vi.mk} (66%) rename scripts/{pyclone.R => pyclone_vi.R} (98%) diff --git a/Makefile b/Makefile index be02314c..13f4ca9e 100644 --- a/Makefile +++ b/Makefile @@ -470,9 +470,9 @@ TARGETS += absolute_seq absolute_seq : $(call RUN_MAKE,modules/clonality/absoluteSeq.mk) -TARGETS += pyclone -pyclone : - $(call RUN_MAKE,modules/ploidy/pyclone.mk) +TARGETS += pyclone_vi +pyclone_vi : + $(call RUN_MAKE,modules/ploidy/pyclone_vi.mk) #================================================== # mutational signatures diff --git a/ploidy/pyclone.mk b/ploidy/pyclone_vi.mk similarity index 66% rename from ploidy/pyclone.mk rename to ploidy/pyclone_vi.mk index bc363b36..c10994b0 100644 --- a/ploidy/pyclone.mk +++ b/ploidy/pyclone_vi.mk @@ -1,22 +1,22 @@ include modules/Makefile.inc -LOGDIR ?= log/pyclone.$(NOW) +LOGDIR ?= log/pyclone_vi.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).tsv) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__PS__.pdf) \ - $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/$(set)__HM__.pdf) +pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).maf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) define r-sufam -pyclone/$1/$1.vcf : summary/tsv/all.tsv +pyclone_vi/$1/$1.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 1 \ @@ -25,7 +25,7 @@ pyclone/$1/$1.vcf : summary/tsv/all.tsv --input_file $$(<) \ --output_file $$(@)") -pyclone/$1/$1.txt : pyclone/$1/$1.vcf bam/$1.bam +pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.vcf bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ sufam \ --sample_name $1 \ @@ -35,7 +35,7 @@ pyclone/$1/$1.txt : pyclone/$1/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") -pyclone/$1/$1.maf : pyclone/$1/$1.vcf +pyclone_vi/$1/$1.maf : pyclone_vi/$1/$1.vcf $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ $$(VCF2MAF) \ --input-vcf $$< \ @@ -52,15 +52,15 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) define r-pyclone -pyclone/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(sample)/$(sample).txt) +pyclone_vi/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 1 \ --sample_set $1 \ --normal_sample '$(normal.$1)' \ --output_file $$(@)") -pyclone/$1/$1.hd5 : pyclone/$1/$1.tsv +pyclone_vi/$1/$1.hd5 : pyclone_vi/$1/$1.tsv $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ pyclone-vi fit \ --in-file $$(<) \ @@ -73,23 +73,23 @@ pyclone/$1/$1.hd5 : pyclone/$1/$1.tsv --precision 500 \ --num-restarts 100") -pyclone/$1/$1.txt : pyclone/$1/$1.hd5 +pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.hd5 $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ pyclone-vi write-results-file \ --in-file $$(<) \ --out-file $$(@)") -pyclone/$1/$1__PS__.pdf : pyclone/$1/$1.txt +pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 2 \ --sample_set '$(tumors.$1)' \ --input_file $$(<) \ --output_file $$(@)") -pyclone/$1/$1__HM__.pdf : pyclone/$1/$1.txt +pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 3 \ --sample_set '$(tumors.$1)' \ --input_file $$(<) \ @@ -100,7 +100,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone,$(set)))) ..DUMMY := $(shell mkdir -p version; \ - R --version > version/pyclone.txt) + R --version > version/pyclone_vi.txt) .DELETE_ON_ERROR: .SECONDARY: .PHONY: pyclone diff --git a/scripts/pyclone.R b/scripts/pyclone_vi.R similarity index 98% rename from scripts/pyclone.R rename to scripts/pyclone_vi.R index 5bfebbef..378b3160 100644 --- a/scripts/pyclone.R +++ b/scripts/pyclone_vi.R @@ -28,7 +28,7 @@ if (as.numeric(opt$option) == 1) { sample_set = setdiff(sample_set, normal_sample) pyclone = list() for (i in 1:length(sample_set)) { - sufam = readr::read_tsv(file = paste0("pyclone/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sufam = readr::read_tsv(file = paste0("pyclone_vi/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::select(Chromosome = chrom, Position = pos, From 53fb768b98a764547a62286be32350f8a2afb8dc Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:05:38 -0400 Subject: [PATCH 305/766] pyclone 13 --- Makefile | 4 ++ ploidy/pyclone_13.mk | 106 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 ploidy/pyclone_13.mk diff --git a/Makefile b/Makefile index 13f4ca9e..2c138d64 100644 --- a/Makefile +++ b/Makefile @@ -470,6 +470,10 @@ TARGETS += absolute_seq absolute_seq : $(call RUN_MAKE,modules/clonality/absoluteSeq.mk) +TARGETS += pyclone_13 +pyclone_13 : + $(call RUN_MAKE,modules/ploidy/pyclone_13.mk) + TARGETS += pyclone_vi pyclone_vi : $(call RUN_MAKE,modules/ploidy/pyclone_vi.mk) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk new file mode 100644 index 00000000..01b9fbe8 --- /dev/null +++ b/ploidy/pyclone_13.mk @@ -0,0 +1,106 @@ +include modules/Makefile.inc + +LOGDIR ?= log/pyclone_13.$(NOW) + +SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' + +pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) + + +define r-sufam +pyclone_13/$1/$1.vcf : summary/tsv/all.tsv + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") + +pyclone_13/$1/$1.txt : pyclone_13/$1/$1.vcf bam/$1.bam + $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ + sufam \ + --sample_name $1 \ + $$(SUFAM_OPTS) \ + $$(REF_FASTA) \ + $$(<) \ + $$(<<) \ + > $$(@)") + +pyclone_13/$1/$1.maf : pyclone_13/$1/$1.vcf + $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$< \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") + +endef +$(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call r-sufam,$(sample)))) + +define r-pyclone +pyclone_vi/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + --option 1 \ + --sample_set $1 \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") + +pyclone_vi/$1/$1.hd5 : pyclone_vi/$1/$1.tsv + $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ + pyclone-vi fit \ + --in-file $$(<) \ + --out-file $$(@) \ + --num-clusters 10 \ + --density beta-binomial \ + --num-grid-points 100 \ + --max-iters 1000000 \ + --mix-weight-prior 1 \ + --precision 500 \ + --num-restarts 100") + +pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.hd5 + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + pyclone-vi write-results-file \ + --in-file $$(<) \ + --out-file $$(@)") + +pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + --option 2 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") + +pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + --option 3 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call r-pyclone,$(set)))) + +..DUMMY := $(shell mkdir -p version; \ + R --version > version/pyclone_vi.txt) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: pyclone From b24124dd0932845c591f3572e2afc07bbe26143d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:06:06 -0400 Subject: [PATCH 306/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 01b9fbe8..e7ce31e4 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -100,7 +100,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone,$(set)))) ..DUMMY := $(shell mkdir -p version; \ - R --version > version/pyclone_vi.txt) + R --version > version/pyclone_13.txt) .DELETE_ON_ERROR: .SECONDARY: .PHONY: pyclone From 009635c819f310a73d5561b9c1b92958da7f6fe2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:21:04 -0400 Subject: [PATCH 307/766] Create pyclone_13.R --- scripts/pyclone_13.R | 204 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 scripts/pyclone_13.R diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R new file mode 100644 index 00000000..39c688db --- /dev/null +++ b/scripts/pyclone_13.R @@ -0,0 +1,204 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("fuzzyjoin")) +suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("reshape2")) +suppressPackageStartupMessages(library("superheat")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_set", default = NA, type = 'character', help = "sample set"), + make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), + make_option("--input_file", default = NA, type = 'character', help = "input file"), + make_option("--output_file", default = NA, type = 'character', help = "output file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option) == 1) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_")) + normal_sample = as.character(opt$normal_sample) + sample_set = setdiff(sample_set, normal_sample) + pyclone = params = list() + for (i in 1:length(sample_set)) { + sufam = readr::read_tsv(file = paste0("pyclone_13/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(Chromosome = chrom, + Position = pos, + Reference_Allele = val_ref, + Alternate_Allele = val_alt, + t_depth = cov, + t_alt_count = val_al_count) %>% + dplyr::mutate(t_ref_count = t_depth - t_alt_count) %>% + dplyr::mutate(mutation_id = paste0(Chromosome, ":", Position, ":", Reference_Allele, ":", Alternate_Allele), + ref_counts = t_ref_count, + var_counts = t_alt_count, + normal_cn = 2) + + facets = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>% + dplyr::mutate(Chromosome = chrom, + Start_Position = loc.start, + End_Position = loc.end, + minor_cn = ifelse(is.na(lcn.em), "0", lcn.em), + major_cn = tcn.em) %>% + readr::type_convert() %>% + dplyr::mutate(major_cn = major_cn - minor_cn) %>% + dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn) + + pyclone[[i]] = sufam %>% + dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>% + dplyr::mutate(Start_Position = Position, + End_Position = Position +1) %>% + readr::type_convert() %>% + fuzzyjoin::genome_left_join(facets %>% + dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>% + readr::type_convert(), + by = c("Chromosome", "Start_Position", "End_Position")) %>% + dplyr::mutate(sample_id = sample_set[i]) %>% + dplyr::select(mutation_id, sample_id, ref_counts, alt_counts, normal_cn, major_cn, minor_cn) + + params[[i]] = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(grepl("# Purity", X1)) %>% + dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>% + readr::type_convert() %>% + .[["X1"]] + } + pyclone = do.call(rbind, pyclone) %>% + dplyr::filter(!is.na(ref_counts)) %>% + dplyr::filter(!is.na(var_counts)) %>% + dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>% + dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) + + smry = pyclone %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(n = n()) %>% + dplyr::ungroup() + + pyclone = pyclone %>% + dplyr::left_join(smry, by = "mutation_id") %>% + dplyr::filter(n != length(sample_set)) + + for (i in 1:length(sample_set)) { + pyclone_ft = pyclone %>% + dplyr::filter(sample_id == sample_set[i]) %>% + dplyr::select(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn) + readr::write_tsv(x = pyclone_ft, file = paste0("pyclone_13/", opt$sample_set, "/", sample_set[i]), append = FALSE, col_names = TRUE) + } + +} else if (as.numeric(opt$option) == 2) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + + pyclone_ft = list() + index = 1 + for (i in 1:(length(sample_set)-1)) { + for (j in (i+1):length(sample_set)) { + pyclone_ft[[index]] = pyclone %>% + dplyr::filter(sample_id == sample_set[i]) %>% + dplyr::rename(sample_id_x = sample_id, + cellular_prevalence_x = cellular_prevalence, + cellular_prevalence_std_x = cellular_prevalence_std) %>% + dplyr::full_join(pyclone %>% + dplyr::filter(sample_id == sample_set[j]) %>% + dplyr::rename(sample_id_y = sample_id, + cellular_prevalence_y = cellular_prevalence, + cellular_prevalence_std_y = cellular_prevalence_std)) %>% + readr::type_convert() + index = index + 1 + } + } + pyclone_ft = do.call(bind_rows, pyclone_ft) %>% + dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) + smry_c = pyclone_ft %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(cluster_id = unique(cluster_id)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(n = n()) + smry_p = pyclone %>% + dplyr::group_by(cluster_id, sample_id) %>% + dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) + + pyclone_ft = pyclone_ft %>% + dplyr::left_join(smry_c, by = "cluster_id") %>% + dplyr::left_join(smry_p, by = "cluster_id") + + plot_ = pyclone_ft %>% + ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + + geom_point(stat = "identity", alpha = .75, shape = 21) + + scale_color_brewer(type = "qual", palette = 6) + + xlab("\n\nCCF (%)\n") + + ylab("\nCCF (%)\n\n") + + guides(color = guide_legend(title = "Cluster"), + size = guide_legend(title = "N")) + + facet_wrap(sample_id_x~sample_id_y) + pdf(file = as.character(opt$output_file), width = 21, height = 21) + print(plot_) + dev.off() + +} else if (as.numeric(opt$option) == 3) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_id = paste0(sample_id, " ")) + + pyclone_mt = pyclone %>% + reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>% + dplyr::left_join(pyclone %>% + dplyr::select(mutation_id, cluster_id) %>% + dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") + + smry_cl = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::arrange(desc(mean)) %>% + dplyr::mutate(cluster_id_ordered = nrow(.):1) + + pyclone_mt = pyclone_mt %>% + dplyr::left_join(smry_cl, by = "cluster_id") + + index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) + pyclone_mt = pyclone_mt[index,,drop=FALSE] + pyclone_mt = pyclone_mt %>% + dplyr::arrange(cluster_id_ordered) + + + pdf(file = as.character(opt$output_file), width = 10, height = 21) + superheat(X = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), + membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], + pretty.order.rows = FALSE, + pretty.order.cols = TRUE, + row.dendrogram = FALSE, + col.dendrogram = FALSE, + smooth.heat = FALSE, + scale = FALSE, + heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + legend = FALSE, + grid.hline = FALSE, + grid.vline = TRUE, + force.grid.hline = TRUE, + force.grid.vline = TRUE, + grid.hline.col = "white", + grid.vline.col = "white", + grid.hline.size = .05, + grid.vline.size = 1, + bottom.label.text.angle = 90, + bottom.label.text.alignment = "right") + dev.off() + +} From 5b928ae8e1e2eef8923746d9cff09f0f5a42fd64 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:24:34 -0400 Subject: [PATCH 308/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 47 ++++++-------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index e7ce31e4..08ae787b 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -7,8 +7,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ @@ -52,49 +52,14 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) define r-pyclone -pyclone_vi/$1/$1.tsv : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) +pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 1 \ --sample_set $1 \ - --normal_sample '$(normal.$1)' \ - --output_file $$(@)") + --normal_sample '$(normal.$1)' && \ + echo 'taskcomplete' > $$(@)") -pyclone_vi/$1/$1.hd5 : pyclone_vi/$1/$1.tsv - $$(call RUN,-c -n 1 -s 12G -m 24G -v $(PYCLONE_ENV) -w 72:00:00,"set -o pipefail && \ - pyclone-vi fit \ - --in-file $$(<) \ - --out-file $$(@) \ - --num-clusters 10 \ - --density beta-binomial \ - --num-grid-points 100 \ - --max-iters 1000000 \ - --mix-weight-prior 1 \ - --precision 500 \ - --num-restarts 100") - -pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.hd5 - $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ - pyclone-vi write-results-file \ - --in-file $$(<) \ - --out-file $$(@)") - -pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt - $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ - --option 2 \ - --sample_set '$(tumors.$1)' \ - --input_file $$(<) \ - --output_file $$(@)") - -pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt - $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ - --option 3 \ - --sample_set '$(tumors.$1)' \ - --input_file $$(<) \ - --output_file $$(@)") - endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone,$(set)))) From c89a32dbeba5306bed4fab6f154a53dcd7b8a917 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:54:52 -0400 Subject: [PATCH 309/766] ++ --- ploidy/pyclone_13.mk | 18 ++++++++++++++++-- scripts/pyclone_13.R | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 08ae787b..9f40e865 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -51,7 +51,7 @@ endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) -define r-pyclone +define r-pyclone-input pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ @@ -62,8 +62,22 @@ pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(s endef $(foreach set,$(SAMPLE_SETS),\ - $(eval $(call r-pyclone,$(set)))) + $(eval $(call r-pyclone-input,$(set)))) +define r-pyclone-process +pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ + PyClone build_mutations_file \ + --in_file pyclone_13/$1/$2.tsv \ + --out_file $(@) \ + --prior total_copy_number") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(foreach sample,$(tumor.$(set)),\ + $(eval $(call r-pyclone-process,$(set),$(sample))))) + + ..DUMMY := $(shell mkdir -p version; \ R --version > version/pyclone_13.txt) .DELETE_ON_ERROR: diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 39c688db..ac6a055a 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -4,8 +4,8 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("reshape2")) suppressPackageStartupMessages(library("superheat")) @@ -91,7 +91,7 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = pyclone %>% dplyr::filter(sample_id == sample_set[i]) %>% dplyr::select(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn) - readr::write_tsv(x = pyclone_ft, file = paste0("pyclone_13/", opt$sample_set, "/", sample_set[i]), append = FALSE, col_names = TRUE) + readr::write_tsv(x = pyclone_ft, file = paste0("pyclone_13/", opt$sample_set, "/", sample_set[i], ".tsv"), append = FALSE, col_names = TRUE) } } else if (as.numeric(opt$option) == 2) { From 0b0321cdc15217ee584725b52c2fedaf12f0214e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 16:58:04 -0400 Subject: [PATCH 310/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index ac6a055a..220f6d88 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -63,7 +63,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert(), by = c("Chromosome", "Start_Position", "End_Position")) %>% dplyr::mutate(sample_id = sample_set[i]) %>% - dplyr::select(mutation_id, sample_id, ref_counts, alt_counts, normal_cn, major_cn, minor_cn) + dplyr::select(mutation_id, sample_id, ref_counts, var_counts, normal_cn, major_cn, minor_cn) params[[i]] = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% From 8d0ec0b2086868eead37daf7d26b12e5664c1a25 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:00:16 -0400 Subject: [PATCH 311/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 220f6d88..046ae9b0 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -85,7 +85,7 @@ if (as.numeric(opt$option) == 1) { pyclone = pyclone %>% dplyr::left_join(smry, by = "mutation_id") %>% - dplyr::filter(n != length(sample_set)) + dplyr::filter(n == length(sample_set)) for (i in 1:length(sample_set)) { pyclone_ft = pyclone %>% From 676c7a271c93c71b96f70970a02e684bf16f801e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:05:18 -0400 Subject: [PATCH 312/766] ++ --- config.inc | 1 + ploidy/pyclone_13.mk | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/config.inc b/config.inc index 419599b3..3b597e92 100644 --- a/config.inc +++ b/config.inc @@ -28,6 +28,7 @@ KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0 SUMREADS_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 PYCLONE_ENV = $(HOME)/share/usr/env/pyclone-vi-0.1.2 +PYCLONE_13_ENV = $(HOME)/share/usr/env/pyclone-0.13.1 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 9f40e865..dfea6121 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -8,8 +8,9 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ + $(foreach set,$(SAMPLE_SETS), \ + $(foreach sample,$(tumor.$(set)),pyclone/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) From eff761a99dc9c110ecf10e41a8069f80efe35609 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:06:45 -0400 Subject: [PATCH 313/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index dfea6121..c363c490 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -10,7 +10,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(tumor.$(set)),pyclone/$(set)/$(sample).yaml)) + $(foreach sample,$(tumors.$(set)),pyclone/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) @@ -75,7 +75,7 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ - $(foreach sample,$(tumor.$(set)),\ + $(foreach sample,$(tumors.$(set)),\ $(eval $(call r-pyclone-process,$(set),$(sample))))) From f6c9aa7f0420ebacc1c59ca6b185e83313c331c0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:09:36 -0400 Subject: [PATCH 314/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index c363c490..70a208db 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -76,7 +76,7 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ $(foreach sample,$(tumors.$(set)),\ - $(eval $(call r-pyclone-process,$(set),$(sample))))) + $(eval $(call r-pyclone-process,$$(set),$$(sample))))) ..DUMMY := $(shell mkdir -p version; \ From 057d138cee95ffc82ae38a3aa46394feacd45c7c Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:13:53 -0400 Subject: [PATCH 315/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 70a208db..5b3aa412 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -66,7 +66,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-input,$(set)))) define r-pyclone-process -pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete +pyclone_13/$1/$2.yaml : #pyclone_13/$1/$1.taskcomplete $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ --in_file pyclone_13/$1/$2.tsv \ From cd99add1d08cff26bc1de83d09bcb9eda5a7a3ca Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:14:31 -0400 Subject: [PATCH 316/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 5b3aa412..1f497018 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -75,7 +75,7 @@ pyclone_13/$1/$2.yaml : #pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ - $(foreach sample,$(tumors.$(set)),\ + $(foreach sample,$$(tumors.$$(set)),\ $(eval $(call r-pyclone-process,$$(set),$$(sample))))) From ee73d1f1fe4a6ac0b65242394c529d16b9da0c02 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:21:43 -0400 Subject: [PATCH 317/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 1f497018..c363c490 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -66,7 +66,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-input,$(set)))) define r-pyclone-process -pyclone_13/$1/$2.yaml : #pyclone_13/$1/$1.taskcomplete +pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ --in_file pyclone_13/$1/$2.tsv \ @@ -75,8 +75,8 @@ pyclone_13/$1/$2.yaml : #pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ - $(foreach sample,$$(tumors.$$(set)),\ - $(eval $(call r-pyclone-process,$$(set),$$(sample))))) + $(foreach sample,$(tumors.$(set)),\ + $(eval $(call r-pyclone-process,$(set),$(sample))))) ..DUMMY := $(shell mkdir -p version; \ From 1870f58f53f12153415e4d324cfb680578b9736f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:27:55 -0400 Subject: [PATCH 318/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index c363c490..b026d702 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -10,7 +10,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(tumors.$(set)),pyclone/$(set)/$(sample).yaml)) + $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) @@ -75,7 +75,7 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ - $(foreach sample,$(tumors.$(set)),\ + $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-pyclone-process,$(set),$(sample))))) From 49ca6e05963a6ccf58e26abeba63678f822a9fb1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:29:07 -0400 Subject: [PATCH 319/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index b026d702..89667074 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -10,7 +10,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone/$(set)/$(sample).yaml)) + $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) @@ -75,7 +75,7 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete endef $(foreach set,$(SAMPLE_SETS),\ - $(foreach sample,$(TUMOR_SAMPLES),\ + $(foreach sample,$(tumors.$(set)),\ $(eval $(call r-pyclone-process,$(set),$(sample))))) From cc2d02c22ee89cfe41522a03dd48c65e3710f26f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 17:32:09 -0400 Subject: [PATCH 320/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 89667074..1c666bab 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -70,7 +70,7 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ --in_file pyclone_13/$1/$2.tsv \ - --out_file $(@) \ + --out_file $$(@) \ --prior total_copy_number") endef From 9658629515906eef92c49207d3b7322e3263e1f8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 19:58:39 -0400 Subject: [PATCH 321/766] ++ --- ploidy/pyclone_13.mk | 11 ++- scripts/pyclone_13.R | 166 ++++++++++++++----------------------------- 2 files changed, 63 insertions(+), 114 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 1c666bab..1ccd6fbe 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -9,6 +9,7 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ $(foreach set,$(SAMPLE_SETS), \ $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ @@ -61,12 +62,20 @@ pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(s --normal_sample '$(normal.$1)' && \ echo 'taskcomplete' > $$(@)") +pyclone_13/$1/config.yaml : pyclone_13/$1/$1.taskcomplete + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ + --option 2 \ + --sample_set $1 \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") + endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-input,$(set)))) define r-pyclone-process -pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete +pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ --in_file pyclone_13/$1/$2.tsv \ diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 046ae9b0..261e6e6d 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -26,7 +26,7 @@ if (as.numeric(opt$option) == 1) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_")) normal_sample = as.character(opt$normal_sample) sample_set = setdiff(sample_set, normal_sample) - pyclone = params = list() + pyclone = list() for (i in 1:length(sample_set)) { sufam = readr::read_tsv(file = paste0("pyclone_13/", sample_set[i], "/", sample_set[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% @@ -64,13 +64,7 @@ if (as.numeric(opt$option) == 1) { by = c("Chromosome", "Start_Position", "End_Position")) %>% dplyr::mutate(sample_id = sample_set[i]) %>% dplyr::select(mutation_id, sample_id, ref_counts, var_counts, normal_cn, major_cn, minor_cn) - - params[[i]] = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::filter(grepl("# Purity", X1)) %>% - dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>% - readr::type_convert() %>% - .[["X1"]] + } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% @@ -95,110 +89,56 @@ if (as.numeric(opt$option) == 1) { } } else if (as.numeric(opt$option) == 2) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) - pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - - pyclone_ft = list() - index = 1 - for (i in 1:(length(sample_set)-1)) { - for (j in (i+1):length(sample_set)) { - pyclone_ft[[index]] = pyclone %>% - dplyr::filter(sample_id == sample_set[i]) %>% - dplyr::rename(sample_id_x = sample_id, - cellular_prevalence_x = cellular_prevalence, - cellular_prevalence_std_x = cellular_prevalence_std) %>% - dplyr::full_join(pyclone %>% - dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::rename(sample_id_y = sample_id, - cellular_prevalence_y = cellular_prevalence, - cellular_prevalence_std_y = cellular_prevalence_std)) %>% - readr::type_convert() - index = index + 1 - } + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = "_")) + normal_sample = as.character(opt$normal_sample) + sample_set = setdiff(sample_set, normal_sample) + params = list() + for (i in 1:length(sample_set)) { + params[[i]] = readr::read_tsv(file = paste0("facets/cncf/", sample_set[i], "_", normal_sample, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(grepl("# Purity", X1)) %>% + dplyr::mutate(X1 = gsub("# Purity = ", "", X1)) %>% + readr::type_convert() %>% + .[["X1"]] + } + cat("num_iters: 10000\n\n", file = as.character(opt$output_file), append = FALSE) + cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) + cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) + cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat("concentration:\n", file = as.character(opt$output_file), append = TRUE) + cat(" value: 1.0\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat(" prior:\n", file = as.character(opt$output_file), append = TRUE) + cat(" shape: 1.0\n", file = as.character(opt$output_file), append = TRUE) + cat(" rate: 0.001\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat("density: pyclone_beta_binomial\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat("beta_binomial_precision_params:\n", file = as.character(opt$output_file), append = TRUE) + cat(" value: 1000\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat(" prior:\n", file = as.character(opt$output_file), append = TRUE) + cat(" shape: 1.0\n", file = as.character(opt$output_file), append = TRUE) + cat(" rate: 0.0001\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat(" proposal:\n", file = as.character(opt$output_file), append = TRUE) + cat(" precision: 0.1\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat("working_dir: pyclone/", file = as.character(opt$output_file), append = TRUE) + cat(as.character(opt$sample_set), file = as.character(opt$output_file), append = TRUE) + cat("\n\n", file = as.character(opt$output_file), append = TRUE) + cat("trace_dir: trace\n", file = as.character(opt$output_file), append = TRUE) + cat("init_method: connected\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat("samples:\n", file = as.character(opt$output_file), append = TRUE) + for (i in 1:length(sample_set)) { + cat(paste0(" ", sample_set[i], ":\n"), file = as.character(opt$output_file), append = TRUE) + cat(paste0(" mutations_file: ", sample_set[i], ".yaml\n\n"), file = as.character(opt$output_file), append = TRUE) + cat(" tumour_content:\n", file = as.character(opt$output_file), append = TRUE) + cat(paste0(" value: ", params[[i]], "\n"), file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) + cat(" error_rate: 0.01\n", file = as.character(opt$output_file), append = TRUE) + cat("\n", file = as.character(opt$output_file), append = TRUE) } - pyclone_ft = do.call(bind_rows, pyclone_ft) %>% - dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) - smry_c = pyclone_ft %>% - dplyr::group_by(mutation_id) %>% - dplyr::summarize(cluster_id = unique(cluster_id)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(n = n()) - smry_p = pyclone %>% - dplyr::group_by(cluster_id, sample_id) %>% - dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) - - pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_c, by = "cluster_id") %>% - dplyr::left_join(smry_p, by = "cluster_id") - - plot_ = pyclone_ft %>% - ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + - geom_point(stat = "identity", alpha = .75, shape = 21) + - scale_color_brewer(type = "qual", palette = 6) + - xlab("\n\nCCF (%)\n") + - ylab("\nCCF (%)\n\n") + - guides(color = guide_legend(title = "Cluster"), - size = guide_legend(title = "N")) + - facet_wrap(sample_id_x~sample_id_y) - pdf(file = as.character(opt$output_file), width = 21, height = 21) - print(plot_) - dev.off() - -} else if (as.numeric(opt$option) == 3) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) - pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(sample_id = paste0(sample_id, " ")) - - pyclone_mt = pyclone %>% - reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>% - dplyr::left_join(pyclone %>% - dplyr::select(mutation_id, cluster_id) %>% - dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") - - smry_cl = pyclone %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::arrange(desc(mean)) %>% - dplyr::mutate(cluster_id_ordered = nrow(.):1) - - pyclone_mt = pyclone_mt %>% - dplyr::left_join(smry_cl, by = "cluster_id") - - index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) - pyclone_mt = pyclone_mt[index,,drop=FALSE] - pyclone_mt = pyclone_mt %>% - dplyr::arrange(cluster_id_ordered) - - - pdf(file = as.character(opt$output_file), width = 10, height = 21) - superheat(X = pyclone_mt %>% - dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), - membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], - pretty.order.rows = FALSE, - pretty.order.cols = TRUE, - row.dendrogram = FALSE, - col.dendrogram = FALSE, - smooth.heat = FALSE, - scale = FALSE, - heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), - legend = FALSE, - grid.hline = FALSE, - grid.vline = TRUE, - force.grid.hline = TRUE, - force.grid.vline = TRUE, - grid.hline.col = "white", - grid.vline.col = "white", - grid.hline.size = .05, - grid.vline.size = 1, - bottom.label.text.angle = 90, - bottom.label.text.alignment = "right") - dev.off() - } From dde75d232d622acf164f3b04a31bc4429017b17d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:10:43 -0400 Subject: [PATCH 322/766] PyClone --- ploidy/pyclone_13.mk | 18 ++++++++++++++---- scripts/pyclone_13.R | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 1ccd6fbe..59f9f982 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -11,8 +11,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ + $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) @@ -74,7 +74,7 @@ endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-input,$(set)))) -define r-pyclone-process +define r-pyclone-build-mutations pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ @@ -85,7 +85,17 @@ pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete pyclone_13/$1/config.yaml endef $(foreach set,$(SAMPLE_SETS),\ $(foreach sample,$(tumors.$(set)),\ - $(eval $(call r-pyclone-process,$(set),$(sample))))) + $(eval $(call r-pyclone-build-mutations,$(set),$(sample))))) + +define r-pyclone-run-analysis +pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/$(sample).yaml) + $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV) -w 72:00:00,"set -o pipefail && \ + PyClone run_analysis \ + --config_file pyclone_13/$1/config.yaml") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call r-pyclone-run-analysis,$(set)))) ..DUMMY := $(shell mkdir -p version; \ diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 261e6e6d..fafbf24e 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -101,7 +101,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] } - cat("num_iters: 10000\n\n", file = as.character(opt$output_file), append = FALSE) + cat("num_iters: 100\n\n", file = as.character(opt$output_file), append = FALSE) cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) From b5e5bc04677440431973baa2819f6a56d3ddad98 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:12:40 -0400 Subject: [PATCH 323/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 59f9f982..42766e36 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -88,7 +88,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-build-mutations,$(set),$(sample))))) define r-pyclone-run-analysis -pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/$(sample).yaml) +pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/$(sample).yaml) pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV) -w 72:00:00,"set -o pipefail && \ PyClone run_analysis \ --config_file pyclone_13/$1/config.yaml") From ecbd49e2ed3019152694a72262c86e0b3a107451 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:19:22 -0400 Subject: [PATCH 324/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index fafbf24e..6dd109bc 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -125,7 +125,7 @@ if (as.numeric(opt$option) == 1) { cat(" proposal:\n", file = as.character(opt$output_file), append = TRUE) cat(" precision: 0.1\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) - cat("working_dir: pyclone/", file = as.character(opt$output_file), append = TRUE) + cat("working_dir: pyclone_13/", file = as.character(opt$output_file), append = TRUE) cat(as.character(opt$sample_set), file = as.character(opt$output_file), append = TRUE) cat("\n\n", file = as.character(opt$output_file), append = TRUE) cat("trace_dir: trace\n", file = as.character(opt$output_file), append = TRUE) From c476013746c1310ebdb37bcc90a649bcd7a36c0e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:22:29 -0400 Subject: [PATCH 325/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 6dd109bc..f92dc041 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -134,7 +134,7 @@ if (as.numeric(opt$option) == 1) { cat("samples:\n", file = as.character(opt$output_file), append = TRUE) for (i in 1:length(sample_set)) { cat(paste0(" ", sample_set[i], ":\n"), file = as.character(opt$output_file), append = TRUE) - cat(paste0(" mutations_file: ", sample_set[i], ".yaml\n\n"), file = as.character(opt$output_file), append = TRUE) + cat(paste0(" mutations_file: pyclone_13/", as.character(opt$sample_set), "/", sample_set[i], ".yaml\n\n"), file = as.character(opt$output_file), append = TRUE) cat(" tumour_content:\n", file = as.character(opt$output_file), append = TRUE) cat(paste0(" value: ", params[[i]], "\n"), file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) From 2319a982ad79e1eebd9d0f532aac6caedb912379 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:35:57 -0400 Subject: [PATCH 326/766] Update pyclone_13.R --- scripts/pyclone_13.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index f92dc041..e19278e2 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -69,7 +69,8 @@ if (as.numeric(opt$option) == 1) { pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% dplyr::filter(!is.na(var_counts)) %>% - dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>% + dplyr::filter(!is.na(major_cn)) %>% + dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) smry = pyclone %>% From 86e5b4b99980641b7258e7e06a5eb9e56290657f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:50:18 -0400 Subject: [PATCH 327/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 42766e36..74d5faf6 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -12,7 +12,9 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ $(foreach set,$(SAMPLE_SETS), \ $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/by_clusters.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/by_loci.txt) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) @@ -92,6 +94,25 @@ pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/ $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV) -w 72:00:00,"set -o pipefail && \ PyClone run_analysis \ --config_file pyclone_13/$1/config.yaml") + +pyclone_13/$1/by_clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 + $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ + PyClone build_table \ + --config_file pyclone_13/$1/config.yaml \ + --out_file pyclone_13/$1/by_clusters.txt \ + --table_type cluster \ + --burnin 50 \ + --thin 1") + +pyclone_13/$1/by_loci.txt : pyclone_13/$1/trace/alpha.tsv.bz2 + $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ + PyClone build_table \ + --config_file pyclone_13/$1/config.yaml \ + --out_file pyclone_13/$1/by_loci.txt \ + --table_type loci \ + --burnin 50 \ + --thin 1") + endef $(foreach set,$(SAMPLE_SETS),\ From 3a245574d148b73c46ccc8e93b54cd6deae0202b Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:55:26 -0400 Subject: [PATCH 328/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 74d5faf6..cfb4d8df 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -95,20 +95,20 @@ pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/ PyClone run_analysis \ --config_file pyclone_13/$1/config.yaml") -pyclone_13/$1/by_clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 +pyclone_13/$1/clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_table \ --config_file pyclone_13/$1/config.yaml \ - --out_file pyclone_13/$1/by_clusters.txt \ + --out_file pyclone_13/$1/clusters.txt \ --table_type cluster \ --burnin 50 \ --thin 1") -pyclone_13/$1/by_loci.txt : pyclone_13/$1/trace/alpha.tsv.bz2 +pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_table \ --config_file pyclone_13/$1/config.yaml \ - --out_file pyclone_13/$1/by_loci.txt \ + --out_file pyclone_13/$1/$1.txt \ --table_type loci \ --burnin 50 \ --thin 1") From cf7e90fd3f25e0f7e39158009b6b4eda317f860e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 20:56:24 -0400 Subject: [PATCH 329/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index cfb4d8df..b5ce0326 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -13,8 +13,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS), \ $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/by_clusters.txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/by_loci.txt) + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) From c796131d9182a96a44ad2d0540541e1ac851c27f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:01:48 -0400 Subject: [PATCH 330/766] 13+ --- ploidy/pyclone_13.mk | 22 +++++++-- scripts/pyclone_13.R | 107 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 3 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index b5ce0326..9948df32 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -14,9 +14,9 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__HM__.pdf) define r-sufam @@ -112,6 +112,22 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --table_type loci \ --burnin 50 \ --thin 1") + +pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ + --option 3 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") + +pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt + $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ + --option 4 \ + --sample_set '$(tumors.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") endef diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index e19278e2..42f6a393 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -142,4 +142,111 @@ if (as.numeric(opt$option) == 1) { cat(" error_rate: 0.01\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) } +} else if (as.numeric(opt$option) == 3) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + + pyclone_ft = list() + index = 1 + for (i in 1:(length(sample_set)-1)) { + for (j in (i+1):length(sample_set)) { + pyclone_ft[[index]] = pyclone %>% + dplyr::filter(sample_id == sample_set[i]) %>% + dplyr::rename(sample_id_x = sample_id, + cellular_prevalence_x = cellular_prevalence, + cellular_prevalence_std_x = cellular_prevalence_std) %>% + dplyr::full_join(pyclone %>% + dplyr::filter(sample_id == sample_set[j]) %>% + dplyr::rename(sample_id_y = sample_id, + cellular_prevalence_y = cellular_prevalence, + cellular_prevalence_std_y = cellular_prevalence_std)) %>% + readr::type_convert() + index = index + 1 + } + } + pyclone_ft = do.call(bind_rows, pyclone_ft) %>% + dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) + smry_c = pyclone_ft %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(cluster_id = unique(cluster_id)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(n = n()) + smry_p = pyclone %>% + dplyr::group_by(cluster_id, sample_id) %>% + dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) + + pyclone_ft = pyclone_ft %>% + dplyr::left_join(smry_c, by = "cluster_id") %>% + dplyr::left_join(smry_p, by = "cluster_id") + + plot_ = pyclone_ft %>% + ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + + geom_point(stat = "identity", alpha = .75, shape = 21) + + scale_color_brewer(type = "qual", palette = 6) + + xlab("\n\nCCF (%)\n") + + ylab("\nCCF (%)\n\n") + + guides(color = guide_legend(title = "Cluster"), + size = guide_legend(title = "N")) + + facet_wrap(sample_id_x~sample_id_y) + pdf(file = as.character(opt$output_file), width = 21, height = 21) + print(plot_) + dev.off() + +} else if (as.numeric(opt$option) == 4) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_id = paste0(sample_id, " ")) + + pyclone_mt = pyclone %>% + reshape2::dcast(formula = mutation_id~sample_id, value.var = "cellular_prevalence") %>% + dplyr::left_join(pyclone %>% + dplyr::select(mutation_id, cluster_id) %>% + dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") + + smry_cl = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::arrange(desc(mean)) %>% + dplyr::mutate(cluster_id_ordered = nrow(.):1) + + pyclone_mt = pyclone_mt %>% + dplyr::left_join(smry_cl, by = "cluster_id") + + index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) + pyclone_mt = pyclone_mt[index,,drop=FALSE] + pyclone_mt = pyclone_mt %>% + dplyr::arrange(cluster_id_ordered) + + + pdf(file = as.character(opt$output_file), width = 10, height = 21) + superheat(X = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), + membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], + pretty.order.rows = FALSE, + pretty.order.cols = TRUE, + row.dendrogram = FALSE, + col.dendrogram = FALSE, + smooth.heat = FALSE, + scale = FALSE, + heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + legend = FALSE, + grid.hline = FALSE, + grid.vline = TRUE, + force.grid.hline = TRUE, + force.grid.vline = TRUE, + grid.hline.col = "white", + grid.vline.col = "white", + grid.hline.size = .05, + grid.vline.size = 1, + bottom.label.text.angle = 90, + bottom.label.text.alignment = "right") + dev.off() + } From f6bf69b4b97a1df94ff345b4629c2956eece39a8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:02:39 -0400 Subject: [PATCH 331/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 9948df32..1b1dce07 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -113,7 +113,7 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --burnin 50 \ --thin 1") -pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt +pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 3 \ @@ -121,7 +121,7 @@ pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt --input_file $$(<) \ --output_file $$(@)") -pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt +pyclone_13/$1/$1__HM__.pdf : pyclone_13/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 4 \ From ff1c1f58178a70fb0f6c5124978c9d6e2eb32ba2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:21:20 -0400 Subject: [PATCH 332/766] ++ --- scripts/pyclone_13.R | 6 ++++-- scripts/pyclone_vi.R | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 42f6a393..431f5232 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -160,13 +160,15 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(sample_id == sample_set[j]) %>% dplyr::rename(sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, - cellular_prevalence_std_y = cellular_prevalence_std)) %>% + cellular_prevalence_std_y = cellular_prevalence_std), + by = "mutation_id") %>% readr::type_convert() index = index + 1 } } pyclone_ft = do.call(bind_rows, pyclone_ft) %>% - dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) + readr::type_convert() + smry_c = pyclone_ft %>% dplyr::group_by(mutation_id) %>% dplyr::summarize(cluster_id = unique(cluster_id)) %>% diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 378b3160..c83c66bf 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -101,7 +101,8 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(sample_id == sample_set[j]) %>% dplyr::rename(sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, - cellular_prevalence_std_y = cellular_prevalence_std)) %>% + cellular_prevalence_std_y = cellular_prevalence_std), + by = "mutation_id") %>% readr::type_convert() index = index + 1 } From 11b0f6c39179b4cb6645ebfa88d01e62752baf40 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:25:47 -0400 Subject: [PATCH 333/766] ++ --- scripts/pyclone_13.R | 4 ++-- scripts/pyclone_vi.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 431f5232..b628a670 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -158,10 +158,10 @@ if (as.numeric(opt$option) == 1) { cellular_prevalence_std_x = cellular_prevalence_std) %>% dplyr::full_join(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::rename(sample_id_y = sample_id, + dplyr::select(sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, cellular_prevalence_std_y = cellular_prevalence_std), - by = "mutation_id") %>% + by = c("mutation_id", "cluster_id")) %>% readr::type_convert() index = index + 1 } diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index c83c66bf..b0d72dcd 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -102,7 +102,7 @@ if (as.numeric(opt$option) == 1) { dplyr::rename(sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, cellular_prevalence_std_y = cellular_prevalence_std), - by = "mutation_id") %>% + by = c("mutation_id", "cluster_id")) %>% readr::type_convert() index = index + 1 } From 9588b1ad82ddfd0236aa917b2035aa6bcac21b6e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:32:33 -0400 Subject: [PATCH 334/766] ++ --- scripts/pyclone_13.R | 9 ++++++--- scripts/pyclone_vi.R | 14 +++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index b628a670..b3a57461 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -153,15 +153,18 @@ if (as.numeric(opt$option) == 1) { for (j in (i+1):length(sample_set)) { pyclone_ft[[index]] = pyclone %>% dplyr::filter(sample_id == sample_set[i]) %>% - dplyr::rename(sample_id_x = sample_id, + dplyr::select(mutation_id, + cluster_id, + sample_id_x = sample_id, cellular_prevalence_x = cellular_prevalence, cellular_prevalence_std_x = cellular_prevalence_std) %>% dplyr::full_join(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::select(sample_id_y = sample_id, + dplyr::select(mutation_id, + sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, cellular_prevalence_std_y = cellular_prevalence_std), - by = c("mutation_id", "cluster_id")) %>% + by = "mutation_id") %>% readr::type_convert() index = index + 1 } diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index b0d72dcd..a2cb636c 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -94,21 +94,25 @@ if (as.numeric(opt$option) == 1) { for (j in (i+1):length(sample_set)) { pyclone_ft[[index]] = pyclone %>% dplyr::filter(sample_id == sample_set[i]) %>% - dplyr::rename(sample_id_x = sample_id, + dplyr::select(mutation_id, + cluster_id, + sample_id_x = sample_id, cellular_prevalence_x = cellular_prevalence, cellular_prevalence_std_x = cellular_prevalence_std) %>% dplyr::full_join(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::rename(sample_id_y = sample_id, + dplyr::select(mutation_id, + sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence, cellular_prevalence_std_y = cellular_prevalence_std), - by = c("mutation_id", "cluster_id")) %>% + by = "mutation_id") %>% readr::type_convert() index = index + 1 } } - pyclone_ft = do.call(bind_rows, pyclone_ft) %>% - dplyr::filter(cellular_prevalence_x > 0 & cellular_prevalence_y > 0) + pyclone_ft = do.call(bind_rows, pyclone_ft) + readr::type_convert() + smry_c = pyclone_ft %>% dplyr::group_by(mutation_id) %>% dplyr::summarize(cluster_id = unique(cluster_id)) %>% From 78f86f96a7aa7944337cfb44bbdc7b942b64c55a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:35:17 -0400 Subject: [PATCH 335/766] Update pyclone_vi.R --- scripts/pyclone_vi.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index a2cb636c..38816d88 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -110,7 +110,7 @@ if (as.numeric(opt$option) == 1) { index = index + 1 } } - pyclone_ft = do.call(bind_rows, pyclone_ft) + pyclone_ft = do.call(bind_rows, pyclone_ft) %>% readr::type_convert() smry_c = pyclone_ft %>% From 93bfa90636dc6d1c069864a4100be448cd7408f2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 21:39:49 -0400 Subject: [PATCH 336/766] ++ --- ploidy/pyclone_13.mk | 2 +- scripts/pyclone_13.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 1b1dce07..280db312 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -110,7 +110,7 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --config_file pyclone_13/$1/config.yaml \ --out_file pyclone_13/$1/$1.txt \ --table_type loci \ - --burnin 50 \ + --burnin 2000 \ --thin 1") pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index b3a57461..29983ce8 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -102,7 +102,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] } - cat("num_iters: 100\n\n", file = as.character(opt$output_file), append = FALSE) + cat("num_iters: 10000\n\n", file = as.character(opt$output_file), append = FALSE) cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) From c682232659666d6fa18a9b39e2d93de6ffc771bb Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:08:08 -0400 Subject: [PATCH 337/766] ++ --- Makefile | 42 +++++++++++++++++++++++------------------- bam_tools/get_bam.mk | 21 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 19 deletions(-) create mode 100644 bam_tools/get_bam.mk diff --git a/Makefile b/Makefile index 2c138d64..c08a8168 100644 --- a/Makefile +++ b/Makefile @@ -29,11 +29,11 @@ RUN_MAKE = $(if $(findstring false,$(USE_CLUSTER))$(findstring n,$(MAKEFLAGS)),+ #================================================== TARGETS += somatic_indels -somatic_indels: +somatic_indels : $(call RUN_MAKE,modules/variant_callers/somatic/somaticIndels.mk) TARGETS += somatic_variants -somatic_variants: +somatic_variants : $(call RUN_MAKE,modules/variant_callers/somatic/somaticVariants.mk) @@ -69,11 +69,11 @@ tophat : $(call RUN_MAKE,modules/aligners/tophatAligner.mk) TARGETS += star -star: +star : $(call RUN_MAKE,modules/aligners/starAligner.mk) TARGETS += star_fusion_aligner -star_fusion_aligner: +star_fusion_aligner : $(call RUN_MAKE,modules/aligners/starFusionAligner.mk) TARGETS += blast_reads @@ -102,11 +102,11 @@ snvmix : $(call RUN_MAKE,modules/variant_callers/snvmix.mk) TARGETS += tvcTN -tvcTN: +tvcTN : $(call RUN_MAKE,modules/variant_callers/somatic/tvcTN.mk) TARGETS += tvc -tvc: +tvc : $(call RUN_MAKE,modules/variant_callers/tvc.mk) TARGETS += varscanTN @@ -150,7 +150,7 @@ samtools_het : $(call RUN_MAKE,modules/variant_callers/samtoolsHet.mk) TARGETS += platypus -platypus: +platypus : $(call RUN_MAKE,modules/variant_callers/somatic/platypus.mk) TARGETS += msisensor @@ -170,7 +170,7 @@ museqTN : $(call RUN_MAKE,modules/variant_callers/somatic/museqTN.mk) TARGETS += hotspot -hotspot: +hotspot : $(call RUN_MAKE,modules/variant_callers/hotspot.mk) TARGETS += jsm @@ -182,15 +182,15 @@ sufam: $(call RUN_MAKE,modules/variant_callers/sufamsampleset.mk) TARGETS += sufam_gt -sufam_gt: +sufam_gt : $(call RUN_MAKE,modules/variant_callers/sufam_gt.mk) TARGETS += get_basecount -get_basecount: +get_basecount : $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) TARGETS += strelka_varscan_indels -strelka_varscan_indels: +strelka_varscan_indels : $(call RUN_MAKE,modules/variant_callers/somatic/strelkaVarscanIndels.mk) @@ -260,7 +260,7 @@ cnv_kit : #================================================== TARGETS += star_fusion -star_fusion: +star_fusion : $(call RUN_MAKE,modules/sv_callers/starFusion.mk) TARGETS += tophat_fusion @@ -512,7 +512,7 @@ virus_detection_bowtie2 : $(call RUN_MAKE,modules/virus/virus_detection_bowtie2.mk) TARGETS += viral_detection -viral_detection: +viral_detection : $(call RUN_MAKE,modules/test/workflows/viral_detection.mk) TARGETS += krona_classify @@ -550,15 +550,15 @@ delmh_summary : #================================================== TARGETS += ann_ext_vcf -ann_ext_vcf: +ann_ext_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateExtVcf.mk) TARGETS += ann_somatic_vcf -ann_somatic_vcf: +ann_somatic_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateSomaticVcf.mk) TARGETS += ann_vcf -ann_vcf: +ann_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateVcf.mk) TARGETS += cravat_annotation @@ -570,7 +570,7 @@ cravat_annotate : $(call RUN_MAKE,modules/vcf_tools/cravat_annotation.mk) TARGETS += ann_summary_vcf -ann_summary_vcf: +ann_summary_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateSummaryVcf.mk) @@ -579,12 +579,16 @@ ann_summary_vcf: #================================================== TARGETS += hotspot_summary -hotspot_summary: +hotspot_summary : $(MAKE) -f modules/variant_callers/genotypehotspots.mk -j $(NUM_JOBS) $(call RUN_MAKE,modules/summary/hotspotsummary.mk) TARGETS += merge_sv -merge_sv: +merge_sv : $(call RUN_MAKE,modules/vcf_tools/merge_sv.mk) + +TARGETS += get_bam +get_bam : + $(call RUN_MAKE,modules/bam_tools/get_bam.mk) .PHONY : $(TARGETS) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk new file mode 100644 index 00000000..44655180 --- /dev/null +++ b/bam_tools/get_bam.mk @@ -0,0 +1,21 @@ +include modules/Makefile.inc + +LOGDIR = log/get_bam.$(NOW) + +get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) + +define get-bam +bam/$1.bam : + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(cut -c 1-1 $1)/$(cut -c 2-2 $1)/$1.bam \ + bam/") + +endef + $(foreach sample,$(SAMPLES),\ + $(eval $(call get-bam,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + scp > version/get_bam.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: get_bam \ No newline at end of file From dab65c8daa559367d7d70514953551c1faaed6e6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:12:39 -0400 Subject: [PATCH 338/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 44655180..c96b123c 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -15,7 +15,7 @@ endef $(eval $(call get-bam,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - scp > version/get_bam.txt) + which scp > version/get_bam.txt) .SECONDARY: .DELETE_ON_ERROR: .PHONY: get_bam \ No newline at end of file From 21dced32091571904060ac773d52fdb6df4cc38a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:13:20 -0400 Subject: [PATCH 339/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index c96b123c..15b7a9a9 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(cut -c 1-1 $1)/$(cut -c 2-2 $1)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/'$(cut -c 1-1 $1)'/$(cut -c 2-2 $1)/$1.bam \ bam/") endef From 8241563f8df3b44ccc4759d86c32545ba9f0f1ea Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:14:09 -0400 Subject: [PATCH 340/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 15b7a9a9..c96b123c 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/'$(cut -c 1-1 $1)'/$(cut -c 2-2 $1)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(cut -c 1-1 $1)/$(cut -c 2-2 $1)/$1.bam \ bam/") endef From f9015d2a6d365c5a591e252dd9496a120aeb205e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:15:29 -0400 Subject: [PATCH 341/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index c96b123c..7da02864 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,9 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(cut -c 1-1 $1)/$(cut -c 2-2 $1)/$1.bam \ + I=$(cut -c 1-1 $1) && \ + J=$(cut -c 2-2 $1) && \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(I)/$$(J)/$1.bam \ bam/") endef From 24461bc37a13f08d3c2596fffa255b39faf05738 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:17:25 -0400 Subject: [PATCH 342/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 7da02864..1d85f855 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,9 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - I=$(cut -c 1-1 $1) && \ - J=$(cut -c 2-2 $1) && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(I)/$$(J)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(echo $1 | cut -c 1-1)/$(echo $1 | cut -c 2-2)/$1.bam \ bam/") endef From 2bc4708ec9d92daf652cdb2bf5c807cefbed376a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:18:22 -0400 Subject: [PATCH 343/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 1d85f855..c288895a 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(echo $1 | cut -c 1-1)/$(echo $1 | cut -c 2-2)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(echo $1 | cut -c 1-1)/$$(echo $1 | cut -c 2-2)/$1.bam \ bam/") endef From 57b61b625400646702bc843af16282436c700d6a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:19:21 -0400 Subject: [PATCH 344/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index c288895a..21d9ddb4 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,9 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(echo $1 | cut -c 1-1)/$$(echo $1 | cut -c 2-2)/$1.bam \ + I=$(echo $1 | cut -c 1-1) && \ + J=$(echo $1 | cut -c 2-2) && \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ bam/") endef From 5420c09bfb74978f04f960f9b8538c969c30328a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:19:41 -0400 Subject: [PATCH 345/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 21d9ddb4..b9fc84df 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=$(echo $1 | cut -c 1-1) && \ J=$(echo $1 | cut -c 2-2) && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(I)/$$(J)/$1.bam \ bam/") endef From 7b810b594f1e05b924f0dde793e89aeb5ecc0da9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:20:02 -0400 Subject: [PATCH 346/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index b9fc84df..ab894730 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,8 +7,8 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - I=$(echo $1 | cut -c 1-1) && \ - J=$(echo $1 | cut -c 2-2) && \ + export I=$(echo $1 | cut -c 1-1) && \ + export J=$(echo $1 | cut -c 2-2) && \ scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(I)/$$(J)/$1.bam \ bam/") From fc13fbd79e13d89ef505bb28f026ffacf33513b1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:20:35 -0400 Subject: [PATCH 347/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index ab894730..b85909db 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ export I=$(echo $1 | cut -c 1-1) && \ export J=$(echo $1 | cut -c 2-2) && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$$(I)/$$(J)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ bam/") endef From 2eaa8e333b21be4185244f7fae55187e4ce1f4b1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:24:38 -0400 Subject: [PATCH 348/766] Update get_bam.mk --- bam_tools/get_bam.mk | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index b85909db..03195ba4 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -4,12 +4,11 @@ LOGDIR = log/get_bam.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) -define get-bam bam/$1.bam : - $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - export I=$(echo $1 | cut -c 1-1) && \ - export J=$(echo $1 | cut -c 2-2) && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ + $(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + I=`$(echo "$1" | cut -c 1-1)`; \ + J=`$(echo "$1" | cut -c 2-2)`; \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$${I}/$${J}/$1.bam \ bam/") endef From 4578af79f5e1a612dffbb2d9e5ac95aedbb48a20 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:25:15 -0400 Subject: [PATCH 349/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 03195ba4..3ca814bb 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -5,7 +5,7 @@ LOGDIR = log/get_bam.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) bam/$1.bam : - $(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`$(echo "$1" | cut -c 1-1)`; \ J=`$(echo "$1" | cut -c 2-2)`; \ scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$${I}/$${J}/$1.bam \ From 9650cffd4333792ee2fe17da42c8bc515ca85bca Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:25:51 -0400 Subject: [PATCH 350/766] Update get_bam.mk --- bam_tools/get_bam.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 3ca814bb..961362c0 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -4,6 +4,7 @@ LOGDIR = log/get_bam.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) +define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`$(echo "$1" | cut -c 1-1)`; \ From 91264eed9fb6c7f7b7224d948c1afd2ff134596d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:26:34 -0400 Subject: [PATCH 351/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 961362c0..f70a8a1c 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,8 +7,8 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - I=`$(echo "$1" | cut -c 1-1)`; \ - J=`$(echo "$1" | cut -c 2-2)`; \ + I=`echo "$1" | cut -c 1-1`; \ + J=`echo "$1" | cut -c 2-2`; \ scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$${I}/$${J}/$1.bam \ bam/") From ae1f44577855f7bd6b499ed54708f4f1a040ff88 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:27:55 -0400 Subject: [PATCH 352/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index f70a8a1c..0e508938 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`echo "$1" | cut -c 1-1`; \ J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$${I}/$${J}/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ bam/") endef From ec457d7c0a90f70637b39e0f0f291db31ded2f44 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:28:12 -0400 Subject: [PATCH 353/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 0e508938..722e632c 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`echo "$1" | cut -c 1-1`; \ J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(I)/$(J)/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$I/$J/$1.bam \ bam/") endef From 1d4c02c5205531824105a5b60d8107116959acad Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:29:12 -0400 Subject: [PATCH 354/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 722e632c..0e8f59c5 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`echo "$1" | cut -c 1-1`; \ J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$I/$J/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/\"$I\"/\"$J\"/$1.bam \ bam/") endef From d08a1dfd44f6493d6db150e02ba063219faaa8c7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:29:58 -0400 Subject: [PATCH 355/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 0e8f59c5..b8b40713 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`echo "$1" | cut -c 1-1`; \ J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/\"$I\"/\"$J\"/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/\"$${I}\"/\"$${J}\"/$1.bam \ bam/") endef From 7f958e61a49430a490970c6ea8252bb97da34532 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:30:55 -0400 Subject: [PATCH 356/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index b8b40713..17245cfb 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -9,7 +9,7 @@ bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ I=`echo "$1" | cut -c 1-1`; \ J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/\"$${I}\"/\"$${J}\"/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`$${I}`/`$${J}`/$1.bam \ bam/") endef From 19cc398bcf003db56e622b583ab1d6a5557a69e0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:31:40 -0400 Subject: [PATCH 357/766] Update get_bam.mk --- bam_tools/get_bam.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 17245cfb..63276c82 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,9 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - I=`echo "$1" | cut -c 1-1`; \ - J=`echo "$1" | cut -c 2-2`; \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`$${I}`/`$${J}`/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo "$1" | cut -c 1-1`/`echo "$1" | cut -c 2-2`/$1.bam \ bam/") endef From 1fc09bc04c68f2ca8fe451fcfb9afc7991673095 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:32:10 -0400 Subject: [PATCH 358/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 63276c82..eb11e6ce 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo "$1" | cut -c 1-1`/`echo "$1" | cut -c 2-2`/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ bam/") endef From 95167fd3a86145d74eb0ae8315e56007988f6e0e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:32:41 -0400 Subject: [PATCH 359/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index eb11e6ce..e10b0dfc 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(echo $1 | cut -c 1-1)/`echo $1 | cut -c 2-2`/$1.bam \ bam/") endef From 7e70bb5df36c66bd206c07989cc2f996301586ee Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:33:11 -0400 Subject: [PATCH 360/766] Update get_bam.mk --- bam_tools/get_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index e10b0dfc..eb11e6ce 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -7,7 +7,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/$(echo $1 | cut -c 1-1)/`echo $1 | cut -c 2-2`/$1.bam \ + scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ bam/") endef From 1acadbb87e6eb3996b9c1e2119cf694766799ab1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:39:06 -0400 Subject: [PATCH 361/766] Update get_bam.mk --- bam_tools/get_bam.mk | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index eb11e6ce..4e09ead8 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -2,13 +2,23 @@ include modules/Makefile.inc LOGDIR = log/get_bam.$(NOW) -get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) +get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ bam/") + +bam/$1.bam.bai : bam/$1.bam + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + $(SAMTOOLS) index $$(<)") + +bam/$1.bai : bam/$1.bam bam/$1/bam.bai + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + cp $$(<<) $$(@)") + endef $(foreach sample,$(SAMPLES),\ From a53e0cbd3b482522bcc07f71c1604cfab4c406e3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 3 Nov 2022 22:41:00 -0400 Subject: [PATCH 362/766] Update get_bam.mk --- bam_tools/get_bam.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bam_tools/get_bam.mk b/bam_tools/get_bam.mk index 4e09ead8..9bff77cd 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/get_bam.mk @@ -3,7 +3,8 @@ include modules/Makefile.inc LOGDIR = log/get_bam.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ - $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) + $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ + $(foreach sample,$(SAMPLES),bam/$(sample).bai) define get-bam bam/$1.bam : @@ -15,7 +16,7 @@ bam/$1.bam.bai : bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ $(SAMTOOLS) index $$(<)") -bam/$1.bai : bam/$1.bam bam/$1/bam.bai +bam/$1.bai : bam/$1.bam bam/$1.bam.bai $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ cp $$(<<) $$(@)") From 8db52a5504f07970180f61b629da1e1da2672bb8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 13:27:16 -0400 Subject: [PATCH 363/766] plot --- scripts/pyclone_13.R | 8 ++++++-- scripts/pyclone_vi.R | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 29983ce8..2eab93d2 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -8,6 +8,7 @@ suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("reshape2")) suppressPackageStartupMessages(library("superheat")) +suppressPackageStartupMessages(library("RColorBrewer")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -188,16 +189,19 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = pyclone_ft %>% dplyr::left_join(smry_c, by = "cluster_id") %>% dplyr::left_join(smry_p, by = "cluster_id") + + colourCount = length(unique(pyclone_ft$cluster_id)) + getPalette = colorRampPalette(brewer.pal(9, "Set1")) plot_ = pyclone_ft %>% ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + geom_point(stat = "identity", alpha = .75, shape = 21) + - scale_color_brewer(type = "qual", palette = 6) + + scale_color_manual(values = getPalette(colourCount)) + xlab("\n\nCCF (%)\n") + ylab("\nCCF (%)\n\n") + guides(color = guide_legend(title = "Cluster"), size = guide_legend(title = "N")) + - facet_wrap(sample_id_x~sample_id_y) + facet_wrap(~sample_id_x+sample_id_y) pdf(file = as.character(opt$output_file), width = 21, height = 21) print(plot_) dev.off() diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 38816d88..038f91ec 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -8,6 +8,7 @@ suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("reshape2")) suppressPackageStartupMessages(library("superheat")) +suppressPackageStartupMessages(library("RColorBrewer")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -129,16 +130,19 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = pyclone_ft %>% dplyr::left_join(smry_c, by = "cluster_id") %>% dplyr::left_join(smry_p, by = "cluster_id") + + colourCount = length(unique(pyclone_ft$cluster_id)) + getPalette = colorRampPalette(brewer.pal(9, "Set1")) plot_ = pyclone_ft %>% ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + geom_point(stat = "identity", alpha = .75, shape = 21) + - scale_color_brewer(type = "qual", palette = 6) + + scale_color_manual(values = getPalette(colourCount)) + xlab("\n\nCCF (%)\n") + ylab("\nCCF (%)\n\n") + guides(color = guide_legend(title = "Cluster"), size = guide_legend(title = "N")) + - facet_wrap(sample_id_x~sample_id_y) + facet_wrap(~sample_id_x+sample_id_y) pdf(file = as.character(opt$output_file), width = 21, height = 21) print(plot_) dev.off() From acd95968ff9e1c5270374bd3911f4f17ee419a9f Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 13:33:54 -0400 Subject: [PATCH 364/766] Update pyclone_13.R --- scripts/pyclone_13.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 2eab93d2..f4e507b3 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -103,7 +103,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] } - cat("num_iters: 10000\n\n", file = as.character(opt$output_file), append = FALSE) + cat("num_iters: 1000\n\n", file = as.character(opt$output_file), append = FALSE) cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) @@ -125,7 +125,7 @@ if (as.numeric(opt$option) == 1) { cat(" rate: 0.0001\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat(" proposal:\n", file = as.character(opt$output_file), append = TRUE) - cat(" precision: 0.1\n", file = as.character(opt$output_file), append = TRUE) + cat(" precision: 0.01\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat("working_dir: pyclone_13/", file = as.character(opt$output_file), append = TRUE) cat(as.character(opt$sample_set), file = as.character(opt$output_file), append = TRUE) From 417b7ce9ecb286b2a053b2a33d0523a5f7687dd1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 13:34:46 -0400 Subject: [PATCH 365/766] Update pyclone_13.mk --- ploidy/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ploidy/pyclone_13.mk b/ploidy/pyclone_13.mk index 280db312..31daa1d0 100644 --- a/ploidy/pyclone_13.mk +++ b/ploidy/pyclone_13.mk @@ -101,7 +101,7 @@ pyclone_13/$1/clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --config_file pyclone_13/$1/config.yaml \ --out_file pyclone_13/$1/clusters.txt \ --table_type cluster \ - --burnin 50 \ + --burnin 200 \ --thin 1") pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 @@ -110,7 +110,7 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --config_file pyclone_13/$1/config.yaml \ --out_file pyclone_13/$1/$1.txt \ --table_type loci \ - --burnin 2000 \ + --burnin 200 \ --thin 1") pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt From 080a3f9cfbe5cb9c2f81ebf9d8822f44e5e92c69 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 13:54:23 -0400 Subject: [PATCH 366/766] ++ --- Makefile | 12 ++++++------ sv_callers/{mantaTN.mk => manta_tumor_normal.mk} | 12 ++++++------ sv_callers/{svabaTN.mk => svaba_tumor_normal.mk} | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) rename sv_callers/{mantaTN.mk => manta_tumor_normal.mk} (81%) rename sv_callers/{svabaTN.mk => svaba_tumor_normal.mk} (84%) diff --git a/Makefile b/Makefile index c08a8168..f7e05da9 100644 --- a/Makefile +++ b/Makefile @@ -296,13 +296,13 @@ oncofuse : # DNA structural variant callers #================================================== -TARGETS += mantaTN -mantaTN : - $(call RUN_MAKE,modules/sv_callers/mantaTN.mk) +TARGETS += manta_tumor_normal +manta_tumor_normal : + $(call RUN_MAKE,modules/sv_callers/manta_tumor_normal.mk) -TARGETS += svabaTN -svabaTN : - $(call RUN_MAKE,modules/sv_callers/svabaTN.mk) +TARGETS += svaba_tumor_normal +svaba_tumor_normal : + $(call RUN_MAKE,modules/sv_callers/svaba_tumor_normal.mk) TARGETS += manta manta : diff --git a/sv_callers/mantaTN.mk b/sv_callers/manta_tumor_normal.mk similarity index 81% rename from sv_callers/mantaTN.mk rename to sv_callers/manta_tumor_normal.mk index 8939ac72..93380e40 100644 --- a/sv_callers/mantaTN.mk +++ b/sv_callers/manta_tumor_normal.mk @@ -1,11 +1,11 @@ include modules/Makefile.inc include modules/sv_callers/manta.inc -LOGDIR ?= log/mantaTN.$(NOW) +LOGDIR ?= log/manta_tumor_normal.$(NOW) -manta_tn : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ - vcf/$(pair).manta_indels.vcf \ - vcf/$(pair).manta_candidate_sv.vcf) +manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ + vcf/$(pair).manta_indels.vcf \ + vcf/$(pair).manta_candidate_sv.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai @@ -36,7 +36,7 @@ $(foreach pair,$(SAMPLE_PAIRS), \ $(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) ..DUMMY := $(shell mkdir -p version; \ - python --version &> version/mantaTN.txt) + python --version &> version/manta_tumor_normal.txt) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: manta_tn +.PHONY: manta diff --git a/sv_callers/svabaTN.mk b/sv_callers/svaba_tumor_normal.mk similarity index 84% rename from sv_callers/svabaTN.mk rename to sv_callers/svaba_tumor_normal.mk index 819aa64e..3636ad22 100644 --- a/sv_callers/svabaTN.mk +++ b/sv_callers/svaba_tumor_normal.mk @@ -1,6 +1,6 @@ include modules/Makefile.inc -LOGDIR = log/svabaTN.$(NOW) +LOGDIR = log/svaba_tumor_normal.$(NOW) SVABA_CORES ?= 8 SVABA_MEM_CORE ?= 6G @@ -10,9 +10,9 @@ SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.be SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SVABA ?= svaba -svaba_tn : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ - vcf/$(pair).svaba_indels.vcf \ - vcf/$(pair).svaba_candidate_sv.vcf) +svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ + vcf/$(pair).svaba_indels.vcf \ + vcf/$(pair).svaba_candidate_sv.vcf) define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam @@ -49,7 +49,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ ..DUMMY := $(shell mkdir -p version; \ - $(SVABA) --help &> version/svabaTN.txt) + $(SVABA) --help &> version/svaba_tumor_normal.txt) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: svaba_tn +.PHONY: svaba From 850568de9593a3004853674790aeb2d0c9ab77b0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 14:18:17 -0400 Subject: [PATCH 367/766] ++ --- Makefile | 6 +++- config.inc | 2 ++ sv_callers/gridss_tumor_normal.mk | 47 +++++++++++++++++++++++++++++++ sv_callers/svaba_tumor_normal.mk | 1 - 4 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 sv_callers/gridss_tumor_normal.mk diff --git a/Makefile b/Makefile index f7e05da9..ea3c638c 100644 --- a/Makefile +++ b/Makefile @@ -303,7 +303,11 @@ manta_tumor_normal : TARGETS += svaba_tumor_normal svaba_tumor_normal : $(call RUN_MAKE,modules/sv_callers/svaba_tumor_normal.mk) - + +TARGETS += gridss_tumor_normal +gridss_tumor_normal : + $(call RUN_MAKE,modules/sv_callers/gridss_tumor_normal.mk) + TARGETS += manta manta : $(call RUN_MAKE,modules/sv_callers/manta.mk) diff --git a/config.inc b/config.inc index 3b597e92..ab86c32e 100644 --- a/config.inc +++ b/config.inc @@ -29,6 +29,8 @@ IMMUNE_ENV ?= $(HOME)/share/usr/env/r-immunedeconv-2.1.0 SUMREADS_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.6 PYCLONE_ENV = $(HOME)/share/usr/env/pyclone-vi-0.1.2 PYCLONE_13_ENV = $(HOME)/share/usr/env/pyclone-0.13.1 +GRIDSS_ENV = $(HOME)/share/usr/env/gridss-2.13.2 +SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk new file mode 100644 index 00000000..43cc4856 --- /dev/null +++ b/sv_callers/gridss_tumor_normal.mk @@ -0,0 +1,47 @@ +include modules/Makefile.inc + +LOGDIR = log/gridss_tumor_normal.$(NOW) + +GRIDSS_CORES ?= 8 +GRIDSS_MEM_CORE ?= 6G +GRIDSS_REF ?= $(REF_FASTA) +GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/ENCFF001TDO.bed +GRIDSS ?= gridss + +gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair).gridss_sv.vcf) + +define gridss-tumor-normal +gridss/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam + $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV) -w 72:00:00,"set -o pipefail && \ + mkdir -p gridss/$1_$2 && \ + cd gridss/$1_$2 && \ + $$(GRIDSS) \ + -r $$(GRIDSS_REF) \ + -o $1_$2.gridss_sv.vcf \ + -b $$(GRIDSS_BLACKLIST) \ + ../../bam/$2.bam \ + ../../bam/$1.bam") + +#svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf + +#svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf + +#vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf +# $$(INIT) cat $$< > $$@ + +#vcf/$1_$2.svaba_indels.vcf : svaba/$1_$2.svaba.somatic.indel.vcf +# $$(INIT) cat $$< > $$@ + +#vcf/$1_$2.svaba_candidate_sv.vcf : svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf +# $$(INIT) cat $$< > $$@ + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call gridss-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) + + +..DUMMY := $(shell mkdir -p version; \ + $(SVABA) --help &> version/gridss_tumor_normal.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: gridss diff --git a/sv_callers/svaba_tumor_normal.mk b/sv_callers/svaba_tumor_normal.mk index 3636ad22..0937171c 100644 --- a/sv_callers/svaba_tumor_normal.mk +++ b/sv_callers/svaba_tumor_normal.mk @@ -7,7 +7,6 @@ SVABA_MEM_CORE ?= 6G SVABA_REF ?= $(REF_FASTA) SVABA_DBSNP ?= $(HOME)/share/lib/resource_files/svaba/dbsnp_indel.vcf SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.bed -SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SVABA ?= svaba svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ From fad2307868c10234b0f9184405fe238c5bcbad2e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 14:19:56 -0400 Subject: [PATCH 368/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 43cc4856..43b71996 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -16,6 +16,7 @@ gridss/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam mkdir -p gridss/$1_$2 && \ cd gridss/$1_$2 && \ $$(GRIDSS) \ + -t $$(GRIDSS_CORES) \ -r $$(GRIDSS_REF) \ -o $1_$2.gridss_sv.vcf \ -b $$(GRIDSS_BLACKLIST) \ From 53dcd2dd9a8bfc9a79a6c9bfcaf46d423f0d01c8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 14:20:44 -0400 Subject: [PATCH 369/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 43b71996..08478da1 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -11,7 +11,7 @@ GRIDSS ?= gridss gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair).gridss_sv.vcf) define gridss-tumor-normal -gridss/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam +gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV) -w 72:00:00,"set -o pipefail && \ mkdir -p gridss/$1_$2 && \ cd gridss/$1_$2 && \ From da56cd00b7db3dc8ec12271e9200e2dfdaa6b4ec Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 14:21:14 -0400 Subject: [PATCH 370/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 08478da1..40a24a7e 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -8,7 +8,7 @@ GRIDSS_REF ?= $(REF_FASTA) GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/ENCFF001TDO.bed GRIDSS ?= gridss -gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair).gridss_sv.vcf) +gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) define gridss-tumor-normal gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam From 8d046d27fa08868e8551fdbc4059734c5d4c5873 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 14:31:24 -0400 Subject: [PATCH 371/766] ++ --- sv_callers/gridss_tumor_normal.mk | 2 +- sv_callers/manta_tumor_normal.mk | 15 +-------------- sv_callers/svaba_tumor_normal.mk | 12 +----------- 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 40a24a7e..92bf585b 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -5,7 +5,7 @@ LOGDIR = log/gridss_tumor_normal.$(NOW) GRIDSS_CORES ?= 8 GRIDSS_MEM_CORE ?= 6G GRIDSS_REF ?= $(REF_FASTA) -GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/ENCFF001TDO.bed +GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/example/ENCFF001TDO.bed GRIDSS ?= gridss gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) diff --git a/sv_callers/manta_tumor_normal.mk b/sv_callers/manta_tumor_normal.mk index 93380e40..3128f5ca 100644 --- a/sv_callers/manta_tumor_normal.mk +++ b/sv_callers/manta_tumor_normal.mk @@ -3,9 +3,7 @@ include modules/sv_callers/manta.inc LOGDIR ?= log/manta_tumor_normal.$(NOW) -manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf \ - vcf/$(pair).manta_indels.vcf \ - vcf/$(pair).manta_candidate_sv.vcf) +manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai @@ -17,20 +15,9 @@ manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp -manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz : manta/$1_$2.manta_timestamp - -manta/$1_$2/results/variants/candidateSV.vcf.gz : manta/$1_$2.manta_timestamp - - vcf/$1_$2.manta_sv.vcf : manta/$1_$2/results/variants/somaticSV.vcf.gz $$(INIT) zcat $$< > $$@ -vcf/$1_$2.manta_indels.vcf : manta/$1_$2/results/variants/candidateSmallIndels.vcf.gz - $$(INIT) zcat $$< > $$@ - -vcf/$1_$2.manta_candidate_sv.vcf : manta/$1_$2/results/variants/candidateSV.vcf.gz - $$(INIT) zcat $$< > $$@ - endef $(foreach pair,$(SAMPLE_PAIRS), \ $(eval $(call manta-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) diff --git a/sv_callers/svaba_tumor_normal.mk b/sv_callers/svaba_tumor_normal.mk index 0937171c..88e382e7 100644 --- a/sv_callers/svaba_tumor_normal.mk +++ b/sv_callers/svaba_tumor_normal.mk @@ -9,9 +9,7 @@ SVABA_DBSNP ?= $(HOME)/share/lib/resource_files/svaba/dbsnp_indel.vcf SVABA_BLACKLIST ?= $(HOME)/share/lib/resource_files/svaba/wgs_blacklist_meres.bed SVABA ?= svaba -svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf \ - vcf/$(pair).svaba_indels.vcf \ - vcf/$(pair).svaba_candidate_sv.vcf) +svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf) define svaba-tumor-normal svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam @@ -31,17 +29,9 @@ svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf -svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf $$(INIT) cat $$< > $$@ -vcf/$1_$2.svaba_indels.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - $$(INIT) cat $$< > $$@ - -vcf/$1_$2.svaba_candidate_sv.vcf : svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf - $$(INIT) cat $$< > $$@ - endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call svaba-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) From ac7b87af1ece13bdf582ede2ef6ccbedf53f37e0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 17:05:40 -0400 Subject: [PATCH 372/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 92bf585b..c76248ae 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -4,7 +4,7 @@ LOGDIR = log/gridss_tumor_normal.$(NOW) GRIDSS_CORES ?= 8 GRIDSS_MEM_CORE ?= 6G -GRIDSS_REF ?= $(REF_FASTA) +GRIDSS_REF ?= $(HOME)/share/lib/ref_files/b37/human_g1k_v37.fasta GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/example/ENCFF001TDO.bed GRIDSS ?= gridss From 3b44c744aa31ed16003fb76f67e3d0836fdf552d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 17:17:37 -0400 Subject: [PATCH 373/766] +++ --- Makefile | 29 ++++++--- clonality/plotpyclone.R | 97 ----------------------------- clonality/plotpyclone.mk | 15 ----- {ploidy => clonality}/pyclone_13.mk | 0 {ploidy => clonality}/pyclone_vi.mk | 0 clonality/pycloneconfig.R | 69 -------------------- clonality/runpyclone.mk | 20 ------ clonality/setuppyclone.mk | 22 ------- clonality/tsvforpyclone.R | 52 ---------------- sv_callers/gridss_tumor_normal.mk | 2 +- 10 files changed, 20 insertions(+), 286 deletions(-) delete mode 100644 clonality/plotpyclone.R delete mode 100644 clonality/plotpyclone.mk rename {ploidy => clonality}/pyclone_13.mk (100%) rename {ploidy => clonality}/pyclone_vi.mk (100%) delete mode 100644 clonality/pycloneconfig.R delete mode 100644 clonality/runpyclone.mk delete mode 100644 clonality/setuppyclone.mk delete mode 100644 clonality/tsvforpyclone.R diff --git a/Makefile b/Makefile index ea3c638c..d3fa7987 100644 --- a/Makefile +++ b/Makefile @@ -373,7 +373,23 @@ merge_bam : TARGETS += process_bam process_bam : $(call RUN_MAKE,modules/bam_tools/processBam.mk) + +TARGETS += get_bam +get_bam : + $(call RUN_MAKE,modules/bam_tools/get_bam.mk) + +#================================================== +# VCF tools +#================================================== +TARGETS += merge_sv +merge_sv : + $(call RUN_MAKE,modules/vcf_tools/merge_sv.mk) + +TARGETS += annot_sv +annot_sv : + $(call RUN_MAKE,modules/vcf_tools/annot_sv.mk) + #================================================== # FASTQ tools @@ -459,7 +475,7 @@ immune_deconv : #================================================== -# Ploidy/ Clonality +# Ploidy / Clonality #================================================== TARGETS += pyloh @@ -476,11 +492,11 @@ absolute_seq : TARGETS += pyclone_13 pyclone_13 : - $(call RUN_MAKE,modules/ploidy/pyclone_13.mk) + $(call RUN_MAKE,modules/clonality/pyclone_13.mk) TARGETS += pyclone_vi pyclone_vi : - $(call RUN_MAKE,modules/ploidy/pyclone_vi.mk) + $(call RUN_MAKE,modules/clonality/pyclone_vi.mk) #================================================== # mutational signatures @@ -587,12 +603,5 @@ hotspot_summary : $(MAKE) -f modules/variant_callers/genotypehotspots.mk -j $(NUM_JOBS) $(call RUN_MAKE,modules/summary/hotspotsummary.mk) -TARGETS += merge_sv -merge_sv : - $(call RUN_MAKE,modules/vcf_tools/merge_sv.mk) -TARGETS += get_bam -get_bam : - $(call RUN_MAKE,modules/bam_tools/get_bam.mk) - .PHONY : $(TARGETS) diff --git a/clonality/plotpyclone.R b/clonality/plotpyclone.R deleted file mode 100644 index 1648e82c..00000000 --- a/clonality/plotpyclone.R +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("ggplot2")) - -optList = list( - make_option("--sample_set", default = NULL, help = "sample set name"), - make_option("--normal_samples", default = NULL, help = "normal sample names"), - make_option("--min_depth", default = NA, help = "minimum depth to consider") - ) - -parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) -normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)) -normal_sample = tumor_samples[tumor_samples %in% normal_sample] -tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)] -min_depth = ifelse(is.na(opt$min_depth) | is.null(opt$min_depth) | opt$min_depth=="" | opt$min_depth==" ", 50, opt$min_depth) - -mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv")) %>% - mutate(mutation_id = paste0(Gene_Symbol, "_", HGVSp)) -index = apply(mutation_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=min_depth)})==length(tumor_samples) -mutation_summary = mutation_summary[index,,drop=FALSE] -pyclone_summary = read_tsv(file=paste0("pyclone/", opt$sample_set, "/report/pyclone.tsv"), col_types = cols(.default = col_character())) %>% - type_convert() %>% - full_join(mutation_summary, by="mutation_id") %>% - arrange(cluster_id) %>% - mutate(mutation_type = ifelse(Variant_Caller=="mutect", "SNV", "Indel")) %>% - mutate(nref = nchar(Ref)) %>% - mutate(nalt = nchar(Alt)) %>% - filter(nref<=2 & nalt<=2) - -df = pyclone_summary[,c("mutation_id", "cluster_id", "mutation_type"),drop=FALSE] -for (i in 1:length(tumor_samples)) { - x = pyclone_summary[,tumor_samples[i]] %>% - .[[1]] - c_x = pyclone_summary %>% - .[[paste0("CALL_", tumor_samples[i])]] - m_x = pyclone_summary %>% - .[[paste0("MAF_", tumor_samples[i])]] - x[x<.025 | c_x==0 | m_x<.05] = 0 - df = cbind(df, x) - colnames(df)[i+3] = tumor_samples[i] -} -index = apply(df[,tumor_samples], 1, function(x) {sum(x==0)})==length(tumor_samples) -df = df[!index,,drop=FALSE] -pyclone_summary = pyclone_summary[!index,,drop=FALSE] -index = apply(pyclone_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=500)})>=1 -df = df[!index,,drop=FALSE] -pyclone_summary = pyclone_summary[!index,,drop=FALSE] - - -pyclone_summary[,tumor_samples] = df[,tumor_samples] - - -clusters = table(pyclone_summary$cluster_id) -if (any(clusters==1)) { - pyclone_summary = pyclone_summary %>% - filter(!(cluster_id %in% names(clusters)[clusters==1])) -} - -df = pyclone_summary[,c("mutation_id", "cluster_id", "mutation_type"),drop=FALSE] -for (i in 1:length(tumor_samples)) { - x = pyclone_summary[,tumor_samples[i]] %>% - .[[1]] - df = cbind(df, x) - colnames(df)[i+3] = tumor_samples[i] -} - - -pdf(file=paste0("pyclone/", opt$sample_set, "/report/pyclone.pdf"), width=6.5, height=6) -for (i in 1:(length(tumor_samples)-1)) { - for (j in (i+1):length(tumor_samples)) { - x = df[,tumor_samples[i]] - y = df[,tumor_samples[j]] - z1 = df[,"cluster_id"] - z2 = df[,"mutation_type"] - tmp.0 = data_frame(x=x, y=y, z1=factor(z1, ordered=TRUE), z2=z2) - plot.0 = ggplot(tmp.0, aes(x=x, y=y, fill=z1, color=z1, shape=z2)) + - geom_point(alpha = .55, size=2.5) + - theme_classic() + - coord_cartesian(xlim=c(0,1), ylim=c(0,1)) + - theme(axis.text.y = element_text(size=15), axis.text.x = element_text(size=15), legend.text=element_text(size=9), legend.title=element_text(size=10), legend.background = element_blank(), legend.key.size = unit(1, 'lines')) + - labs(x=paste0("\n",tumor_samples[i],"\n"), y=paste0("\n",tumor_samples[j],"\n")) + - guides(color=guide_legend(title=c("Cluster")), shape=guide_legend(title=c("Type"))) + - guides(fill=FALSE) - print(plot.0) - } -} -dev.off() - -write_tsv(pyclone_summary, path=paste0("pyclone/", opt$sample_set, "/report/summary.tsv")) diff --git a/clonality/plotpyclone.mk b/clonality/plotpyclone.mk deleted file mode 100644 index 20abed4c..00000000 --- a/clonality/plotpyclone.mk +++ /dev/null @@ -1,15 +0,0 @@ -include modules/Makefile.inc -include modules/clonality/setuppyclone.mk - -LOGDIR ?= log/plot_pyclone.$(NOW) -PHONY += pyclone - -plot_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/report/pyclone.pdf) - -define plot-pyclone -pyclone/%/report/pyclone.pdf : pyclone/%/report/pyclone.tsv - $$(call RUN,-s 4G -m 6G -w 7200,"$(RSCRIPT) modules/clonality/plotpyclone.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES) --min_depth $(MIN_DEPTH)") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call plot-pyclone,$(set)))) diff --git a/ploidy/pyclone_13.mk b/clonality/pyclone_13.mk similarity index 100% rename from ploidy/pyclone_13.mk rename to clonality/pyclone_13.mk diff --git a/ploidy/pyclone_vi.mk b/clonality/pyclone_vi.mk similarity index 100% rename from ploidy/pyclone_vi.mk rename to clonality/pyclone_vi.mk diff --git a/clonality/pycloneconfig.R b/clonality/pycloneconfig.R deleted file mode 100644 index 110c8866..00000000 --- a/clonality/pycloneconfig.R +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) - -optList = list(make_option("--sample_set", default = NULL, help = "sample set name"), - make_option("--normal_samples", default = NULL, help = "normal sample names")) - -parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) -normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)) -normal_sample = tumor_samples[tumor_samples %in% normal_sample] -tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)] - -cat("num_iters: 10000\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = FALSE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("base_measure_params:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" alpha: 1\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" beta: 1\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("concentration:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" value: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" prior:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" shape: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" rate: 0.001\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("density: pyclone_beta_binomial\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("beta_binomial_precision_params:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" value: 1000\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" prior:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" shape: 1.0\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" rate: 0.0001\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" proposal:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(" precision: 0.5\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat(paste0("working_dir: pyclone/",opt$sample_set, "\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("trace_dir: trace", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("init_method: connected\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) -cat("samples:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - -for (i in 1:length(tumor_samples)) { - if (i!=1) { - cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - } - cat(paste0(" ", tumor_samples[i], ":\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - cat(paste0(" mutations_file: ", tumor_samples[i], ".yaml\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - cat(" tumour_content:\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - load(paste0("ascat/ascat/", tumor_samples[i], "_", normal_sample, ".RData")) - cat(paste0(" value: ", ifelse(is.na(purity), 1.0, signif(purity, 2)),"\n"), file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - cat(" error_rate: 0.01", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - if (i!=length(tumor_samples)) { - cat("\n", file=paste0("pyclone/", opt$sample_set, "/config.yaml"), append = TRUE) - } -} - -for (i in 1:length(tumor_samples)) { - system(paste0("source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/PyClone-0.13.1 && PyClone build_mutations_file --in_file pyclone/", opt$sample_set, "/", tumor_samples[i], ".tsv --out_file pyclone/", opt$sample_set, "/", tumor_samples[i], ".yaml --prior parental_copy_number")) -} diff --git a/clonality/runpyclone.mk b/clonality/runpyclone.mk deleted file mode 100644 index 0c86ddfa..00000000 --- a/clonality/runpyclone.mk +++ /dev/null @@ -1,20 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/run_pyclone.$(NOW) -PHONY += pyclone - -run_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/report/pyclone.tsv) - -define run-pyclone -pyclone/%/trace/alpha.tsv.bz2 : pyclone/%/config.yaml - $$(call RUN,-s 4G -m 6G -w 7200,"source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \ - PyClone run_analysis --config_file pyclone/$$*/config.yaml --seed 0") - -pyclone/%/report/pyclone.tsv : pyclone/%/trace/alpha.tsv.bz2 - $$(call RUN,-s 4G -m 6G -w 7200,"make -p pyclone/$$*/report && \ - source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate /home/${USER}/share/usr/anaconda-envs/PyClone-0.13.1 && \ - PyClone build_table --config_file pyclone/$$*/config.yaml --out_file pyclone/$$*/report/pyclone.tsv --max_cluster 10 --table_type old_style --burnin 5000") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call run-pyclone,$(set)))) diff --git a/clonality/setuppyclone.mk b/clonality/setuppyclone.mk deleted file mode 100644 index ebd2f8f9..00000000 --- a/clonality/setuppyclone.mk +++ /dev/null @@ -1,22 +0,0 @@ -include modules/Makefile.inc - -LOGDIR ?= log/setup_pyclone.$(NOW) -PHONY += pyclone - -MIN_DEPTH ?= 50 - -setup_pyclone : $(foreach set,$(SAMPLE_SETS),pyclone/$(set)/config.yaml) - -define make-input-pyclone -pyclone/%/config.yaml : sufam/%.tsv - $$(call RUN, -s 4G -m 6G,"mkdir -p pyclone/$$(*) && \ - $(RSCRIPT) modules/clonality/tsvforpyclone.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES) --min_depth $(MIN_DEPTH) && \ - $(RSCRIPT) modules/clonality/pycloneconfig.R --sample_set $$(*) --normal_samples $(NORMAL_SAMPLES)") - -endef -$(foreach set,$(SAMPLE_SETS),\ - $(eval $(call make-input-pyclone,$(set)))) - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: $(PHONY) diff --git a/clonality/tsvforpyclone.R b/clonality/tsvforpyclone.R deleted file mode 100644 index e6bc7be6..00000000 --- a/clonality/tsvforpyclone.R +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) - -optList = list( - make_option("--sample_set", default = NULL, help = "sample set name"), - make_option("--normal_samples", default = NULL, help = "normal sample names"), - make_option("--min_depth", default = NA, help = "minimum depth to consider") - ) - -parser = OptionParser(usage = "%prog [options] mutation_file", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -tumor_samples = unlist(strsplit(opt$sample_set, split="_", fixed=TRUE)) -normal_sample = unlist(strsplit(opt$normal_samples, split=" ", fixed=TRUE)) -normal_sample = tumor_samples[tumor_samples %in% normal_sample] -tumor_samples = tumor_samples[!(tumor_samples %in% normal_sample)] -min_depth = ifelse(is.na(opt$min_depth) | is.null(opt$min_depth) | opt$min_depth=="" | opt$min_depth==" ", 50, opt$min_depth) - -mutation_summary = read_tsv(file=paste0("sufam/", opt$sample_set, ".tsv")) -index = apply(mutation_summary[,paste0("DP_", tumor_samples)], 1, function(x) {sum(x>=min_depth)})==length(tumor_samples) -mutation_summary = mutation_summary[index,,drop=FALSE] -index = apply(mutation_summary[,paste0("CALL_", tumor_samples)], 1, function(x) {sum(x==0)})==length(tumor_samples) -mutation_summary = mutation_summary[!index,,drop=FALSE] - -for (i in 1:length(tumor_samples)) { - mutation_id = paste0(mutation_summary$Gene_Symbol, "_", mutation_summary$HGVSp) - fsq = mutation_summary %>% - .[[paste0("MAF_", tumor_samples[i])]] - qt = mutation_summary %>% - .[[paste0("qt_", tumor_samples[i])]] - q2 = mutation_summary %>% - .[[paste0("q2_", tumor_samples[i])]] - q1 = qt - q2 - n = mutation_summary %>% - .[[paste0("DP_", tumor_samples[i])]] - flag = mutation_summary %>% - .[[paste0("CALL_", tumor_samples[i])]] - - fsq[flag==0] = 0 - var_counts = round(fsq*n) - ref_counts = round((1-fsq)*n) - normal_cn = rep(2, length(mutation_id)) - major_cn = q2 - minor_cn = q1 - sample_summary = data.frame(mutation_id, ref_counts, var_counts, normal_cn, minor_cn, major_cn) - write.table(sample_summary, paste0("pyclone/", opt$sample_set, "/", tumor_samples[i], ".tsv"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE, append=FALSE) -} diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index c76248ae..e935074c 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -42,7 +42,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ ..DUMMY := $(shell mkdir -p version; \ - $(SVABA) --help &> version/gridss_tumor_normal.txt) + echo 'gridss' > version/gridss_tumor_normal.txt) .SECONDARY: .DELETE_ON_ERROR: .PHONY: gridss From 86afd7bf54544a1af476356f1148a078a20f821b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 19:04:30 -0400 Subject: [PATCH 374/766] filter base counts --- scripts/pyclone_13.R | 2 ++ scripts/pyclone_vi.R | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index f4e507b3..7825ab2c 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -69,7 +69,9 @@ if (as.numeric(opt$option) == 1) { } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% + dplyr::filter(ref_counts>0) %>% dplyr::filter(!is.na(var_counts)) %>% + dplyr::mutate(var_counts = ifelse(var_counts<=2, 0, var_counts)) %>% dplyr::filter(!is.na(major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 038f91ec..63803b58 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -78,8 +78,11 @@ if (as.numeric(opt$option) == 1) { } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% + dplyr::filter(ref_counts>0) %>% dplyr::filter(!is.na(alt_counts)) %>% - dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>% + dplyr::mutate(var_coalt_countsunts = ifelse(alt_counts<=2, 0, alt_counts)) %>% + dplyr::filter(!is.na(major_cn)) %>% + dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) From c7dc2efc8974269f509b5619fb8444de0ec77d3a Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 19:24:56 -0400 Subject: [PATCH 375/766] ++ --- clonality/pyclone_13.mk | 2 +- scripts/pyclone_vi.R | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 31daa1d0..1e9c4ffa 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -64,7 +64,7 @@ pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(s --normal_sample '$(normal.$1)' && \ echo 'taskcomplete' > $$(@)") -pyclone_13/$1/config.yaml : pyclone_13/$1/$1.taskcomplete +pyclone_13/$1/config.yaml : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 2 \ diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 63803b58..2f7ecccb 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -85,6 +85,15 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) + smry = pyclone %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(n = n()) %>% + dplyr::ungroup() + + pyclone = pyclone %>% + dplyr::left_join(smry, by = "mutation_id") %>% + dplyr::filter(n == length(sample_set)) + readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option) == 2) { From ca312d8c0a6575be86036112ce99d098e35db44a Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 20:38:21 -0400 Subject: [PATCH 376/766] ++ --- scripts/pyclone_13.R | 4 ++-- scripts/pyclone_vi.R | 4 ++-- sv_callers/gridss_tumor_normal.mk | 6 ------ sv_callers/svaba_tumor_normal.mk | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 7825ab2c..5b68d0df 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -69,9 +69,9 @@ if (as.numeric(opt$option) == 1) { } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% - dplyr::filter(ref_counts>0) %>% dplyr::filter(!is.na(var_counts)) %>% - dplyr::mutate(var_counts = ifelse(var_counts<=2, 0, var_counts)) %>% + dplyr::mutate(var_counts = ifelse(var_counts<=3, 0, var_counts)) %>% + dplyr::filter((ref_counts+var_counts)>10) %>% dplyr::filter(!is.na(major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 2f7ecccb..843ad694 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -78,9 +78,9 @@ if (as.numeric(opt$option) == 1) { } pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% - dplyr::filter(ref_counts>0) %>% dplyr::filter(!is.na(alt_counts)) %>% - dplyr::mutate(var_coalt_countsunts = ifelse(alt_counts<=2, 0, alt_counts)) %>% + dplyr::mutate(var_coalt_countsunts = ifelse(alt_counts<=3, 0, alt_counts)) %>% + dplyr::filter((ref_counts+alt_counts)>10) %>% dplyr::filter(!is.na(major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index e935074c..e707b66f 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -30,12 +30,6 @@ gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam #vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf # $$(INIT) cat $$< > $$@ -#vcf/$1_$2.svaba_indels.vcf : svaba/$1_$2.svaba.somatic.indel.vcf -# $$(INIT) cat $$< > $$@ - -#vcf/$1_$2.svaba_candidate_sv.vcf : svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf -# $$(INIT) cat $$< > $$@ - endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call gridss-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) diff --git a/sv_callers/svaba_tumor_normal.mk b/sv_callers/svaba_tumor_normal.mk index 88e382e7..cbb03b34 100644 --- a/sv_callers/svaba_tumor_normal.mk +++ b/sv_callers/svaba_tumor_normal.mk @@ -12,7 +12,7 @@ SVABA ?= svaba svaba : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).svaba_sv.vcf) define svaba-tumor-normal -svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam +svaba/$1_$2.svaba.somatic.sv.vcf : bam/$1.bam bam/$2.bam $$(call RUN,-c -n $(SVABA_CORES) -s 4G -m $(SVABA_MEM_CORE) -v $(SVABA_ENV) -w 72:00:00,"set -o pipefail && \ mkdir -p svaba && \ cd svaba && \ @@ -27,8 +27,6 @@ svaba/$1_$2.svaba.somatic.indel.vcf : bam/$1.bam bam/$2.bam -a $1_$2 \ -G $$(SVABA_REF)") -svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf $$(INIT) cat $$< > $$@ From 801dd93dffec777c8f937dac80ff64899daa0437 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 20:55:37 -0400 Subject: [PATCH 377/766] MCMC iterations --- clonality/pyclone_13.mk | 15 ++++++++++----- scripts/pyclone_13.R | 5 +++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 1e9c4ffa..b669b9de 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -5,6 +5,10 @@ LOGDIR ?= log/pyclone_13.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' +MCMC_ITER = 1000 +MCMC_BURNIN = 200 +MCMC_THIN = 1 + pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ @@ -70,7 +74,8 @@ pyclone_13/$1/config.yaml : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sampl --option 2 \ --sample_set $1 \ --normal_sample '$(normal.$1)' \ - --output_file $$(@)") + --output_file $$(@) \ + --num_iter $$(MCMC_ITER)") endef $(foreach set,$(SAMPLE_SETS),\ @@ -101,8 +106,8 @@ pyclone_13/$1/clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --config_file pyclone_13/$1/config.yaml \ --out_file pyclone_13/$1/clusters.txt \ --table_type cluster \ - --burnin 200 \ - --thin 1") + --burnin $$(MCMC_BURNIN) \ + --thin $$(MCMC_THIN)") pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ @@ -110,8 +115,8 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 --config_file pyclone_13/$1/config.yaml \ --out_file pyclone_13/$1/$1.txt \ --table_type loci \ - --burnin 200 \ - --thin 1") + --burnin $$(MCMC_BURNIN) \ + --thin $$(MCMC_THIN)") pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 5b68d0df..f5e9d342 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -18,7 +18,8 @@ optList = list(make_option("--option", default = NA, type = 'character', help = make_option("--sample_set", default = NA, type = 'character', help = "sample set"), make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), make_option("--input_file", default = NA, type = 'character', help = "input file"), - make_option("--output_file", default = NA, type = 'character', help = "output file")) + make_option("--output_file", default = NA, type = 'character', help = "output file"), + make_option("--num_iter", default = NA, type = 'character', help = "mcmc iterations")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options @@ -105,7 +106,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] } - cat("num_iters: 1000\n\n", file = as.character(opt$output_file), append = FALSE) + cat(paste0("num_iters: ", as.numeric(opt$num_iter), "\n\n", file = as.character(opt$output_file), append = FALSE) cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) From 53495fc9d94d7783630afc4b7275536a0c1767ec Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 21:14:00 -0400 Subject: [PATCH 378/766] Update pyclone_13.R --- scripts/pyclone_13.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index f5e9d342..88c187d4 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -193,11 +193,21 @@ if (as.numeric(opt$option) == 1) { dplyr::left_join(smry_c, by = "cluster_id") %>% dplyr::left_join(smry_p, by = "cluster_id") - colourCount = length(unique(pyclone_ft$cluster_id)) + smry_cl = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::arrange(desc(mean)) %>% + dplyr::mutate(cluster_id_ordered = nrow(.):1) + + pyclone_ft = pyclone_ft %>% + dplyr::left_join(smry_cl, by = "cluster_id") + + colourCount = length(unique(pyclone_ft$cluster_id_ordered)) getPalette = colorRampPalette(brewer.pal(9, "Set1")) plot_ = pyclone_ft %>% - ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + + ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id_ordered), size = n)) + geom_point(stat = "identity", alpha = .75, shape = 21) + scale_color_manual(values = getPalette(colourCount)) + xlab("\n\nCCF (%)\n") + From e31ee912ae1727d47ed2e8ecda972a83ece79db3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 21:15:26 -0400 Subject: [PATCH 379/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 88c187d4..439b4d00 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -106,7 +106,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] } - cat(paste0("num_iters: ", as.numeric(opt$num_iter), "\n\n", file = as.character(opt$output_file), append = FALSE) + cat(paste0("num_iters: ", as.numeric(opt$num_iter), "\n\n"), file = as.character(opt$output_file), append = FALSE) cat("base_measure_params:\n", file = as.character(opt$output_file), append = TRUE) cat(" alpha: 1\n", file = as.character(opt$output_file), append = TRUE) cat(" beta: 1\n", file = as.character(opt$output_file), append = TRUE) From e4fc49c445842a296c3ef25a2c846c9d533f9d57 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:23:47 -0400 Subject: [PATCH 380/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 439b4d00..19927488 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -257,7 +257,7 @@ if (as.numeric(opt$option) == 1) { col.dendrogram = FALSE, smooth.heat = FALSE, scale = FALSE, - heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + heat.pal = c(rep("#d9d9d9", 10), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), legend = FALSE, grid.hline = FALSE, grid.vline = TRUE, From 8e5ec52a7952e434a30caf09c17388355d03e5c4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:24:25 -0400 Subject: [PATCH 381/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 19927488..b99acefd 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -233,7 +233,7 @@ if (as.numeric(opt$option) == 1) { smry_cl = pyclone %>% dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::summarize(mean = sum(cellular_prevalence)) %>% dplyr::ungroup() %>% dplyr::arrange(desc(mean)) %>% dplyr::mutate(cluster_id_ordered = nrow(.):1) From f7e03297a4f2f605885b5d4f1766d630930955e0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:26:25 -0400 Subject: [PATCH 382/766] Update pyclone_13.R --- scripts/pyclone_13.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index b99acefd..94c04c34 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -233,7 +233,7 @@ if (as.numeric(opt$option) == 1) { smry_cl = pyclone %>% dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = sum(cellular_prevalence)) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% dplyr::ungroup() %>% dplyr::arrange(desc(mean)) %>% dplyr::mutate(cluster_id_ordered = nrow(.):1) @@ -257,7 +257,7 @@ if (as.numeric(opt$option) == 1) { col.dendrogram = FALSE, smooth.heat = FALSE, scale = FALSE, - heat.pal = c(rep("#d9d9d9", 10), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + heat.pal = c(rep("#d9d9d9", 6), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), legend = FALSE, grid.hline = FALSE, grid.vline = TRUE, From d2a80e1d75bb264d8d5f6e4a8aed5a4ddf578ee1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:28:54 -0400 Subject: [PATCH 383/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 94c04c34..1d1af463 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -257,7 +257,7 @@ if (as.numeric(opt$option) == 1) { col.dendrogram = FALSE, smooth.heat = FALSE, scale = FALSE, - heat.pal = c(rep("#d9d9d9", 6), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + heat.pal = c(rep("#d9d9d9", 7), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), legend = FALSE, grid.hline = FALSE, grid.vline = TRUE, From 70d651b8b9cb1dbb90d6ce4fcc9df6ad66c48136 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:34:06 -0400 Subject: [PATCH 384/766] Update pyclone_13.R --- scripts/pyclone_13.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 1d1af463..70e5bbb2 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -257,7 +257,7 @@ if (as.numeric(opt$option) == 1) { col.dendrogram = FALSE, smooth.heat = FALSE, scale = FALSE, - heat.pal = c(rep("#d9d9d9", 7), "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), + heat.pal = c(rep("#d9d9d9", 4), rep("#9ecae1", 2), "#4292c6", "#2171b5", "#08519c", "#08306b"), legend = FALSE, grid.hline = FALSE, grid.vline = TRUE, From 388e3233fede8875b435e25259a5e8fb1cbbdf32 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:38:10 -0400 Subject: [PATCH 385/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index b669b9de..111955a1 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -5,8 +5,8 @@ LOGDIR ?= log/pyclone_13.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -MCMC_ITER = 1000 -MCMC_BURNIN = 200 +MCMC_ITER = 100 +MCMC_BURNIN = 50 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ From aa5777a688c690ad802baa26a8cdc98c7dfb9e91 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 4 Nov 2022 23:57:07 -0400 Subject: [PATCH 386/766] ++ --- clonality/pyclone_13.mk | 4 ++-- scripts/pyclone_13.R | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 111955a1..b669b9de 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -5,8 +5,8 @@ LOGDIR ?= log/pyclone_13.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -MCMC_ITER = 100 -MCMC_BURNIN = 50 +MCMC_ITER = 1000 +MCMC_BURNIN = 200 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 70e5bbb2..f37a16c4 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -116,7 +116,7 @@ if (as.numeric(opt$option) == 1) { cat("\n", file = as.character(opt$output_file), append = TRUE) cat(" prior:\n", file = as.character(opt$output_file), append = TRUE) cat(" shape: 1.0\n", file = as.character(opt$output_file), append = TRUE) - cat(" rate: 0.001\n", file = as.character(opt$output_file), append = TRUE) + cat(" rate: 1.0\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat("density: pyclone_beta_binomial\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) @@ -124,11 +124,11 @@ if (as.numeric(opt$option) == 1) { cat(" value: 1000\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat(" prior:\n", file = as.character(opt$output_file), append = TRUE) - cat(" shape: 1.0\n", file = as.character(opt$output_file), append = TRUE) - cat(" rate: 0.0001\n", file = as.character(opt$output_file), append = TRUE) + cat(" shape: 10\n", file = as.character(opt$output_file), append = TRUE) + cat(" rate: 10\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat(" proposal:\n", file = as.character(opt$output_file), append = TRUE) - cat(" precision: 0.01\n", file = as.character(opt$output_file), append = TRUE) + cat(" precision: 0.1\n", file = as.character(opt$output_file), append = TRUE) cat("\n", file = as.character(opt$output_file), append = TRUE) cat("working_dir: pyclone_13/", file = as.character(opt$output_file), append = TRUE) cat(as.character(opt$sample_set), file = as.character(opt$output_file), append = TRUE) From b8167d195917900b1080822cc9e008ea02ef2474 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 00:02:04 -0400 Subject: [PATCH 387/766] ++ --- scripts/pyclone_13.R | 6 ++++-- scripts/pyclone_vi.R | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index f37a16c4..25d264c8 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -79,12 +79,14 @@ if (as.numeric(opt$option) == 1) { smry = pyclone %>% dplyr::group_by(mutation_id) %>% - dplyr::summarize(n = n()) %>% + dplyr::summarize(n_x = n(), + n_y = sum(var_counts)) %>% dplyr::ungroup() pyclone = pyclone %>% dplyr::left_join(smry, by = "mutation_id") %>% - dplyr::filter(n == length(sample_set)) + dplyr::filter(n_x == length(sample_set)) %>% + dplyr::filter(n_y > 0) for (i in 1:length(sample_set)) { pyclone_ft = pyclone %>% diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 843ad694..c07f5168 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -87,12 +87,14 @@ if (as.numeric(opt$option) == 1) { smry = pyclone %>% dplyr::group_by(mutation_id) %>% - dplyr::summarize(n = n()) %>% + dplyr::summarize(n_x = n(), + n_y = sum(var_counts)) %>% dplyr::ungroup() pyclone = pyclone %>% dplyr::left_join(smry, by = "mutation_id") %>% - dplyr::filter(n == length(sample_set)) + dplyr::filter(n_x == length(sample_set)) %>% + dplyr::filter(n_y > 0) readr::write_tsv(x = pyclone, file = opt$output_file, append = FALSE, col_names = TRUE) From 725a8e529833b71a6de0b8122857255792ee5dfb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 01:20:19 -0400 Subject: [PATCH 388/766] Update pyclone_13.R --- scripts/pyclone_13.R | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 25d264c8..44031e2c 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -151,7 +151,8 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 3) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) pyclone = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() + readr::type_convert() %>% + dplyr::arrange(mutation_id) pyclone_ft = list() index = 1 @@ -162,14 +163,12 @@ if (as.numeric(opt$option) == 1) { dplyr::select(mutation_id, cluster_id, sample_id_x = sample_id, - cellular_prevalence_x = cellular_prevalence, - cellular_prevalence_std_x = cellular_prevalence_std) %>% - dplyr::full_join(pyclone %>% + cellular_prevalence_x = cellular_prevalence) %>% + dplyr::bind_cols(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% dplyr::select(mutation_id, sample_id_y = sample_id, - cellular_prevalence_y = cellular_prevalence, - cellular_prevalence_std_y = cellular_prevalence_std), + cellular_prevalence_y = cellular_prevalence), by = "mutation_id") %>% readr::type_convert() index = index + 1 @@ -178,32 +177,25 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = do.call(bind_rows, pyclone_ft) %>% readr::type_convert() - smry_c = pyclone_ft %>% + smry_x = pyclone_ft %>% dplyr::group_by(mutation_id) %>% dplyr::summarize(cluster_id = unique(cluster_id)) %>% dplyr::ungroup() %>% dplyr::group_by(cluster_id) %>% dplyr::summarize(n = n()) - smry_p = pyclone %>% - dplyr::group_by(cluster_id, sample_id) %>% - dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_c, by = "cluster_id") %>% - dplyr::left_join(smry_p, by = "cluster_id") + dplyr::left_join(smry_x, by = "cluster_id") - smry_cl = pyclone %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::arrange(desc(mean)) %>% - dplyr::mutate(cluster_id_ordered = nrow(.):1) + smry_y = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() %>% + dplyr::arrange(desc(mean)) %>% + dplyr::mutate(cluster_id_ordered = nrow(.):1) pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_cl, by = "cluster_id") + dplyr::left_join(smry_y, by = "cluster_id") colourCount = length(unique(pyclone_ft$cluster_id_ordered)) getPalette = colorRampPalette(brewer.pal(9, "Set1")) From 48b0553dd661a09be72aa4fa53e2b3c922b9c25d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 01:27:52 -0400 Subject: [PATCH 389/766] Update pyclone_13.R --- scripts/pyclone_13.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 44031e2c..790c3532 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -166,8 +166,7 @@ if (as.numeric(opt$option) == 1) { cellular_prevalence_x = cellular_prevalence) %>% dplyr::bind_cols(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::select(mutation_id, - sample_id_y = sample_id, + dplyr::select(sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence), by = "mutation_id") %>% readr::type_convert() From b3f9589262ca00f293793a32cf8182190053c46f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 03:41:48 -0400 Subject: [PATCH 390/766] Update pyclone_13.R --- scripts/pyclone_13.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 790c3532..93b58a6a 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -164,9 +164,10 @@ if (as.numeric(opt$option) == 1) { cluster_id, sample_id_x = sample_id, cellular_prevalence_x = cellular_prevalence) %>% - dplyr::bind_cols(pyclone %>% + dplyr::full_join(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% - dplyr::select(sample_id_y = sample_id, + dplyr::select(mutation_id, + sample_id_y = sample_id, cellular_prevalence_y = cellular_prevalence), by = "mutation_id") %>% readr::type_convert() @@ -174,7 +175,9 @@ if (as.numeric(opt$option) == 1) { } } pyclone_ft = do.call(bind_rows, pyclone_ft) %>% - readr::type_convert() + readr::type_convert() %>% + dplyr::filter(!is.na(cellular_prevalence_x)) %>% + dplyr::filter(!is.na(cellular_prevalence_y)) smry_x = pyclone_ft %>% dplyr::group_by(mutation_id) %>% From c9e8a92afab126aa9b13a1bd22067c1369f8e911 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:24:37 -0400 Subject: [PATCH 391/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index e707b66f..e2fac5a4 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -7,6 +7,8 @@ GRIDSS_MEM_CORE ?= 6G GRIDSS_REF ?= $(HOME)/share/lib/ref_files/b37/human_g1k_v37.fasta GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/example/ENCFF001TDO.bed GRIDSS ?= gridss +GRIDSS_FILTER ?= gridss_somatic_filter +GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) @@ -22,6 +24,21 @@ gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam -b $$(GRIDSS_BLACKLIST) \ ../../bam/$2.bam \ ../../bam/$1.bam") + +gridss/$1_$2/$1_$2.gridss_sv_ft.vcf : gridss/$1_$2/$1_$2.gridss_sv.vcf + $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV) -w 72:00:00,"set -o pipefail && \ + cd gridss/$1_$2 && \ + $$(GRIDSS_FILTER) \ + --pondir $$(GRIDSS_PON_DIR) \ + --ref $$(GRIDSS_REF) \ + --input $1_$2.gridss.sv.vcf \ + --output $1_$2.gridss_sv_ft.vcf.gz \ + --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ + --scriptdir $$(GRIDSS_ENV)/bin \ + -n 1 \ + -t 2") + + #svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf From 773482f3b618aa5192a2b31e92c1695628772718 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:25:34 -0400 Subject: [PATCH 392/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index e2fac5a4..be5fcf2c 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -26,17 +26,17 @@ gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam ../../bam/$1.bam") gridss/$1_$2/$1_$2.gridss_sv_ft.vcf : gridss/$1_$2/$1_$2.gridss_sv.vcf - $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV) -w 72:00:00,"set -o pipefail && \ - cd gridss/$1_$2 && \ - $$(GRIDSS_FILTER) \ - --pondir $$(GRIDSS_PON_DIR) \ - --ref $$(GRIDSS_REF) \ - --input $1_$2.gridss.sv.vcf \ - --output $1_$2.gridss_sv_ft.vcf.gz \ - --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ - --scriptdir $$(GRIDSS_ENV)/bin \ - -n 1 \ - -t 2") + $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV),"set -o pipefail && \ + cd gridss/$1_$2 && \ + $$(GRIDSS_FILTER) \ + --pondir $$(GRIDSS_PON_DIR) \ + --ref $$(GRIDSS_REF) \ + --input $1_$2.gridss.sv.vcf \ + --output $1_$2.gridss_sv_ft.vcf.gz \ + --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ + --scriptdir $$(GRIDSS_ENV)/bin \ + -n 1 \ + -t 2") From 90905b4d6dc8162c9f4d9405844e59e9d07830be Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:26:11 -0400 Subject: [PATCH 393/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index be5fcf2c..29d1c838 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -10,7 +10,8 @@ GRIDSS ?= gridss GRIDSS_FILTER ?= gridss_somatic_filter GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon -gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) +gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf) define gridss-tumor-normal gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam From d7b99f4d2a28ff7bdc2fcdbc37b71f8d0e697404 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:28:55 -0400 Subject: [PATCH 394/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 29d1c838..20f6fcd8 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -30,12 +30,12 @@ gridss/$1_$2/$1_$2.gridss_sv_ft.vcf : gridss/$1_$2/$1_$2.gridss_sv.vcf $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV),"set -o pipefail && \ cd gridss/$1_$2 && \ $$(GRIDSS_FILTER) \ - --pondir $$(GRIDSS_PON_DIR) \ + --pondir $$(GRIDSS_PON_DIR) \ --ref $$(GRIDSS_REF) \ - --input $1_$2.gridss.sv.vcf \ + --input $1_$2.gridss_sv.vcf \ --output $1_$2.gridss_sv_ft.vcf.gz \ --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ - --scriptdir $$(GRIDSS_ENV)/bin \ + --scriptdir '$$(GRIDSS_ENV)/bin' \ -n 1 \ -t 2") From 8a8901c99b59ea3928b2198b3ba72a46645bca75 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:29:47 -0400 Subject: [PATCH 395/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 20f6fcd8..117864d2 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -8,7 +8,7 @@ GRIDSS_REF ?= $(HOME)/share/lib/ref_files/b37/human_g1k_v37.fasta GRIDSS_BLACKLIST ?= $(HOME)/share/lib/resource_files/gridss/example/ENCFF001TDO.bed GRIDSS ?= gridss GRIDSS_FILTER ?= gridss_somatic_filter -GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon +GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon/ gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf) From 2af81adc419ce2ad82f9113987232e96ae02a3ba Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:31:11 -0400 Subject: [PATCH 396/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 117864d2..f4ebd4cc 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -35,7 +35,7 @@ gridss/$1_$2/$1_$2.gridss_sv_ft.vcf : gridss/$1_$2/$1_$2.gridss_sv.vcf --input $1_$2.gridss_sv.vcf \ --output $1_$2.gridss_sv_ft.vcf.gz \ --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ - --scriptdir '$$(GRIDSS_ENV)/bin' \ + --scriptdir '$(GRIDSS_ENV)/bin' \ -n 1 \ -t 2") From 3730bcdd57a14a32bb59651471b284ac6d6a2859 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 14:47:47 -0400 Subject: [PATCH 397/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 37 +++++++++++++------------------ 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index f4ebd4cc..ab916de8 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -11,7 +11,8 @@ GRIDSS_FILTER ?= gridss_somatic_filter GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon/ gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) \ - $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf) + $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).gridss_sv.vcf) define gridss-tumor-normal gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam @@ -26,27 +27,19 @@ gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam ../../bam/$2.bam \ ../../bam/$1.bam") -gridss/$1_$2/$1_$2.gridss_sv_ft.vcf : gridss/$1_$2/$1_$2.gridss_sv.vcf - $$(call RUN,-c -n $(GRIDSS_CORES) -s 4G -m $(GRIDSS_MEM_CORE) -v $(GRIDSS_ENV),"set -o pipefail && \ - cd gridss/$1_$2 && \ - $$(GRIDSS_FILTER) \ - --pondir $$(GRIDSS_PON_DIR) \ - --ref $$(GRIDSS_REF) \ - --input $1_$2.gridss_sv.vcf \ - --output $1_$2.gridss_sv_ft.vcf.gz \ - --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.gz \ - --scriptdir '$(GRIDSS_ENV)/bin' \ - -n 1 \ - -t 2") - - - -#svaba/$1_$2.svaba.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - -#svaba/$1_$2.svaba.unfiltered.somatic.sv.vcf : svaba/$1_$2.svaba.somatic.indel.vcf - -#vcf/$1_$2.svaba_sv.vcf : svaba/$1_$2.svaba.somatic.sv.vcf -# $$(INIT) cat $$< > $$@ +gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz : gridss/$1_$2/$1_$2.gridss_sv.vcf + $$(call RUN,-c -n 1 -s 12G -m 18G -v $(GRIDSS_ENV),"set -o pipefail && \ + cd gridss/$1_$2 && \ + $$(GRIDSS_FILTER) \ + --pondir $$(GRIDSS_PON_DIR) \ + --input $1_$2.gridss_sv.vcf \ + --output $1_$2.gridss_sv_ft.vcf.bgz \ + --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.bgz \ + -n 1 \ + -t 2") + +vcf/$1_$2.gridss_sv.vcf : gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz + $$(INIT) zcat $$(<) > $$(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From a5a4e822b895a768c77c789100b83d583a76b377 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:03:07 -0400 Subject: [PATCH 398/766] ++ --- config.inc | 1 + sv_callers/gridss_tumor_normal.mk | 4 ++-- vcf_tools/merge_sv.mk | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config.inc b/config.inc index ab86c32e..16ac1c6e 100644 --- a/config.inc +++ b/config.inc @@ -31,6 +31,7 @@ PYCLONE_ENV = $(HOME)/share/usr/env/pyclone-vi-0.1.2 PYCLONE_13_ENV = $(HOME)/share/usr/env/pyclone-0.13.1 GRIDSS_ENV = $(HOME)/share/usr/env/gridss-2.13.2 SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 +SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index ab916de8..29707a0f 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -33,8 +33,8 @@ gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz : gridss/$1_$2/$1_$2.gridss_sv.vcf $$(GRIDSS_FILTER) \ --pondir $$(GRIDSS_PON_DIR) \ --input $1_$2.gridss_sv.vcf \ - --output $1_$2.gridss_sv_ft.vcf.bgz \ - --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf.bgz \ + --output $1_$2.gridss_sv_ft.vcf \ + --fulloutput $1_$2.gridss_sv_high_and_low_confidence_somatic.vcf \ -n 1 \ -t 2") diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index b2d59473..1d9b26d1 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -2,7 +2,6 @@ include modules/Makefile.inc LOGDIR ?= log/merge_sv.$(NOW) -SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 SV_CALLERS = svaba manta MAX_DIST = 500 NUM_CALLERS = 1 From ebc1083690d6bd5bc04c0cc2b32ee41f9eda9a02 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:08:22 -0400 Subject: [PATCH 399/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 1d9b26d1..15347179 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,37 +4,30 @@ LOGDIR ?= log/merge_sv.$(NOW) SV_CALLERS = svaba manta MAX_DIST = 500 -NUM_CALLERS = 1 +NUM_CALLERS = 2 TYPE = 1 STRAND = 1 MIN_SIZE = 30 -merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_sv.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) \ - $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/sample_list_candidate_sv.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_candidate_sv.vcf) +merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) +# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) define merge-sv -merge_sv/$1_$2/sample_list_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - mkdir -p merge_sv/$1_$2 - echo vcf/$1_$2.svaba_sv.vcf > $$(@) - echo vcf/$1_$2.manta_sv.vcf >> $$(@) - -vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ - SURVIVOR merge $$(<) \ - $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") - -merge_sv/$1_$2/sample_list_candidate_sv.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_candidate_sv.vcf) - mkdir -p merge_sv/$1_$2 - echo vcf/$1_$2.svaba_candidate_sv.vcf > $$(@) - echo vcf/$1_$2.manta_candidate_sv.vcf >> $$(@) - -vcf/$1_$2.merged_candidate_sv.vcf : merge_sv/$1_$2/sample_list_candidate_sv.txt - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ - SURVIVOR merge $$(<) \ - $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") +merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) + $$(INIT) mkdir -p merge_sv/$1_$2 && \ + for caller in $(SV_CALLERS); do \ + echo vcf/$1_$2.$(caller)_sv.vcf > $$(@); + done +#merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) +# mkdir -p merge_sv/$1_$2 +# echo vcf/$1_$2.svaba_sv.vcf > $$(@) +# echo vcf/$1_$2.manta_sv.vcf >> $$(@) +# +#vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt +# $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ +# SURVIVOR merge $$(<) \ +# $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From f4bcf8058256fe2238a9c955dbb10e5bd882d671 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:14:19 -0400 Subject: [PATCH 400/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 15347179..86eab331 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -14,10 +14,10 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) - $$(INIT) mkdir -p merge_sv/$1_$2 && \ - for caller in $(SV_CALLERS); do \ - echo vcf/$1_$2.$(caller)_sv.vcf > $$(@); - done + mkdir -p merge_sv/$1_$2 && \ + for caller in $(SV_CALLERS); do \ + echo vcf/$1_$2.$$(caller)_sv.vcf > $$(@); \ + done #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) # mkdir -p merge_sv/$1_$2 From 06e3c22cef065ed4da9cdd7db5d4f93c2c6ae04a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:14:58 -0400 Subject: [PATCH 401/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 86eab331..31bb4849 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -16,7 +16,7 @@ define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ for caller in $(SV_CALLERS); do \ - echo vcf/$1_$2.$$(caller)_sv.vcf > $$(@); \ + echo \"vcf/$1_$2.$$(caller)_sv.vcf\" > $$(@); \ done #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) From 44fe173cac2610d87a54ee81139bb18ecdc1d4fe Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:15:20 -0400 Subject: [PATCH 402/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 31bb4849..c8bdac77 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -16,7 +16,7 @@ define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ for caller in $(SV_CALLERS); do \ - echo \"vcf/$1_$2.$$(caller)_sv.vcf\" > $$(@); \ + echo 'vcf/$1_$2.$$(caller)_sv.vcf' > $$(@); \ done #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) From 175dd5e990c6783ec4d14901e12e91bda0e4396e Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:15:55 -0400 Subject: [PATCH 403/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index c8bdac77..b4b0caeb 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -16,7 +16,7 @@ define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ for caller in $(SV_CALLERS); do \ - echo 'vcf/$1_$2.$$(caller)_sv.vcf' > $$(@); \ + echo `vcf/$1_$2.$$(caller)_sv.vcf` > $$(@); \ done #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) From 2e6ff4e7974b3037f0f790de20470c0360a69ceb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:16:58 -0400 Subject: [PATCH 404/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index b4b0caeb..7e790a7b 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -15,8 +15,8 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ - for caller in $(SV_CALLERS); do \ - echo `vcf/$1_$2.$$(caller)_sv.vcf` > $$(@); \ + for caller in $$(SV_CALLERS); do \ + echo vcf/$1_$2.$(caller)_sv.vcf > $$(@); \ done #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) From c4af660479e03761fe63d9c7b56a71b58e144eb7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:17:19 -0400 Subject: [PATCH 405/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 7e790a7b..b1895e6f 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -2,7 +2,7 @@ include modules/Makefile.inc LOGDIR ?= log/merge_sv.$(NOW) -SV_CALLERS = svaba manta +SV_CALLERS = svaba manta gridss MAX_DIST = 500 NUM_CALLERS = 2 TYPE = 1 From fc4da0af613e35b34bdf4c7d9d9d0fd2b6cbcd1e Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:18:29 -0400 Subject: [PATCH 406/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index b1895e6f..e3719489 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -15,9 +15,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ - for caller in $$(SV_CALLERS); do \ - echo vcf/$1_$2.$(caller)_sv.vcf > $$(@); \ - done + $(foreach caller,$(SV_CALLERS),echo vcf/$1_$2.$(caller)_sv.vcf > $$(@);) #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) # mkdir -p merge_sv/$1_$2 From f6efc1de8627b7a864a9f9646aa58822e4de075b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:19:15 -0400 Subject: [PATCH 407/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index e3719489..a0783e1b 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -15,7 +15,7 @@ merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ - $(foreach caller,$(SV_CALLERS),echo vcf/$1_$2.$(caller)_sv.vcf > $$(@);) + $(foreach caller,$(SV_CALLERS),echo vcf/$1_$2.$(caller)_sv.vcf >> $$(@);) #merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) # mkdir -p merge_sv/$1_$2 From 900389978333ee453d5cdcf83007bc43c77f6868 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:21:37 -0400 Subject: [PATCH 408/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index a0783e1b..c2b765d7 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -10,22 +10,17 @@ STRAND = 1 MIN_SIZE = 30 merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) -# $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) + $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) mkdir -p merge_sv/$1_$2 && \ $(foreach caller,$(SV_CALLERS),echo vcf/$1_$2.$(caller)_sv.vcf >> $$(@);) -#merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) -# mkdir -p merge_sv/$1_$2 -# echo vcf/$1_$2.svaba_sv.vcf > $$(@) -# echo vcf/$1_$2.manta_sv.vcf >> $$(@) -# -#vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/sample_list_sv.txt -# $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ -# SURVIVOR merge $$(<) \ -# $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") +merge_sv/$1_$2/$1_$2.merged_sv.vcf : merge_sv/$1_$2/samples.txt + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR merge $$(<) \ + $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 6876a79597afed6c6372b84c2d5bf1975f31d200 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:22:31 -0400 Subject: [PATCH 409/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index c2b765d7..34addfaa 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -9,7 +9,7 @@ TYPE = 1 STRAND = 1 MIN_SIZE = 30 -merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) +merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) \ $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) define merge-sv From dc78fc3957c149d4ae737968b60c4dbd5e9c60ef Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:30:52 -0400 Subject: [PATCH 410/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 34addfaa..08d85a74 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -10,7 +10,8 @@ STRAND = 1 MIN_SIZE = 30 merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) + $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) define merge-sv merge_sv/$1_$2/samples.txt : $(foreach caller,$(SV_CALLERS),vcf/$1_$2.$(caller)_sv.vcf) @@ -22,6 +23,9 @@ merge_sv/$1_$2/$1_$2.merged_sv.vcf : merge_sv/$1_$2/samples.txt SURVIVOR merge $$(<) \ $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") +vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/$1_$2.merged_sv.vcf + $$(INIT) cat $$(<) > $$(@) + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) From d40a57d5747e939ed1e74391e73c4ff2b5e284e7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:42:10 -0400 Subject: [PATCH 411/766] ++ --- Makefile | 7 ++++--- vcf_tools/annotate_sv.mk | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 vcf_tools/annotate_sv.mk diff --git a/Makefile b/Makefile index d3fa7987..b2bf1f07 100644 --- a/Makefile +++ b/Makefile @@ -378,6 +378,7 @@ TARGETS += get_bam get_bam : $(call RUN_MAKE,modules/bam_tools/get_bam.mk) + #================================================== # VCF tools #================================================== @@ -386,9 +387,9 @@ TARGETS += merge_sv merge_sv : $(call RUN_MAKE,modules/vcf_tools/merge_sv.mk) -TARGETS += annot_sv -annot_sv : - $(call RUN_MAKE,modules/vcf_tools/annot_sv.mk) +TARGETS += annotate_sv +annotate_sv : + $(call RUN_MAKE,modules/vcf_tools/annotate_sv.mk) #================================================== diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk new file mode 100644 index 00000000..2f932caa --- /dev/null +++ b/vcf_tools/annotate_sv.mk @@ -0,0 +1,21 @@ +include modules/Makefile.inc + +LOGDIR ?= log/anotate_sv.$(NOW) + +SV_CALLERS = svaba manta gridss merged + +annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ + $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.txt)) + +define annotate-sv +annotate_sv/$1/$1.$2_sv.txt : vcf/$1.$2_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(foreach caller,$(SV_CALLER), \ + $(eval $(call annotate-sv,$(pair),$(caller))))) + +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: annotate_sv From 252cb26069747fa5f169ce249f05449727ca06a4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:43:15 -0400 Subject: [PATCH 412/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 2f932caa..6d4f5e0a 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -13,7 +13,7 @@ annotate_sv/$1/$1.$2_sv.txt : vcf/$1.$2_sv.vcf endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(foreach caller,$(SV_CALLER), \ + $(foreach caller,$(SV_CALLERS), \ $(eval $(call annotate-sv,$(pair),$(caller))))) .DELETE_ON_ERROR: From e8224d7c225466085cdfe10bf5717ad0041f38c0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:58:20 -0400 Subject: [PATCH 413/766] ++ --- config.inc | 1 + vcf_tools/annotate_sv.mk | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/config.inc b/config.inc index 16ac1c6e..d5e8021e 100644 --- a/config.inc +++ b/config.inc @@ -32,6 +32,7 @@ PYCLONE_13_ENV = $(HOME)/share/usr/env/pyclone-0.13.1 GRIDSS_ENV = $(HOME)/share/usr/env/gridss-2.13.2 SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 +ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 6d4f5e0a..22abbbac 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -3,13 +3,19 @@ include modules/Makefile.inc LOGDIR ?= log/anotate_sv.$(NOW) SV_CALLERS = svaba manta gridss merged +ANNOTATE_SV ?= $(HOME)/share/usr/env/annot_sv-3.1.3/opt/AnnotSV/bin/AnnotSV annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ - $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.txt)) + $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) define annotate-sv -annotate_sv/$1/$1.$2_sv.txt : vcf/$1.$2_sv.vcf - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail") +annotate_sv/$1/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + mkdir -p annotate_sv/$1 && \ + $$(ANNOTATE_SV) \ + -SVinputFile $$(<) \ + -outputFile ./$$(@) \ + -genomeBuild GRCh37") endef $(foreach pair,$(SAMPLE_PAIRS),\ From ed56ee79f240dd17dd6341d4c2b9b0ec47907e2c Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 15:58:56 -0400 Subject: [PATCH 414/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 22abbbac..61b816a1 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -10,12 +10,12 @@ annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ define annotate-sv annotate_sv/$1/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ - mkdir -p annotate_sv/$1 && \ - $$(ANNOTATE_SV) \ - -SVinputFile $$(<) \ - -outputFile ./$$(@) \ - -genomeBuild GRCh37") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(ANNOTATE_SV_ENV),"set -o pipefail && \ + mkdir -p annotate_sv/$1 && \ + $$(ANNOTATE_SV) \ + -SVinputFile $$(<) \ + -outputFile ./$$(@) \ + -genomeBuild GRCh37") endef $(foreach pair,$(SAMPLE_PAIRS),\ From af4a3cdad65b87362fe1bb40454e1a655f3ac159 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 16:07:44 -0400 Subject: [PATCH 415/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 61b816a1..13a8bb76 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -9,13 +9,16 @@ annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) define annotate-sv -annotate_sv/$1/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf +annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(ANNOTATE_SV_ENV),"set -o pipefail && \ - mkdir -p annotate_sv/$1 && \ + mkdir -p annotate_sv/$1/$2 && \ $$(ANNOTATE_SV) \ -SVinputFile $$(<) \ - -outputFile ./$$(@) \ + -outputFile ./annotate_sv/$1/$2/$1.$2_sv.tsv \ -genomeBuild GRCh37") + +annotate_sv/$1/$1.$2_sv.tsv : annotate_sv/$1/$2/$1.$2_sv.tsv + $$(INIT) cat $$(<) > $$(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From 8b8ef1c1e4a094f02b1fe7bb27f2d8eee9856419 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 17:58:49 -0400 Subject: [PATCH 416/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 13a8bb76..56c1f5e9 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -7,6 +7,8 @@ ANNOTATE_SV ?= $(HOME)/share/usr/env/annot_sv-3.1.3/opt/AnnotSV/bin/AnnotSV annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) + $(foreach pair,$(SAMPLE_PAIRS), \ + $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.maf)) define annotate-sv annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf @@ -19,6 +21,22 @@ annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf annotate_sv/$1/$1.$2_sv.tsv : annotate_sv/$1/$2/$1.$2_sv.tsv $$(INIT) cat $$(<) > $$(@) + +annotate_sv/$1/$1.$2_sv.maf : vcf/$1.$2_sv.vcf + $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$(<) \ + --tumor-id $1 \ + --filter-vcf $$(EXAC_NONTCGA) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") + +endef +$(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call r-sufam,$(sample)))) endef $(foreach pair,$(SAMPLE_PAIRS),\ From a7e85f5996d0a523013d03561addcd8cbda584db Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:00:12 -0400 Subject: [PATCH 417/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 56c1f5e9..bea238f3 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -34,10 +34,6 @@ annotate_sv/$1/$1.$2_sv.maf : vcf/$1.$2_sv.vcf --tmp-dir `mktemp -d` \ --output-maf $$(@)") -endef -$(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call r-sufam,$(sample)))) - endef $(foreach pair,$(SAMPLE_PAIRS),\ $(foreach caller,$(SV_CALLERS), \ From 9142af4c4eeaac50efb916de83fc6ad6688174a7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:01:17 -0400 Subject: [PATCH 418/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index bea238f3..78d340d7 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -6,7 +6,7 @@ SV_CALLERS = svaba manta gridss merged ANNOTATE_SV ?= $(HOME)/share/usr/env/annot_sv-3.1.3/opt/AnnotSV/bin/AnnotSV annotate_sv : $(foreach pair,$(SAMPLE_PAIRS), \ - $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) + $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.tsv)) \ $(foreach pair,$(SAMPLE_PAIRS), \ $(foreach caller,$(SV_CALLERS),annotate_sv/$(pair)/$(pair).$(caller)_sv.maf)) From c0c10c60eeec693486b4c5081e2d2ae52c6abc17 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:14:54 -0400 Subject: [PATCH 419/766] -- --- clonality/pyclone_13.mk | 33 ++++++++++----------------------- scripts/pyclone_13.R | 2 +- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index b669b9de..bf02fe61 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -10,17 +10,16 @@ MCMC_BURNIN = 200 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ - $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__HM__.pdf) + $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ +# $(foreach set,$(SAMPLE_SETS), \ +# $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__HM__.pdf) define r-sufam @@ -43,18 +42,6 @@ pyclone_13/$1/$1.txt : pyclone_13/$1/$1.vcf bam/$1.bam $$(<<) \ > $$(@)") -pyclone_13/$1/$1.maf : pyclone_13/$1/$1.vcf - $$(call RUN,-c -n 12 -s 1G -m 2G -v $(VEP_ENV),"set -o pipefail && \ - $$(VCF2MAF) \ - --input-vcf $$< \ - --tumor-id $1 \ - --filter-vcf $$(EXAC_NONTCGA) \ - --ref-fasta $$(REF_FASTA) \ - --vep-path $$(VEP_PATH) \ - --vep-data $$(VEP_DATA) \ - --tmp-dir `mktemp -d` \ - --output-maf $$(@)") - endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 93b58a6a..d2b93a2c 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -7,7 +7,7 @@ suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("reshape2")) -suppressPackageStartupMessages(library("superheat")) +suppressPackageStartupMessages(library("ComplexHeatmap")) suppressPackageStartupMessages(library("RColorBrewer")) if (!interactive()) { From 2fac328c94e0ccc9e56947c1fdb28e040d6051fd Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:16:56 -0400 Subject: [PATCH 420/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index bf02fe61..01de0f1f 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -10,8 +10,8 @@ MCMC_BURNIN = 200 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).taskcomplete) \ + $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ # $(foreach set,$(SAMPLE_SETS), \ # $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ @@ -47,13 +47,13 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call r-sufam,$(sample)))) define r-pyclone-input -pyclone_13/$1/$1.taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) +pyclone_13/$1/taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 1 \ --sample_set $1 \ --normal_sample '$(normal.$1)' && \ - echo 'taskcomplete' > $$(@)") + touch $$(@)") pyclone_13/$1/config.yaml : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ From 768e79a8d92cc77c493924f36ba36a4b9692c4f9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:21:27 -0400 Subject: [PATCH 421/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 01de0f1f..1f22395f 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -53,7 +53,7 @@ pyclone_13/$1/taskcomplete : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(samp --option 1 \ --sample_set $1 \ --normal_sample '$(normal.$1)' && \ - touch $$(@)") + echo 'success' > $$(@)") pyclone_13/$1/config.yaml : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_ENV),"set -o pipefail && \ From f0aa606dfccb631b334c8afdbcfb060797810c3d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:23:11 -0400 Subject: [PATCH 422/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 1f22395f..3882a245 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -5,14 +5,14 @@ LOGDIR ?= log/pyclone_13.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -MCMC_ITER = 1000 -MCMC_BURNIN = 200 +MCMC_ITER = 100 +MCMC_BURNIN = 20 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) # $(foreach set,$(SAMPLE_SETS), \ # $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ From 553c819d886cdb292ed7bc0e01dad6cb24096092 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:27:59 -0400 Subject: [PATCH 423/766] ++ --- clonality/pyclone_13.mk | 6 +++--- scripts/pyclone_13.R | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 3882a245..b45f6814 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -12,9 +12,9 @@ MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).txt) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) -# $(foreach set,$(SAMPLE_SETS), \ -# $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ + $(foreach set,$(SAMPLE_SETS), \ + $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index d2b93a2c..e3dc83ce 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -71,8 +71,7 @@ if (as.numeric(opt$option) == 1) { pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% dplyr::filter(!is.na(var_counts)) %>% - dplyr::mutate(var_counts = ifelse(var_counts<=3, 0, var_counts)) %>% - dplyr::filter((ref_counts+var_counts)>10) %>% + dplyr::mutate(var_counts = ifelse(var_counts<=1, 0, var_counts)) %>% dplyr::filter(!is.na(major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) From 3265b12dbf5cf07caedfd5eace89059518da2fb9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:30:17 -0400 Subject: [PATCH 424/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index b45f6814..bc523690 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -69,7 +69,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone-input,$(set)))) define r-pyclone-build-mutations -pyclone_13/$1/$2.yaml : pyclone_13/$1/$1.taskcomplete pyclone_13/$1/config.yaml +pyclone_13/$1/$2.yaml : pyclone_13/$1/taskcomplete pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 4G -m 8G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_mutations_file \ --in_file pyclone_13/$1/$2.tsv \ From 705898ba59383e1ab546153ce9295902a29caf59 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:32:08 -0400 Subject: [PATCH 425/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index bc523690..886d27f8 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -14,8 +14,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/taskcomplete) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ $(foreach set,$(SAMPLE_SETS), \ - $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ + $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ From 624b08859ee6b7f6e660855c789bfe36a5ce9ee2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:36:16 -0400 Subject: [PATCH 426/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 886d27f8..181fedde 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -15,9 +15,9 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/config.yaml) \ $(foreach set,$(SAMPLE_SETS), \ $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/clusters.txt) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_clusters.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_loci.txt) # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ # $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__HM__.pdf) @@ -87,20 +87,20 @@ pyclone_13/$1/trace/alpha.tsv.bz2 : $(foreach sample,$(tumors.$1),pyclone_13/$1/ PyClone run_analysis \ --config_file pyclone_13/$1/config.yaml") -pyclone_13/$1/clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 +pyclone_13/$1/summary/by_clusters.txt : pyclone_13/$1/trace/alpha.tsv.bz2 pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_table \ - --config_file pyclone_13/$1/config.yaml \ - --out_file pyclone_13/$1/clusters.txt \ + --config_file $$(<<) \ + --out_file $$(@) \ --table_type cluster \ --burnin $$(MCMC_BURNIN) \ --thin $$(MCMC_THIN)") -pyclone_13/$1/$1.txt : pyclone_13/$1/trace/alpha.tsv.bz2 +pyclone_13/$1/summary/by_loci.txt : pyclone_13/$1/trace/alpha.tsv.bz2 pyclone_13/$1/config.yaml $$(call RUN,-c -n 1 -s 8G -m 16G -v $(PYCLONE_13_ENV),"set -o pipefail && \ PyClone build_table \ - --config_file pyclone_13/$1/config.yaml \ - --out_file pyclone_13/$1/$1.txt \ + --config_file $$(<<) \ + --out_file $$(@) \ --table_type loci \ --burnin $$(MCMC_BURNIN) \ --thin $$(MCMC_THIN)") From 5aa0f65c8ea2a9c6e55ce42002501346e2b53763 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 18:44:10 -0400 Subject: [PATCH 427/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 181fedde..aab4737d 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -18,8 +18,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_clusters.txt) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_loci.txt) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__PS__.pdf) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/$(set)__HM__.pdf) +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/scatter_by_sample.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/heatmap_by_sample.pdf) define r-sufam @@ -105,7 +105,7 @@ pyclone_13/$1/summary/by_loci.txt : pyclone_13/$1/trace/alpha.tsv.bz2 pyclone_13 --burnin $$(MCMC_BURNIN) \ --thin $$(MCMC_THIN)") -pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt +pyclone_13/$1/summary/scatter_by_sample.pdf : pyclone_13/$1/summary/by_loci.txt pyclone_13/$1/summary/by_clusters.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 3 \ @@ -113,7 +113,7 @@ pyclone_13/$1/$1__PS__.pdf : pyclone_13/$1/$1.txt --input_file $$(<) \ --output_file $$(@)") -pyclone_13/$1/$1__HM__.pdf : pyclone_13/$1/$1.txt +pyclone_13/$1/summary/heatmap_by_sample.pdf : pyclone_13/$1/summary/by_loci.txt pyclone_13/$1/summary/by_clusters.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_13.R \ --option 4 \ From 51bfa4af804fb83a05aac3deb89ef6136f17cdd4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:25:31 -0400 Subject: [PATCH 428/766] Update pyclone_13.R --- scripts/pyclone_13.R | 109 +++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index e3dc83ce..bc2a7fad 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -176,41 +176,34 @@ if (as.numeric(opt$option) == 1) { pyclone_ft = do.call(bind_rows, pyclone_ft) %>% readr::type_convert() %>% dplyr::filter(!is.na(cellular_prevalence_x)) %>% - dplyr::filter(!is.na(cellular_prevalence_y)) + dplyr::filter(!is.na(cellular_prevalence_y)) %>% + dplyr::mutate(sample_id_x = factor(sample_id_x, levels = sample_set, ordered = TRUE)) %>% + dplyr::mutate(sample_id_y = factor(sample_id_y, levels = sample_set, ordered = TRUE)) - smry_x = pyclone_ft %>% - dplyr::group_by(mutation_id) %>% - dplyr::summarize(cluster_id = unique(cluster_id)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(n = n()) + smry_ = pyclone_ft %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(cluster_id = unique(cluster_id)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(n = n()) pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_x, by = "cluster_id") + dplyr::left_join(smry_, by = "cluster_id") - smry_y = pyclone %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::arrange(desc(mean)) %>% - dplyr::mutate(cluster_id_ordered = nrow(.):1) - - pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_y, by = "cluster_id") - - colourCount = length(unique(pyclone_ft$cluster_id_ordered)) + colourCount = nrow(smry_) getPalette = colorRampPalette(brewer.pal(9, "Set1")) plot_ = pyclone_ft %>% - ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id_ordered), size = n)) + + ggplot(aes(x = 100*cellular_prevalence_x, y = 100*cellular_prevalence_y, color = factor(cluster_id), size = n)) + geom_point(stat = "identity", alpha = .75, shape = 21) + scale_color_manual(values = getPalette(colourCount)) + xlab("\n\nCCF (%)\n") + ylab("\nCCF (%)\n\n") + - guides(color = guide_legend(title = "Cluster"), + guides(color = guide_legend(title = "Cluster", override.aes = list(shape = 19)), size = guide_legend(title = "N")) + facet_wrap(~sample_id_x+sample_id_y) - pdf(file = as.character(opt$output_file), width = 21, height = 21) + + pdf(file = as.character(opt$output_file), width = 18, height = 18) print(plot_) dev.off() @@ -226,44 +219,50 @@ if (as.numeric(opt$option) == 1) { dplyr::select(mutation_id, cluster_id) %>% dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") - smry_cl = pyclone %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::arrange(desc(mean)) %>% - dplyr::mutate(cluster_id_ordered = nrow(.):1) + smry_ = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(cluster_mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() pyclone_mt = pyclone_mt %>% - dplyr::left_join(smry_cl, by = "cluster_id") + dplyr::left_join(smry_, by = "cluster_id") + + index = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_mean) %>% + apply(., 1, mean) - index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) - pyclone_mt = pyclone_mt[index,,drop=FALSE] pyclone_mt = pyclone_mt %>% - dplyr::arrange(cluster_id_ordered) - + dplyr::mutate(index = index) %>% + dplyr::arrange(desc(cluster_mean), desc(cluster_id), desc(index)) + + cp = c("#f0f0f0","#c6dbef","#9ecae1","#6baed6","#4292c6","#2171b5","#08519c","#08519c","#08306b","#08306b","#08306b") + ca = colorRampPalette(brewer.pal(9, "Set1"))(nrow(smry_)) + names(ca) = smry_ %>% .[["cluster_id"]] + + ha = rowAnnotation( + `Cluster ID` = pyclone_mt %>% .[["cluster_id"]], + col = list(`Cluster ID` = ca), + annotation_width = unit(3, "cm") + ) - pdf(file = as.character(opt$output_file), width = 10, height = 21) - superheat(X = pyclone_mt %>% - dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), - membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], - pretty.order.rows = FALSE, - pretty.order.cols = TRUE, - row.dendrogram = FALSE, - col.dendrogram = FALSE, - smooth.heat = FALSE, - scale = FALSE, - heat.pal = c(rep("#d9d9d9", 4), rep("#9ecae1", 2), "#4292c6", "#2171b5", "#08519c", "#08306b"), - legend = FALSE, - grid.hline = FALSE, - grid.vline = TRUE, - force.grid.hline = TRUE, - force.grid.vline = TRUE, - grid.hline.col = "white", - grid.vline.col = "white", - grid.hline.size = .05, - grid.vline.size = 1, - bottom.label.text.angle = 90, - bottom.label.text.alignment = "right") + pdf(file = as.character(opt$output_file), width = 12, height = 18) + Heatmap(matrix = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_mean, -index), + col = cp, + name = "CCF", + na_col = "#f0f0f0", + border = "white", + border_gp = gpar(lwd = 0), + cluster_rows = TRUE, + show_row_dend = FALSE, + cluster_row_slices = FALSE, + cluster_columns = TRUE, + show_column_dend = FALSE, + use_raster = FALSE, + left_annotation = ha, + row_split = pyclone_mt %>% .[["cluster_id"]], + width = unit(20, "cm"), + height = unit(40, "cm")) dev.off() } From e41caa6176509f82ff8c833e4aaba6bb67b36675 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:26:24 -0400 Subject: [PATCH 429/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index aab4737d..6052508f 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -17,9 +17,9 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) $(foreach sample,$(tumors.$(set)),pyclone_13/$(set)/$(sample).yaml)) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/trace/alpha.tsv.bz2) \ $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_clusters.txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_loci.txt) -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/scatter_by_sample.pdf) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/heatmap_by_sample.pdf) + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/by_loci.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/scatter_by_sample.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_13/$(set)/summary/heatmap_by_sample.pdf) define r-sufam From e2830ee9de07eb2fabd0a67bfa293e2895235f10 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:28:06 -0400 Subject: [PATCH 430/766] Update pyclone_13.R --- scripts/pyclone_13.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index bc2a7fad..37af84a9 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -246,7 +246,7 @@ if (as.numeric(opt$option) == 1) { ) pdf(file = as.character(opt$output_file), width = 12, height = 18) - Heatmap(matrix = pyclone_mt %>% + draw(Heatmap(matrix = pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_mean, -index), col = cp, name = "CCF", @@ -262,7 +262,7 @@ if (as.numeric(opt$option) == 1) { left_annotation = ha, row_split = pyclone_mt %>% .[["cluster_id"]], width = unit(20, "cm"), - height = unit(40, "cm")) + height = unit(40, "cm"))) dev.off() } From c491c2fd4fe7f2d4b14c092677ade999ed24d156 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:30:23 -0400 Subject: [PATCH 431/766] Update pyclone_13.mk --- clonality/pyclone_13.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clonality/pyclone_13.mk b/clonality/pyclone_13.mk index 6052508f..7df04299 100644 --- a/clonality/pyclone_13.mk +++ b/clonality/pyclone_13.mk @@ -5,8 +5,8 @@ LOGDIR ?= log/pyclone_13.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' -MCMC_ITER = 100 -MCMC_BURNIN = 20 +MCMC_ITER = 10000 +MCMC_BURNIN = 2000 MCMC_THIN = 1 pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_13/$(sample)/$(sample).vcf) \ From e8ecf961298f8609fd2945db7ea047db1d638631 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:41:55 -0400 Subject: [PATCH 432/766] ++ --- clonality/pyclone_vi.mk | 13 ++--- scripts/pyclone_vi.R | 119 ++++++++++++++++++++-------------------- 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index c10994b0..331e59de 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -7,12 +7,11 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).maf) \ $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__PS__.pdf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set)__HM__.pdf) + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) define r-sufam @@ -73,13 +72,13 @@ pyclone_vi/$1/$1.hd5 : pyclone_vi/$1/$1.tsv --precision 500 \ --num-restarts 100") -pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.hd5 +pyclone_vi/$1/summary/by_loci.txt : pyclone_vi/$1/$1.hd5 $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ pyclone-vi write-results-file \ --in-file $$(<) \ --out-file $$(@)") -pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt +pyclone_vi/$1/summary/scatter_by_sample.pdf : pyclone_vi/$1/sumary/by_loci.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 2 \ @@ -87,7 +86,7 @@ pyclone_vi/$1/$1__PS__.pdf : pyclone_vi/$1/$1.txt --input_file $$(<) \ --output_file $$(@)") -pyclone_vi/$1/$1__HM__.pdf : pyclone_vi/$1/$1.txt +pyclone_vi/$1/summary/heatmap_by_sample.pdf : pyclone_vi/$1/sumary/by_loci.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 3 \ diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index c07f5168..b583496b 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -7,7 +7,7 @@ suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("fuzzyjoin")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("reshape2")) -suppressPackageStartupMessages(library("superheat")) +suppressPackageStartupMessages(library("ComplexHeatmap")) suppressPackageStartupMessages(library("RColorBrewer")) if (!interactive()) { @@ -79,8 +79,7 @@ if (as.numeric(opt$option) == 1) { pyclone = do.call(rbind, pyclone) %>% dplyr::filter(!is.na(ref_counts)) %>% dplyr::filter(!is.na(alt_counts)) %>% - dplyr::mutate(var_coalt_countsunts = ifelse(alt_counts<=3, 0, alt_counts)) %>% - dplyr::filter((ref_counts+alt_counts)>10) %>% + dplyr::mutate(alt_counts = ifelse(alt_counts<=1, 0, alt_counts)) %>% dplyr::filter(!is.na(major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) @@ -88,7 +87,7 @@ if (as.numeric(opt$option) == 1) { smry = pyclone %>% dplyr::group_by(mutation_id) %>% dplyr::summarize(n_x = n(), - n_y = sum(var_counts)) %>% + n_y = sum(alt_counts)) %>% dplyr::ungroup() pyclone = pyclone %>% @@ -112,40 +111,35 @@ if (as.numeric(opt$option) == 1) { dplyr::select(mutation_id, cluster_id, sample_id_x = sample_id, - cellular_prevalence_x = cellular_prevalence, - cellular_prevalence_std_x = cellular_prevalence_std) %>% + cellular_prevalence_x = cellular_prevalence) %>% dplyr::full_join(pyclone %>% dplyr::filter(sample_id == sample_set[j]) %>% dplyr::select(mutation_id, sample_id_y = sample_id, - cellular_prevalence_y = cellular_prevalence, - cellular_prevalence_std_y = cellular_prevalence_std), + cellular_prevalence_y = cellular_prevalence), by = "mutation_id") %>% readr::type_convert() index = index + 1 } } pyclone_ft = do.call(bind_rows, pyclone_ft) %>% - readr::type_convert() - - smry_c = pyclone_ft %>% - dplyr::group_by(mutation_id) %>% - dplyr::summarize(cluster_id = unique(cluster_id)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(n = n()) - smry_p = pyclone %>% - dplyr::group_by(cluster_id, sample_id) %>% - dplyr::summarize(mean_cellular_prevalence = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(is_clonal = max(mean_cellular_prevalence)) + readr::type_convert() %>% + dplyr::filter(!is.na(cellular_prevalence_x)) %>% + dplyr::filter(!is.na(cellular_prevalence_y)) %>% + dplyr::mutate(sample_id_x = factor(sample_id_x, levels = sample_set, ordered = TRUE)) %>% + dplyr::mutate(sample_id_y = factor(sample_id_y, levels = sample_set, ordered = TRUE)) + + smry_ = pyclone_ft %>% + dplyr::group_by(mutation_id) %>% + dplyr::summarize(cluster_id = unique(cluster_id)) %>% + dplyr::ungroup() %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(n = n()) pyclone_ft = pyclone_ft %>% - dplyr::left_join(smry_c, by = "cluster_id") %>% - dplyr::left_join(smry_p, by = "cluster_id") + dplyr::left_join(smry_, by = "cluster_id") - colourCount = length(unique(pyclone_ft$cluster_id)) + colourCount = nrow(smry_) getPalette = colorRampPalette(brewer.pal(9, "Set1")) plot_ = pyclone_ft %>% @@ -157,7 +151,8 @@ if (as.numeric(opt$option) == 1) { guides(color = guide_legend(title = "Cluster"), size = guide_legend(title = "N")) + facet_wrap(~sample_id_x+sample_id_y) - pdf(file = as.character(opt$output_file), width = 21, height = 21) + + pdf(file = as.character(opt$output_file), width = 18, height = 18) print(plot_) dev.off() @@ -173,44 +168,50 @@ if (as.numeric(opt$option) == 1) { dplyr::select(mutation_id, cluster_id) %>% dplyr::filter(!duplicated(mutation_id)), by = "mutation_id") - smry_cl = pyclone %>% - dplyr::group_by(cluster_id) %>% - dplyr::summarize(mean = mean(cellular_prevalence)) %>% - dplyr::ungroup() %>% - dplyr::arrange(desc(mean)) %>% - dplyr::mutate(cluster_id_ordered = nrow(.):1) + smry_ = pyclone %>% + dplyr::group_by(cluster_id) %>% + dplyr::summarize(cluster_mean = mean(cellular_prevalence)) %>% + dplyr::ungroup() pyclone_mt = pyclone_mt %>% - dplyr::left_join(smry_cl, by = "cluster_id") + dplyr::left_join(smry_, by = "cluster_id") + + index = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_mean) %>% + apply(., 1, mean) - index = order(apply(pyclone_mt %>% dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered), 1, mean), decreasing = TRUE) - pyclone_mt = pyclone_mt[index,,drop=FALSE] pyclone_mt = pyclone_mt %>% - dplyr::arrange(cluster_id_ordered) - + dplyr::mutate(index = index) %>% + dplyr::arrange(desc(cluster_mean), desc(cluster_id), desc(index)) + + cp = c("#f0f0f0","#c6dbef","#9ecae1","#6baed6","#4292c6","#2171b5","#08519c","#08519c","#08306b","#08306b","#08306b") + ca = colorRampPalette(brewer.pal(9, "Set1"))(nrow(smry_)) + names(ca) = smry_ %>% .[["cluster_id"]] + + ha = rowAnnotation( + `Cluster ID` = pyclone_mt %>% .[["cluster_id"]], + col = list(`Cluster ID` = ca), + annotation_width = unit(3, "cm") + ) - pdf(file = as.character(opt$output_file), width = 10, height = 21) - superheat(X = pyclone_mt %>% - dplyr::select(-mutation_id, -cluster_id, -cluster_id_ordered, -mean), - membership.rows = pyclone_mt %>% .[["cluster_id_ordered"]], - pretty.order.rows = FALSE, - pretty.order.cols = TRUE, - row.dendrogram = FALSE, - col.dendrogram = FALSE, - smooth.heat = FALSE, - scale = FALSE, - heat.pal = c("#d9d9d9", "#d9d9d9", "#d9d9d9", "#9ecae1", "#4292c6", "#2171b5", "#08519c", "#08306b"), - legend = FALSE, - grid.hline = FALSE, - grid.vline = TRUE, - force.grid.hline = TRUE, - force.grid.vline = TRUE, - grid.hline.col = "white", - grid.vline.col = "white", - grid.hline.size = .05, - grid.vline.size = 1, - bottom.label.text.angle = 90, - bottom.label.text.alignment = "right") + pdf(file = as.character(opt$output_file), width = 12, height = 18) + draw(Heatmap(matrix = pyclone_mt %>% + dplyr::select(-mutation_id, -cluster_id, -cluster_mean, -index), + col = cp, + name = "CCF", + na_col = "#f0f0f0", + border = "white", + border_gp = gpar(lwd = 0), + cluster_rows = TRUE, + show_row_dend = FALSE, + cluster_row_slices = FALSE, + cluster_columns = TRUE, + show_column_dend = FALSE, + use_raster = FALSE, + left_annotation = ha, + row_split = pyclone_mt %>% .[["cluster_id"]], + width = unit(20, "cm"), + height = unit(40, "cm"))) dev.off() } From f366b1f61e4fecab599837f6c6a86d4716e7dd0c Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 20:43:26 -0400 Subject: [PATCH 433/766] Update pyclone_vi.mk --- clonality/pyclone_vi.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index 331e59de..da78320d 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -78,7 +78,7 @@ pyclone_vi/$1/summary/by_loci.txt : pyclone_vi/$1/$1.hd5 --in-file $$(<) \ --out-file $$(@)") -pyclone_vi/$1/summary/scatter_by_sample.pdf : pyclone_vi/$1/sumary/by_loci.txt +pyclone_vi/$1/summary/scatter_by_sample.pdf : pyclone_vi/$1/summary/by_loci.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 2 \ @@ -86,7 +86,7 @@ pyclone_vi/$1/summary/scatter_by_sample.pdf : pyclone_vi/$1/sumary/by_loci.txt --input_file $$(<) \ --output_file $$(@)") -pyclone_vi/$1/summary/heatmap_by_sample.pdf : pyclone_vi/$1/sumary/by_loci.txt +pyclone_vi/$1/summary/heatmap_by_sample.pdf : pyclone_vi/$1/summary/by_loci.txt $$(call RUN,-c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ --option 3 \ From 5e543af2766748852f7dda110dc16d47f7451ffc Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 21:21:20 -0400 Subject: [PATCH 434/766] Update pyclone_vi.R --- scripts/pyclone_vi.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index b583496b..772bd699 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -202,9 +202,9 @@ if (as.numeric(opt$option) == 1) { na_col = "#f0f0f0", border = "white", border_gp = gpar(lwd = 0), - cluster_rows = TRUE, + cluster_rows = FALSE, show_row_dend = FALSE, - cluster_row_slices = FALSE, + cluster_row_slices = TRUE, cluster_columns = TRUE, show_column_dend = FALSE, use_raster = FALSE, From be7da3785b858c57ed46bc32775fb9847fa934ba Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 21:48:23 -0400 Subject: [PATCH 435/766] cluster = TRUE --- scripts/pyclone_13.R | 4 ++-- scripts/pyclone_vi.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/pyclone_13.R b/scripts/pyclone_13.R index 37af84a9..0c79350e 100644 --- a/scripts/pyclone_13.R +++ b/scripts/pyclone_13.R @@ -242,7 +242,7 @@ if (as.numeric(opt$option) == 1) { ha = rowAnnotation( `Cluster ID` = pyclone_mt %>% .[["cluster_id"]], col = list(`Cluster ID` = ca), - annotation_width = unit(3, "cm") + simple_anno_size = unit(7, "mm") ) pdf(file = as.character(opt$output_file), width = 12, height = 18) @@ -255,7 +255,7 @@ if (as.numeric(opt$option) == 1) { border_gp = gpar(lwd = 0), cluster_rows = TRUE, show_row_dend = FALSE, - cluster_row_slices = FALSE, + cluster_row_slices = TRUE, cluster_columns = TRUE, show_column_dend = FALSE, use_raster = FALSE, diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 772bd699..0cbffb4e 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -191,7 +191,7 @@ if (as.numeric(opt$option) == 1) { ha = rowAnnotation( `Cluster ID` = pyclone_mt %>% .[["cluster_id"]], col = list(`Cluster ID` = ca), - annotation_width = unit(3, "cm") + simple_anno_size = unit(7, "mm") ) pdf(file = as.character(opt$output_file), width = 12, height = 18) @@ -202,7 +202,7 @@ if (as.numeric(opt$option) == 1) { na_col = "#f0f0f0", border = "white", border_gp = gpar(lwd = 0), - cluster_rows = FALSE, + cluster_rows = TRUE, show_row_dend = FALSE, cluster_row_slices = TRUE, cluster_columns = TRUE, From baeaf4db9b715bfeb6844ce772bb25fb9d52505b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 5 Nov 2022 23:34:58 -0400 Subject: [PATCH 436/766] + --- Makefile | 2 +- .../{getBaseCount.mk => get_basecounts.mk} | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) rename variant_callers/{getBaseCount.mk => get_basecounts.mk} (79%) diff --git a/Makefile b/Makefile index b2bf1f07..d5a655f6 100644 --- a/Makefile +++ b/Makefile @@ -187,7 +187,7 @@ sufam_gt : TARGETS += get_basecount get_basecount : - $(call RUN_MAKE,modules/variant_callers/getBaseCount.mk) + $(call RUN_MAKE,modules/variant_callers/get_basecounts.mk) TARGETS += strelka_varscan_indels strelka_varscan_indels : diff --git a/variant_callers/getBaseCount.mk b/variant_callers/get_basecounts.mk similarity index 79% rename from variant_callers/getBaseCount.mk rename to variant_callers/get_basecounts.mk index 56ad36fa..bb75a1ab 100644 --- a/variant_callers/getBaseCount.mk +++ b/variant_callers/get_basecounts.mk @@ -2,15 +2,14 @@ include modules/Makefile.inc LOGDIR ?= log/get_basecount.$(NOW) -GBC_ENV = $(HOME)/share/data/common/eec_sc_split/etc/conda -GBC_EXE = $(HOME)/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts -MAPQ := 10 -BAQ := 15 +MAPQ := 0 +BAQ := 0 +COV := 0 -getbasecount : $(foreach sample,$(SAMPLES),gbc/MFE296/$(sample).tsv) +getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) define get-basecount -gbc/MFE296/$1.txt : bam/MFE296/$1.bam +gbc/$1.txt.gz : bam/$1.bam $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ mkdir -p gbc/MFE296 && \ $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ From 84bc191ccac122d551d9bf40f50e62e68a83d5b0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 17:16:20 -0500 Subject: [PATCH 437/766] ++ --- config.inc | 5 ++-- variant_callers/get_basecounts.mk | 39 +++++++++++++++---------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/config.inc b/config.inc index d5e8021e..9eee85e8 100644 --- a/config.inc +++ b/config.inc @@ -84,7 +84,6 @@ SUM_READS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads.R SUM_EXONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byexon.R SUM_INTRONS_RSCRIPT = $(RSCRIPT) $(SCRIPTS_DIR)/summarize_rnaseqreads_byintron.R - JAVA_BIN ?= $(JAVA8_BIN) JAVA6_BIN ?= $(HOME)/share/usr/jdk1.6.0_45/bin/java JAVA7_BIN ?= $(HOME)/share/usr/jdk1.7.0_45/bin/java @@ -148,8 +147,8 @@ SNP_FILTER_VCF = python modules/vcf_tools/snp_filter_vcf.py MERGE_VCF = python modules/vcf_tools/merge_vcf.py -MEDICC_VAR = $(MEDICC_ENV)/PROFILE -MEDICC_BIN = $(MEDICC_ENV)/opt/medicc +# gbc command line +GBC ?= $(HOME)/share/usr/GetBaseCounts/GetBaseCounts endif CONFIG_INC = true diff --git a/variant_callers/get_basecounts.mk b/variant_callers/get_basecounts.mk index bb75a1ab..8f30e082 100644 --- a/variant_callers/get_basecounts.mk +++ b/variant_callers/get_basecounts.mk @@ -9,32 +9,31 @@ COV := 0 getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) define get-basecount -gbc/$1.txt.gz : bam/$1.bam - $$(call RUN,-n 6 -s 3G -m 6G -v $(GBC_ENV),"set -o pipefail && \ - mkdir -p gbc/MFE296 && \ - $(GBC_EXE) --fasta ~/share/reference/ucsc_gatk_bundle_2.8/ucsc.hg19.fasta \ - --bam $$(<) \ - --vcf etc/vcf/MFE296.vcf \ - --output $$(@) \ - --maq $(MAPQ) \ - --baq $(BAQ) \ - --filter_duplicate 0 \ - --filter_improper_pair 0 \ - --filter_qc_failed 1 \ - --thread 6") +gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf + $$(call RUN,-n 6 -s 3G -m 6G,"set -o pipefail && \ + $(GBC) --fasta $(REF_FASTA) \ + --bam $$(<) \ + --vcf $$(<<) \ + --output $$(@) \ + --thread 6 \ + --maq $(MAPQ) \ + -sort_output \ + --compress_output \ + --maq $(MAPQ) \ + --baq $(BAQ) \ + --cov $(COV) \ + --filter_duplicate 0 \ + --filter_improper_pair 0 \ + --filter_qc_failed 1 \ + --filter_indel 0 \ + --filter_non_primary 1") -gbc/MFE296/$1.tsv : gbc/MFE296/$1.txt - $$(call RUN,-n 1 -s 12G -m 18G,"set -o pipefail && \ - $(RSCRIPT) modules/variant_callers/getBaseCount.R --file_name $$(<) && \ - rm $$(<)") - - endef $(foreach sample,$(SAMPLES),\ $(eval $(call get-basecount,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - /lila/home/brownd7/share/data/common/eec_sc_split/etc/GetBaseCounts/GetBaseCounts &> version/get_basecount.txt;) + ${GBC} &> version/get_basecount.txt;) .SECONDARY: .DELETE_ON_ERROR: .PHONY: getbasecount From cd0dc8d532d0ba72dd84d2a3ff2e1ecc9f943534 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 17:31:46 -0500 Subject: [PATCH 438/766] Update get_basecounts.mk --- variant_callers/get_basecounts.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/variant_callers/get_basecounts.mk b/variant_callers/get_basecounts.mk index 8f30e082..9530a098 100644 --- a/variant_callers/get_basecounts.mk +++ b/variant_callers/get_basecounts.mk @@ -16,8 +16,7 @@ gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf --vcf $$(<<) \ --output $$(@) \ --thread 6 \ - --maq $(MAPQ) \ - -sort_output \ + --sort_output \ --compress_output \ --maq $(MAPQ) \ --baq $(BAQ) \ From 8c583fd53af142a3fc2af177436ae24902b4414c Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 22:07:21 -0500 Subject: [PATCH 439/766] Update sufam_gt.R --- scripts/sufam_gt.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 0d808503..fa5243b7 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -91,23 +91,31 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(HOTSPOT = case_when( is.na(HOTSPOT) ~ FALSE, HOTSPOT == "True" ~ TRUE, - HOTSPOT == "False" ~ FALSE + HOTSPOT == "False" ~ FALSE, + HOTSPOT == "TRUE" ~ TRUE, + HOTSPOT == "FALSE" ~ FALSE )) %>% dplyr::mutate(HOTSPOT_INTERNAL = case_when( is.na(HOTSPOT_INTERNAL) ~ FALSE, HOTSPOT_INTERNAL == "True" ~ TRUE, HOTSPOT_INTERNAL == "False" ~ FALSE + HOTSPOT_INTERNAL == "TRUE" ~ TRUE, + HOTSPOT_INTERNAL == "FALSE" ~ FALSE )) %>% dplyr::mutate(cmo_hotspot = case_when( is.na(cmo_hotspot) ~ FALSE, cmo_hotspot == "True" ~ TRUE, - cmo_hotspot == "False" ~ FALSE + cmo_hotspot == "False" ~ FALSE, + cmo_hotspot == "TRUE" ~ TRUE, + cmo_hotspot == "FALSE" ~ FALSE )) %>% dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% dplyr::mutate(facetsLOHCall = case_when( is.na(facetsLOHCall) ~ FALSE, facetsLOHCall == "True" ~ TRUE, - facetsLOHCall == "False" ~ FALSE + facetsLOHCall == "False" ~ FALSE, + facetsLOHCall == "TRUE" ~ TRUE, + facetsLOHCall == "FALSE" ~ FALSE )) %>% dplyr::mutate(is_loh = facetsLOHCall) %>% readr::type_convert() From 95af6cf80e750602860370d58642bc2005ecb9e6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 22:09:39 -0500 Subject: [PATCH 440/766] Update sufam_gt.R --- scripts/sufam_gt.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index fa5243b7..c9e2fd47 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -143,23 +143,31 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(HOTSPOT = case_when( is.na(HOTSPOT) ~ FALSE, HOTSPOT == "True" ~ TRUE, - HOTSPOT == "False" ~ FALSE + HOTSPOT == "False" ~ FALSE, + HOTSPOT == "TRUE" ~ TRUE, + HOTSPOT == "FALSE" ~ FALSE )) %>% dplyr::mutate(HOTSPOT_INTERNAL = case_when( is.na(HOTSPOT_INTERNAL) ~ FALSE, HOTSPOT_INTERNAL == "True" ~ TRUE, HOTSPOT_INTERNAL == "False" ~ FALSE + HOTSPOT_INTERNAL == "TRUE" ~ TRUE, + HOTSPOT_INTERNAL == "FALSE" ~ FALSE )) %>% dplyr::mutate(cmo_hotspot = case_when( is.na(cmo_hotspot) ~ FALSE, cmo_hotspot == "True" ~ TRUE, - cmo_hotspot == "False" ~ FALSE + cmo_hotspot == "False" ~ FALSE, + cmo_hotspot == "TRUE" ~ TRUE, + cmo_hotspot == "FALSE" ~ FALSE )) %>% dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% dplyr::mutate(facetsLOHCall = case_when( is.na(facetsLOHCall) ~ FALSE, facetsLOHCall == "True" ~ TRUE, - facetsLOHCall == "False" ~ FALSE + facetsLOHCall == "False" ~ FALSE, + facetsLOHCall == "TRUE" ~ TRUE, + facetsLOHCall == "FALSE" ~ FALSE )) %>% dplyr::mutate(is_loh = facetsLOHCall) %>% readr::type_convert() From 4498497ee8339bdd1491ebf580d60e9e0c8f6887 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 22:10:55 -0500 Subject: [PATCH 441/766] Update sufam_gt.R --- scripts/sufam_gt.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index c9e2fd47..cab21886 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -98,7 +98,7 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(HOTSPOT_INTERNAL = case_when( is.na(HOTSPOT_INTERNAL) ~ FALSE, HOTSPOT_INTERNAL == "True" ~ TRUE, - HOTSPOT_INTERNAL == "False" ~ FALSE + HOTSPOT_INTERNAL == "False" ~ FALSE, HOTSPOT_INTERNAL == "TRUE" ~ TRUE, HOTSPOT_INTERNAL == "FALSE" ~ FALSE )) %>% @@ -150,7 +150,7 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(HOTSPOT_INTERNAL = case_when( is.na(HOTSPOT_INTERNAL) ~ FALSE, HOTSPOT_INTERNAL == "True" ~ TRUE, - HOTSPOT_INTERNAL == "False" ~ FALSE + HOTSPOT_INTERNAL == "False" ~ FALSE, HOTSPOT_INTERNAL == "TRUE" ~ TRUE, HOTSPOT_INTERNAL == "FALSE" ~ FALSE )) %>% From c50d3ae85edc501adb531ed7bfec5aeaa4da8f71 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 22:12:09 -0500 Subject: [PATCH 442/766] Update sufam_gt.R --- scripts/sufam_gt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index cab21886..d921e2e7 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -75,7 +75,7 @@ if (as.numeric(opt$option)==1) { } else if (as.numeric(opt$option)==3) { maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::filter(t_alt_count > 0) %>% + dplyr::filter(t_alt_count > 1) %>% dplyr::filter(t_ref_count > 0) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) From 055651a13f2aa23f18eb0fc5557954ad26dd7d9a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 6 Nov 2022 23:11:38 -0500 Subject: [PATCH 443/766] Update sufam_gt.R --- scripts/sufam_gt.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index d921e2e7..eb2e9a18 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -75,8 +75,7 @@ if (as.numeric(opt$option)==1) { } else if (as.numeric(opt$option)==3) { maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::filter(t_alt_count > 1) %>% - dplyr::filter(t_ref_count > 0) + dplyr::filter(t_alt_count > 1) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option)==4) { From da613c906353c5ea830e2a71a076fa8b3e283c38 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 10 Nov 2022 14:49:35 -0500 Subject: [PATCH 444/766] ++/-- --- Makefile | 13 +- signatures/createNMFinput.m | 17 -- signatures/deconstruct_sigs.mk | 21 +- signatures/emu.mk | 63 ------ signatures/mut_sig.mk | 28 --- signatures/mut_sig_report.Rmd | 330 ------------------------------- signatures/nmfMutSig.mk | 45 ----- signatures/plotNMF.m | 20 -- signatures/plot_emu_signatures.R | 121 ------------ signatures/plot_signatures.R | 121 ------------ signatures/runNMF.m | 46 ----- signatures/sv_signature.mk | 21 ++ signatures/vcf_2_vranges.R | 95 --------- 13 files changed, 38 insertions(+), 903 deletions(-) delete mode 100644 signatures/createNMFinput.m delete mode 100644 signatures/emu.mk delete mode 100644 signatures/mut_sig.mk delete mode 100644 signatures/mut_sig_report.Rmd delete mode 100644 signatures/nmfMutSig.mk delete mode 100644 signatures/plotNMF.m delete mode 100644 signatures/plot_emu_signatures.R delete mode 100644 signatures/plot_signatures.R delete mode 100644 signatures/runNMF.m create mode 100644 signatures/sv_signature.mk delete mode 100644 signatures/vcf_2_vranges.R diff --git a/Makefile b/Makefile index d5a655f6..95d19e80 100644 --- a/Makefile +++ b/Makefile @@ -503,17 +503,14 @@ pyclone_vi : # mutational signatures #================================================== -TARGETS += emu -emu : - $(call RUN_MAKE,modules/signatures/emu.mk) - -TARGETS += mut_sig -mut_sig : - $(call RUN_MAKE,modules/signatures/mut_sig.mk) - TARGETS += deconstruct_sigs deconstruct_sigs : $(call RUN_MAKE,modules/signatures/deconstruct_sigs.mk) + + +TARGETS += sv_signature +sv_signature : + $(call RUN_MAKE,modules/signatures/sv_signature.mk) #================================================== diff --git a/signatures/createNMFinput.m b/signatures/createNMFinput.m deleted file mode 100644 index e61040b2..00000000 --- a/signatures/createNMFinput.m +++ /dev/null @@ -1,17 +0,0 @@ -function createNMFinput( mutationFile, sampleNameFile, typesFile, cancerType, inputFile) -%create WTSI input -% convert mutsig mutation matrix file and sample name file into input for -% WTSI mutation signature package - -originalGenomes = importdata(mutationFile)'; - -fid = fopen(sampleNameFile); -sampleNames = textscan(fid, '%s'); -fclose(fid); -sampleNames = sampleNames{1}; - -load(typesFile); - -save(inputFile, 'originalGenomes', 'subtypes', 'types', 'sampleNames', 'cancerType'); -quit -end diff --git a/signatures/deconstruct_sigs.mk b/signatures/deconstruct_sigs.mk index c6721dc2..ba309ad8 100644 --- a/signatures/deconstruct_sigs.mk +++ b/signatures/deconstruct_sigs.mk @@ -1,24 +1,27 @@ include modules/Makefile.inc LOGDIR = log/deconstruct_sigs.$(NOW) -PHONY += deconstructsigs deconstructsigs/signatures deconstructsigs/plots/context -deconstructsigs : $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/signatures/$(sample).RData) $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/plots/context/$(sample).pdf) +deconstructsigs : $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/signatures/$(sample).RData) \ + $(foreach sample,$(TUMOR_SAMPLES),deconstructsigs/plots/context/$(sample).pdf) define extract-signatures deconstructsigs/signatures/%.RData : summary/tsv/mutation_summary.tsv - $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"$(RSCRIPT) modules/signatures/extract_signatures.R --sample_name $$(*)") + $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefial && \ + $(RSCRIPT) modules/signatures/extract_signatures.R \ + --sample_name $$()") deconstructsigs/plots/context/%.pdf : deconstructsigs/signatures/%.RData - $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"mkdir -p deconstructsigs/plots/context && \ - mkdir -p deconstructsigs/plots/exposures && \ - $(RSCRIPT) modules/signatures/plot_signatures.R --sample_name $$(*)") + $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/signatures/plot_signatures.R \ + --sample_name $$(*)") endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call extract-signatures,$(sample)))) - -.DELETE_ON_ERROR: +..DUMMY := $(shell mkdir -p version; \ + $(DECONSTRUCTSIGS_ENV)/bin/R --version > version/deconstruct_sigs.txt) .SECONDARY: -.PHONY: $(PHONY) +.DELETE_ON_ERROR: +.PHONY: deconstructsigs \ No newline at end of file diff --git a/signatures/emu.mk b/signatures/emu.mk deleted file mode 100644 index 7d48e555..00000000 --- a/signatures/emu.mk +++ /dev/null @@ -1,63 +0,0 @@ -include modules/Makefile.inc - -LOGDIR = log/emu.$(NOW) - -EMU_PREPARE = $(HOME)/usr/bin/EMu-prepare -EMU_PREPARE_OPTS := --chr $(EMU_REF_DIR) -ifdef EMU_TARGETS_FILE -EMU_PREPARE_OPTS += --regions $(EMU_TARGETS_FILE) -endif -EMU = $(HOME)/usr/bin/EMu - -PLOT_EMU = $(RSCRIPT) modules/signatures/plot_emu_signatures.R - -NO_CNV ?= false - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: all - -ALL := emu/emu_results_bic.txt emu/report/index.html -ifdef NUM_SPECTRA -ALL += emu/emu_$(NUM_SPECTRA).timestamp -endif - -all : $(ALL) - -ALL_TABLE ?= alltables/allTN.mutect_snps.tab.txt - -emu/mutations.txt : $(ALL_TABLE) - $(INIT) awk 'NR > 1 { sub("X", "23", $$3); sub("Y", "24", $$3); sub("MT", "25", $$3); print $$1 "_" $$2, $$3, $$4, $$6 ">" $$7 }' $< | cat - $(EMU_REF_MUTATIONS) > $@ - -emu/cnv.txt : $(foreach pair,$(SAMPLE_PAIRS),freec/$(pair)/$(tumor.$(pair)).bam_CNVs) - $(INIT) rm -f $@; for x in $^; do \ - sample=`echo $$x | sed 's:freec/::; s:/.*::'`; \ - awk -v sample=$$sample 'NR > 1 { sub("chr", "", $$1); sub("X", "23" , $$1); sub("Y", "24", $$1); sub("MT", "25", $$1); print sample, $$1, $$2, $$3, $$4; }' $$x >> $@; \ - done && cat $(EMU_REF_CNV) >> $@ - -ifeq ($(NO_CNV),false) -emu/mutations.txt.mut.matrix : emu/mutations.txt emu/cnv.txt - $(call RUN,-s 4G -m 8G,"$(EMU_PREPARE) $(EMU_PREPARE_OPTS) --cnv $(<<) --mut $< --pre $(@D) --regions $(EMU_TARGETS_FILE)") -else -emu/mutations.txt.mut.matrix : emu/mutations.txt - $(call RUN,-s 4G -m 8G,"$(EMU_PREPARE) $(EMU_PREPARE_OPTS) --chr $(EMU_REF_DIR) --mut $< --pre $(@D)") -endif - -emu/emu_results_bic.txt : emu/mutations.txt.mut.matrix - $(call RUN,-s 4G -m 8G,"$(EMU) --mut $< --opp human-exome --pre emu/emu_results") - -RESULT_TIMESTAMPS = -ifdef NUM_SPECTRA -emu/emu_$(NUM_SPECTRA).timestamp : emu/mutations.txt.mut.matrix - $(call RUN,-s 4G -m 8G,"$(EMU) --force $(NUM_SPECTRA) --mut $< --opp human-exome --pre emu/emu_results && touch $@") - -RESULT_TIMESTAMPS += emu/emu_$(NUM_SPECTRA).timestamp -endif - -emu/samples.txt : - $(INIT) echo "$(SAMPLE_PAIRS)" | sed 's/ /\n/g' > $@ - -emu/report/index.html : emu/emu_results_bic.txt emu/samples.txt emu/mutations.txt $(RESULT_TIMESTAMPS) - $(call RUN,-s 4G -m 16G,"$(PLOT_EMU) --inPrefix $( 1) { - vrFiles <- arguments$args -} -if (length(vrFiles) < 1) { - cat("Need VRange file(s)\n"); - print_help(parser); - stop(); -} - -outFile <- opt$outFile -if (opt$genome == "b37" || opt$genome == "hg19") { - library("BSgenome.Hsapiens.UCSC.hg19"); - library("TxDb.Hsapiens.UCSC.hg19.knownGene") - genome <- BSgenome.Hsapiens.UCSC.hg19 - txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene - genomeName <- 'hg19' -} else if (opt$genome == "mm10" || opt$genome == "GRCm38") { - library("TxDb.Mmusculus.UCSC.mm10.knownGene") - library("BSgenome.Mmusculus.UCSC.mm10"); - genome <- BSgenome.Mmusculus.UCSC.mm10 - txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene - genomeName <- 'mm10' -} - -txdb <- renameSeqlevels(txdb, sub('chr', '', seqlevels(txdb))) -txByGenes <- transcriptsBy(txdb, 'gene') -bases <- c("A", "C", "G", "T") - -if (!is.null(opt$targetBed)) { - bed <- ucsc(import(opt$targetBed)) - k3t <- kmerFrequency(genome, 1e5, 3, bed) - k3wg <- kmerFrequency(genome, 1e5, 3) - norms <- k3wg[names(k3t)] / k3t -} - -registerDoMC(opt$ncores) - -theme_set(theme_bw(base_size = 18)) -``` - -# `r opt$name` Mutational Signature Report ---- -### Raymond Lim - -```{r createMotifMatrices, include = F, cache = F} -vrs <- list() -mm <- list() -mmNorm <- list() -mmTranscribed <- list() -mmUntranscribed <- list() -for (vrFile in vrFiles) { - s <- sub('\\..*', '', vrFile) - s <- sub('.*/', '', s) - load(vrFile) - vrs[[s]] <- vr -} -vrs <- VRangesList(vrs) -allVr <- unlist(vrs) - -mm <- motifMatrix(allVr, normalize = F) -mmNorm <- motifMatrix(allVr, normalize = T) -x <- subset(allVr, allVr$transcribed) -sampleNames(x) <- factor(as.factor(sampleNames(x))) -mmTranscribed <- motifMatrix(x, normalize = T) -x <- subset(allVr, !allVr$transcribed) -sampleNames(x) <- factor(as.factor(sampleNames(x))) -mmUntranscribed <- motifMatrix(x, normalize = T) -if (!is.null(opt$targetBed)) { - mmNorm <- normalizeMotifs(mmNorm, norms) - mmTranscribed <- normalizeMotifs(mmTranscribed, norms) - mmUntranscribed <- normalizeMotifs(mmUntranscribed, norms) -} -``` - -```{r writeMotifMatrices} -if (!is.null(opt$outDir)) { - fn <- paste(opt$outDir, "/mm.tsv", sep = "") - write.table(mm, file = fn, quote = F, sep = '\t') - fn <- paste(opt$outDir, "/mm_transcribed.tsv", sep = "") - write.table(mmTranscribed, file = fn, quote = F, sep = '\t') - fn <- paste(opt$outDir, "/mm_untranscribed.tsv", sep = "") - write.table(mmUntranscribed, file = fn, quote = F, sep = '\t') - fn <- paste(opt$outDir, "/mm_norm.tsv", sep = "") - write.table(mmNorm, file = fn, quote = F, sep = '\t') -} -``` - -```{r loadAlexandrovData} -alexandrov <- read.table(opt$alexandrovData, sep = '\t', header = T, as.is = T) -rownames(alexandrov) <- paste(sub('>', '', as.character(alexandrov$Substitution.Type)), - ' ', subseq(as.character(alexandrov$Trinucleotide), 1, 1), '.', - subseq(as.character(alexandrov$Trinucleotide), 3, 3), sep = '') - -alexandrovM <- alexandrov[rownames(mm), grepl('Signature', colnames(alexandrov))] - -``` - -```{r nmf, fig.width = 10, fig.height = 10} -solveNMF <- function(x, signatures){ - coef <- fcnnls(x, signatures[rownames(x),, drop = F]) # reorder the rownames of the in matrix - colsum <- apply(coef$x, 2, sum) - coef_x_scaled <- scale(coef$x, center = F, scale = colsum) - return(coef_x_scaled) -} -nmfCoefs <- solveNMF(as.matrix(alexandrovM), as.matrix(mm)) - -if (!is.null(opt$outDir)) { - fn <- paste(opt$outDir, "/nnls_coefs.tsv", sep = "") - write.table(nmfCoefs, file = fn, quote = F, sep = '\t') -} - -cols <- brewer.pal(9, 'Blues') -if (ncol(nmfCoefs) > 2) { - heatmap.2(nmfCoefs, trace = 'none', margins = c(13, 8), cexCol = 0.8, col = cols, Rowv = F, Colv = F, dendrogram = 'none') -} - -par(mar = c(10,5,5,5)) -for (s in colnames(nmfCoefs)) { - barplot(nmfCoefs[,s], main = s, las = 2) -} -``` - - -```{r pie, eval = F, echo = F} -for (s in names(vrs)) { - vr <- vrs[[s]] - cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen", "T>G" = "pink") - main <- paste(s, " (n = ", length(vr), ")", sep = '') - pie(table(vr$alteration), col = cols, main = main) - if (sum(vr$transcribed, na.rm = T) > 0 && sum(!vr$transcribed, na.rm = T) > 0) { - main <- paste('transcribed', s, " (n=", sum(vr$transcribed, na.rm = T), ")", sep = '') - pie(table(subset(vr, vr$transcribed)$alteration), col = cols, main = main) - main <- paste('untranscribed', s, " (n=", sum(!info(vcf)$transcribed, na.rm = T), ")", sep = '') - pie(table(subset(vr, !vr$transcribed)$alteration), col = cols, main = paste('untranscribed', main)) - } -} -``` - - -```{r mutCountPlots, fig.height = 7, fig.width = 28} -plotMutBarplot <- function(samp, mm) { - cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen", "T>G" = "pink") - mdf <- melt(mm, varnames = c('motif', 'sample')) - mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", - mdf$motif) - mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif) - tit <- paste(samp, ' (n = ', sum(mdf %>% filter(sample == samp) %$% value, na.rm = T), ")", sep = '') - mdf %>% filter(sample == samp) %>% - ggplot(aes(x = context, y = value, fill = alteration)) + - geom_bar(stat = 'identity') + - facet_grid(~ alteration, switch = 'x') + - xlab("") + ylab("") + ggtitle(tit) + - theme(axis.text.x = element_text(angle = 90, hjust = 1), - legend.position = 'none', - panel.border = element_blank(), - axis.line.x = element_line(color = 'black', size = 1), - axis.line.y = element_line(color = 'black', size = 1), - panel.grid = element_blank(), - strip.background = element_blank(), - strip.text.x = element_text(size = 20)) + - scale_fill_manual(values = cols) -} - -plotMutBarplotStranded <- function(samp, mmTranscribed, mmUntranscribed) { - mdfTranscribed <- melt(mmTranscribed, varnames = c('motif', 'sample')) - mdfTranscribed$transcribed <- T - mdfUntranscribed <- melt(mmUntranscribed, varnames = c('motif', 'sample')) - mdfUntranscribed$transcribed <- F - mdf <- rbind(mdfTranscribed, mdfUntranscribed) - mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", - mdf$motif) - mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif) - tit <- paste(samp, ' (n = ', sum(mdf %>% filter(sample == samp) %$% value, na.rm = T), ")", sep = '') - mdf %>% filter(sample == samp) %>% - ggplot(aes(x = context, y = value, fill = transcribed)) + - geom_bar(stat = 'identity', position = 'dodge') + - facet_grid(~ alteration, switch = 'x') + - theme(axis.text.x = element_text(angle = 90, hjust = 1), - panel.border = element_blank(), - axis.line.x = element_line(color = 'black', size = 1), - axis.line.y = element_line(color = 'black', size = 1), - panel.grid = element_blank(), - strip.background = element_blank(), - strip.text.x = element_text(size = 20)) + - scale_fill_manual(name = "", values = c('blue', 'red'), - labels = c("Transcribed strand", - 'Untranscribed strand')) + - xlab("") + ylab("") + ggtitle(tit) -} - -plotMutPiechart <- function(samp, mm) { - mdf <- melt(mm, varnames = c('motif', 'sample')) - mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", - mdf$motif) - mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif) - cols <- c("C>A" = "lightblue", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "lightgreen", "T>G" = "pink") - mdf %>% filter(sample == samp) %>% group_by(alteration) %>% summarise(value = sum(value)) %>% - ggplot(aes(x = "", y = value, fill = alteration)) + - geom_bar(width = 1, stat = 'identity') + scale_fill_manual(values = cols) + - coord_polar("y") + xlab("") + ylab("") -} - -for (s in colnames(mm)) { - p1 <- plotMutBarplot(s, mm) - p2 <- plotMutPiechart(s, mm) - grid.arrange(p1, p2, ncol = 2, widths = c(4, 2)) - - p1 <- plotMutBarplot(s, mmNorm) - p2 <- plotMutPiechart(s, mmNorm) - grid.arrange(p1, p2, ncol = 2, widths = c(4, 2)) -} - -for (s in colnames(mm)) { - vr <- vrs[[s]] - if (sum(vr$transcribed, na.rm = T) > 0 && sum(!vr$transcribed, na.rm = T) > 0) { - p1 <- plotMutBarplotStranded(s, mmTranscribed, mmUntranscribed) - p2 <- plotMutPiechart(s, mmTranscribed) - p3 <- plotMutPiechart(s, mmUntranscribed) - lom <- matrix(c(1,1, 2,3), nrow = 2, ncol = 2) - grid.arrange(p1, p2, p3, layout_matrix = lom, widths = c(4, 2)) - } -} - -``` - - -```{r bootPlot, fig.width = 12} -bootFun <- function(x) { - baseMotif = subseq(as.character(x$motif), 4, 6) - subseq(baseMotif, 2, 2) = subseq(as.character(x$motif), 1, 1) - if (!is.null(opt$targetBed)) { - nval <- x$value * norms[baseMotif] - } else { - nval <- x$value - } - nval <- nval / sum(nval) - apply(alexandrovM, 2, function(y) fcnnls(nval, y)$x) -} - -ranFun <- function(p, d) { - # create a vector of same # of mutations using original context probabilities - s <- sample.int(nrow(p), size = sum(p$value), replace = T, prob = p$value / sum(p$value)) - pp <- p - tab <- table(p[s, "motif"]) - pp[match(names(tab), pp$motif), "value"] <- tab - pp -} - -mm <- motifMatrix(allVr, normalize = F) -bootDf <- foreach(s = colnames(mm), .combine = 'rbind', .errorhandling = 'remove') %dopar% { - mdf <- melt(mm[, s, drop = F], varnames = c('motif', 'sample')) - mdf$alteration = sub("([ACGTN])([ACGTN]) .+", "\\1>\\2", - mdf$motif) - mdf$context = sub("[ACGTN][ACGTN] (.+)", "\\1", mdf$motif) - boots <- boot(mdf, bootFun, R = 1000, ran.gen = ranFun, sim = 'parametric', parallel = 'multicore') - boots.sd <- apply(boots$t, 2, sd) - ci <- norm.ci(boots, index = 1:ncol(alexandrovM)) - Df <- as.data.frame(boots$t0, row.names = 1:ncol(alexandrovM)) - colnames(Df) <- 'bootCor' - Df$signature <- sub('Signature.', '', colnames(alexandrovM)) - Df$bootSD <- boots.sd - Df$lowerCI95 <- ci[,2] - Df$upperCI95 <- ci[,3] - n <- colnames(alexandrovM) - Df$votes <- table(factor(n[apply(boots$t, 1, which.max)], levels = n)) - Df$sample <- s - Df$nCalls <- sum(mm[, s]) - Df$significant <- ! with(Df, bootCor > lowerCI95 & bootCor < upperCI95) - Df$rank = rank(-Df$votes, ties.method = 'max') - Df -} - -fn <- paste(opt$outDir, "/vote_results.tsv", sep = "") -write.table(bootDf, row.names = F, quote = F, sep = '\t', file = fn) - -maxBootDf <- bootDf %>% group_by(sample) %>% filter(votes == max(votes)) -fn <- paste(opt$outDir, "/max_vote_results.tsv", sep = "") -write.table(maxBootDf, row.names = F, quote = F, sep = '\t', file = fn) - -for (s in colnames(mm)) { - bdf <- filter(bootDf, sample == s) - cols <- ifelse(bdf$signficant, 'red', 'grey') - n <- sub('Signature.', '', colnames(alexandrovM)) - par(mfrow = c(2,1), mar = c(3,5,3,3)) - barCenters <- barplot(bdf$bootCor, ylim = c(min(bdf$bootCor - bdf$bootSD), max(bdf$bootCor + bdf$bootSD)), names.arg = n, col = cols, - main = s, ylab = 'Correlation') - segments(barCenters, bdf$bootCor - bdf$bootSD, barCenters, bdf$bootCor + bdf$bootSD, lwd = 1) - # vote barplot - barplot(bdf$votes, ylab = '# Votes', names.arg = bdf$signature) -} -``` - diff --git a/signatures/nmfMutSig.mk b/signatures/nmfMutSig.mk deleted file mode 100644 index 3b69c1ee..00000000 --- a/signatures/nmfMutSig.mk +++ /dev/null @@ -1,45 +0,0 @@ -# Run wtsi NMF mutation sig on tumour/normal data -# Detect mutation signatures using mutect calls -##### DEFAULTS ###### - -include modules/Makefile.inc - -LOGDIR = log/nmf_mutsig.$(NOW) - -EMU_PREPARE = $(HOME)/usr/bin/EMu-prepare -MATLABPATH := modules/mut_sigs -ifeq ($(HOSTNAME),ika.cbio.mskcc.org) -export MATLAB_BIN := /usr/local/MATLAB/R2013a/bin/matlab -else -export MATLAB_BIN := /usr/local/bin/matlab -endif -MATLAB = export MATLABPATH=$(MATLABPATH); $(MATLAB_BIN) -nodisplay -nosplash - -NMF_DIR = $(HOME)/usr/nmf_mut_sig -NMF_TYPES_FILE = $(NMF_DIR)/types.mat - -NMF_MIN_SIG = 1 -NMF_MAX_SIG = 4 - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY: all - -ALL := nmf_mutsig/mutations.txt.mut.matrix nmf_mutsig/results.mat nmf_mutsig/plot.timestamp - -all : $(ALL) - -nmf_mutsig/mutations.txt : alltables/allTN.mutect_snps.tab.txt - $(INIT) awk 'NR > 1 { sub("X", "23", $$3); sub("Y", "24", $$3); sub("MT", "25", $$3); print $$1 "_" $$2, $$3, $$4, $$6 ">" $$7 }' $< > $@ - -nmf_mutsig/mutations.txt.mut.matrix : nmf_mutsig/mutations.txt - $(INIT) $(EMU_PREPARE) --chr $(EMU_REF_DIR) --mut $< --pre $(@D) --regions $(EMU_TARGETS_FILE) - -nmf_mutsig/input.mat : nmf_mutsig/mutations.txt.mut.matrix - $(INIT) $(MATLAB) -r "createNMFinput $< $(<:.mut.matrix=.samples) $(NMF_TYPES_FILE) $(PROJECT_NAME) $@" - -nmf_mutsig/results.mat : nmf_mutsig/input.mat - $(INIT) $(MATLAB) -r "runNMF $< $(@:.mat=) $(NMF_DIR) $(NMF_MIN_SIG) $(NMF_MAX_SIG)" - -nmf_mutsig/plot.timestamp : nmf_mutsig/results.mat - $(INIT) $(MATLAB) -r "plotNMF $(<:.mat=) $(NMF_DIR) $(NMF_MIN_SIG) $(NMF_MAX_SIG)" && touch $@ diff --git a/signatures/plotNMF.m b/signatures/plotNMF.m deleted file mode 100644 index a1420ccb..00000000 --- a/signatures/plotNMF.m +++ /dev/null @@ -1,20 +0,0 @@ -function plotNMF( prefix, nmfDir, minNumSig, maxNumSig ) -% run NMF -addpath(strcat(nmfDir, '/source/')); -addpath(strcat(nmfDir, '/plotting/')); -mkdir('temp'); - -minNumSig = str2num(minNumSig); -maxNumSig = str2num(maxNumSig); - -for totalSignatures = minNumSig : maxNumSig - tsPrefix = strcat(prefix, '_ts', num2str(totalSignatures)); - inputFile = strcat(tsPrefix, '.mat'); - S = load(inputFile); - plotSignaturesToFile(tsPrefix, S.processes, S.input, S.allProcesses, S.idx, S.processStabAvg); - plotSignaturesExposureInSamplesToFile(tsPrefix, S.exposures, S.input); -end - -quit -end - diff --git a/signatures/plot_emu_signatures.R b/signatures/plot_emu_signatures.R deleted file mode 100644 index 6b08a32f..00000000 --- a/signatures/plot_emu_signatures.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("VariantAnnotation")) -suppressPackageStartupMessages(library("RColorBrewer")) -suppressPackageStartupMessages(library("hwriter")) -suppressPackageStartupMessages(library("org.Hs.eg.db")) - -options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) - -optList <- list( - make_option("--outDir", default = NULL, help = "output dir (required)"), - make_option("--mutations", default = NULL, help = "mutations file (required)"), - make_option("--samples", default = NULL, help = "samples file"), - make_option("--sampleSubset", default = NULL, help = "sample subset file: list of samples to plot contribution"), - make_option("--inPrefix", default = NULL, help = "EMu input prefix (required)")) - -parser <- OptionParser(usage = "%prog [options]", option_list = optList); - -arguments <- parse_args(parser, positional_arguments = T); -opt <- arguments$options; - -if (is.null(opt$outDir)) { - cat("Need output dir\n"); - print_help(parser); - stop(); -} else if (is.null(opt$inPrefix)) { - cat("Need EMu input prefix\n"); - print_help(parser); - stop(); -} else if (is.null(opt$mutations)) { - cat("Need EMu mutations file\n"); - print_help(parser); - stop(); -} else { - files <- arguments$args; -} - -glob <- paste(opt$inPrefix, '*_ml_spectra.txt', sep = '') -spectraFiles <- Sys.glob(glob) - -glob <- paste(opt$inPrefix, '*_map_activities.txt', sep = '') -activityFiles <- Sys.glob(glob) - -glob <- paste(opt$inPrefix, '*_assigned.txt', sep = '') -assignedFiles <- Sys.glob(glob) - -pg <- openPage('index.html', dirname = opt$outDir, title = 'EMu results') - -set.seed(002) -palette(sample(rainbow(30))) - -for (fn in spectraFiles) { - spectra <- read.table(fn, sep = ' ') - spectra <- spectra[,-97] # remove empty col - for (i in 1:nrow(spectra)) { - ofn <- paste(opt$outDir, "/", basename(fn), sep = '') - ofn <- sub('\\.txt$', paste("_", i, '.pdf', sep = ''), ofn) - pdf(ofn, height = 8, width = 10) - par(cex = 1.5) - cols <- rep(c('LightBlue', 'Black', 'Red', 'Grey', 'Green', 'Magenta'), each = 16) - barplot(t(spectra[i,]) * 100, beside = T, col = cols, border = cols, xaxt = 'n', main = paste("Signature", i), col.main = i, ylab = "% of mutations") - labs <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G") - mtext(labs, side = 1, at = 1:6 * 16 - 7.5) - null <- dev.off() - } - - for (i in 1:nrow(spectra)) { - ofn <- paste(opt$outDir, "/", basename(fn), sep = '') - ofn <- sub('\\.txt$', paste("_", i, '.png', sep = ''), ofn) - png(ofn, height = 500, width = 800, type = 'cairo-png') - par(cex = 2) - cols <- rep(c('LightBlue', 'Black', 'Red', 'Grey', 'Green', 'Magenta'), each = 16) - barplot(t(spectra[i,]) * 100, beside = T, col = cols, border = cols, xaxt = 'n', main = paste("Signature", i), ylab = "% of mutations", col.main = i) - labs <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G") - mtext(labs, side = 1, at = 1:6 * 16 - 7.5) - null <- dev.off() - hwriteImage(basename(ofn), pg, br = T) - } -} - -samples <- scan(opt$samples, what = 'character') -sampleSubset <- scan(opt$sampleSubset, what = 'character') - -mutations <- read.table(opt$mutations, sep = ' ') -colnames(mutations) <- c('sample', 'chr', 'pos', 'snv') -mutations <- subset(mutations, sample %in% sampleSubset) - -for (fn in assignedFiles) { - assigned <- read.table(fn, sep = ' ') - assigned <- as.matrix(assigned[,-ncol(assigned)]) - rownames(assigned) <- samples - assigned <- assigned[sampleSubset, ] - - ofn <- paste(opt$outDir, "/", basename(fn), sep = '') - ofn <- sub('\\.txt$', '.png', ofn) - - tab <- table(factor(mutations$sample)) - tab <- tab[sampleSubset] - oo <- order(tab) - assigned <- assigned[oo, ] - - png(ofn, height = 1000, width = 1000, type = 'cairo-png') - par(mar = c(5, 10, 5, 1), cex = 1, mfrow = c(1, 2), cex = 1.5) - barplot(t(assigned / rowSums(assigned)), col = 1:5, space = 0, border = F, horiz = T, las = 2, xlab = "Contribution of signature") - par(mar = c(5,1,5,5)) - barplot(tab[oo], las = 2, horiz = T, space = 0, border = F, xlab = "Number of Mutations", axisnames = F) - null <- dev.off() - hwriteImage(basename(ofn), pg, br = T) - - ofn <- sub('\\.png$', '.pdf', ofn) - pdf(ofn, height = 12, width = 12) - par(mar = c(5, 10, 5, 1), cex = 1, mfrow = c(1, 2), cex = 1.5) - barplot(t(assigned / rowSums(assigned)), col = 1:5, space = 0, border = F, horiz = T, las = 2, xlab = "Contribution of signature") - par(mar = c(5,1,5,5)) - barplot(tab[oo], las = 2, horiz = T, space = 0, border = F, xlab = "Number of Mutations", axisnames = F) - null <- dev.off() -} - - -closePage(pg) diff --git a/signatures/plot_signatures.R b/signatures/plot_signatures.R deleted file mode 100644 index 86972ef4..00000000 --- a/signatures/plot_signatures.R +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("deconstructSigs")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) -suppressPackageStartupMessages(library("ggplot2")) -suppressPackageStartupMessages(library("RColorBrewer")) -suppressPackageStartupMessages(library("Palimpsest")) -suppressPackageStartupMessages(library("BSgenome.Hsapiens.UCSC.hg19")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list( - make_option("--sample_name", default = NA, type = 'character', help = "tumor sample name") - ) - -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -'plot96_mutation_spectrum' <- function (vcf, sample.col = "sample", mutcat3.col = "mutcat3", - ymax = NULL, averageProp = FALSE, plot.file = NULL) -{ - bases <- c("A", "C", "G", "T") - ctxt16 <- paste(rep(bases, each = 4), rep(bases, 4), sep = ".") - mt <- c("CA", "CG", "CT", "TA", "TC", "TG") - types96 <- paste(rep(mt, each = 16), rep(ctxt16, 6), sep = "_") - types96 <- sapply(types96, function(z) { - sub("\\.", substr(z, 1, 1), z) - }) - context <- substr(types96, 4, 6) - nsamp <- length(unique(vcf[, sample.col])) - if (averageProp & nsamp > 1) { - tmp <- makeMutypeMatFromVcf(vcf, sample.col = "CHCID", - mutcat.col = "mutcat3", mutypes = types96) - freq <- apply(tmp, 1, mean) - } - else { - freq <- sapply(types96, function(z) { - mean(vcf[, mutcat3.col] == z, na.rm = T) - }) - } - if (!is.null(plot.file)) { - pdf(plot.file, width = 24, height = 5) - } - col96 <- c(rep("skyblue3", 16), rep("black", 16), rep("red", - 16), rep("grey", 16), rep("green", 16), rep("pink", 16)) - labs <- c(rep("C>A", 16), rep("C>G", 16), rep("C>T", 16), - rep("T>A", 16), rep("T>C", 16), rep("T>G", 16)) - if (is.null(ymax)) { - ymax <- 100*ceiling(max(freq) * 100)/100 - ymax <- ifelse(ymax>10, 30, 10) - } - bp <- barplot(freq*100, col = col96, border = col96, las = 2, - width = 1, space = .35, yaxt = "n", xaxt = "n", ylim = c(0, - ymax * 1.2)) - title(ylab = "Fraction of mutations (%)", mgp = c(1, 1, 0), - cex.lab = 1.6) - axis(1, at = bp, labels = context, pos = 0, las = 2, cex.axis = 1.5, - tick = F, cex.axis = 1, lwd=-1) - if (ymax==40) { - axis(2, at = c(0,10,20,30,40), labels=c(0,10,20,30,40), pos = 0, las = 1, cex.axis = 1.5) - } else if (ymax==30) { - axis(2, at = c(0,5,10,15,20,25,30), labels=c(0,5,10,15,20,25,30), pos = 0, las = 1, cex.axis = 1.5) - } else if (ymax==20) { - axis(2, at = c(0,5,10,15,20), labels=c(0,5,10,15,20), pos = 0, las = 1, cex.axis = 1.5) - } else if (ymax==10) { - axis(2, at = c(0,2,4,6,8,10), labels=c(0,2,4,6,8,10), pos = 0, las = 1, cex.axis = 1.5) - } - for (i in seq(1, 81, by = 16)) { - rect(bp[i], par()$usr[4], bp[i + 15], par()$usr[4] - - 0.05 * diff(par()$usr[3:4]), col = col96[i], border = col96[i]) - text((bp[i] + bp[i + 15])/2, par()$usr[4] + 0.09 * diff(par()$usr[3:4]), - labels = labs[i], xpd = TRUE, cex = 2) - } - if (!is.null(plot.file)) { - dev.off() - } -} - -load(file=paste0("deconstructsigs/signatures/", opt$sample_name, ".RData")) - -## barplot of base changes with 3' and 5' context -colnames(mutation_summary) = c("Sample", "CHROM", "POS", "REF", "ALT") -mutation_summary = cbind(mutation_summary, "Type"=rep("SNV", nrow(mutation_summary))) -vcf = preprocessInput_snv(input_data = mutation_summary, - ensgene = ensgene, - reference_genome = BSgenome.Hsapiens.UCSC.hg19) -patient_ids = unique(vcf$Sample) -pdf(file=paste0("deconstructsigs/plots/context/", opt$sample_name, ".pdf"), width=18, height=5) -plot96_mutation_spectrum(vcf, ymax=20, sample.col = "Sample", plot.file = NULL) -dev.off() - -## pie-charts of signatures -palette = colorRampPalette(brewer.pal(9, "Set1")) -cols = palette(30) -names(cols) = 1:30 - -df = data.frame(percentage = 100*as.numeric(extracted_signatures$weights[1,]), - signature_name = colnames(extracted_signatures$weights)) %>% - mutate(signature_name = as.numeric(gsub(pattern="Signature.", replacement="", signature_name))) %>% - arrange(signature_name) %>% - filter(percentage!=0) %>% - mutate(signature_name = factor(signature_name, ordered=TRUE, levels=sort(signature_name))) %>% - mutate(lab.ypos = cumsum(percentage) - 0.5*percentage) - -plot.0 = ggplot(df, aes(x = "", y = percentage, fill = signature_name)) + - geom_bar(width = 1, stat = "identity", color = "white") + - scale_fill_manual(values=cols) + - coord_polar("y", start = 0) + - geom_text(aes(y = lab.ypos, label = paste0(signif(percentage,3), "%")), color = "white") + - guides(fill=guide_legend(title="Signature")) + - theme_void() - -pdf(file=paste0("deconstructsigs/plots/exposures/", opt$sample_name, ".pdf"), width=6, height=6) -print(plot.0) -dev.off() diff --git a/signatures/runNMF.m b/signatures/runNMF.m deleted file mode 100644 index 40d372d7..00000000 --- a/signatures/runNMF.m +++ /dev/null @@ -1,46 +0,0 @@ -function runNMF( inputFile, outputPrefix, nmfDir, minNumSig, maxNumSig ) -% run NMF -addpath(strcat(nmfDir, '/source/')); -addpath(strcat(nmfDir, '/plotting/')); -clc; - -mkdir('temp'); - -minNumSig = str2num(minNumSig); -maxNumSig = str2num(maxNumSig); - -%% Open matlabpool -if ( matlabpool('size') == 0 ) - matlabpool open; % opens the default matlabpool, if it is not already opened -end - -%% Define parameters -iterationsPerCore = 100; -stability = zeros(maxNumSig, 1); -reconstructionError = zeros(maxNumSig, 1); -allOutputFile = strcat(outputPrefix, '.mat'); - -for totalSignatures = minNumSig : maxNumSig - outputFile = strcat(outputPrefix, '_ts', num2str(totalSignatures), '.mat'); - - % Decipher the signatures of mutational processes from catalogues of mutations - [input allProcesses allExposures idx processes exposures processStab processStabAvg] = ... - decipherMutationalProcesses(iterationsPerCore, totalSignatures, inputFile, ... - [ outputFile ] ); - % Record the stability and average Frobenius reconstruction error - stability(totalSignatures-minNumSig+1) = mean(processStabAvg); - reconstructionError(totalSignatures-minNumSig+1) = norm(input.originalGenomes - processes*exposures, 'fro'); -end - -%% Plotting the stability and average Frobenius reconstruction error -try %% Some versions of MATLAB plotyy has a bug under linux with -nodisplay -nosplash -nodesktop options - plotSignatureStabilityAndReconstructionToFile(strcat(outputPrefix, '_stab_reconstruction.png'), minNumSig:maxNumSig, stability, reconstructionError, input); -catch ME - %% Do not do anything - just ignore the plot in order to save the final output daya -end - -%% Saving the data -save(allOutputFile); - -quit -end diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk new file mode 100644 index 00000000..38b3f591 --- /dev/null +++ b/signatures/sv_signature.mk @@ -0,0 +1,21 @@ +include modules/Makefile.inc + +LOGDIR = log/sv_signature.$(NOW) + +svsignature : $(foreach sample,$(TUMOR_SAMPLES),sv_signature/$(sample)/) + +define extract-signatures +sv_signature/$1/ : + $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefial && \ + $(RSCRIPT) modules/signatures/extract_signatures.R \ + --sample_name $$()") + +endef +$(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call extract-signatures,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + $(DECONSTRUCTSIGS_ENV)/bin/R --version > version/deconstruct_sigs.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: deconstructsigs \ No newline at end of file diff --git a/signatures/vcf_2_vranges.R b/signatures/vcf_2_vranges.R deleted file mode 100644 index 925ef565..00000000 --- a/signatures/vcf_2_vranges.R +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("VariantAnnotation")) -suppressPackageStartupMessages(library("reshape")) -suppressPackageStartupMessages(library("boot")) -suppressPackageStartupMessages(library("plyr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("ggplot2")) -suppressPackageStartupMessages(library("RColorBrewer")) -suppressPackageStartupMessages(library("reshape2")) -suppressPackageStartupMessages(library("SomaticSignatures")) -suppressPackageStartupMessages(library("foreach")) - -optList <- list( - make_option("--genome", default = 'b37', help = "reference genome"), - make_option("--ignoreFilter", default = F, action = 'store_true', help = "ignore the filter column for vcf files"), - make_option("--outFile", default = NULL, type = "character", action = "store", help = "output directory") - ) - -parser <- OptionParser(usage = "%prog [options] [vcf file(s)]", option_list = optList); -arguments <- parse_args(parser, positional_arguments = T); -opt <- arguments$options; - -if (length(arguments$args) != 1) { - cat("Need vcf file\n"); - print_help(parser); - stop(); -} - -vcfFile <- arguments$args[1] -outFile <- opt$outFile -if (opt$genome == "b37" || opt$genome == "hg19") { - library("BSgenome.Hsapiens.UCSC.hg19"); - library("TxDb.Hsapiens.UCSC.hg19.knownGene") - genome <- BSgenome.Hsapiens.UCSC.hg19 - txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene - genomeName <- 'hg19' - chromosomes <- c(1:22, "X", "Y") - chromosomes <- c(chromosomes, paste('chr', chromosomes, sep = '')) -} else if (opt$genome == "mm10" || opt$genome == "GRCm38") { - library("BSgenome.Mmusculus.UCSC.mm10"); - library("TxDb.Mmusculus.UCSC.mm10.knownGene") - genome <- BSgenome.Mmusculus.UCSC.mm10 - txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene - genomeName <- 'mm10' - chromosomes <- c(1:19, "X", "Y") - chromosomes <- c(chromosomes, paste('chr', chromosomes, sep = '')) -} - -txByGenes <- transcriptsBy(txdb, 'gene') - -temp <- tempfile() -zipped <- bgzip(vcfFile, temp) -idx <- indexTabix(temp, "vcf") -cat('done\n') - -tab <- TabixFile(zipped, idx) -open(tab) - -vcf <- readVcf(tab, genomeName) -passIds <- which(rowRanges(vcf)$FILTER == "PASS") -if (nrow(vcf) > 0 && sum(seqnames(vcf) %in% chromosomes) > 0 && - sum(isSNV(vcf)) > 0 && (opt$ignoreFilter | length(passIds) > 0)) { - if (!opt$ignoreFilter) { - vcf <- vcf[passIds, ] - } - vcf <- vcf[isSNV(vcf) & seqnames(vcf) %in% chromosomes] - s <- sub('\\..*', '', vcfFile) - s <- sub('.*/', '', s) - vr <- VRanges(seqnames = seqnames(vcf), - ranges = ranges(vcf), - ref = as.character(ref(vcf)), - alt = sapply(alt(vcf), function(x) as.character(x[1])), - sampleNames = s) - seqlevels(vr) <- sub('^M$', 'MT', seqlevels(vr)) - vr <- ucsc(vr) - vr <- mutationContext(vr, genome, unify = T) - vr$refalt <- paste(ref(vr), alt(vr), sep = '') - - # query transcript ids - ol <- findOverlaps(vr, txByGenes) - subjectStrands <- sapply(txByGenes[subjectHits(ol)], function(x) paste(unique(as.character(strand(x))), collapse = ',')) - queryStrands <- tapply(subjectStrands, queryHits(ol), function(x) paste(unique(x), collapse = ",")) - vr$txStrand <- NA - vr$txStrand[as.integer(names(queryStrands))] <- queryStrands - vr$transcribed <- F - vr$transcribed[is.na(vr$txStrand)] <- NA - vr$transcribed[vr$refalt %in% c("GA", "GC", "GT", "AC", "AG", "AT") & grepl('\\+', vr$txStrand)] <- T - vr$transcribed[vr$refalt %in% c("CA", "CG", "CT", "TA", "TC", "TG") & grepl('-', vr$txStrand)] <- T - save(vr, file = opt$outFile) -} else { - vr <- NULL - save(vr, file = opt$outFile) -} From 58dd7a81d7c6f66f759a2302ffd79256ff7b54b7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 10 Nov 2022 14:55:17 -0500 Subject: [PATCH 445/766] Create sv_signature.py --- signatures/sv_signature.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 signatures/sv_signature.py diff --git a/signatures/sv_signature.py b/signatures/sv_signature.py new file mode 100644 index 00000000..49fb3534 --- /dev/null +++ b/signatures/sv_signature.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +""" compute structural variant signatures +""" + +import argparse +import viola +import numpy as np +import pandas as pd +import os +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog='sv_signature.py', description='Compute structural variant signatures') + parser.add_argument('--pcawg_bedpe', nargs='?', default='/data/reis-filho/lib/resource_files/viola/pcawg/', help='Path containing PCAWG bedpe files') + parser.add_argument('--fragile_site', nargs='?', default='/data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed', help='Fragile sites') + parser.add_argument('--replication_timing', nargs='?', default='/data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph', help='Replication timing') + parser.add_argument('--sv_definition', nargs='?', default='/data/reis-filho/lib/resource_files/viola/definitions/sv_class_definition.txt', help='SV definition') + parser.add_argument('--feature_matrix', nargs='?', default='feature_maxtrix.txt', help='Feature matrix path') + parser.add_argument('--exposure_matrix', nargs='?', default='exposure_maxtrix.txt', help='Exposure matrix path') + parser.add_argument('--signature_matrix', nargs='?', default='signature_maxtrix.txt', help='Signature matrix path') + parser.add_argument('--name', nargs='?', default='viola-sv', help='Run name') + + args = parser.parse_args() + + pcawg_bedpe = viola.read_bedpe_multi(args.pcawg_bedpe) + bed_fragile = viola.read_bed(args.fragile_site) + bedgraph_timing = viola.read_bed(args.replication_timing) + pcawg_bedpe.annotate_bed(bed=bed_fragile, annotation='fragile', how='flag') + pcawg_bedpe.annotate_bed(bed=bedgraph_timing, annotation='timing', how='value') + pcawg_bedpe.calculate_info('(${timingleft} + ${timingright}) / 2', 'timing') + feature_matrix = pcawg_bedpe.classify_manual_svtype(definitions=args.pcawg_bedpe, return_data_frame=True) + feature_matrix.drop('others', axis=1, inplace=True) + result_silhouette, result_metrics, exposure_matrix, signature_matrix = viola.SV_signature_extractor(feature_matrix, n_iter=10, name=args.name, n_components=12, init='nndsvda', solver='mu', beta_loss='kullback-leibler', max_iter=10000, random_state=1) + + feature_matrix.to_csv(args.feature_matrix, index=False, sep='\t') + + fh = open(args.exposure_matrix, 'r+') + np.savetxt(fh, exposure_matrix) + fh.close() + + fh = open(args.signature_matrix, 'r+') + np.savetxt(fh, signature_matrix) + fh.close() \ No newline at end of file From 9aae7f2e090e27c75385a682b6962ef9a73e43d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:04:24 -0500 Subject: [PATCH 446/766] ++ --- signatures/sv_signature.mk | 34 ++++++++++++++++------------- signatures/sv_signature.py | 44 -------------------------------------- 2 files changed, 19 insertions(+), 59 deletions(-) delete mode 100644 signatures/sv_signature.py diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 38b3f591..96d907b0 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -1,21 +1,25 @@ include modules/Makefile.inc -LOGDIR = log/sv_signature.$(NOW) +LOGDIR ?= log/sv_signature.$(NOW) -svsignature : $(foreach sample,$(TUMOR_SAMPLES),sv_signature/$(sample)/) +MIN_SIZE = 1 +MAX_SIZE = 10000000000000000 -define extract-signatures -sv_signature/$1/ : - $$(call RUN,-s 4G -m 6G -v $(DECONSTRUCTSIGS_ENV),"set -o pipefial && \ - $(RSCRIPT) modules/signatures/extract_signatures.R \ - --sample_name $$()") - -endef -$(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call extract-signatures,$(sample)))) +signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/samples.bed) + +define signature-sv +signature_sv/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR vcftobed \ + $$(<) \ + $(MIN_SIZE) \ + $(MAX_SIZE) \ + $$(@)") -..DUMMY := $(shell mkdir -p version; \ - $(DECONSTRUCTSIGS_ENV)/bin/R --version > version/deconstruct_sigs.txt) -.SECONDARY: +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) + .DELETE_ON_ERROR: -.PHONY: deconstructsigs \ No newline at end of file +.SECONDARY: +.PHONY: signature_sv diff --git a/signatures/sv_signature.py b/signatures/sv_signature.py deleted file mode 100644 index 49fb3534..00000000 --- a/signatures/sv_signature.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -""" compute structural variant signatures -""" - -import argparse -import viola -import numpy as np -import pandas as pd -import os -import sys - -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog='sv_signature.py', description='Compute structural variant signatures') - parser.add_argument('--pcawg_bedpe', nargs='?', default='/data/reis-filho/lib/resource_files/viola/pcawg/', help='Path containing PCAWG bedpe files') - parser.add_argument('--fragile_site', nargs='?', default='/data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed', help='Fragile sites') - parser.add_argument('--replication_timing', nargs='?', default='/data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph', help='Replication timing') - parser.add_argument('--sv_definition', nargs='?', default='/data/reis-filho/lib/resource_files/viola/definitions/sv_class_definition.txt', help='SV definition') - parser.add_argument('--feature_matrix', nargs='?', default='feature_maxtrix.txt', help='Feature matrix path') - parser.add_argument('--exposure_matrix', nargs='?', default='exposure_maxtrix.txt', help='Exposure matrix path') - parser.add_argument('--signature_matrix', nargs='?', default='signature_maxtrix.txt', help='Signature matrix path') - parser.add_argument('--name', nargs='?', default='viola-sv', help='Run name') - - args = parser.parse_args() - - pcawg_bedpe = viola.read_bedpe_multi(args.pcawg_bedpe) - bed_fragile = viola.read_bed(args.fragile_site) - bedgraph_timing = viola.read_bed(args.replication_timing) - pcawg_bedpe.annotate_bed(bed=bed_fragile, annotation='fragile', how='flag') - pcawg_bedpe.annotate_bed(bed=bedgraph_timing, annotation='timing', how='value') - pcawg_bedpe.calculate_info('(${timingleft} + ${timingright}) / 2', 'timing') - feature_matrix = pcawg_bedpe.classify_manual_svtype(definitions=args.pcawg_bedpe, return_data_frame=True) - feature_matrix.drop('others', axis=1, inplace=True) - result_silhouette, result_metrics, exposure_matrix, signature_matrix = viola.SV_signature_extractor(feature_matrix, n_iter=10, name=args.name, n_components=12, init='nndsvda', solver='mu', beta_loss='kullback-leibler', max_iter=10000, random_state=1) - - feature_matrix.to_csv(args.feature_matrix, index=False, sep='\t') - - fh = open(args.exposure_matrix, 'r+') - np.savetxt(fh, exposure_matrix) - fh.close() - - fh = open(args.signature_matrix, 'r+') - np.savetxt(fh, signature_matrix) - fh.close() \ No newline at end of file From 3a7f7ea08bee6dd48bee04604d17c47dcf9b8421 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:05:15 -0500 Subject: [PATCH 447/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 96d907b0..f1b4691f 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -5,7 +5,7 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/samples.bed) +signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bed) define signature-sv signature_sv/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf From 17aa17e39cbbbe86e74958aea77132ac0713b166 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:06:55 -0500 Subject: [PATCH 448/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index f1b4691f..6a59f0c4 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -8,7 +8,7 @@ MAX_SIZE = 10000000000000000 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bed) define signature-sv -signature_sv/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf +sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ SURVIVOR vcftobed \ $$(<) \ From 582e21dd38fcb7d6998d1fdaf1eb40b8c3c478ee Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:13:00 -0500 Subject: [PATCH 449/766] Update sv_signature.mk --- signatures/sv_signature.mk | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 6a59f0c4..096144f2 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -5,7 +5,8 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bed) +signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bed) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bedpe) define signature-sv sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf @@ -15,6 +16,12 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf $(MIN_SIZE) \ $(MAX_SIZE) \ $$(@)") + +sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + echo 'chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n' > \ + $$(@) && \ + cat $$(<) >> $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From bb97956ada036f8a8bad05e96260d460a62b8b9f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:14:30 -0500 Subject: [PATCH 450/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 096144f2..6c561663 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -19,7 +19,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - echo 'chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n' > \ + echo \"chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ $$(@) && \ cat $$(<) >> $$(@)") From d8e6b85aa89e49a186811e55693c70d742fc1c7c Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:15:44 -0500 Subject: [PATCH 451/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 6c561663..a228e006 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -19,7 +19,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - echo \"chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ + echo \"chrom1\\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ $$(@) && \ cat $$(<) >> $$(@)") From 05f04247139cfc4fc7daf750d218548f0ccdcdf2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:16:07 -0500 Subject: [PATCH 452/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index a228e006..6c561663 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -19,7 +19,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - echo \"chrom1\\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ + echo \"chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ $$(@) && \ cat $$(<) >> $$(@)") From f14c953912c36d047538e414bd3e9ebcb603a254 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:17:10 -0500 Subject: [PATCH 453/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 6c561663..a228e006 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -19,7 +19,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - echo \"chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ + echo \"chrom1\\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ $$(@) && \ cat $$(<) >> $$(@)") From 71d9580ebcecdc3303956168f930915c406931e1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:18:42 -0500 Subject: [PATCH 454/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index a228e006..e2a95bc4 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -19,7 +19,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - echo \"chrom1\\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\n\" > \ + echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ cat $$(<) >> $$(@)") From 3f3b525a6ff1d81b59a9eb19a82b1626148f6b21 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:21:07 -0500 Subject: [PATCH 455/766] Update sv_signature.mk --- signatures/sv_signature.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index e2a95bc4..6c0a00b3 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -5,11 +5,11 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bed) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).manta.bedpe) +signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) define signature-sv -sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf +sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ SURVIVOR vcftobed \ $$(<) \ @@ -17,7 +17,7 @@ sv_signature/$1_$2/$1_$2.manta.bed : vcf/$1_$2.manta_sv.vcf $(MAX_SIZE) \ $$(@)") -sv_signature/$1_$2/$1_$2.manta.bedpe : sv_signature/$1_$2/$1_$2.manta.bed +sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ From 3eaae74a2cb8ba6358a372744e7b878ef778d317 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:25:35 -0500 Subject: [PATCH 456/766] ++ --- signatures/sv_signature.mk | 7 ++++++- vcf_tools/merge_sv.mk | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 6c0a00b3..6d793072 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -22,11 +22,16 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ cat $$(<) >> $$(@)") + + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) - + +..DUMMY := $(shell mkdir -p version; \ + $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/sv_signature.txt; \ + ) .DELETE_ON_ERROR: .SECONDARY: .PHONY: signature_sv diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 08d85a74..c876ca9b 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -29,7 +29,9 @@ vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/$1_$2.merged_sv.vcf endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) - + +..DUMMY := $(shell mkdir -p version; \ + $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/merge_sv.txt;) .DELETE_ON_ERROR: .SECONDARY: .PHONY: merge_sv From de2d6638eb604e0b3d0ccd5c59b4324e4227b5fa Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 15:56:48 -0500 Subject: [PATCH 457/766] ++ --- scripts/sv_signature.py | 34 ++++++++++++++++++++++++++++++++++ signatures/sv_signature.mk | 15 ++++++++++++--- 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 scripts/sv_signature.py diff --git a/scripts/sv_signature.py b/scripts/sv_signature.py new file mode 100644 index 00000000..ec214155 --- /dev/null +++ b/scripts/sv_signature.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +""" extract structural variant signatures +""" + +import argparse +import viola +import numpy as np +import pandas as pd +import os +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog='sv_signature.py', + description='SV feature extraction') + parser.add_argument('--bedpe_infile', required=True, help='Sample input bedpe file') + parser.add_argument('--fragile_bed', required=True, type=argparse.FileType('r')) + parser.add_argument('--timing_bedgraph', required=True, type=argparse.FileType('r')) + parser.add_argument('--sv_definitions', required=True, type=argparse.FileType('r')) + parser.add_argument('--text_outfile', required=True, type=argparse.FileType('w')) + + args = parser.parse_args() + + sample_bedpe = viola.viola.read_bedpe(args.bedpe_infile) + bed_fragile = viola.read_bed(args.fragile_bed) + bedgraph_timing = viola.read_bed(args.timing_bedgraph) + + sample_bedpe.annotate_bed(bed=bed_fragile, annotation='fragile', how='flag') + sample_bedpe.annotate_bed(bed=bedgraph_timing, annotation='timing', how='value') + sample_bedpe.calculate_info('(${timingleft} + ${timingright}) / 2', 'timing') + + feature_matrix = sample_bedpe.classify_manual_svtype(definitions=args.sv_definitions, return_data_frame=True) + feature_matrix.drop('others', axis=1, inplace=True) + feature_matrix.to_csv(args.text_outfile, index=False, sep='\t') diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 6d793072..970d2fdc 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -4,10 +4,12 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 +VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) - + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) + define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ @@ -23,7 +25,14 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed $$(@) && \ cat $$(<) >> $$(@)") - +sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ + python $(SCRIPTS_DIR)/sv_signature.py \ + --bedpe_infile $$(<) \ + --fragile_bed $(HOME)/share/lib/resource_files/viola/annotation/fragile_site.hg19.bed \ + --timing_bedgraph $(HOME)/share/lib/resource_files/viola/annotation/replication_timing.bedgraph \ + --sv_definitions $(HOME)/share/lib/resource_files/viola/definitions/sv_class_default.txt \ + --text_outfile $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 17ed40954679b7dc64957b5185255238511ffe8c Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 16:02:43 -0500 Subject: [PATCH 458/766] Update sv_signature.py --- scripts/sv_signature.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/sv_signature.py b/scripts/sv_signature.py index ec214155..f6e6d7ba 100644 --- a/scripts/sv_signature.py +++ b/scripts/sv_signature.py @@ -13,11 +13,11 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(prog='sv_signature.py', description='SV feature extraction') - parser.add_argument('--bedpe_infile', required=True, help='Sample input bedpe file') - parser.add_argument('--fragile_bed', required=True, type=argparse.FileType('r')) - parser.add_argument('--timing_bedgraph', required=True, type=argparse.FileType('r')) - parser.add_argument('--sv_definitions', required=True, type=argparse.FileType('r')) - parser.add_argument('--text_outfile', required=True, type=argparse.FileType('w')) + parser.add_argument('--bedpe_infile', required=True) + parser.add_argument('--fragile_bed', required=True) + parser.add_argument('--timing_bedgraph', required=True) + parser.add_argument('--sv_definitions', required=True) + parser.add_argument('--text_outfile', required=True) args = parser.parse_args() From 302f754670e96b1ec3ab6386147fd49ea7df7b50 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 16:30:20 -0500 Subject: [PATCH 459/766] ++ --- scripts/sv_signature.py | 6 +++--- signatures/sv_signature.mk | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/sv_signature.py b/scripts/sv_signature.py index f6e6d7ba..4835cee3 100644 --- a/scripts/sv_signature.py +++ b/scripts/sv_signature.py @@ -29,6 +29,6 @@ sample_bedpe.annotate_bed(bed=bedgraph_timing, annotation='timing', how='value') sample_bedpe.calculate_info('(${timingleft} + ${timingright}) / 2', 'timing') - feature_matrix = sample_bedpe.classify_manual_svtype(definitions=args.sv_definitions, return_data_frame=True) - feature_matrix.drop('others', axis=1, inplace=True) - feature_matrix.to_csv(args.text_outfile, index=False, sep='\t') + feature_matrix = sample_bedpe.classify_manual_svtype(definitions=args.sv_definitions) + feature_matrix.drop('others', inplace=True) + feature_matrix.to_csv(args.text_outfile, index=True, sep='\t') diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 970d2fdc..621d5453 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -29,9 +29,9 @@ sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ python $(SCRIPTS_DIR)/sv_signature.py \ --bedpe_infile $$(<) \ - --fragile_bed $(HOME)/share/lib/resource_files/viola/annotation/fragile_site.hg19.bed \ - --timing_bedgraph $(HOME)/share/lib/resource_files/viola/annotation/replication_timing.bedgraph \ - --sv_definitions $(HOME)/share/lib/resource_files/viola/definitions/sv_class_default.txt \ + --fragile_bed /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed \ + --timing_bedgraph /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph \ + --sv_definitions /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt \ --text_outfile $$(@)") endef From e20993f15498dcb774f10dd1364ad3c64b8a0589 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 16:57:13 -0500 Subject: [PATCH 460/766] ++ --- config.inc | 1 + scripts/sv_signature.R | 31 +++++++++++++++++++++++++++++++ signatures/sv_signature.mk | 21 ++++++++++++++------- 3 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 scripts/sv_signature.R diff --git a/config.inc b/config.inc index 9eee85e8..b239b51e 100644 --- a/config.inc +++ b/config.inc @@ -33,6 +33,7 @@ GRIDSS_ENV = $(HOME)/share/usr/env/gridss-2.13.2 SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 +VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R new file mode 100644 index 00000000..2d2a8548 --- /dev/null +++ b/scripts/sv_signature.R @@ -0,0 +1,31 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_names", default = NA, type = 'character', help = "sample names"), + make_option("--output_file", default = NA, type = 'character', help = "output file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) + feature_counts = list() + for (i in 1:length(sample_names)) { + feature_counts[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_names[i], "/", sample_names[i], ".merged.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(sv_class = X1, + sv_count = manual_sv_type) %>% + dplyr::mutate(sample_name = sample_names[i]) + } + feature_counts = do.call(bind_rows, feature_counts) + write_tsv(x = feature_counts, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) +} diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 621d5453..27cd43f6 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -4,11 +4,14 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 +FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed +REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph +SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ + sv_signature/feature_matrix.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -29,18 +32,22 @@ sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ python $(SCRIPTS_DIR)/sv_signature.py \ --bedpe_infile $$(<) \ - --fragile_bed /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed \ - --timing_bedgraph /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph \ - --sv_definitions /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt \ + --fragile_bed $(FRAGILE_SITES) \ + --timing_bedgraph $(REPLICATION_TIMING) \ + --sv_definitions $(SV_DEFINITIONS) \ --text_outfile $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) + +sv_signature/feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 1 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") + ..DUMMY := $(shell mkdir -p version; \ - $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/sv_signature.txt; \ - ) + $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/sv_signature.txt;) .DELETE_ON_ERROR: .SECONDARY: .PHONY: signature_sv From 16fd00ae77bc77b27e9b9071108cebaf604bbd9b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 13 Nov 2022 17:15:42 -0500 Subject: [PATCH 461/766] Update sv_signature.mk --- signatures/sv_signature.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 27cd43f6..31098b68 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -4,8 +4,8 @@ LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.hg19.bed -REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.bedgraph +FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.b37.bed +REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.b37.bedgraph SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ From 18f19a1d8efdbae136dab5b3c8ad02661bc7a54b Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 18:50:05 -0500 Subject: [PATCH 462/766] Update sv_signature.mk --- signatures/sv_signature.mk | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 31098b68..e68bb7c5 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -7,9 +7,13 @@ MAX_SIZE = 10000000000000000 FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.b37.bed REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.b37.bedgraph SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt +CLUSTER_SV = $(VIOLA_ENV)/opt/ClusterSV/R +CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hs37d5.chrom_sizes +CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hs37d5_centromere_and_telomere_coords.txt signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.tsv) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ sv_signature/feature_matrix.txt @@ -27,7 +31,17 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ cat $$(<) >> $$(@)") - + +sv_signature/$1_$2/$1_$2.merged.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ + $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ + -bedpe $$(<) \ + -chr $(CHROM_SIZES) \ + -cen_telo $(CENTROMERE_TELOMERE) \ + -out sv_signature/$1_$2/ \ + -n 4 \ + > $$(@)") + sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ python $(SCRIPTS_DIR)/sv_signature.py \ From 6662f9768351d8310401acea4b407a2cd0e7891d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 19:14:54 -0500 Subject: [PATCH 463/766] Update sv_signature.mk --- signatures/sv_signature.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index e68bb7c5..38cf353d 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -8,8 +8,8 @@ FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_sit REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.b37.bedgraph SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt CLUSTER_SV = $(VIOLA_ENV)/opt/ClusterSV/R -CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hs37d5.chrom_sizes -CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hs37d5_centromere_and_telomere_coords.txt +CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hg19.chrom_sizes +CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_telomere_coords.txt signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ From d3fb47b65ef6f25a33592478bba6714f2ca1790e Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 19:24:02 -0500 Subject: [PATCH 464/766] Update sv_signature.mk --- signatures/sv_signature.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 38cf353d..97063428 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -13,7 +13,7 @@ CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.tsv) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ sv_signature/feature_matrix.txt @@ -32,15 +32,15 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed $$(@) && \ cat $$(<) >> $$(@)") -sv_signature/$1_$2/$1_$2.merged.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ -bedpe $$(<) \ -chr $(CHROM_SIZES) \ -cen_telo $(CENTROMERE_TELOMERE) \ - -out sv_signature/$1_$2/ \ + -out sv_signature/$1_$2/$1_$2 \ -n 4 \ - > $$(@)") + > sv_signature/$1_$2/$1_$2.merged.log") sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From d6d558335fe536698de8b0f37bf02db6baa8d676 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 19:33:29 -0500 Subject: [PATCH 465/766] Update sv_signature.mk --- signatures/sv_signature.mk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 97063428..926cf3f9 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -13,7 +13,7 @@ CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.taskcomplete) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ sv_signature/feature_matrix.txt @@ -32,7 +32,7 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed $$(@) && \ cat $$(<) >> $$(@)") -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe +sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ -bedpe $$(<) \ @@ -40,7 +40,8 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1 -cen_telo $(CENTROMERE_TELOMERE) \ -out sv_signature/$1_$2/$1_$2 \ -n 4 \ - > sv_signature/$1_$2/$1_$2.merged.log") + > sv_signature/$1_$2/$1_$2.merged.log && \ + echo 'task completed' > $$(@)") sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From cde38ed01c97ae740b438d34bc7f75bf2b5bbe97 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:28:20 -0500 Subject: [PATCH 466/766] ++ --- {signatures => scripts}/extract_signatures.R | 0 scripts/sv_signature.R | 46 +++++++++++++++++++- signatures/sv_signature.mk | 10 ++++- 3 files changed, 53 insertions(+), 3 deletions(-) rename {signatures => scripts}/extract_signatures.R (100%) diff --git a/signatures/extract_signatures.R b/scripts/extract_signatures.R similarity index 100% rename from signatures/extract_signatures.R rename to scripts/extract_signatures.R diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 2d2a8548..012007bf 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -11,12 +11,53 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--sample_names", default = NA, type = 'character', help = "sample names"), - make_option("--output_file", default = NA, type = 'character', help = "output file")) + make_option("--output_file", default = NA, type = 'character', help = "output file"), + make_option("--p_value", default = NA, type = 'character', help = "cluster sv p-value"), + make_option("--n_sv", default = NA, type = 'character', help = "number of sv")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options -if (as.numeric(opt$option)==1) { + +else if (as.numeric(opt$option)==2) { + bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(chrom1 = X1, + start1 = X2, + end1 = X3, + chrom2 = X4, + start2 = X5, + end2 = X6, + sv_id = X7, + pe_support = X8, + strand1 = X9, + strand2 = X10, + n_svs = X12, + p_value = X17) %>% + dplyr::mutate(p_value = case_when( + is.na(p_value) ~ 1, + TRUE ~ p_value + )) %>% + dplyr::left_join(bedpe_org %>% + dplyr::select(chrom1, start1, end1, chrom2, start2, end2, svclass), + by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% + dplyr::filter(!is.na(svclass)) %>% + dplyr::mutate(is_clustered = case_when( + p_value < as.numeric(opt$p_value) & n_svs > 2*as.numeric(opt$n_sv) ~ "Cplx2", + p_value < as.numeric(opt$p_value) & n_svs > as.numeric(opt$n_sv) ~ "Cplx1", + TRUE ~ "" + )) %>% + dplyr::mutate(svclass = case_when( + svclass == "TRA" & is_clustered != "" ~ paste0(is_clustered, svclass), + TRUE ~ svclass + )) %>% + dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) + + write_tsv(x = bedpe_cli, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==2) { sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) feature_counts = list() for (i in 1:length(sample_names)) { @@ -28,4 +69,5 @@ if (as.numeric(opt$option)==1) { } feature_counts = do.call(bind_rows, feature_counts) write_tsv(x = feature_counts, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + } diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 926cf3f9..c4bdf2b2 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -14,6 +14,7 @@ CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.taskcomplete) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ sv_signature/feature_matrix.txt @@ -42,6 +43,13 @@ sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.b -n 4 \ > sv_signature/$1_$2/$1_$2.merged.log && \ echo 'task completed' > $$(@)") + +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.taskcomplete + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --option 1 \ + --sample_names $1_$2 \ + --output_file $$(@)") sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ @@ -58,7 +66,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ sv_signature/feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 1 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From 13640a458a580319888a1703336b2e435de03e07 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:30:56 -0500 Subject: [PATCH 467/766] ++ --- scripts/sv_signature.R | 7 +++---- signatures/sv_signature.mk | 4 +++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 012007bf..e13a0eab 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -12,14 +12,13 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--sample_names", default = NA, type = 'character', help = "sample names"), make_option("--output_file", default = NA, type = 'character', help = "output file"), - make_option("--p_value", default = NA, type = 'character', help = "cluster sv p-value"), - make_option("--n_sv", default = NA, type = 'character', help = "number of sv")) + make_option("--p_value", default = "0.05", type = 'character', help = "cluster sv p-value"), + make_option("--n_sv", default = "50", type = 'character', help = "number of sv")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options - -else if (as.numeric(opt$option)==2) { +if (as.numeric(opt$option)==2) { bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index c4bdf2b2..d28c55a0 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -49,7 +49,9 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ --option 1 \ --sample_names $1_$2 \ - --output_file $$(@)") + --output_file $$(@) \ + --p_value 0.05 \ + --n_sv 50") sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From 813aff86e5deb3ca2d69d8f29ec268480ec5ac49 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:31:37 -0500 Subject: [PATCH 468/766] Update sv_signature.mk --- signatures/sv_signature.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index d28c55a0..bb5ed927 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -10,6 +10,8 @@ SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_ CLUSTER_SV = $(VIOLA_ENV)/opt/ClusterSV/R CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hg19.chrom_sizes CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_telomere_coords.txt +P_VALUE = 0.05 +N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ @@ -50,8 +52,8 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/ --option 1 \ --sample_names $1_$2 \ --output_file $$(@) \ - --p_value 0.05 \ - --n_sv 50") + --p_value $(P_VALUE) \ + --n_sv $(N_SV)") sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From 9ba46332f3ee80c3f456f55456d3495deaa61d9a Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:43:17 -0500 Subject: [PATCH 469/766] Update sv_signature.R --- scripts/sv_signature.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index e13a0eab..6398b25b 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -19,6 +19,7 @@ arguments = parse_args(parser, positional_arguments = T) opt = arguments$options if (as.numeric(opt$option)==2) { + sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% From 3dfe45ab50200f4db5592f00a5909b6b89eb2545 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:47:13 -0500 Subject: [PATCH 470/766] Update sv_signature.R --- scripts/sv_signature.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 6398b25b..4bb852f0 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -18,8 +18,8 @@ parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options -if (as.numeric(opt$option)==2) { - sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) +if (as.numeric(opt$option)==1) { + sample_names = as.character(opt$sample_names) bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% From 9501d4c9a1ddcfe4e05528d2fe1c41c2cfa81994 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 14 Nov 2022 20:53:28 -0500 Subject: [PATCH 471/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index bb5ed927..86eb3e72 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -55,7 +55,7 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/ --p_value $(P_VALUE) \ --n_sv $(N_SV)") -sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.bedpe +sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ python $(SCRIPTS_DIR)/sv_signature.py \ --bedpe_infile $$(<) \ From 4a9f177cd599bea7ce2ad25842358292c74e42ba Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 19 Nov 2022 22:02:04 -0500 Subject: [PATCH 472/766] Update manta_tumor_normal.mk --- sv_callers/manta_tumor_normal.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sv_callers/manta_tumor_normal.mk b/sv_callers/manta_tumor_normal.mk index 3128f5ca..000bd926 100644 --- a/sv_callers/manta_tumor_normal.mk +++ b/sv_callers/manta_tumor_normal.mk @@ -7,11 +7,11 @@ manta : $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).manta_sv.vcf) define manta-tumor-normal manta/$1_$2/runWorkflow.py : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai - $$(INIT) $$(CONFIG_MANTA) $$(CONFIG_MANTA_OPTS) --tumorBam $$< --normalBam $$(<<) --runDir $$(@D) + $$(INIT) $$(CONFIG_MANTA) $$(CONFIG_MANTA_OPTS) --tumorBam $$(<) --normalBam $$(<<) --runDir $$(@D) manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py - $$(call RUN,-n 8 -s 2G -m 4G,"set -o pipefail && \ - python $$< -m local -j 8 && touch $$@") + $$(call RUN,-n 8 -s 2G -m 4G -w 72:00:00,"set -o pipefail && \ + python $$(<) -m local -j 8 && touch $$(@)") manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp From 0edd86adc1326260412933cdc7c115190e0730bb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 19 Nov 2022 22:02:30 -0500 Subject: [PATCH 473/766] Update manta_tumor_normal.mk --- sv_callers/manta_tumor_normal.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sv_callers/manta_tumor_normal.mk b/sv_callers/manta_tumor_normal.mk index 000bd926..1c24fa3e 100644 --- a/sv_callers/manta_tumor_normal.mk +++ b/sv_callers/manta_tumor_normal.mk @@ -16,7 +16,7 @@ manta/$1_$2.manta_timestamp : manta/$1_$2/runWorkflow.py manta/$1_$2/results/variants/somaticSV.vcf.gz : manta/$1_$2.manta_timestamp vcf/$1_$2.manta_sv.vcf : manta/$1_$2/results/variants/somaticSV.vcf.gz - $$(INIT) zcat $$< > $$@ + $$(INIT) zcat $$(<) > $$(@) endef $(foreach pair,$(SAMPLE_PAIRS), \ From fa8d467c9a6471b1ee87090b5b4de537f89dbb25 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 21 Nov 2022 19:36:40 -0500 Subject: [PATCH 474/766] ++ --- config.inc | 1 + scripts/sv_signature.R | 143 +++++++++++++++++++++++-------------- signatures/sv_signature.mk | 78 +++++++++++--------- 3 files changed, 133 insertions(+), 89 deletions(-) diff --git a/config.inc b/config.inc index b239b51e..35ee33b3 100644 --- a/config.inc +++ b/config.inc @@ -34,6 +34,7 @@ SVABA_ENV ?= $(HOME)/share/usr/env/svaba-1.1.0 SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 +SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 4bb852f0..cefad910 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -4,70 +4,105 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("signature.tools.lib")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) } optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), - make_option("--sample_names", default = NA, type = 'character', help = "sample names"), - make_option("--output_file", default = NA, type = 'character', help = "output file"), - make_option("--p_value", default = "0.05", type = 'character', help = "cluster sv p-value"), - make_option("--n_sv", default = "50", type = 'character', help = "number of sv")) + make_option("--sample_name", default = NA, type = 'character', help = "sample name"), + make_option("--input_file", default = NA, type = 'character', help = "input file") + make_option("--output_file", default = NA, type = 'character', help = "output file")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) opt = arguments$options if (as.numeric(opt$option)==1) { - sample_names = as.character(opt$sample_names) - bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::select(chrom1 = X1, - start1 = X2, - end1 = X3, - chrom2 = X4, - start2 = X5, - end2 = X6, - sv_id = X7, - pe_support = X8, - strand1 = X9, - strand2 = X10, - n_svs = X12, - p_value = X17) %>% - dplyr::mutate(p_value = case_when( - is.na(p_value) ~ 1, - TRUE ~ p_value - )) %>% - dplyr::left_join(bedpe_org %>% - dplyr::select(chrom1, start1, end1, chrom2, start2, end2, svclass), - by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% - dplyr::filter(!is.na(svclass)) %>% - dplyr::mutate(is_clustered = case_when( - p_value < as.numeric(opt$p_value) & n_svs > 2*as.numeric(opt$n_sv) ~ "Cplx2", - p_value < as.numeric(opt$p_value) & n_svs > as.numeric(opt$n_sv) ~ "Cplx1", - TRUE ~ "" - )) %>% - dplyr::mutate(svclass = case_when( - svclass == "TRA" & is_clustered != "" ~ paste0(is_clustered, svclass), - TRUE ~ svclass - )) %>% - dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) - - write_tsv(x = bedpe_cli, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==2) { - sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) - feature_counts = list() - for (i in 1:length(sample_names)) { - feature_counts[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_names[i], "/", sample_names[i], ".merged.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::rename(sv_class = X1, - sv_count = manual_sv_type) %>% - dplyr::mutate(sample_name = sample_names[i]) - } - feature_counts = do.call(bind_rows, feature_counts) - write_tsv(x = feature_counts, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + sample_name = as.character(opt$sample_name) + sv_bedpe = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample = sample_name) %>% + dplyr::select(-svclass) + res_list = bedpeToRearrCatalogue(sv_bedpe %>% data.frame()) + catalogues_mutations = data.frame(row.names = rownames(res_list$rearr_catalogue), stringsAsFactors = FALSE) + bedpecolumns = c("chrom1", "start1", "end1", "chrom2", "start2", "end2" , "sample","svclass","id", "is.clustered", "length") + catalogues_mutations = cbind(catalogues_mutations,res_list$rearr_catalogue) + mtype_mutations = signature.tools.lib:::getTypeOfMutationsFromChannels(catalogues_mutations) + exposureFilterType = "fixedThreshold" + threshold_percent = 5 + optimisation_method = "KLD" + useBootstrap = FALSE + nboot = 1000 + threshold_p.value = 0.05 + nparallel = 4 + randomSeed = 1 + fit = Fit(catalogues = catalogues_mutations, + signatures = signature.tools.lib:::RefSigv1_rearr, + exposureFilterType = exposureFilterType, + threshold_percent = threshold_percent, + method = optimisation_method, + useBootstrap = useBootstrap, + nboot = nboot, + threshold_p.value = threshold_p.value, + nparallel = nparallel, + randomSeed = randomSeed, + verbose = TRUE) + readr::write_tsv(x = fit$catalogues, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) + readr::write_tsv(x = fit$catalogues, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) } + +#if (as.numeric(opt$option)==1) { +# sample_names = as.character(opt$sample_names) +# bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% +# readr::type_convert() +# bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% +# readr::type_convert() %>% +# dplyr::select(chrom1 = X1, +# start1 = X2, +# end1 = X3, +# chrom2 = X4, +# start2 = X5, +# end2 = X6, +# sv_id = X7, +# pe_support = X8, +# strand1 = X9, +# strand2 = X10, +# n_svs = X12, +# p_value = X17) %>% +# dplyr::mutate(p_value = case_when( +# is.na(p_value) ~ 1, +# TRUE ~ p_value +# )) %>% +# dplyr::left_join(bedpe_org %>% +# dplyr::select(chrom1, start1, end1, chrom2, start2, end2, svclass), +# by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% +# dplyr::filter(!is.na(svclass)) %>% +# dplyr::mutate(is_clustered = case_when( +# p_value < as.numeric(opt$p_value) & n_svs > 2*as.numeric(opt$n_sv) ~ "Cplx2", +# p_value < as.numeric(opt$p_value) & n_svs > as.numeric(opt$n_sv) ~ "Cplx1", +# TRUE ~ "" +# )) %>% +# dplyr::mutate(svclass = case_when( +# svclass == "TRA" & is_clustered != "" ~ paste0(is_clustered, svclass), +# TRUE ~ svclass +# )) %>% +# dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) +# +# write_tsv(x = bedpe_cli, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) +# +#} else if (as.numeric(opt$option)==2) { +# sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) +# feature_counts = list() +# for (i in 1:length(sample_names)) { +# feature_counts[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_names[i], "/", sample_names[i], ".merged.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% +# readr::type_convert() %>% +# dplyr::rename(sv_class = X1, +# sv_count = manual_sv_type) %>% +# dplyr::mutate(sample_name = sample_names[i]) +# } +# feature_counts = do.call(bind_rows, feature_counts) +# write_tsv(x = feature_counts, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) +# +#} diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 86eb3e72..4e14880f 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -15,10 +15,11 @@ N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.taskcomplete) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ - sv_signature/feature_matrix.txt + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.taskcomplete) \ +# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ +# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ +# sv_signature/feature_matrix.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -34,43 +35,50 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ cat $$(<) >> $$(@)") + +sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 4 -s 2G -m 4G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --sample_name $1_$2\ + --input_file $$(<)\ + --output_file sv_signature/$1_$2/$1_$2.merged") -sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.bedpe - $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ - $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ - -bedpe $$(<) \ - -chr $(CHROM_SIZES) \ - -cen_telo $(CENTROMERE_TELOMERE) \ - -out sv_signature/$1_$2/$1_$2 \ - -n 4 \ - > sv_signature/$1_$2/$1_$2.merged.log && \ - echo 'task completed' > $$(@)") - -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.taskcomplete - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 1 \ - --sample_names $1_$2 \ - --output_file $$(@) \ - --p_value $(P_VALUE) \ - --n_sv $(N_SV)") - -sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ - python $(SCRIPTS_DIR)/sv_signature.py \ - --bedpe_infile $$(<) \ - --fragile_bed $(FRAGILE_SITES) \ - --timing_bedgraph $(REPLICATION_TIMING) \ - --sv_definitions $(SV_DEFINITIONS) \ - --text_outfile $$(@)") +#sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.bedpe +# $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ +# $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ +# -bedpe $$(<) \ +# -chr $(CHROM_SIZES) \ +# -cen_telo $(CENTROMERE_TELOMERE) \ +# -out sv_signature/$1_$2/$1_$2 \ +# -n 4 \ +# > sv_signature/$1_$2/$1_$2.merged.log && \ +# echo 'task completed' > $$(@)") +# +#sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.taskcomplete +# $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ +# $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ +# --option 1 \ +# --sample_names $1_$2 \ +# --output_file $$(@) \ +# --p_value $(P_VALUE) \ +# --n_sv $(N_SV)") +# +#sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe +# $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ +# python $(SCRIPTS_DIR)/sv_signature.py \ +# --bedpe_infile $$(<) \ +# --fragile_bed $(FRAGILE_SITES) \ +# --timing_bedgraph $(REPLICATION_TIMING) \ +# --sv_definitions $(SV_DEFINITIONS) \ +# --text_outfile $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) -sv_signature/feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) - $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") +#sv_signature/feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) +# $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ +# $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From 565fed9f991c9427b1ac70c5ffbf2d2fbf5f0fe9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 21 Nov 2022 19:39:11 -0500 Subject: [PATCH 475/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index cefad910..7b284ab4 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -12,7 +12,7 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--sample_name", default = NA, type = 'character', help = "sample name"), - make_option("--input_file", default = NA, type = 'character', help = "input file") + make_option("--input_file", default = NA, type = 'character', help = "input file"), make_option("--output_file", default = NA, type = 'character', help = "output file")) parser = OptionParser(usage = "%prog", option_list = optList) arguments = parse_args(parser, positional_arguments = T) From f525107e293c8c84110266164e44591879d3cc68 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 21 Nov 2022 19:40:10 -0500 Subject: [PATCH 476/766] Update sv_signature.mk --- signatures/sv_signature.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 4e14880f..f99fb666 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -39,8 +39,9 @@ sv_signature/$1_$2/$1_$2.merged.bedpe : sv_signature/$1_$2/$1_$2.merged.bed sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 4 -s 2G -m 4G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --sample_name $1_$2\ - --input_file $$(<)\ + --option 1 \ + --sample_name $1_$2 \ + --input_file $$(<) \ --output_file sv_signature/$1_$2/$1_$2.merged") #sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.bedpe From a59ca3add0236055e73611fafb6c152c2c48b605 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 21 Nov 2022 19:47:23 -0500 Subject: [PATCH 477/766] Update sv_signature.R --- scripts/sv_signature.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 7b284ab4..790ebe4b 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -48,8 +48,15 @@ if (as.numeric(opt$option)==1) { nparallel = nparallel, randomSeed = randomSeed, verbose = TRUE) - readr::write_tsv(x = fit$catalogues, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) - readr::write_tsv(x = fit$catalogues, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) + x = dplyr::tibble(feature_name = rownames(fit$catalogues) + feature_count = as.vector(fit$catalogues[,1])) %>% + dplyr::mutate(sample_name = sample_name) + readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) + + x = dplyr::tibble(signature_name = colnames(fit$exposures), + signature_exposure = as.vector(fit$exposures[1,])) %>% + dplyr::mutate(sample_name = sample_name) + readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) } From 4d3b731e0f2eea8b463736cb932f607badda1d70 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 21 Nov 2022 19:48:39 -0500 Subject: [PATCH 478/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 790ebe4b..071d417e 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -48,7 +48,7 @@ if (as.numeric(opt$option)==1) { nparallel = nparallel, randomSeed = randomSeed, verbose = TRUE) - x = dplyr::tibble(feature_name = rownames(fit$catalogues) + x = dplyr::tibble(feature_name = rownames(fit$catalogues), feature_count = as.vector(fit$catalogues[,1])) %>% dplyr::mutate(sample_name = sample_name) readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) From 519be53e2733895142e6d6a755c7040279bf53e4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:30:17 -0500 Subject: [PATCH 479/766] Update sv_signature.mk --- signatures/sv_signature.mk | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index f99fb666..b9316ac1 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -15,8 +15,8 @@ N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) -# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.taskcomplete) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe.sv_clusters_and_footprints.tsv) # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ # sv_signature/feature_matrix.txt @@ -44,17 +44,16 @@ sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged. --input_file $$(<) \ --output_file sv_signature/$1_$2/$1_$2.merged") -#sv_signature/$1_$2/$1_$2.merged.taskcomplete : sv_signature/$1_$2/$1_$2.merged.bedpe -# $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ -# $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ -# -bedpe $$(<) \ -# -chr $(CHROM_SIZES) \ -# -cen_telo $(CENTROMERE_TELOMERE) \ -# -out sv_signature/$1_$2/$1_$2 \ -# -n 4 \ -# > sv_signature/$1_$2/$1_$2.merged.log && \ -# echo 'task completed' > $$(@)") -# +sv_signature/$1_$2/$1_$2.merged.bedpe.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ + $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ + -bedpe $$(<) \ + -chr $(CHROM_SIZES) \ + -cen_telo $(CENTROMERE_TELOMERE) \ + -out sv_signature/$1_$2/$1_$2 \ + -n 4 \ + > sv_signature/$1_$2/$1_$2.merged.log") + #sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.taskcomplete # $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ # $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ From 456fa52d41e0a2b415059a8abee1ab39daf11502 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:35:28 -0500 Subject: [PATCH 480/766] Update sv_signature.mk --- signatures/sv_signature.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index b9316ac1..ab352467 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -44,7 +44,7 @@ sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged. --input_file $$(<) \ --output_file sv_signature/$1_$2/$1_$2.merged") -sv_signature/$1_$2/$1_$2.merged.bedpe.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe +sv_signature/$1_$2/$1_$2.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ -bedpe $$(<) \ @@ -54,7 +54,7 @@ sv_signature/$1_$2/$1_$2.merged.bedpe.sv_clusters_and_footprints.tsv : sv_signat -n 4 \ > sv_signature/$1_$2/$1_$2.merged.log") -#sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.taskcomplete +#sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.bedpe.sv_clusters_and_footprints.tsv # $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ # $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ # --option 1 \ From 74721f6c69a2a3f1a8361d235fb7673d41983573 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:37:40 -0500 Subject: [PATCH 481/766] Update sv_signature.mk --- signatures/sv_signature.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index ab352467..af4ddddf 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -16,7 +16,7 @@ N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe.sv_clusters_and_footprints.tsv) + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ # sv_signature/feature_matrix.txt @@ -44,13 +44,13 @@ sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged. --input_file $$(<) \ --output_file sv_signature/$1_$2/$1_$2.merged") -sv_signature/$1_$2/$1_$2.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ -bedpe $$(<) \ -chr $(CHROM_SIZES) \ -cen_telo $(CENTROMERE_TELOMERE) \ - -out sv_signature/$1_$2/$1_$2 \ + -out sv_signature/$1_$2/$1_$2.merged \ -n 4 \ > sv_signature/$1_$2/$1_$2.merged.log") From a9f6dfe217bf9616f4b1401690e05b08447927ae Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:41:26 -0500 Subject: [PATCH 482/766] Update sv_signature.mk --- signatures/sv_signature.mk | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index af4ddddf..535f7bb7 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -16,8 +16,8 @@ N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) -# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ # sv_signature/feature_matrix.txt @@ -54,15 +54,15 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1 -n 4 \ > sv_signature/$1_$2/$1_$2.merged.log") -#sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.bedpe.sv_clusters_and_footprints.tsv -# $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ -# $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ -# --option 1 \ -# --sample_names $1_$2 \ -# --output_file $$(@) \ -# --p_value $(P_VALUE) \ -# --n_sv $(N_SV)") -# +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --option 1 \ + --sample_names $1_$2 \ + --output_file $$(@) \ + --p_value $(P_VALUE) \ + --n_sv $(N_SV)") + #sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe # $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ # python $(SCRIPTS_DIR)/sv_signature.py \ From cc2f587b803608278eaaddb45e0f20f60da5ebec Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:51:27 -0500 Subject: [PATCH 483/766] ++ --- scripts/sv_signature.R | 79 +++++++++++++++++++------------------- signatures/sv_signature.mk | 10 ++--- 2 files changed, 42 insertions(+), 47 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 071d417e..25229b25 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -58,48 +58,47 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(sample_name = sample_name) readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) +} else if (as.numeric(opt$option)==2) { + sample_names = as.character(opt$sample_names) + bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::filter(chrom1 != "Y") %>% + dplyr::filter(chrom2 != "Y") %>% + readr::type_convert() + bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(chrom1 = X1, + start1 = X2, + end1 = X3, + chrom2 = X4, + start2 = X5, + end2 = X6, + n_svs = X12, + p_value = X17) %>% + dplyr::mutate(p_value = as.numeric(p_value)) %>% + dplyr::mutate(p_value = case_when( + is.na(p_value) ~ 1, + TRUE ~ p_value)) + bedpe_org = bedpe_org %>% + dplyr::left_join(bedpe_cli, by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% + dplyr::mutate(is_clustered = case_when( + p_value<.05 & n_svs>=15 ~ "c1", + TRUE ~ "non_clustered" + )) %>% + dplyr::mutate(is_clustered = case_when( + p_value<.05 & n_svs>=50 ~ "c2", + TRUE ~ is_clustered + )) %>% + dplyr::mutate(svclass = case_when( + svclass == "TRA" & is_clustered == "c1" ~ "c1TRA", + svclass == "TRA" & is_clustered == "c2" ~ "c2TRA", + svclass == "INV" & (is_clustered == "c1" | is_clustered == "c2") ~ "cINV", + TRUE ~ svclass + )) %>% + dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) + write_tsv(x = bedpe_org, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) } -#if (as.numeric(opt$option)==1) { -# sample_names = as.character(opt$sample_names) -# bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% -# readr::type_convert() -# bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% -# readr::type_convert() %>% -# dplyr::select(chrom1 = X1, -# start1 = X2, -# end1 = X3, -# chrom2 = X4, -# start2 = X5, -# end2 = X6, -# sv_id = X7, -# pe_support = X8, -# strand1 = X9, -# strand2 = X10, -# n_svs = X12, -# p_value = X17) %>% -# dplyr::mutate(p_value = case_when( -# is.na(p_value) ~ 1, -# TRUE ~ p_value -# )) %>% -# dplyr::left_join(bedpe_org %>% -# dplyr::select(chrom1, start1, end1, chrom2, start2, end2, svclass), -# by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% -# dplyr::filter(!is.na(svclass)) %>% -# dplyr::mutate(is_clustered = case_when( -# p_value < as.numeric(opt$p_value) & n_svs > 2*as.numeric(opt$n_sv) ~ "Cplx2", -# p_value < as.numeric(opt$p_value) & n_svs > as.numeric(opt$n_sv) ~ "Cplx1", -# TRUE ~ "" -# )) %>% -# dplyr::mutate(svclass = case_when( -# svclass == "TRA" & is_clustered != "" ~ paste0(is_clustered, svclass), -# TRUE ~ svclass -# )) %>% -# dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) -# -# write_tsv(x = bedpe_cli, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -# -#} else if (as.numeric(opt$option)==2) { +#else if (as.numeric(opt$option)==2) { # sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) # feature_counts = list() # for (i in 1:length(sample_names)) { diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 535f7bb7..512f9ae9 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -10,8 +10,6 @@ SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_ CLUSTER_SV = $(VIOLA_ENV)/opt/ClusterSV/R CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hg19.chrom_sizes CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_telomere_coords.txt -P_VALUE = 0.05 -N_SV = 50 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ @@ -57,11 +55,9 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1 sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 1 \ - --sample_names $1_$2 \ - --output_file $$(@) \ - --p_value $(P_VALUE) \ - --n_sv $(N_SV)") + --option 2 \ + --sample_name $1_$2 \ + --output_file $$(@)") #sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe # $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From 7ae158432718b764fa4d8b32b9aa65cbef7ee07b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:52:56 -0500 Subject: [PATCH 484/766] Update sv_signature.mk --- signatures/sv_signature.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 512f9ae9..e25b5098 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -53,11 +53,11 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1 > sv_signature/$1_$2/$1_$2.merged.log") sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 2 \ - --sample_name $1_$2 \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --option 2 \ + --sample_name $1_$2 \ + --output_file $$(@)") #sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe # $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From 14e02fd72bda750ecb9b36d99a7ea779202f8758 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:54:25 -0500 Subject: [PATCH 485/766] Update sv_signature.mk --- signatures/sv_signature.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index e25b5098..8d564969 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -53,11 +53,11 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1 > sv_signature/$1_$2/$1_$2.merged.log") sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv - $$(call RUN,-c -n 1 -s 4G -m 8G $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 2 \ - --sample_name $1_$2 \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --option 2 \ + --sample_name $1_$2 \ + --output_file $$(@)") #sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe # $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ From f66c8cfed4c4ae8801f75a0db101f5f55958a3a8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:55:55 -0500 Subject: [PATCH 486/766] Update sv_signature.R --- scripts/sv_signature.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 25229b25..2c958a6b 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -59,12 +59,12 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==2) { - sample_names = as.character(opt$sample_names) - bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sample_name = as.character(opt$sample_name) + bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% dplyr::filter(chrom1 != "Y") %>% dplyr::filter(chrom2 != "Y") %>% readr::type_convert() - bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_names, "/", sample_names, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::select(chrom1 = X1, start1 = X2, From 3c5f3e8fad3a0ea5d8b0d188a09e976995c82db8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 18:57:50 -0500 Subject: [PATCH 487/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 2c958a6b..5859a79a 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -64,7 +64,7 @@ if (as.numeric(opt$option)==1) { dplyr::filter(chrom1 != "Y") %>% dplyr::filter(chrom2 != "Y") %>% readr::type_convert() - bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".merged.sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::select(chrom1 = X1, start1 = X2, From 98311864d5651c58cc50ba502139950746e70bda Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 19:01:17 -0500 Subject: [PATCH 488/766] Update sv_signature.mk --- signatures/sv_signature.mk | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 8d564969..06e453c0 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -15,7 +15,8 @@ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merg $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.txt) # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ # sv_signature/feature_matrix.txt @@ -59,14 +60,14 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/ --sample_name $1_$2 \ --output_file $$(@)") -#sv_signature/$1_$2/$1_$2.merged.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe -# $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ -# python $(SCRIPTS_DIR)/sv_signature.py \ -# --bedpe_infile $$(<) \ -# --fragile_bed $(FRAGILE_SITES) \ -# --timing_bedgraph $(REPLICATION_TIMING) \ -# --sv_definitions $(SV_DEFINITIONS) \ -# --text_outfile $$(@)") +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ + python $(SCRIPTS_DIR)/sv_signature.py \ + --bedpe_infile $$(<) \ + --fragile_bed $(FRAGILE_SITES) \ + --timing_bedgraph $(REPLICATION_TIMING) \ + --sv_definitions $(SV_DEFINITIONS) \ + --text_outfile $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From e7bea6984ac2005778ff615dbc7f6072c27cbdd0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 19:44:49 -0500 Subject: [PATCH 489/766] ++ --- scripts/sv_signature.R | 58 ++++++++++++++++++++++++++++---------- signatures/sv_signature.mk | 11 +++++++- 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 5859a79a..258a4f8d 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -96,19 +96,47 @@ if (as.numeric(opt$option)==1) { )) %>% dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) write_tsv(x = bedpe_org, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} + +} else if (as.numeric(opt$option)==3) { + sample_name = as.character(opt$sample_name) + catalogues = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + catalogues_mutations = data.frame(catalogues %>% dplyr::select(manual_sv_type)) + colnames(catalogues_mutations) = sample_name + rownames(catalogues_mutations) = catalogues %>% .[["...1"]] + + signatures = readr::read_tsv(file = "~/share/lib/resource_files/viola/NMF/signature_matrix.txt", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + signatures_mutations = data.frame(signatures %>% dplyr::select(-`SV Type`)) + colnames(signatures_mutations) = colnames(signatures)[-1] + rownames(signatures_mutations) = signatures %>% .[["SV Type"]] + exposureFilterType = "fixedThreshold" + threshold_percent = 5 + optimisation_method = "KLD" + useBootstrap = FALSE + nboot = 1000 + threshold_p.value = 0.05 + nparallel = 4 + randomSeed = 1 + fit = Fit(catalogues = catalogues_mutations, + signatures = signatures_mutations, + exposureFilterType = exposureFilterType, + threshold_percent = threshold_percent, + method = optimisation_method, + useBootstrap = useBootstrap, + nboot = nboot, + threshold_p.value = threshold_p.value, + nparallel = nparallel, + randomSeed = randomSeed, + verbose = TRUE) + x = dplyr::tibble(feature_name = rownames(fit$catalogues), + feature_count = as.vector(fit$catalogues[,1])) %>% + dplyr::mutate(sample_name = sample_name) + readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) + + x = dplyr::tibble(signature_name = colnames(fit$exposures), + signature_exposure = as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>% + dplyr::mutate(sample_name = sample_name) + readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) -#else if (as.numeric(opt$option)==2) { -# sample_names = unlist(strsplit(x = as.character(opt$sample_names), split = " ", fixed=TRUE)) -# feature_counts = list() -# for (i in 1:length(sample_names)) { -# feature_counts[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_names[i], "/", sample_names[i], ".merged.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% -# readr::type_convert() %>% -# dplyr::rename(sv_class = X1, -# sv_count = manual_sv_type) %>% -# dplyr::mutate(sample_name = sample_names[i]) -# } -# feature_counts = do.call(bind_rows, feature_counts) -# write_tsv(x = feature_counts, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -# -#} +} diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 06e453c0..c9817762 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -16,7 +16,8 @@ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merg $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.txt) + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) # $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ # sv_signature/feature_matrix.txt @@ -68,6 +69,14 @@ sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.txt : sv_signature/$1 --timing_bedgraph $(REPLICATION_TIMING) \ --sv_definitions $(SV_DEFINITIONS) \ --text_outfile $$(@)") + +sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints_exposures.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.txt + $$(call RUN,-c -n 4 -s 2G -m 4G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ + --option 3 \ + --sample_name $1_$2 \ + --input_file $$(<) \ + --output_file sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 2ee8ece66d36e68916678071c26d534c87331897 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 19:48:21 -0500 Subject: [PATCH 490/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 258a4f8d..42588c54 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -54,7 +54,7 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) x = dplyr::tibble(signature_name = colnames(fit$exposures), - signature_exposure = as.vector(fit$exposures[1,])) %>% + as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>% dplyr::mutate(sample_name = sample_name) readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) From 22598d01079c2551f0948773957b2e66a8bb4ab7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 19:50:11 -0500 Subject: [PATCH 491/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 42588c54..018407ee 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -54,7 +54,7 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) x = dplyr::tibble(signature_name = colnames(fit$exposures), - as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>% + signature_exposure = as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>% dplyr::mutate(sample_name = sample_name) readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) From de752e09254f9c4e482c53566a3ee7d7a06868a1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 19:58:03 -0500 Subject: [PATCH 492/766] Update sv_signature.R --- scripts/sv_signature.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 018407ee..f88ffae9 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -85,7 +85,7 @@ if (as.numeric(opt$option)==1) { TRUE ~ "non_clustered" )) %>% dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=50 ~ "c2", + p_value<.05 & n_svs>=100 ~ "c2", TRUE ~ is_clustered )) %>% dplyr::mutate(svclass = case_when( From 457ce5cfa45c9395307884584562d67e8b7a6d74 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 20:55:31 -0500 Subject: [PATCH 493/766] ++ --- scripts/sv_signature.R | 2 ++ signatures/sv_signature.mk | 11 +++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index f88ffae9..53a98e71 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -139,4 +139,6 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(sample_name = sample_name) readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) +} else if (as.numeric(opt$option)==4) { + } diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index c9817762..de8cb484 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -17,9 +17,8 @@ signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merg $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) -# $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) \ -# sv_signature/feature_matrix.txt + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) \ + sv_signature/summary.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -82,9 +81,9 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) -#sv_signature/feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.txt) -# $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ -# $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") +sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From b53d3158e6e8dc18d3f2e8caa4cf147371d64815 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 21:05:17 -0500 Subject: [PATCH 494/766] ++ --- scripts/sv_signature.R | 18 +++++++++++++++++- signatures/sv_signature.mk | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 53a98e71..aa582f0b 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -140,5 +140,21 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==4) { - + sample_name = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) + signature_x = list() + for (i in 1:length(sample_name)) { + signature_x[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(method = "signature.tools.lib") + } + signature_x = do.call(bind_rows, signature_x) + signature_y = list() + for (i in 1:length(sample_name)) { + signature_y[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged.sv_clusters_and_footprints_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(method = "viola") + } + signature_y = do.call(bind_rows, signature_y) + signature_df = dplyr::bind_rows(signature_x, signature_y) + readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index de8cb484..2ae89301 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -83,7 +83,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_names '$(SAMPLE_PAIRS)' --output_file $(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From 9c11e1e788d8a619af2af2939fc9770867c1815e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 21:07:00 -0500 Subject: [PATCH 495/766] Update sv_signature.mk --- signatures/sv_signature.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 2ae89301..fe0e938d 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -82,8 +82,8 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) - $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") + $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From beb1d20c6aac375c22a550db88a5cda40a8d96e5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 22 Nov 2022 21:13:28 -0500 Subject: [PATCH 496/766] Update sv_signature.R --- scripts/sv_signature.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index aa582f0b..623847cb 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -81,11 +81,11 @@ if (as.numeric(opt$option)==1) { bedpe_org = bedpe_org %>% dplyr::left_join(bedpe_cli, by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=15 ~ "c1", + p_value<.05 & n_svs>=25 ~ "c1", TRUE ~ "non_clustered" )) %>% dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=100 ~ "c2", + p_value<.05 & n_svs>=250 ~ "c2", TRUE ~ is_clustered )) %>% dplyr::mutate(svclass = case_when( From 2df59ca72a686f306a71fea4cdadf90c649433f0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 18:12:12 -0500 Subject: [PATCH 497/766] ++cnvkit --- config.inc | 1 + copy_number/cnvkit.mk | 59 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 copy_number/cnvkit.mk diff --git a/config.inc b/config.inc index 35ee33b3..bc4b4eb0 100644 --- a/config.inc +++ b/config.inc @@ -35,6 +35,7 @@ SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 +CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.8 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk new file mode 100644 index 00000000..c26c2e05 --- /dev/null +++ b/copy_number/cnvkit.mk @@ -0,0 +1,59 @@ +include modules/Makefile.inc +include modules/genome_inc/b37.inc + +LOGDIR ?= log/cnv_kit.$(NOW) + +cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcoverage.cnn) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).antitargetcoverage.cnn) \ + $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn) \ + $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \ + cnvkit/reference/combined_reference.cnr \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) + +ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed +OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed + +define cnvkit-tumor-cnn +cnvkit/cnn/tumor/$1.targetcoverage.cnn : bam/$1.bam + $$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/tumor/$1.targetcoverage.cnn") + +cnvkit/cnn/tumor/$1.antitargetcoverage.cnn : bam/$1.bam + $$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/tumor/$1.antitargetcoverage.cnn") +endef + $(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call cnvkit-tumor-cnn,$(sample)))) + +define cnvkit-normal-cnn +cnvkit/cnn/normal/$1.targetcoverage.cnn : bam/$1.bam + $$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py coverage -p 4 -q 0 $$(<) $$(ONTARGET_FILE) -o cnvkit/cnn/normal/$1.targetcoverage.cnn") + +cnvkit/cnn/normal/$1.antitargetcoverage.cnn : bam/$1.bam + $$(call RUN,-c -n 4 -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py coverage -p 4 -q 0 $$(<) $$(OFFTARGET_FILE) -o cnvkit/cnn/normal/$1.antitargetcoverage.cnn") +endef + $(foreach sample,$(NORMAL_SAMPLES),\ + $(eval $(call cnvkit-normal-cnn,$(sample)))) + +cnvkit/reference/combined_reference.cnr : $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn) $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) + $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ + sleep 30 && \ + cnvkit.py reference cnvkit/cnn/normal/*.cnn -f $(REF_FASTA) --no-edge -o cnvkit/reference/combined_reference.cnr") + +define cnvkit-tumor-cnr +cnvkit/cnr/$1.cnr : cnvkit/cnn/tumor/$1.targetcoverage.cnn cnvkit/cnn/tumor/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr") + +endef + $(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call cnvkit-tumor-cnr,$(sample)))) + + +..DUMMY := $(shell mkdir -p version; \ + python $(CNVKIT_ENV)/bin/cnvkit.py version &> version/cnvkit.txt) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: cnv_kit From 9924406f2a8d9fdb574ae8534176b95a84a950c7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:15:31 -0500 Subject: [PATCH 498/766] ++ --- config.inc | 2 +- copy_number/cnvkit.R | 203 +++++++++++------------------------------- copy_number/cnvkit.mk | 14 ++- 3 files changed, 66 insertions(+), 153 deletions(-) diff --git a/config.inc b/config.inc index bc4b4eb0..abab7457 100644 --- a/config.inc +++ b/config.inc @@ -35,7 +35,7 @@ SURVIVOR_ENV ?= $(HOME)/share/usr/env/survivor-1.0.7 ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 -CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.8 +CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/copy_number/cnvkit.R b/copy_number/cnvkit.R index 95cbe66c..c213e4f3 100644 --- a/copy_number/cnvkit.R +++ b/copy_number/cnvkit.R @@ -1,168 +1,69 @@ #!/usr/bin/env Rscript suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("copynumber")) -suppressPackageStartupMessages(library("colorspace")) -suppressPackageStartupMessages(library("ASCAT")) -suppressPackageStartupMessages(library("GAP")) - -'plot_log2_' <- function(x, y, title = "", alpha=NA, psi=NA) -{ - par(mar=c(5, 5, 4, 2)+.1) - data("CytoBand") - end = NULL - for (j in 1:23) { - end = c(end, max(CytoBand$End[CytoBand$Chromosome==j])) - } - end = cumsum(end) - start = rep(0, 23) - start[2:23] = end[1:22]+1 - for (j in 1:23) { - y[y[,"Chromosome"]==j,"Start"] = y[y[,"Chromosome"]==j,"Start"] + start[j] - y[y[,"Chromosome"]==j,"End"] = y[y[,"Chromosome"]==j,"End"] + start[j] - x[x[,"chrom"]==j,"pos"] = x[x[,"chrom"]==j,"pos"] + start[j] - } - plot(x[,"pos"], x[,"Log2Ratio"], type="p", pch=".", cex=1, col="grey75", axes=FALSE, frame=TRUE, xlab="", ylab="", main="", ylim=c(-4,5)) - for (j in 1:nrow(y)) { - lines(x=c(y[j,"Start"], y[j,"End"]), y=rep(y[j,"Log2Ratio"],2), lty=1, lwd=1.75, col="red") - } - axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) - mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) - abline(v=1, col="goldenrod3", lty=3, lwd=.5) - abline(h=0, col="red", lty=1, lwd=1) - for (j in 2:23) { - v = start[j] - abline(v=v, col="goldenrod3", lty=3, lwd=.5) - } - abline(v=max(x[,"pos"]), col="goldenrod3", lty=3, lwd=.5) - axis(1, at = .5*(start+end), labels=c(1:22, "X"), cex.axis = 0.85, las = 1) - rect(xleft=1-1e10, xright=x[nrow(x),"pos"]+1e10, ybottom=4, ytop=6, col="lightgrey", border="black", lwd=1.5) - title(main = paste0(title, " | alpha = ", signif(alpha, 3), " | psi = ", signif(psi, 3)), line=-1, cex.main=.75, font.main=1) - box(lwd=1.5) -} if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) } -args_list <- list(make_option("--type", default = NA, type = 'character', help = "type of analysis"), - make_option("--sample_name", default = NA, type = 'character', help = "sample name")) - +args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"), + make_option("--sample_name", default = NA, type = 'character', help = "sample name")) parser <- OptionParser(usage = "%prog", option_list = args_list) arguments <- parse_args(parser, positional_arguments = T) opt <- arguments$options -if (opt$type=="total-copy") { +'plot_log2_ratio' <- function(x) +{ + par(mar=c(5, 5, 4, 2)+.1) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5)) + y = x %>% + dplyr::group_by(chromosome) %>% + dplyr::summarize(start = min(start_chr), + end = max(end_chr)) %>% + dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% + dplyr::arrange(chromosome) - 'prunesegments.cn' <- function(x, n=10) - { - cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x)) - for (j in 1:nrow(x)) { - cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"]) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx,"Log2Ratio"]) - scl = sd(x[indx,"Log2Ratio"]) - ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl)) - x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"]) - } else { - x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"]) - } - } - return(x) - } - - data = read.csv(file=paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), header=TRUE, sep="\t", stringsAsFactors=FALSE) - CN = data[,c("chromosome", "start", "log2"),drop=FALSE] - colnames(CN) = c("Chromosome", "Position", "Log2Ratio") - CN[,"Chromosome"] = gsub(pattern="chr", replacement="", x=CN[,"Chromosome"], fixed=TRUE) - CN[CN[,"Chromosome"]=="X","Chromosome"] = 23 - CN[CN[,"Chromosome"]=="Y","Chromosome"] = 24 - CN[,"Chromosome"] = as.numeric(CN[,"Chromosome"]) - CN[CN[,"Log2Ratio"]<(-4) | CN[,"Log2Ratio"]>(4),"Log2Ratio"] = 0 - CN = subset(CN, CN[,"Chromosome"]<=23) - tmp = pcf(data=winsorize(data=CN, method="mad", tau=2.5, k=10, verbose=FALSE), kmin = 10, gamma=40, fast=FALSE, verbose=FALSE)[,2:7,drop=FALSE] - colnames(tmp) = c("Chromosome", "Arm", "Start", "End", "N", "Log2Ratio") - save(CN, tmp, file=paste0("cnvkit/totalcopy/", opt$sample_name, ".RData")) - tmp = prunesegments.cn(x=tmp, n=10) - CN = winsorize(data=CN[,c("Chromosome","Position","Log2Ratio")], tau=2.5, k=15, verbose=FALSE) - pdf(file=paste0("cnvkit/segmented/", opt$sample_name, ".pdf"), width=10, height=4.25) - file_names = dir(path="facets/cncf", pattern=opt$sample_name, full.names=TRUE) - file_names = file_names[grep(".Rdata", file_names, fixed=TRUE)] - if (length(file_names)==1) { - load(file_names) - alpha = fit$purity - psi = fit$ploidy - } else { - alpha = NA - psi = NA - } - plot_log2_(x=CN, y=tmp, title = opt$sample_name, alpha=alpha, psi=psi) - dev.off() + axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) + axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) + mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) +} -} else if (opt$type=="call-cna") { +if (as.numeric(opt$option) == 1) { + data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(weight>.1) %>% + dplyr::filter(chromosome != "Y") %>% + dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) + cytoband = data %>% + dplyr::group_by(chromosome) %>% + dplyr::summarize(start = min(start), + end = max(end)) %>% + dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% + dplyr::arrange(chromosome) %>% + dplyr::mutate(end = cumsum(end)) + start = rep(0, nrow(cytoband)) + start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)] + cytoband$start = start + data = data %>% + dplyr::left_join(cytoband %>% + dplyr::rename(start_chr = start, + end_chr = end), + by = "chromosome") %>% + dplyr::mutate(start = start + start_chr, + end = end + start_chr) %>% + dplyr::mutate(position = .5*(start + end)) %>% + dplyr::mutate(log2 = case_when( + log2 > 4 ~ 4, + log2 < (-4) ~ 4, + TRUE ~ log2 + )) - 'prunesegments.cn' <- function(x, n=10) - { - cnm = matrix(NA, nrow=nrow(x), ncol=nrow(x)) - for (j in 1:nrow(x)) { - cnm[,j] = abs(2^x[j,"Log2Ratio"] - 2^x[,"Log2Ratio"]) - } - cnt = hclust(as.dist(cnm), "average") - cnc = cutree(tree=cnt, k=n) - for (j in unique(cnc)) { - indx = which(cnc==j) - if (length(indx)>2) { - mcl = mean(x[indx,"Log2Ratio"]) - scl = sd(x[indx,"Log2Ratio"]) - ind = which(x[indx,"Log2Ratio"]<(mcl+1.96*scl) & x[indx,"Log2Ratio"]>(mcl-1.96*scl)) - x[indx[ind],"Log2Ratio"] = mean(x[indx[ind],"Log2Ratio"]) - } else { - x[indx,"Log2Ratio"] = mean(x[indx,"Log2Ratio"]) - } - } - return(x) - } - load(paste0("cnvkit/totalcopy/", opt$sample_name, ".RData")) - file_names = dir(path="facets/cncf", pattern=opt$sample_name, full.names=TRUE) - file_names = file_names[grep(".Rdata", file_names, fixed=TRUE)] - if (length(file_names)==1) { - load(file_names) - alpha = ifelse(is.na(fit$purity), 1, fit$purity) - psi = ifelse(is.na(fit$ploidy), 2, fit$ploid) - } else { - alpha = 1 - psi = 2 - } - tmp = prunesegments.cn(x=tmp, n=10) - qt = round((((2^(tmp[,"Log2Ratio"])) * (alpha*psi + 2*(1-alpha))) - 2*(1-alpha))/alpha) - qt[is.na(qt)] = 2 - qt[is.infinite(qt)] = 2 - cat5 = rep(0, length(qt)) - if (round(psi)==1 | round(psi)==2) { - cat5t = c(0, 1, 3, 7) - } else if (round(psi)==3) { - cat5t = c(0, 1, 4, 9) - } else if (round(psi)==4) { - cat5t = c(0, 1, 5, 10) - } else if (round(psi)==5) { - cat5t = c(0, 2, 6, 12) - } else if (round(psi)>=6) { - cat5t = c(0, 2, 7, 15) - } else { - cat5t = c(0, 1, 3, 7) - } - cat5[qt <= cat5t[2]] = -1 - cat5[qt <= cat5t[1]] = -2 - cat5[qt >= cat5t[3]] = 1 - cat5[qt >= cat5t[4]] = 2 - tmp = cbind(tmp, "Cat5"=cat5) - save(CN, tmp, file=paste0("cnvkit/called/", opt$sample_name, ".RData")) - -} + pdf(file = paste0("cnvkit/segmented/", opt$sample_name, ".pdf"), width = 10, height = 4.25) + plot_log2_ratio(x = data) + dev.off() -warnings() +} \ No newline at end of file diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index c26c2e05..de342591 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -8,7 +8,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).targetcoverage.cnn) \ $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \ cnvkit/reference/combined_reference.cnr \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -50,6 +51,17 @@ cnvkit/cnr/$1.cnr : cnvkit/cnn/tumor/$1.targetcoverage.cnn cnvkit/cnn/tumor/$1.a endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call cnvkit-tumor-cnr,$(sample)))) + +define cnvkit-plot +cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 1 \ + --sample_name $1") + +endef + $(foreach sample,$(TUMOR_SAMPLES),\ + $(eval $(call cnvkit-tumor-cnr,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ From b7bd1ebfb20a69919d3e0455ef76e99e337d5e4d Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:16:20 -0500 Subject: [PATCH 499/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index de342591..201c934d 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -61,7 +61,7 @@ cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr endef $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-tumor-cnr,$(sample)))) + $(eval $(call cnvkit-plot,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ From 095e393214c7ce2ffed0faaf6643840a08113829 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:18:13 -0500 Subject: [PATCH 500/766] -> --- {copy_number => scripts}/cnvkit.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {copy_number => scripts}/cnvkit.R (100%) diff --git a/copy_number/cnvkit.R b/scripts/cnvkit.R similarity index 100% rename from copy_number/cnvkit.R rename to scripts/cnvkit.R From e5f7715b615767a6787f9366f081d1b917ed84c1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:19:20 -0500 Subject: [PATCH 501/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index c213e4f3..f8b7c497 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -62,7 +62,7 @@ if (as.numeric(opt$option) == 1) { TRUE ~ log2 )) - pdf(file = paste0("cnvkit/segmented/", opt$sample_name, ".pdf"), width = 10, height = 4.25) + pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 10, height = 4.25) plot_log2_ratio(x = data) dev.off() From 58fa8644af121c1d623cde091f6f77730090bdc2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:27:07 -0500 Subject: [PATCH 502/766] Update cnvkit.R --- scripts/cnvkit.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index f8b7c497..8c123bd5 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,6 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, lwd = 0, tck = .5) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) @@ -62,8 +63,8 @@ if (as.numeric(opt$option) == 1) { TRUE ~ log2 )) - pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 10, height = 4.25) + pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 7, height = 4.25) plot_log2_ratio(x = data) dev.off() -} \ No newline at end of file +} From 8f23ef358de05c2341aa5c7deee701d7297baa6f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:29:40 -0500 Subject: [PATCH 503/766] ++ --- copy_number/cnvkit.mk | 4 ++-- scripts/cnvkit.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 201c934d..c7dee045 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -9,7 +9,7 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \ cnvkit/reference/combined_reference.cnr \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -53,7 +53,7 @@ endef $(eval $(call cnvkit-tumor-cnr,$(sample)))) define cnvkit-plot -cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr +cnvkit/plots/log2/$1.pdf : cnvkit/cnr/$1.cnr $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 1 \ diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 8c123bd5..1fa2e3c7 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,7 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, lwd = 0, tck = .5) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .5) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) @@ -63,7 +63,7 @@ if (as.numeric(opt$option) == 1) { TRUE ~ log2 )) - pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 7, height = 4.25) + pdf(file = paste0("cnvkit/plots/log2/", opt$sample_name, ".pdf"), width = 8, height = 3.75) plot_log2_ratio(x = data) dev.off() From c85ed5092e40edd270355641a4566d692941336f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:31:08 -0500 Subject: [PATCH 504/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 1fa2e3c7..83270ce2 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,7 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .5) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.05) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) From 0014203d51c891f06d1e6796d511824f06e6738a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:32:04 -0500 Subject: [PATCH 505/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 83270ce2..f3b0ab98 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,7 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.05) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.01) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) From 785086cd2bd39dca082de4296e843435d661b3e5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:32:50 -0500 Subject: [PATCH 506/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index f3b0ab98..ca13972b 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,7 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.01) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) From fd38c6994929f2f686cbbaf605c5b615abdaba1a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:34:40 -0500 Subject: [PATCH 507/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index ca13972b..9897c343 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4.5,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), From 45d4b00e48df52a20331a7e6762042fdc5fb5e27 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:36:41 -0500 Subject: [PATCH 508/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 9897c343..5a6b58f5 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -58,8 +58,8 @@ if (as.numeric(opt$option) == 1) { end = end + start_chr) %>% dplyr::mutate(position = .5*(start + end)) %>% dplyr::mutate(log2 = case_when( - log2 > 4 ~ 4, - log2 < (-4) ~ 4, + log2 > 4 ~ NA, + log2 < (-4) ~ NA, TRUE ~ log2 )) From efbe714abd7a5abe2a789db323a08713730df423 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:37:29 -0500 Subject: [PATCH 509/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 5a6b58f5..a7032b3c 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -58,8 +58,8 @@ if (as.numeric(opt$option) == 1) { end = end + start_chr) %>% dplyr::mutate(position = .5*(start + end)) %>% dplyr::mutate(log2 = case_when( - log2 > 4 ~ NA, - log2 < (-4) ~ NA, + log2 > 4 ~ 0, + log2 < (-4) ~ 0, TRUE ~ log2 )) From 7d8346c9119cf3b8d44571f05cd8cef785acc891 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:41:07 -0500 Subject: [PATCH 510/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index a7032b3c..d2c9db36 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4.5,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-5,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), @@ -58,7 +58,7 @@ if (as.numeric(opt$option) == 1) { end = end + start_chr) %>% dplyr::mutate(position = .5*(start + end)) %>% dplyr::mutate(log2 = case_when( - log2 > 4 ~ 0, + log2 > 5 ~ 0, log2 < (-4) ~ 0, TRUE ~ log2 )) From dc99f564839c6d002a30528dffb8e97ac74e9e1b Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:42:56 -0500 Subject: [PATCH 511/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index d2c9db36..5c504406 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-5,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-6,6)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), From 75d926754a5e3a3bb07ed417c0a60a288df182c8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 19:44:11 -0500 Subject: [PATCH 512/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 5c504406..63166615 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -58,7 +58,7 @@ if (as.numeric(opt$option) == 1) { end = end + start_chr) %>% dplyr::mutate(position = .5*(start + end)) %>% dplyr::mutate(log2 = case_when( - log2 > 5 ~ 0, + log2 > 6 ~ 0, log2 < (-4) ~ 0, TRUE ~ log2 )) From cdedb9fa6ab195f2b73663d4267a3a1a7a38b589 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:25:04 -0500 Subject: [PATCH 513/766] ++ --- Makefile | 4 ++++ config.inc | 1 + signatures/star_fish.mk | 27 +++++++++++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 signatures/star_fish.mk diff --git a/Makefile b/Makefile index 95d19e80..14b099ff 100644 --- a/Makefile +++ b/Makefile @@ -511,6 +511,10 @@ deconstruct_sigs : TARGETS += sv_signature sv_signature : $(call RUN_MAKE,modules/signatures/sv_signature.mk) + +TARGETS += star_fish +star_fish : + $(call RUN_MAKE,modules/signatures/star_fish.mk) #================================================== diff --git a/config.inc b/config.inc index abab7457..41fbc52b 100644 --- a/config.inc +++ b/config.inc @@ -36,6 +36,7 @@ ANNOTATE_SV_ENV ?= $(HOME)/share/usr/env/annot_sv-3.1.3 VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9 +STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk new file mode 100644 index 00000000..f2f9b6b5 --- /dev/null +++ b/signatures/star_fish.mk @@ -0,0 +1,27 @@ +include modules/Makefile.inc + +LOGDIR ?= log/star_fish.$(NOW) + +MIN_SIZE = 1 +MAX_SIZE = 10000000000000000 + +star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bed) + +define starfish-sv +star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR vcftobed \ + $$(<) \ + $(MIN_SIZE) \ + $(MAX_SIZE) \ + $$(@)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) + +..DUMMY := $(shell mkdir -p version; \ + $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: star_fish From 9b699959843106dd27b786f3b8fbfac8c7c8d0a3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:37:05 -0500 Subject: [PATCH 514/766] ++ --- scripts/star_fish.R | 45 +++++++++++++++++++++++++++++++++++++++++ signatures/star_fish.mk | 11 +++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 scripts/star_fish.R diff --git a/scripts/star_fish.R b/scripts/star_fish.R new file mode 100644 index 00000000..fc362851 --- /dev/null +++ b/scripts/star_fish.R @@ -0,0 +1,45 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("Starfish")) + + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_name", default = NA, type = 'character', help = "sample name"), + make_option("--input_file", default = NA, type = 'character', help = "input file"), + make_option("--output_file", default = NA, type = 'character', help = "output file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_name = as.character(opt$sample_name) + bed = readr::read_tsv(file = as.character(opt$input_file), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(chrom1 = X1 + start1 = X2, + end1 = X3, + chrom2 = X4, + start2 = X5, + end2 = X6, + sv_id = X7, + pe_support = X8, + strand1 = X9, + strand2 = X10, + svclass = X11) %>% + dplyr::select(chrom1, pos1 = start1, chrom2, pos2 = start2, strand1, strand2, svtype = svclass) %>% + dplyr::mutate(svtype = case_when( + svtype == "INV" & strand1 == "+" & strand2 == "+" ~ "h2hINV", + svtype == "INV" & strand1 == "-" & strand2 == "-" ~ "t2tINV", + )) %>% + dplyr::mutate(sample = sample_name) + readr::write_tsv(x = bed, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) + +} diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index f2f9b6b5..a10ac1c4 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -5,7 +5,8 @@ LOGDIR ?= log/star_fish.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bed) +star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bed) \ + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bedpe) define starfish-sv star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -16,6 +17,14 @@ star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf $(MAX_SIZE) \ $$(@)") +star_fish/$1_$2/$1_$2.merged.bedpe : star_fish/$1_$2/$1_$2.merged.bed + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 1 \ + --sample_name $1_$2 \ + --input_file $$(<) \ + --output_file $$(@)") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 15e60b5374a0989182bbc8446c76a012ad17a775 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:38:55 -0500 Subject: [PATCH 515/766] Update star_fish.mk --- signatures/star_fish.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index a10ac1c4..419ff6b9 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -18,12 +18,12 @@ star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf $$(@)") star_fish/$1_$2/$1_$2.merged.bedpe : star_fish/$1_$2/$1_$2.merged.bed - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ - --option 1 \ - --sample_name $1_$2 \ - --input_file $$(<) \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 1 \ + --sample_name $1_$2 \ + --input_file $$(<) \ + --output_file $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 527e4f98643a8fd9d5ec666210ad2baafee51452 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:42:36 -0500 Subject: [PATCH 516/766] Update star_fish.R --- scripts/star_fish.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index fc362851..36fde650 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -23,7 +23,7 @@ if (as.numeric(opt$option)==1) { sample_name = as.character(opt$sample_name) bed = readr::read_tsv(file = as.character(opt$input_file), col_names = FALSE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::rename(chrom1 = X1 + dplyr::rename(chrom1 = X1, start1 = X2, end1 = X3, chrom2 = X4, From 9e65c13c359c5f1b9305b59bab28d0ce0b4c2dbd Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:43:39 -0500 Subject: [PATCH 517/766] Update star_fish.R --- scripts/star_fish.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 36fde650..54c12852 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -38,6 +38,7 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(svtype = case_when( svtype == "INV" & strand1 == "+" & strand2 == "+" ~ "h2hINV", svtype == "INV" & strand1 == "-" & strand2 == "-" ~ "t2tINV", + TRUE ~ svtype )) %>% dplyr::mutate(sample = sample_name) readr::write_tsv(x = bed, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) From 2bea5b411a7ff2780766306ca75a2fb19bc59805 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 23 Nov 2022 21:53:53 -0500 Subject: [PATCH 518/766] ++ --- scripts/star_fish.R | 11 +++++++++++ signatures/star_fish.mk | 11 ++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 54c12852..2913dd8e 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -43,4 +43,15 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(sample = sample_name) readr::write_tsv(x = bed, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) +} else if (as.numeric(opt$option)==2) { + sample_name = as.character(opt$sample_name) + data = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::select(chromosome = chrom, + start = loc.start, + end = loc.end, + total_cn = tcn.em) %>% + dplyr::mutate(sample = sample_name) + readr::write_tsv(x = data, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) + } + diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 419ff6b9..113c91e8 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -6,7 +6,8 @@ MIN_SIZE = 1 MAX_SIZE = 10000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bed) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bedpe) + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.txt) define starfish-sv star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -25,6 +26,14 @@ star_fish/$1_$2/$1_$2.merged.bedpe : star_fish/$1_$2/$1_$2.merged.bed --input_file $$(<) \ --output_file $$(@)") +star_fish/$1_$2/$1_$2.merged.txt : facets/cncf/$1_$2.txt + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 2 \ + --sample_name $1_$2 \ + --input_file $$(<) \ + --output_file $$(@)") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 0f8f3718f8ef950db8682e911718655142e9b8fa Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 24 Nov 2022 13:01:06 -0500 Subject: [PATCH 519/766] Update sv_signature.R --- scripts/sv_signature.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 623847cb..2296219c 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -81,11 +81,11 @@ if (as.numeric(opt$option)==1) { bedpe_org = bedpe_org %>% dplyr::left_join(bedpe_cli, by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=25 ~ "c1", + p_value<.01 & n_svs>=100 ~ "c1", TRUE ~ "non_clustered" )) %>% dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=250 ~ "c2", + p_value<.01 & n_svs>=500 ~ "c2", TRUE ~ is_clustered )) %>% dplyr::mutate(svclass = case_when( From 2a2d0d96bdf2360a93001b6ae568d669a78ad716 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 24 Nov 2022 13:05:21 -0500 Subject: [PATCH 520/766] Update star_fish.mk --- signatures/star_fish.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 113c91e8..9df1289f 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -5,12 +5,12 @@ LOGDIR ?= log/star_fish.$(NOW) MIN_SIZE = 1 MAX_SIZE = 10000000000000000 -star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bed) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged.txt) +star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) define starfish-sv -star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf +star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ SURVIVOR vcftobed \ $$(<) \ @@ -18,7 +18,7 @@ star_fish/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf $(MAX_SIZE) \ $$(@)") -star_fish/$1_$2/$1_$2.merged.bedpe : star_fish/$1_$2/$1_$2.merged.bed +star_fish/$1_$2/$1_$2.merged_sv.bedpe : star_fish/$1_$2/$1_$2.merged_sv.bed $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ --option 1 \ @@ -26,7 +26,7 @@ star_fish/$1_$2/$1_$2.merged.bedpe : star_fish/$1_$2/$1_$2.merged.bed --input_file $$(<) \ --output_file $$(@)") -star_fish/$1_$2/$1_$2.merged.txt : facets/cncf/$1_$2.txt +star_fish/$1_$2/$1_$2.merged_cn.txt : facets/cncf/$1_$2.txt $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ --option 2 \ From 5689571e801a82962248869f83aa382bda61223a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 24 Nov 2022 13:27:40 -0500 Subject: [PATCH 521/766] Update sv_signature.R --- scripts/sv_signature.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 2296219c..1e779257 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -81,11 +81,11 @@ if (as.numeric(opt$option)==1) { bedpe_org = bedpe_org %>% dplyr::left_join(bedpe_cli, by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% dplyr::mutate(is_clustered = case_when( - p_value<.01 & n_svs>=100 ~ "c1", + p_value<.05 & n_svs>=100 ~ "c1", TRUE ~ "non_clustered" )) %>% dplyr::mutate(is_clustered = case_when( - p_value<.01 & n_svs>=500 ~ "c2", + p_value<.01 & n_svs>=250 ~ "c2", TRUE ~ is_clustered )) %>% dplyr::mutate(svclass = case_when( From 71dc6a2aee4ec55dfdc15b7dfd2ee290e3110b62 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 24 Nov 2022 14:55:04 -0500 Subject: [PATCH 522/766] ++ --- scripts/star_fish.R | 28 +++++++++++++++++++++++++++- signatures/star_fish.mk | 9 ++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 2913dd8e..4d69d9f5 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -53,5 +53,31 @@ if (as.numeric(opt$option)==1) { dplyr::mutate(sample = sample_name) readr::write_tsv(x = data, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) -} +} else if (as.numeric(opt$option)==3) { + sample_name = as.character(opt$sample_name) + sv_df = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + cn_df = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + gd_df = dplyr::tibble(sample = sample_name, gender = "unknown") %>% + readr::type_convert() + + starfish_link_out = starfish_link(sv_file = sv_df, prefix = paste0("star_fish/", sample_name, "/", sample_name)) + if (length(starfish_link_out)==1) { + cat(starfish_link_out, file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) + } else { + starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, complex_sv = starfish_link_out$interleave_tra_complex_sv, + cnv_file = cn_df, gender_file = gd_df, prefix = paste0("star_fish/", sample_name, "/", sample_name), + genome_v = "hg19", cnv_factor = "auto", arm_del_rm = TRUE) + starfish_sig_out = starfish_sig(cluster_feature = starfish_feature_out$cluster_feature, + prefix = paste0("star_fish/", sample_name, "/", sample_name), + cmethod = "class") + wd = getwd() + setwd(paste0("star_fish/", sample_name, "/")) + starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19") + setwd(wd) + cat("taskcomplete!!", file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) + + } + diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 9df1289f..59fa574e 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -7,7 +7,8 @@ MAX_SIZE = 10000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -34,6 +35,12 @@ star_fish/$1_$2/$1_$2.merged_cn.txt : facets/cncf/$1_$2.txt --input_file $$(<) \ --output_file $$(@)") +star_fish/$1_$2/$1_$2.taskcomplete : star_fish/$1_$2/$1_$2.merged_sv.bedpe star_fish/$1_$2/$1_$2.merged_cn.txt + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 3 \ + --sample_name $1_$2") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 0a28f6b1e0c27ffb5f2fa8623e675e16a639d7cc Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 24 Nov 2022 14:57:37 -0500 Subject: [PATCH 523/766] Update star_fish.R --- scripts/star_fish.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 4d69d9f5..a80b401a 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -79,5 +79,4 @@ if (as.numeric(opt$option)==1) { cat("taskcomplete!!", file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) } - - +} From d5992df33949fa701a59a1f645b8c9d5700aa0de Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 11:56:58 -0500 Subject: [PATCH 524/766] Update sufam_gt.R --- scripts/sufam_gt.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index eb2e9a18..84634257 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -37,6 +37,7 @@ if (as.numeric(opt$option)==1) { FILTER = "PASS", INFO = ".") %>% dplyr::select(`#CHROM`, POS, ID, REF, ALT, QUAL, INFO) %>% + dplyr::mutate(`#CHROM` = as.character(`#CHROM`)) %>% dplyr::mutate(chr_n = case_when( `#CHROM` == "X" ~ "23", `#CHROM` == "Y" ~ "24", From aee236b0244dc9699a6bf8fa67315225f7543ff3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:06:05 -0500 Subject: [PATCH 525/766] Update cnvkit.R --- scripts/cnvkit.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 63166615..29f6583a 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-6,6)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-5,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), @@ -27,9 +27,9 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = -.035) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .05) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) - axis(2, at = c(-4, -2, 0, 2, 4), labels = c(-4, -2, 0, 2, 4), cex.axis = 1, las = 1) + axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) } From a222afac62786fb6fd316b00be7498751816a9cb Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:09:27 -0500 Subject: [PATCH 526/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 29f6583a..fc67084c 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -27,7 +27,7 @@ opt <- arguments$options dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .05) + axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) From dd792d4e20c86820481a6f12b69ec907c4b23929 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:10:55 -0500 Subject: [PATCH 527/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index fc67084c..b053c2eb 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-5,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), From 557ec577c1e24c41ec4a656d3365693c6a53387e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:12:06 -0500 Subject: [PATCH 528/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index b053c2eb..f5e20286 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,7 +19,7 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-3,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), From 926683f2a4d7e8ba339b45fc9de7879624ce3c18 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:15:16 -0500 Subject: [PATCH 529/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index f5e20286..bac05f87 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -19,14 +19,14 @@ opt <- arguments$options 'plot_log2_ratio' <- function(x) { par(mar=c(5, 5, 4, 2)+.1) - plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-3,5)) + plot(x = x$position, y = x$log2, type = "p", pch = ".", cex = 1, col = "grey75", axes = FALSE, frame = FALSE, xlab = "", ylab = "", main = "", ylim = c(-4,5)) y = x %>% dplyr::group_by(chromosome) %>% dplyr::summarize(start = min(start_chr), end = max(end_chr)) %>% dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - + abline(h = 0, col = "black") axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) From 0a0debeed421a78c07b97730267df6adbdef1e88 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:18:13 -0500 Subject: [PATCH 530/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index bac05f87..a23d5989 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -26,7 +26,7 @@ opt <- arguments$options end = max(end_chr)) %>% dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - abline(h = 0, col = "black") + points(x = c(y$start[1]-1E6, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey10") axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) From 9445ce288b113387bfc1403685a4679c0c7156d1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:19:40 -0500 Subject: [PATCH 531/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index a23d5989..db38026a 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -26,7 +26,7 @@ opt <- arguments$options end = max(end_chr)) %>% dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - points(x = c(y$start[1]-1E6, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey10") + points(x = c(y$start[1]-1E8, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey20", lwd = 1.5) axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) From 85c75329205b65dedca1c217c0fd546d90d282e5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:20:34 -0500 Subject: [PATCH 532/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index db38026a..a68ef020 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -26,7 +26,7 @@ opt <- arguments$options end = max(end_chr)) %>% dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% dplyr::arrange(chromosome) - points(x = c(y$start[1]-1E8, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey20", lwd = 1.5) + points(x = c(y$start[1]-1E9, y$end[nrow(y)]), y = c(0, 0), type = "l", col = "grey20", lwd = 1.15) axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) From d4e6e068a6f7320b5b561a5f89306a965b5f9c7d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 12:28:54 -0500 Subject: [PATCH 533/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index a68ef020..5bbbf7c7 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -30,7 +30,7 @@ opt <- arguments$options axis(1, at = c(y$start, y$end[nrow(y)]), labels = rep(" ", nrow(y)+1), cex.axis = 0.85, las = 1, tck = .035) axis(1, at = .5*(y$start + y$end), labels = y$chromosome, cex.axis = 0.85, las = 1) axis(2, at = c(-2, -1, 0, 1, 2), labels = c(-2, -1, 0, 1, 2), cex.axis = 1, las = 1) - mtext(side = 2, text = expression(Log[2]~"Ratio"), line = 3.15, cex = 1.25) + mtext(side = 2, text = expression(Log[2]~"Ratio "), line = 3.15, cex = 1) } if (as.numeric(opt$option) == 1) { From f73c2dcd7237ca4dfd44813fc451f934aea6fb4d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 13:25:31 -0500 Subject: [PATCH 534/766] ++ --- copy_number/cnvkit.mk | 6 ++++ scripts/cnvkit.R | 69 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index c7dee045..99d22c2b 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -47,6 +47,12 @@ define cnvkit-tumor-cnr cnvkit/cnr/$1.cnr : cnvkit/cnn/tumor/$1.targetcoverage.cnn cnvkit/cnn/tumor/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr") + +cnvkit/segmented/$1.txt : cnvkit/cnr/$1.cnr + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 2 \ + --sample_name $1") endef $(foreach sample,$(TUMOR_SAMPLES),\ diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 5bbbf7c7..6d21b8a8 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -33,6 +33,13 @@ opt <- arguments$options mtext(side = 2, text = expression(Log[2]~"Ratio "), line = 3.15, cex = 1) } +'add_segmented' <- function(x) +{ + for (i in 1:nrow(x)) { + points(x = c(x$Start_Position[i], x$End_Position[i]), y = rep(x$Log2_Ratio[i], 2), type = "l", col = "#e41a1c", lwd = 2.75) + } +} + if (as.numeric(opt$option) == 1) { data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% @@ -67,4 +74,66 @@ if (as.numeric(opt$option) == 1) { plot_log2_ratio(x = data) dev.off() +} else if (as.numeric(opt$option) == 2) { + data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(weight>.1) %>% + dplyr::filter(chromosome != "Y") + smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") + segmented = pcf(data = smoothed, kmin = 10, gamma = 40, normalize = TRUE, fast = FALSE) %>% + dplyr::as_tibble() %>% + dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, + Start_Position = start.pos, End_Position = end.pos, + N = n.probes, Log2_Ratio = mean) %>% + dplyr::mutate(Sample_Name = opt$sample_name) + readr::write_tsv(x = segmented, file = paste0("cnvkit/segmented/", opt$sample_name, ".txt"), col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option) == 3) { + data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(weight>.1) %>% + dplyr::filter(chromosome != "Y") + smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") + segmented = pcf(data = smoothed, kmin = 10, gamma = 40, normalize = TRUE, fast = FALSE) %>% + dplyr::as_tibble() %>% + dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, + Start_Position = start.pos, End_Position = end.pos, + N = n.probes, Log2_Ratio = mean) %>% + dplyr::mutate(Sample_Name = opt$sample_name) + cytoband = data %>% + dplyr::group_by(chromosome) %>% + dplyr::summarize(start = min(start), + end = max(end)) %>% + dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% + dplyr::arrange(chromosome) %>% + dplyr::mutate(end = cumsum(end)) + start = rep(0, nrow(cytoband)) + start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)] + cytoband$start = start + data = data %>% + dplyr::left_join(cytoband %>% + dplyr::rename(start_chr = start, + end_chr = end), + by = "chromosome") %>% + dplyr::mutate(start = start + start_chr, + end = end + start_chr) %>% + dplyr::mutate(position = start) %>% + dplyr::mutate(log2 = case_when( + log2 > 6 ~ 0, + log2 < (-4) ~ 0, + TRUE ~ log2 + )) + segmented = segmented %>% + dplyr::left_join(cytoband %>% + dplyr::rename(Chromosome = chromosome, + start_chr = start, + end_chr = end), + by = "Chromosome") %>% + dplyr::mutate(Start_Position = Start_Position + start_chr, + End_Position = End_Position + start_chr) + + pdf(file = paste0("cnvkit/plots/segmented/", opt$sample_name, ".pdf"), width = 8, height = 3.75) + plot_log2_ratio(x = data) + add_segmented(x = segmented) + dev.off() } From c576f3b96a9e88e8265d719dc34bc538fbc6c8fa Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 13:26:13 -0500 Subject: [PATCH 535/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 99d22c2b..2cad779f 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -9,6 +9,7 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \ cnvkit/reference/combined_reference.cnr \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed From a96edd8a445492ea84813d58c0c2c8bda56d29af Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 13:27:29 -0500 Subject: [PATCH 536/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 2cad779f..5620f9d4 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -10,7 +10,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov cnvkit/reference/combined_reference.cnr \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -65,6 +66,12 @@ cnvkit/plots/log2/$1.pdf : cnvkit/cnr/$1.cnr $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 1 \ --sample_name $1") + +cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 3 \ + --sample_name $1") endef $(foreach sample,$(TUMOR_SAMPLES),\ From 806426c1375b68d04ad8ccb3c7025f6d0f0fb2d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 13:29:32 -0500 Subject: [PATCH 537/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 6d21b8a8..6d040ff4 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 10, gamma = 40, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 50, normalize = TRUE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 10, gamma = 40, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 50, normalize = TRUE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From 4b538e660513b3d7f26fbbfa66ce7cad7f5e7bce Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 13:37:27 -0500 Subject: [PATCH 538/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 6d040ff4..ab78805e 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 50, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = TRUE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 50, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = TRUE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From 6efeb17cdc136cc999416c87a3d7a288068d75f0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 18:29:50 -0500 Subject: [PATCH 539/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index ab78805e..1a426a45 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 125, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = TRUE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 125, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From 66e1c35767065f3f4888559698e8706f1d3ea5a8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 18:37:27 -0500 Subject: [PATCH 540/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 1a426a45..0e7d2284 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 125, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 115, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 125, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 115, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From ebbf476aef0403d09d80585a1df2bf9053316008 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 18:39:15 -0500 Subject: [PATCH 541/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 0e7d2284..810fdba1 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 115, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 115, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From a8ca9f339382607c7dd8d7f54f7ec6300753c81b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 18:40:42 -0500 Subject: [PATCH 542/766] Update cnvkit.R --- scripts/cnvkit.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 810fdba1..59eadd06 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 75, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, @@ -94,7 +94,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(weight>.1) %>% dplyr::filter(chromosome != "Y") smoothed = winsorize(data = data %>% dplyr::select(chromosome, start, log2) %>% data.frame(), method = "mad") - segmented = pcf(data = smoothed, kmin = 25, gamma = 100, normalize = FALSE, fast = FALSE) %>% + segmented = pcf(data = smoothed, kmin = 25, gamma = 75, normalize = FALSE, fast = FALSE) %>% dplyr::as_tibble() %>% dplyr::select(Sample_Name = sampleID, Chromosome = chrom, Arm = arm, Start_Position = start.pos, End_Position = end.pos, From 1cb9ee34ad974f947ee10f625b4cf29887973523 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:18:01 -0500 Subject: [PATCH 543/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 5620f9d4..b696997e 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -11,7 +11,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -49,22 +50,17 @@ define cnvkit-tumor-cnr cnvkit/cnr/$1.cnr : cnvkit/cnn/tumor/$1.targetcoverage.cnn cnvkit/cnn/tumor/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr") - -cnvkit/segmented/$1.txt : cnvkit/cnr/$1.cnr + +cnvkit/plots/log2/$1.pdf : cnvkit/cnr/$1.cnr $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ - --option 2 \ + --option 1 \ --sample_name $1") - -endef - $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-tumor-cnr,$(sample)))) - -define cnvkit-plot -cnvkit/plots/log2/$1.pdf : cnvkit/cnr/$1.cnr + +cnvkit/segmented/$1.txt : cnvkit/cnr/$1.cnr $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ - --option 1 \ + --option 2 \ --sample_name $1") cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr @@ -75,7 +71,19 @@ cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr endef $(foreach sample,$(TUMOR_SAMPLES),\ - $(eval $(call cnvkit-plot,$(sample)))) + $(eval $(call cnvkit-tumor-cnr,$(sample)))) + + +define cnvkit-total-copy +cnvkit/totalcopy/$1.txt : cnvkit/segmented/$1.txt facets/cncf/$1_$2.out + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 4 \ + --sample_name $1_$2") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call cnvkit-total-copy,$(tumor.$(pair)),$(normal.$(pair))))) ..DUMMY := $(shell mkdir -p version; \ From 3d179d22070c30c1c28c18515041de0e4b43ee66 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:21:50 -0500 Subject: [PATCH 544/766] Update cnvkit.R --- scripts/cnvkit.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 59eadd06..8305163e 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -136,4 +136,16 @@ if (as.numeric(opt$option) == 1) { plot_log2_ratio(x = data) add_segmented(x = segmented) dev.off() +} else if (as.numeric(opt$option) == 4) { + tumor_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[1] + normal_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[2] + data = readr::read_tsv(file = paste0("cnvkit/segmented/", tumor_name, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_name, "_", normal_name, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + purity = as.numeric(gsub(pattern = "# Purity = ", replacement = "", x = facets %>% dplyr::slice(10) %>% .[["X1"]], fixed = TRUE)) + ploidy = as.numeric(gsub(pattern = "# Ploidy = ", replacement = "", x = facets %>% dplyr::slice(11) %>% .[["X1"]], fixed = TRUE)) + data = data %>% + dplyr::mutate(Total_Copy = ((2^(Log2_Ratio))*(purity*ploidy + (1-purity)*2) - (1-purity)*2)/purity) + readr::write_tsv(x = data, file = paste0("cnvkit/totalcopy/", tumor_name, ".txt"), col_names = TRUE, append = FALSE) } From b98a774cf0287a3e7106cbd7b8cd249ceb2becf9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:23:38 -0500 Subject: [PATCH 545/766] ++ --- copy_number/cnvkit.mk | 1 + scripts/cnvkit.R | 1 + 2 files changed, 2 insertions(+) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index b696997e..9e6ff28b 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -80,6 +80,7 @@ cnvkit/totalcopy/$1.txt : cnvkit/segmented/$1.txt facets/cncf/$1_$2.out $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 4 \ --sample_name $1_$2") + endef $(foreach pair,$(SAMPLE_PAIRS),\ diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 8305163e..1619b179 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -136,6 +136,7 @@ if (as.numeric(opt$option) == 1) { plot_log2_ratio(x = data) add_segmented(x = segmented) dev.off() + } else if (as.numeric(opt$option) == 4) { tumor_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[1] normal_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[2] From bbacb51801bfe87aab99b2e20c1d6bb52acae359 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:39:29 -0500 Subject: [PATCH 546/766] ++ --- copy_number/cnvkit.mk | 10 +++++-- scripts/cnvkit.R | 63 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 9e6ff28b..7e3aa501 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -12,7 +12,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) \ + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/totalcopy/$(sample).pdf) ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -80,7 +81,12 @@ cnvkit/totalcopy/$1.txt : cnvkit/segmented/$1.txt facets/cncf/$1_$2.out $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 4 \ --sample_name $1_$2") - + +cnvkit/totalcopy/$1.txt : cnvkit/cnr/$1.cnr cnvkit/totalcopy/$1.txt facets/cncf/$1_$2.out + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 5 \ + --sample_name $1_$2") endef $(foreach pair,$(SAMPLE_PAIRS),\ diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 1619b179..0b58f576 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -40,6 +40,14 @@ opt <- arguments$options } } +'add_totalcopies' <- function(purity, ploidy, xmin, xmax) +{ + for (i in c(1, 2, 4, 6, 10)) { + y = log2(((purity*i) + (1-purity)*2)/((purity*ploidy) + (1-purity)*2)) + points(x = c(xmin, xmax), y = rep(y, 2), type = "l", col = "goldenrod3", lty = 3, lwd = 1) + } +} + if (as.numeric(opt$option) == 1) { data = readr::read_tsv(file = paste0("cnvkit/cnr/", opt$sample_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% @@ -149,4 +157,57 @@ if (as.numeric(opt$option) == 1) { data = data %>% dplyr::mutate(Total_Copy = ((2^(Log2_Ratio))*(purity*ploidy + (1-purity)*2) - (1-purity)*2)/purity) readr::write_tsv(x = data, file = paste0("cnvkit/totalcopy/", tumor_name, ".txt"), col_names = TRUE, append = FALSE) -} + +} else if (as.numeric(opt$option) == 5) { + tumor_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[1] + normal_name = unlist(strsplit(x = opt$sample_name, split = "_", fixed = TRUE))[2] + data = readr::read_tsv(file = paste0("cnvkit/cnr/", tumor_name, ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(weight>.1) %>% + dplyr::filter(chromosome != "Y") + segmented = readr::read_tsv(file = paste0("cnvkit/totalcopy/", tumor_name, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + cytoband = data %>% + dplyr::group_by(chromosome) %>% + dplyr::summarize(start = min(start), + end = max(end)) %>% + dplyr::mutate(chromosome = factor(chromosome, levels = c(1:22, "X"), ordered = TRUE)) %>% + dplyr::arrange(chromosome) %>% + dplyr::mutate(end = cumsum(end)) + start = rep(0, nrow(cytoband)) + start[2:nrow(cytoband)] = cytoband$end[1:(nrow(cytoband)-1)] + cytoband$start[2:nrow(cytoband)] + cytoband$start = start + data = data %>% + dplyr::left_join(cytoband %>% + dplyr::rename(start_chr = start, + end_chr = end), + by = "chromosome") %>% + dplyr::mutate(start = start + start_chr, + end = end + start_chr) %>% + dplyr::mutate(position = start) %>% + dplyr::mutate(log2 = case_when( + log2 > 6 ~ 0, + log2 < (-4) ~ 0, + TRUE ~ log2 + )) + segmented = segmented %>% + dplyr::left_join(cytoband %>% + dplyr::rename(Chromosome = chromosome, + start_chr = start, + end_chr = end), + by = "Chromosome") %>% + dplyr::mutate(Start_Position = Start_Position + start_chr, + End_Position = End_Position + start_chr) + + facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_name, "_", normal_name, ".out"), col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + purity = as.numeric(gsub(pattern = "# Purity = ", replacement = "", x = facets %>% dplyr::slice(10) %>% .[["X1"]], fixed = TRUE)) + ploidy = as.numeric(gsub(pattern = "# Ploidy = ", replacement = "", x = facets %>% dplyr::slice(11) %>% .[["X1"]], fixed = TRUE)) + + pdf(file = paste0("cnvkit/plots/totalcopy/", tumor_name, ".pdf"), width = 8, height = 3.75) + plot_log2_ratio(x = data) + add_segmented(x = segmented) + add_totalcopies(purity, ploidy, cytoband[1,"start"]-1E9, cytoband[nrow(cyoband),"end"]) + dev.off() + +} \ No newline at end of file From 04df2b8cf50124fb1c3fe4db25bf8ccef79c4ee8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:40:25 -0500 Subject: [PATCH 547/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 7e3aa501..4b188018 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -82,7 +82,7 @@ cnvkit/totalcopy/$1.txt : cnvkit/segmented/$1.txt facets/cncf/$1_$2.out --option 4 \ --sample_name $1_$2") -cnvkit/totalcopy/$1.txt : cnvkit/cnr/$1.cnr cnvkit/totalcopy/$1.txt facets/cncf/$1_$2.out +cnvkit/plots/totalcopy/$1.pdf : cnvkit/cnr/$1.cnr cnvkit/totalcopy/$1.txt facets/cncf/$1_$2.out $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 5 \ From c53d24e21b47702f9cc6675cc5f7fec1281c3e4b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:42:04 -0500 Subject: [PATCH 548/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 0b58f576..c1d99c22 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -207,7 +207,7 @@ if (as.numeric(opt$option) == 1) { pdf(file = paste0("cnvkit/plots/totalcopy/", tumor_name, ".pdf"), width = 8, height = 3.75) plot_log2_ratio(x = data) add_segmented(x = segmented) - add_totalcopies(purity, ploidy, cytoband[1,"start"]-1E9, cytoband[nrow(cyoband),"end"]) + add_totalcopies(purity, ploidy, cytoband[1,"start"]-1E9, cytoband[nrow(cytoband),"end"]) dev.off() } \ No newline at end of file From 077340cf2ca7d19f74febc7f45dc711c8526a0c1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:44:05 -0500 Subject: [PATCH 549/766] Update cnvkit.R --- scripts/cnvkit.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index c1d99c22..9f5d07e0 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -44,7 +44,9 @@ opt <- arguments$options { for (i in c(1, 2, 4, 6, 10)) { y = log2(((purity*i) + (1-purity)*2)/((purity*ploidy) + (1-purity)*2)) - points(x = c(xmin, xmax), y = rep(y, 2), type = "l", col = "goldenrod3", lty = 3, lwd = 1) + if (y<2) { + points(x = c(xmin, xmax), y = rep(y, 2), type = "l", col = "goldenrod3", lty = 3, lwd = 1) + } } } From e4e6f38bb8f22e28c31b3a283a4355e42a1ce412 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 19:46:09 -0500 Subject: [PATCH 550/766] Update cnvkit.R --- scripts/cnvkit.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 9f5d07e0..1bc503c1 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -44,7 +44,7 @@ opt <- arguments$options { for (i in c(1, 2, 4, 6, 10)) { y = log2(((purity*i) + (1-purity)*2)/((purity*ploidy) + (1-purity)*2)) - if (y<2) { + if (!is.na(y) & y<2) { points(x = c(xmin, xmax), y = rep(y, 2), type = "l", col = "goldenrod3", lty = 3, lwd = 1) } } From 1290c8ad53c0946538f536e49d84a82e442fb606 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 20:02:30 -0500 Subject: [PATCH 551/766] ++ --- copy_number/cnvkit.mk | 9 ++++++++- scripts/cnvkit.R | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 4b188018..b40a512a 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -13,7 +13,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/totalcopy/$(sample).pdf) + $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/totalcopy/$(sample).pdf) \ + cnvkit/summary/totalcopy.txt ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -91,6 +92,12 @@ cnvkit/plots/totalcopy/$1.pdf : cnvkit/cnr/$1.cnr cnvkit/totalcopy/$1.txt facets endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call cnvkit-total-copy,$(tumor.$(pair)),$(normal.$(pair))))) + +cnvkit/summary/totalcopy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) + $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ + --option 6 \ + --sample_name '$(TUMOR_SAMPLES)'") + ..DUMMY := $(shell mkdir -p version; \ diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 1bc503c1..1f122a04 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -212,4 +212,14 @@ if (as.numeric(opt$option) == 1) { add_totalcopies(purity, ploidy, cytoband[1,"start"]-1E9, cytoband[nrow(cytoband),"end"]) dev.off() +} else if (as.numeric(opt$option) == 6) { + sample_names = unlist(strsplit(x = opt$sample_name, split = " ", fixed = TRUE)) + data = list() + for (i in 1:length(sample_names)) { + data[[i]] = readr::read_tsv(file = paste0("cnvkit/totalcopy/", sample_names[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + data = do.call(bind_rows, data) + readr::write_tsv(x = data, file = "cnvkit/summary/totalcopy.txt", col_names = TRUE, append = FALSE) + } \ No newline at end of file From d024824c3e7d978b4854466ecd2e9d2e157618ac Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 20:03:47 -0500 Subject: [PATCH 552/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index b40a512a..eb9754ae 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -95,6 +95,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ cnvkit/summary/totalcopy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 6 \ --sample_name '$(TUMOR_SAMPLES)'") From 4ea23a93891b4a5c758fb1808577bd5f20b17ea5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 25 Nov 2022 20:09:21 -0500 Subject: [PATCH 553/766] ++ --- copy_number/cnvkit.mk | 11 +++++++++-- scripts/cnvkit.R | 14 +++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index eb9754ae..1041b46f 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -14,7 +14,8 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/totalcopy/$(sample).pdf) \ - cnvkit/summary/totalcopy.txt + cnvkit/summary/total_copy.txt \ + cnvkit/summary/log2_ratio.txt ONTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v3_cnvkit_ontarget.bed OFFTARGET_FILE = $(HOME)/share/lib/bed_files/MSK-IMPACT-v4_cnvkit_offtarget.bed @@ -93,11 +94,17 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call cnvkit-total-copy,$(tumor.$(pair)),$(normal.$(pair))))) -cnvkit/summary/totalcopy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) +cnvkit/summary/total_copy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalcopy/$(sample).txt) $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 6 \ --sample_name '$(TUMOR_SAMPLES)'") + +cnvkit/summary/log2_ratio.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) + $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ + --option 7 \ + --sample_name '$(TUMOR_SAMPLES)'") diff --git a/scripts/cnvkit.R b/scripts/cnvkit.R index 1f122a04..1e7b0a6e 100644 --- a/scripts/cnvkit.R +++ b/scripts/cnvkit.R @@ -220,6 +220,18 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() } data = do.call(bind_rows, data) - readr::write_tsv(x = data, file = "cnvkit/summary/totalcopy.txt", col_names = TRUE, append = FALSE) + readr::write_tsv(x = data, file = "cnvkit/summary/total_copy.txt", col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option) == 7) { + sample_names = unlist(strsplit(x = opt$sample_name, split = " ", fixed = TRUE)) + data = list() + for (i in 1:length(sample_names)) { + data[[i]] = readr::read_tsv(file = paste0("cnvkit/cnr/", sample_names[i], ".cnr"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(chromosome, start, end, log2, weight) %>% + dplyr::mutate(sample_name = sample_names[i]) + } + data = do.call(bind_rows, data) + readr::write_tsv(x = data, file = "cnvkit/summary/log2_ratio.txt", col_names = TRUE, append = FALSE) } \ No newline at end of file From 0d5955b567ce58e3f952cd995cf0f2f076f9947a Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 29 Nov 2022 12:27:06 -0500 Subject: [PATCH 554/766] ++ --- clonality/pyclone_vi.mk | 11 ++++++++++- scripts/pyclone_vi.R | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index da78320d..2965da74 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -11,7 +11,8 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \ + pyclone_vi/summary.txt define r-sufam @@ -98,6 +99,14 @@ endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call r-pyclone,$(set)))) + +pyclone_vi/summary.txt : $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(PYCLONE_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + --option 4 \ + --sample_set '$(SAMPLE_SETS)'") + + ..DUMMY := $(shell mkdir -p version; \ R --version > version/pyclone_vi.txt) .DELETE_ON_ERROR: diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 0cbffb4e..a0a8cef7 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -214,4 +214,13 @@ if (as.numeric(opt$option) == 1) { height = unit(40, "cm"))) dev.off() +} else if (as.numeric(opt$option) == 4) { + sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ")) + pyclone = list() + for (i in 1:length(sample_set)) { + pyclone[[i]] = readr::read_tsv(file = paste0("pyclone_vi/", sample_set[i], "/summary/by_loci.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + pyclone = do.call(bind_rows, pyclone) + readr::write_tsv(x = pyclone, file = "pyclone_vi/summary.txt", append = FALSE, col_names = TRUE) } From 92ada9baeeea04d16254f1fc0dca5bfc877cba21 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:28:00 -0500 Subject: [PATCH 555/766] Update genomesummary.mk --- summary/genomesummary.mk | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 186f2059..42cb3ab8 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -3,24 +3,23 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).fga) \ - genome_stats/genome_altered.tsv \ - $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) \ - genome_stats/lst_score.tsv \ - $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ - genome_stats/ntai_score.tsv \ - $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ - genome_stats/myriad_score.tsv \ - summary/tsv/genome_summary.tsv \ - summary/genome_summary.xlsx +genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ + genome_summary/genome_altered/summary.txt +# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) \ +# genome_stats/lst_score.tsv \ +# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ +# genome_stats/ntai_score.tsv \ +# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ +# genome_stats/myriad_score.tsv \ +# summary/tsv/genome_summary.tsv \ +# summary/genome_summary.xlsx -GENOME_ALTERED = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).fga) LST_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst) NTAI_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai) MYRIAD_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) define fraction-genome-altered -genome_stats/$1_$2.fga : facets/cncf/$1_$2.Rdata +genome_summary/genome_altered/$1_$2.txt : facets/cncf/$1_$2.Rdata $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 1 --file_in $$(<) --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ @@ -47,7 +46,7 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) -genome_stats/genome_altered.tsv : $(GENOME_ALTERED) +genome_summary/genome_altered/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ mkdir -p genome_stats && \ cat $(GENOME_ALTERED) > $(@)") From e82b4edfbbbb91289ce29901e494060d97736012 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:35:51 -0500 Subject: [PATCH 556/766] ++ --- summary/genomesummary.R | 9 +++++++-- summary/genomesummary.mk | 18 +++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 6027ca26..6d39994f 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -1,12 +1,16 @@ #!/usr/bin/env Rscript suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("readr")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) } args_list <- list(make_option("--option", default = NA, type = 'character', help = "which analysis to do"), + make_option("--sample_name", default = NA, type = 'character', help = "sample name"), make_option("--file_in", default = NA, type = 'character', help = "input file name"), make_option("--file_out", default = NA, type = 'character', help = "output file name")) parser <- OptionParser(usage = "%prog", option_list = args_list) @@ -29,8 +33,9 @@ if (as.numeric(opt$option) == 1) { } else { genome_altered = 0 } - cat(paste0(gsub("facets/cncf/","", gsub(".Rdata", "", opt$file_in)), "\t", genome_altered), file = opt$file_out, append=FALSE) - cat("\n", file = opt$file_out, append=TRUE) + x = dplyr::tibble(sample_name = as.character(opt$sample_name), + genome_altered = genome_altered) + readr::write_stv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 2) { diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 42cb3ab8..c3bd3b6d 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -3,8 +3,8 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ - genome_summary/genome_altered/summary.txt +genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) +# genome_summary/genome_altered/summary.txt # $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) \ # genome_stats/lst_score.tsv \ # $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ @@ -13,14 +13,15 @@ genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$( # genome_stats/myriad_score.tsv \ # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx - -LST_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).lst) -NTAI_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).ntai) -MYRIAD_SCORE = $(foreach set,$(SAMPLE_PAIRS),genome_stats/$(set).mrs) define fraction-genome-altered genome_summary/genome_altered/$1_$2.txt : facets/cncf/$1_$2.Rdata - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 1 --file_in $$(<) --file_out $$(@)") + $$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/genomesummary.R \ + --option 1 \ + --sample_name $1_$2 \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) @@ -48,8 +49,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ genome_summary/genome_altered/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(GENOME_ALTERED) > $(@)") + ") genome_stats/lst_score.tsv : $(LST_SCORE) $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ From 8ec8df9875b88c60de64b902e6530c324d7905ee Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:37:48 -0500 Subject: [PATCH 557/766] Update genomesummary.R --- summary/genomesummary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 6d39994f..cd71753b 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -35,7 +35,7 @@ if (as.numeric(opt$option) == 1) { } x = dplyr::tibble(sample_name = as.character(opt$sample_name), genome_altered = genome_altered) - readr::write_stv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 2) { From 041d6aed677b378f7a3d682cc65312afec22ddf8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:41:44 -0500 Subject: [PATCH 558/766] ++ --- summary/genomesummary.R | 5 +++-- summary/genomesummary.mk | 18 +++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index cd71753b..ff05df41 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -197,8 +197,9 @@ if (as.numeric(opt$option) == 1) { segs = fix_facet_segs(dat) chromInfo = GetChrominfo() lst = score_LST(segs, chromInfo) - cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", lst$score), file = opt$file_out, append=FALSE) - cat("\n", file = opt$file_out, append=TRUE) + x = dplyr::tibble(sample_name = as.character(opt$sample_name), + lst = lst) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 3) { diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index c3bd3b6d..7a4011de 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -3,13 +3,12 @@ include modules/Makefile.inc LOGDIR ?= log/genome_summary.$(NOW) -genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) -# genome_summary/genome_altered/summary.txt -# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).lst) \ -# genome_stats/lst_score.tsv \ +genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) + # $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ -# genome_stats/ntai_score.tsv \ # $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ +# genome_summary/genome_altered/summary.txt # genome_stats/myriad_score.tsv \ # summary/tsv/genome_summary.tsv \ # summary/genome_summary.xlsx @@ -27,8 +26,13 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call fraction-genome-altered,$(tumor.$(pair)),$(normal.$(pair))))) define lst-score -genome_stats/$1_$2.lst : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 2 --file_in $$< --file_out $$(@)") +genome_summary/lst/$1_$2.txt : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/genomesummary.R \ + --option 2 \ + --sample_name $1_$2 \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) From fbc87a9e615a5a09d6e3ba021f3ad17684846271 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:44:33 -0500 Subject: [PATCH 559/766] Update genomesummary.R --- summary/genomesummary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index ff05df41..3a2ab9c0 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -198,7 +198,7 @@ if (as.numeric(opt$option) == 1) { chromInfo = GetChrominfo() lst = score_LST(segs, chromInfo) x = dplyr::tibble(sample_name = as.character(opt$sample_name), - lst = lst) + lst = lst$score) readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 3) { From 357f7336a03133895a89454618fc7a4412db1cc0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:47:52 -0500 Subject: [PATCH 560/766] ++ --- summary/genomesummary.R | 5 +++-- summary/genomesummary.mk | 13 +++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 3a2ab9c0..14a99cef 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -332,8 +332,9 @@ if (as.numeric(opt$option) == 1) { segs = fix_facet_segs(dat) chromInfo = GetChrominfo() ntai = score_ntAI(segs, chromInfo) - cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", ntai$score), file = opt$file_out, append=FALSE) - cat("\n", file = opt$file_out, append=TRUE) + x = dplyr::tibble(sample_name = as.character(opt$sample_name), + ntai = nati$score) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 4) { diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 7a4011de..7065a68e 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -4,9 +4,9 @@ LOGDIR ?= log/genome_summary.$(NOW) genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ - $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) \ + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) -# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).ntai) \ # $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ # genome_summary/genome_altered/summary.txt # genome_stats/myriad_score.tsv \ @@ -38,8 +38,13 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) define ntai-score -genome_stats/$1_$2.ntai : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 3 --file_in $$< --file_out $$(@)") +genome_summary/ntai/$1_$2.ntai : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/genomesummary.R \ + --option 3 \ + --sample_name $1_$2 \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) From 174e8393befcc91799e35236285b828b539726d2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:48:43 -0500 Subject: [PATCH 561/766] Update genomesummary.mk --- summary/genomesummary.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 7065a68e..19bae39a 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -38,7 +38,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call lst-score,$(tumor.$(pair)),$(normal.$(pair))))) define ntai-score -genome_summary/ntai/$1_$2.ntai : facets/cncf/$1_$2.txt +genome_summary/ntai/$1_$2.txt : facets/cncf/$1_$2.txt $$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \ $(RSCRIPT) modules/summary/genomesummary.R \ --option 3 \ From 7012cbd411dfba2cd2e6997a712befeb52e2d38b Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:50:01 -0500 Subject: [PATCH 562/766] Update genomesummary.R --- summary/genomesummary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 14a99cef..6c3d6b3f 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -333,7 +333,7 @@ if (as.numeric(opt$option) == 1) { chromInfo = GetChrominfo() ntai = score_ntAI(segs, chromInfo) x = dplyr::tibble(sample_name = as.character(opt$sample_name), - ntai = nati$score) + ntai = ntai$score) readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option) == 4) { From e7dd8aea1fd9f80c61f07fd2862c8a01798a06be Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 15:52:43 -0500 Subject: [PATCH 563/766] ++ --- summary/genomesummary.R | 5 +++-- summary/genomesummary.mk | 13 +++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 6c3d6b3f..b9374476 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -503,8 +503,9 @@ if (as.numeric(opt$option) == 1) { segs = fix_facet_segs(dat) chromInfo = GetChrominfo() mrs = score_myriad_HRD(segs) - cat(paste0(gsub("facets/cncf/","", gsub(".cncf.txt", "", opt$file_in)), "\t", mrs$score), file = opt$file_out, append=FALSE) - cat("\n", file = opt$file_out, append=TRUE) + x = dplyr::tibble(sample_name = as.character(opt$sample_name), + mrs = mrs$score) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) } else if (as.numeric(opt$option)==5) { diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 19bae39a..2f689e6a 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -5,9 +5,9 @@ LOGDIR ?= log/genome_summary.$(NOW) genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) \ - $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) \ + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt) -# $(foreach pair,$(SAMPLE_PAIRS),genome_stats/$(pair).mrs) \ # genome_summary/genome_altered/summary.txt # genome_stats/myriad_score.tsv \ # summary/tsv/genome_summary.tsv \ @@ -50,8 +50,13 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call ntai-score,$(tumor.$(pair)),$(normal.$(pair))))) define myriad-score -genome_stats/$1_$2.mrs : facets/cncf/$1_$2.txt - $$(call RUN,-n 1 -s 3G -m 6G,"$(RSCRIPT) modules/summary/genomesummary.R --option 4 --file_in $$< --file_out $$(@)") +genome_summary/myriad_score/$1_$2.txt : facets/cncf/$1_$2.txt + $$(call RUN,-n 1 -s 3G -m 6G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/genomesummary.R \ + --option 4 \ + --sample_name $1_$2 \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) From 395e0888dc1b753e704d0819a6c370698bc6d75c Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 16:02:16 -0500 Subject: [PATCH 564/766] ++ --- summary/genomesummary.R | 25 ++++++++++++------------- summary/genomesummary.mk | 40 ++++++++-------------------------------- 2 files changed, 20 insertions(+), 45 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index b9374476..9da275ff 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -35,7 +35,7 @@ if (as.numeric(opt$option) == 1) { } x = dplyr::tibble(sample_name = as.character(opt$sample_name), genome_altered = genome_altered) - readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option) == 2) { @@ -199,7 +199,7 @@ if (as.numeric(opt$option) == 1) { lst = score_LST(segs, chromInfo) x = dplyr::tibble(sample_name = as.character(opt$sample_name), lst = lst$score) - readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option) == 3) { @@ -334,7 +334,7 @@ if (as.numeric(opt$option) == 1) { ntai = score_ntAI(segs, chromInfo) x = dplyr::tibble(sample_name = as.character(opt$sample_name), ntai = ntai$score) - readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option) == 4) { @@ -505,18 +505,17 @@ if (as.numeric(opt$option) == 1) { mrs = score_myriad_HRD(segs) x = dplyr::tibble(sample_name = as.character(opt$sample_name), mrs = mrs$score) - readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = FALSE) + readr::write_tsv(x = x, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option)==5) { - file_names = c("genome_altered.tsv", "lst_score.tsv", "myriad_score.tsv", "ntai_score.tsv") - summary_scores = NULL - for (i in 1:length(file_names)) { - data = read.csv(file=paste0("genome_stats/", file_names[i]), header=FALSE, sep="\t", stringsAsFactors=FALSE) - summary_scores = cbind(summary_scores, data[,2]) + sample_names = unlist(strsplit(opt$sample_name, split = " ", fixed = TRUE)) + data = list() + for (i in 1:length(sample_names)) { + data[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names, ".txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() } - summary_scores = cbind(data[,1], summary_scores) - colnames(summary_scores) = c("sample_names", gsub(".tsv", "", file_names)) - write.table(summary_scores, file="summary/tsv/genome_summary.tsv", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) - + data = do.call(bind_rows, data) + readr::write_tsv(x = data, file = as.character(opt$file_out), append = FALSE, col_names = TRUE) } diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 2f689e6a..917c155d 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -6,12 +6,8 @@ LOGDIR ?= log/genome_summary.$(NOW) genome_summary : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) \ $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) \ $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) \ - $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt) - -# genome_summary/genome_altered/summary.txt -# genome_stats/myriad_score.tsv \ -# summary/tsv/genome_summary.tsv \ -# summary/genome_summary.xlsx + $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt) \ + genome_summary/summary.txt define fraction-genome-altered genome_summary/genome_altered/$1_$2.txt : facets/cncf/$1_$2.Rdata @@ -61,33 +57,13 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call myriad-score,$(tumor.$(pair)),$(normal.$(pair))))) -genome_summary/genome_altered/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - ") +genome_summary/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genome_altered/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/lst/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/ntai/$(pair).txt) $(foreach pair,$(SAMPLE_PAIRS),genome_summary/myriad_score/$(pair).txt) + $(call RUN,-n 1 -s 4G -m 8G,"set -o pipefail && \ + $(RSCRIPT) modules/summary/genomesummary.R \ + --option 5 \ + --sample_name '$(SAMPLE_PAIRS)' \ + --file_out $$(@)") -genome_stats/lst_score.tsv : $(LST_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(LST_SCORE) > $(@)") - -genome_stats/ntai_score.tsv : $(NTAI_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(NTAI_SCORE) > $(@)") - -genome_stats/myriad_score.tsv : $(MYRIAD_SCORE) - $(call RUN,-n 1 -s 4G -m 4G,"set -o pipefail && \ - mkdir -p genome_stats && \ - cat $(MYRIAD_SCORE) > $(@)") - -summary/tsv/genome_summary.tsv : genome_stats/genome_altered.tsv genome_stats/lst_score.tsv genome_stats/ntai_score.tsv genome_stats/myriad_score.tsv - $(call RUN,-n 1 -s 6G -m 8G,"set -o pipefail && \ - mkdir -p genome_stats && \ - $(RSCRIPT) modules/summary/genomesummary.R --option 5") - -summary/genome_summary.xlsx : summary/tsv/genome_summary.tsv - $(call RUN,-n 1 -s 4G -m 4G,"python modules/summary/genome_summary_excel.py") - .DELETE_ON_ERROR: .SECONDARY: .PHONY: genome_summary From 5b1fc9b13f5abc94a2521753de22411f290752ff Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 16:04:16 -0500 Subject: [PATCH 565/766] ++ --- summary/genomesummary.R | 2 +- summary/genomesummary.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index 9da275ff..a7874b96 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -517,5 +517,5 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() } data = do.call(bind_rows, data) - readr::write_tsv(x = data, file = as.character(opt$file_out), append = FALSE, col_names = TRUE) + readr::write_tsv(x = data, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } diff --git a/summary/genomesummary.mk b/summary/genomesummary.mk index 917c155d..35424cc8 100644 --- a/summary/genomesummary.mk +++ b/summary/genomesummary.mk @@ -62,7 +62,7 @@ genome_summary/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),genome_summary/genom $(RSCRIPT) modules/summary/genomesummary.R \ --option 5 \ --sample_name '$(SAMPLE_PAIRS)' \ - --file_out $$(@)") + --file_out $(@)") .DELETE_ON_ERROR: .SECONDARY: From d84e00ced861607b31dd7b8d72b8c04ed201206d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 16:07:52 -0500 Subject: [PATCH 566/766] Update genomesummary.R --- summary/genomesummary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index a7874b96..ede35eb4 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -512,7 +512,7 @@ if (as.numeric(opt$option) == 1) { sample_names = unlist(strsplit(opt$sample_name, split = " ", fixed = TRUE)) data = list() for (i in 1:length(sample_names)) { - data[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names, ".txt"), + data[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() } From b9ce5caf4067bc234eacc3c77dc8ab35c81de636 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 1 Dec 2022 16:11:42 -0500 Subject: [PATCH 567/766] Update genomesummary.R --- summary/genomesummary.R | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/summary/genomesummary.R b/summary/genomesummary.R index ede35eb4..be20985e 100644 --- a/summary/genomesummary.R +++ b/summary/genomesummary.R @@ -510,12 +510,42 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option)==5) { sample_names = unlist(strsplit(opt$sample_name, split = " ", fixed = TRUE)) - data = list() + x1 = list() for (i in 1:length(sample_names)) { - data[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names[i], ".txt"), + x1[[i]] = readr::read_tsv(file = paste0("genome_summary/genome_altered/", sample_names[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() } - data = do.call(bind_rows, data) + x1 = do.call(bind_rows, x1) + + x2 = list() + for (i in 1:length(sample_names)) { + x2[[i]] = readr::read_tsv(file = paste0("genome_summary/lst/", sample_names[i], ".txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + x2 = do.call(bind_rows, x2) + + x3 = list() + for (i in 1:length(sample_names)) { + x3[[i]] = readr::read_tsv(file = paste0("genome_summary/ntai/", sample_names[i], ".txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + x3 = do.call(bind_rows, x3) + + x4 = list() + for (i in 1:length(sample_names)) { + x4[[i]] = readr::read_tsv(file = paste0("genome_summary/myriad_score/", sample_names[i], ".txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + x4 = do.call(bind_rows, x4) + + data = x1 %>% + dplyr::full_join(x2, by = "sample_name") %>% + dplyr::full_join(x3, by = "sample_name") %>% + dplyr::full_join(x4, by = "sample_name") + readr::write_tsv(x = data, path = as.character(opt$file_out), append = FALSE, col_names = TRUE) } From cca2bf0d44441d6501d129a31b82d775de4b09be Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 19:02:35 -0500 Subject: [PATCH 568/766] ++ --- Makefile | 4 ++++ config.inc | 6 ++---- copy_number/medicc2.R | 34 ++++++++++++++++++++++++++++++++++ copy_number/medicc2.mk | 27 +++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 copy_number/medicc2.R create mode 100644 copy_number/medicc2.mk diff --git a/Makefile b/Makefile index 14b099ff..39b9377e 100644 --- a/Makefile +++ b/Makefile @@ -541,6 +541,10 @@ TARGETS += krona_classify krona_classify : $(call RUN_MAKE,modules/virus/krona_classify.mk) +TARGETS += medicc2 +medicc2 : + $(call RUN_MAKE,modules/copy_number/medicc2.mk) + #================================================== # reports diff --git a/config.inc b/config.inc index 41fbc52b..62263cef 100644 --- a/config.inc +++ b/config.inc @@ -21,7 +21,6 @@ JRFLAB_MODULES_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.4 ONCOTATOR_ENV = $(HOME)/share/usr/venv/oncotator-1.9.2.0 VEP_ENV = $(HOME)/share/usr/anaconda-envs/variant-effect-predictor-86 ASCAT_ENV = $(HOME)/share/usr/anaconda-envs/ascat -MEDICC_ENV = $(HOME)/share/usr/anaconda-envs/medicc INNOVATION_ENV = $(HOME)/share/usr/env/innovation-lab-0.0.1 PIGZ_ENV ?= $(HOME)/share/usr/env/pigz-2.6 KALLISTO_ENV ?= $(HOME)/share/usr/env/kallisto-0.46.2 @@ -37,6 +36,7 @@ VIOLA_ENV = $(HOME)/share/usr/env/viola-sv-1.0.2 SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9 STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11 +MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1 JARDIR ?= $(HOME)/share/usr/lib/java @@ -57,20 +57,18 @@ BGZIP ?= $(HOME)/share/usr/bin/bgzip IGVTOOLS ?= $(HOME)/share/usr/IGVTools/igvtools VCFTOOLS ?= $(HOME)/share/usr/bin/vcftools-0.1.10 VCF_SORT ?= $(PERL) $(HOME)/share/usr/bin/vcfsorter.pl - SNP_EFF_JAR ?= $(JARDIR)/snpEff-4.3.jar SNP_SIFT_JAR ?= $(JARDIR)/SnpSift-4.3.jar SNP_EFF_CONFIG ?= modules/config/snpEff.conf DB_NSFP ?= $(HOME)/share/reference/snpEff-4.1/dbNSFP3.0b1a.hg19.txt.gz NSFP_FIELDS ?= Uniprot_acc_Polyphen2 Uniprot_id_Polyphen2 Polyphen2_HVAR_score Polyphen2_HVAR_pred 1000Gp3_AF ESP6500_AA_AF ESP6500_EA_AF MutationAssessor_pred MutationAssessor_score MutationTaster_pred MutationTaster_score PROVEAN_pred ExAC_Adj_AF clinvar_rs clinvar_clnsig Interpro_domain - CUFFLINKS ?= cufflinks CUFFCMP ?= cuffcompare TOPHAT ?= tophat DEFUSE ?= $(PERL) $(HOME)/share/usr/defuse-0.6.1/scripts/defuse.pl - ONCOFUSE_JAR ?= $(HOME)/share/usr/oncofuse-1.0.9b2/Oncofuse.jar VARSCAN_JAR ?= $(JARDIR)/VarScan.v2.3.9.jar +MDEICC ?= medicc2 ## PICARD tools PICARD_DIR ?= $(JARDIR)/picard-1.92 diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R new file mode 100644 index 00000000..953aaf44 --- /dev/null +++ b/copy_number/medicc2.R @@ -0,0 +1,34 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("magrittr")) + + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--option", default = NA, type = 'character', help = "Which option?"), + make_option("--tumor_name", default = NA, type = 'character', help = "Tumor sample name"), + make_option("--normal_name", default = NA, type = 'character', help = "Normal sample name"), + make_option("--file_in", default = NA, type = 'character', help = "Input file name including path"), + make_option("--file_out", default = NA, type = 'character', help = "Output file name including path")) + +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +if (as.numeric(opt$option) == 1) { + load(as.character(opt$file_in)) + cn_df = out2$jointseg %>% + dplyr::as_tibble() %>% + dplyr::filter(het == 1) %>% + dplyr::select(Chromosome = chrom, + Position = maploc, + Log2_Ratio = cnlr, + B_Allele_Frequency = vafT) + readr::write_tsv(x = cn_df, file = as.character(opt$_file_out), col_names = TRUE, append = FALSE) + +} \ No newline at end of file diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk new file mode 100644 index 00000000..9025eabf --- /dev/null +++ b/copy_number/medicc2.mk @@ -0,0 +1,27 @@ +include modules/Makefile.inc + +LOGDIR ?= log/medicc2.$(NOW) + +medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) + +define aggregate-copynumber +medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata + $$(call RUN,-c -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/medicc.R \ + --option 1 \ + --tumor_sample $1 \ + --normal_sample $2 \ + --file_in $$(<) \ + --file_out $$(@)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call aggregate-copynumber,$(tumor),$(normal)))) + + +..DUMMY := $(shell mkdir -p version; \ + $(MEDICC_ENV)/bin/R --version > version/medicc2.txt; \ + $(MEDICC_ENV)/bin/medicc2 --help >> version/medicc2.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: medicc From 706345b33b89e64eee28a0ad5f4e37890164ae9e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 19:07:52 -0500 Subject: [PATCH 569/766] Update medicc2.mk --- copy_number/medicc2.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 9025eabf..62042512 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -4,7 +4,7 @@ LOGDIR ?= log/medicc2.$(NOW) medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) -define aggregate-copynumber +define aggregate-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata $$(call RUN,-c -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ $(RSCRIPT) modules/copy_number/medicc.R \ @@ -16,7 +16,8 @@ medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call aggregate-copynumber,$(tumor),$(normal)))) + $(eval $(call aggregate-copy-number,$(tumor.$(pair)),$(normal.$(pair))))) + ..DUMMY := $(shell mkdir -p version; \ From 587561f70b777673007914ce20509d3ad12c05cd Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 19:11:01 -0500 Subject: [PATCH 570/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 62042512..50a3cc5a 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -7,7 +7,7 @@ medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) define aggregate-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata $$(call RUN,-c -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/copy_number/medicc.R \ + $(RSCRIPT) modules/copy_number/medicc2.R \ --option 1 \ --tumor_sample $1 \ --normal_sample $2 \ From e85d11820df0acbb36f1f3e09b9203574b629eab Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 19:13:31 -0500 Subject: [PATCH 571/766] ++ --- copy_number/medicc2.R | 4 ++-- copy_number/medicc2.mk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index 953aaf44..fc1b8933 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -11,8 +11,8 @@ if (!interactive()) { } args_list <- list(make_option("--option", default = NA, type = 'character', help = "Which option?"), - make_option("--tumor_name", default = NA, type = 'character', help = "Tumor sample name"), - make_option("--normal_name", default = NA, type = 'character', help = "Normal sample name"), + make_option("--tumor_sample_name", default = NA, type = 'character', help = "Tumor sample name"), + make_option("--normal_sample_name", default = NA, type = 'character', help = "Normal sample name"), make_option("--file_in", default = NA, type = 'character', help = "Input file name including path"), make_option("--file_out", default = NA, type = 'character', help = "Output file name including path")) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 50a3cc5a..a604423d 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -9,8 +9,8 @@ medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata $$(call RUN,-c -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ $(RSCRIPT) modules/copy_number/medicc2.R \ --option 1 \ - --tumor_sample $1 \ - --normal_sample $2 \ + --tumor_sample_name $1 \ + --normal_sample_name $2 \ --file_in $$(<) \ --file_out $$(@)") From 67c8cdb9a43cc333152727db1450744d2ece4f61 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 19:16:06 -0500 Subject: [PATCH 572/766] Update medicc2.R --- copy_number/medicc2.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index fc1b8933..d470dba1 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -29,6 +29,6 @@ if (as.numeric(opt$option) == 1) { Position = maploc, Log2_Ratio = cnlr, B_Allele_Frequency = vafT) - readr::write_tsv(x = cn_df, file = as.character(opt$_file_out), col_names = TRUE, append = FALSE) + readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) -} \ No newline at end of file +} From c4debd2801372d34f8522f7a6927596006e4ed8a Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 20:01:08 -0500 Subject: [PATCH 573/766] ++ --- copy_number/medicc2.R | 20 ++++++++++++++++++-- copy_number/medicc2.mk | 35 +++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index d470dba1..cb33397d 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -4,7 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("magrittr")) - +suppressPackageStartupMessages(library("reshape2")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -28,7 +28,23 @@ if (as.numeric(opt$option) == 1) { dplyr::select(Chromosome = chrom, Position = maploc, Log2_Ratio = cnlr, - B_Allele_Frequency = vafT) + B_Allele_F = 1 - vafT) readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) +} else if (as.numeric(opt$option) == 2) { + tumor_sample_names = unlist(strsplit(x = as.character(opt$tumor_sample_name), split = " ", fixed = TRUE)) + normal_sample_name = unlist(strsplit(x = as.character(opt$normal_sample_name), split = " ", fixed = TRUE)) + cn_df = list() + for (i in 1:length(tumor_sample_names)) { + data_ = readr::read_tsv(file = paste0("medicc2/", tumor_sample_names[i], "/", tumor_sample_names[i], ".txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + colnames(data_) = c("Chromosome", "Position", paste0(tumor_sample_names[i], "_Log2_Ratio"), paste0(tumor_sample_names[i], "_B_Allele_F")) + cn_df[[i]] = data_ %>% + reshape2::melt(id.vars = c("Chromosome", "Position")) + } + cn_df = do.call(bind_rows, cn_df) %>% + reshape2::dcast(Chromosome + Position ~ variable, value.var = "value", fill = 0) + readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) + } diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index a604423d..caf9ef87 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -2,22 +2,37 @@ include modules/Makefile.inc LOGDIR ?= log/medicc2.$(NOW) -medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) +medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) \ + $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).txt) -define aggregate-copy-number +define collect-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata - $$(call RUN,-c -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/copy_number/medicc2.R \ - --option 1 \ - --tumor_sample_name $1 \ - --normal_sample_name $2 \ - --file_in $$(<) \ - --file_out $$(@)") + $$(call RUN,-c -n 1 -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/medicc2.R \ + --option 1 \ + --tumor_sample_name $1 \ + --normal_sample_name $2 \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call aggregate-copy-number,$(tumor.$(pair)),$(normal.$(pair))))) + $(eval $(call collect-copy-number,$(tumor.$(pair)),$(normal.$(pair))))) + + +define aggregate-copy-number +medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) + $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + --option 2 \ + --tumor_sample_name '$(tumors.$1)' \ + --normal_sample_name '$(normal.$1)' \ + --file_out $$(@)") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call aggregate-copy-number,$(set)))) ..DUMMY := $(shell mkdir -p version; \ From af493532a3c3a604ac4f6cadc4ad5a6ee4cf6fbb Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 20:03:27 -0500 Subject: [PATCH 574/766] Update medicc2.R --- copy_number/medicc2.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index cb33397d..2ace4249 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -25,10 +25,11 @@ if (as.numeric(opt$option) == 1) { cn_df = out2$jointseg %>% dplyr::as_tibble() %>% dplyr::filter(het == 1) %>% + dplyr::mutate(vafT = 1 - vafT) %>% dplyr::select(Chromosome = chrom, Position = maploc, Log2_Ratio = cnlr, - B_Allele_F = 1 - vafT) + B_Allele_F = vafT) readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option) == 2) { From d52040aee1f81fa730ce5f2d7ce008e27815ed2c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 20:06:09 -0500 Subject: [PATCH 575/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index caf9ef87..28479c8c 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -23,7 +23,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define aggregate-copy-number medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/pyclone_vi.R \ + $(RSCRIPT) modules/copy_number/medicc2.R \ --option 2 \ --tumor_sample_name '$(tumors.$1)' \ --normal_sample_name '$(normal.$1)' \ From 6c70622c401e4c8eb45718256b890a07158bfad3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 20:18:01 -0500 Subject: [PATCH 576/766] Update medicc2.mk --- copy_number/medicc2.mk | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 28479c8c..ca742efc 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -3,7 +3,8 @@ include modules/Makefile.inc LOGDIR ?= log/medicc2.$(NOW) medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) \ - $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).txt) + $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).txt) \ + $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).tsv) define collect-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata @@ -29,6 +30,14 @@ medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample --normal_sample_name '$(normal.$1)' \ --file_out $$(@)") +medicc2/$1/$1.tsv : medicc2/$1/$1.txt $(foreach sample,$(TUMOR_SAMPLES),facets/$(sample)/$(sample).txt) + $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/copy_number/medicc2.R \ + --option 3 \ + --tumor_sample_name '$(tumors.$1)' \ + --normal_sample_name '$(normal.$1)' \ + --file_in $$(<) \ + --file_out $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ From 8943dfa7c983e80800b9ba96c43f1bfd8c0746ec Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 20:55:09 -0500 Subject: [PATCH 577/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index ca742efc..240f398b 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -30,7 +30,7 @@ medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample --normal_sample_name '$(normal.$1)' \ --file_out $$(@)") -medicc2/$1/$1.tsv : medicc2/$1/$1.txt $(foreach sample,$(TUMOR_SAMPLES),facets/$(sample)/$(sample).txt) +medicc2/$1/$1.tsv : medicc2/$1/$1.txt $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ $(RSCRIPT) modules/copy_number/medicc2.R \ --option 3 \ From c737f2e6b9c18919eeedbdd802e3217842ed3628 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:10:37 -0500 Subject: [PATCH 578/766] Update medicc2.R --- copy_number/medicc2.R | 50 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index 2ace4249..a712347b 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -5,6 +5,7 @@ suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("magrittr")) suppressPackageStartupMessages(library("reshape2")) +suppressPackageStartupMessages(library("copynumber")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -48,4 +49,53 @@ if (as.numeric(opt$option) == 1) { reshape2::dcast(Chromosome + Position ~ variable, value.var = "value", fill = 0) readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) +} else if (as.numeric(opt$option) == 2) { + tumor_sample_names = unlist(strsplit(x = as.character(opt$tumor_sample_name), split = " ", fixed = TRUE)) + normal_sample_name = unlist(strsplit(x = as.character(opt$normal_sample_name), split = " ", fixed = TRUE)) + cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + as.data.frame() + cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", k = 40, verbose = FALSE) + cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 25, normalize = TRUE, fast = FALSE, verbose = FALSE) + + total_copies = cn_segmented %>% + dplyr::select(c("chrom", "start.pos", "end.pos", contains("Log2_Ratio"))) %>% + dplyr::rename(start = start.pos, end = end.pos) %>% + reshape2::melt(id.vars = c("chrom", "start", "end")) %>% + dplyr::select(sample_id = variable, + chrom, start, end, nAB = value) %>% + dplyr::mutate(sample_id = gsub(pattern = "_Log2_Ratio", replacement = "", x = sample_id, fixed = TRUE)) %>% + dplyr::left_join(readr::read_tsv(file = "facets/summary/summary.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::select(sample_id = tumorName, purity, ploidy), + by = "sample_id") %>% + readr::type_convert() %>% + dplyr::mutate(nAB = ((2^nAB)*((purity*ploidy) + (2*(1-purity))) - 2*(1-purity))/purity) %>% + dplyr::select(-purity, -ploidy) + + major_copies = cn_segmented %>% + dplyr::select(c("chrom", "start.pos", "end.pos", contains("B_Allele_F"))) %>% + dplyr::rename(start = start.pos, end = end.pos) %>% + reshape2::melt(id.vars = c("chrom", "start", "end")) %>% + dplyr::select(sample_id = variable, + chrom, start, end, nB = value) %>% + dplyr::mutate(sample_id = gsub(pattern = "_B_Allele_F", replacement = "", x = sample_id, fixed = TRUE)) %>% + dplyr::left_join(readr::read_tsv(file = "facets/summary/summary.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::select(sample_id = tumorName, purity, ploidy), + by = "sample_id") %>% + dplyr::left_join(total_copies, by = c("sample_id", "chrom", "start", "end")) %>% + readr::type_convert() %>% + dplyr::mutate(nB = (nB*(2 - 2*purity + purity*nAB) - 1 + purity)/purity) %>% + dplyr::mutate(nAB = round(nAB), + nB = round(nB)) %>% + dplyr::mutate(nAB = ifelse(is.na(nAB), 0, nAB), + nB = ifelse(is.na(nB), 0, nB)) %>% + dplyr::mutate(nAB = ifelse(nAB<0, 0, nAB), + nB = ifelse(nB<0, 0, nB)) %>% + dplyr::mutate(cn_a = nAB - nB) %>% + dplyr::mutate(cn_b = nB) %>% + dplyr::mutate(cn_a = ifelse(cn_a < 0, 0, cn_a), + cn_b = ifelse(cn_a < 0, 0, cn_b)) %>% + dplyr::select(-purity, -ploidy, -nAB, -nB) + + readr::write_tsv(x = major_copies, file = as.charcater(opt$file_out), col_names = TRUE, append = FALSE) } From bba6e1de7a4ccf1867f415fef4e18052bd858bec Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:15:19 -0500 Subject: [PATCH 579/766] Update medicc2.R --- copy_number/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index a712347b..8b3a7de0 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -97,5 +97,5 @@ if (as.numeric(opt$option) == 1) { cn_b = ifelse(cn_a < 0, 0, cn_b)) %>% dplyr::select(-purity, -ploidy, -nAB, -nB) - readr::write_tsv(x = major_copies, file = as.charcater(opt$file_out), col_names = TRUE, append = FALSE) + readr::write_tsv(x = major_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) } From a3301289433ba880912cbf99b5435bd275531a63 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:19:53 -0500 Subject: [PATCH 580/766] Update medicc2.R --- copy_number/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index 8b3a7de0..0bb8bbf1 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -49,7 +49,7 @@ if (as.numeric(opt$option) == 1) { reshape2::dcast(Chromosome + Position ~ variable, value.var = "value", fill = 0) readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) -} else if (as.numeric(opt$option) == 2) { +} else if (as.numeric(opt$option) == 3) { tumor_sample_names = unlist(strsplit(x = as.character(opt$tumor_sample_name), split = " ", fixed = TRUE)) normal_sample_name = unlist(strsplit(x = as.character(opt$normal_sample_name), split = " ", fixed = TRUE)) cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>% From 0a9c4033ede4dcc074a8297c0b282925f4157e65 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:33:04 -0500 Subject: [PATCH 581/766] ++ --- config.inc | 2 +- copy_number/medicc2.mk | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/config.inc b/config.inc index 62263cef..fb718e33 100644 --- a/config.inc +++ b/config.inc @@ -68,7 +68,7 @@ TOPHAT ?= tophat DEFUSE ?= $(PERL) $(HOME)/share/usr/defuse-0.6.1/scripts/defuse.pl ONCOFUSE_JAR ?= $(HOME)/share/usr/oncofuse-1.0.9b2/Oncofuse.jar VARSCAN_JAR ?= $(JARDIR)/VarScan.v2.3.9.jar -MDEICC ?= medicc2 +MEDICC ?= medicc2 ## PICARD tools PICARD_DIR ?= $(JARDIR)/picard-1.92 diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 240f398b..9918af7e 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -4,7 +4,8 @@ LOGDIR ?= log/medicc2.$(NOW) medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) \ $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).txt) \ - $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).tsv) + $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set).tsv) \ + $(foreach set,$(SAMPLE_SETS),medicc2/$(set)/$(set)_summary.tsv) define collect-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata @@ -42,6 +43,23 @@ medicc2/$1/$1.tsv : medicc2/$1/$1.txt endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call aggregate-copy-number,$(set)))) + + +define r-medicc-2 +medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv + $$(call RUN,-c -n 4 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ + $$(MEDICC) \ + $$(<) \ + medicc2/$1/ \ + --input-type tsv \ + --normal-name diploid \ + --plot both \ + --maxcn 10 \ + --n-cores 4") + +endef +$(foreach set,$(SAMPLE_SETS),\ + $(eval $(call aggregate-copy-number,$(set)))) ..DUMMY := $(shell mkdir -p version; \ From fea5f5d0a0f462cc0f24ebd09b50c1506de53bb2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:33:37 -0500 Subject: [PATCH 582/766] Update medicc2.mk --- copy_number/medicc2.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 9918af7e..7dfcd25c 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -45,7 +45,7 @@ $(foreach set,$(SAMPLE_SETS),\ $(eval $(call aggregate-copy-number,$(set)))) -define r-medicc-2 +define r-medicc2 medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv $$(call RUN,-c -n 4 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ $$(MEDICC) \ @@ -59,7 +59,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv endef $(foreach set,$(SAMPLE_SETS),\ - $(eval $(call aggregate-copy-number,$(set)))) + $(eval $(call r-medicc2,$(set)))) ..DUMMY := $(shell mkdir -p version; \ From d339a520d0ddf337133b4e2e207bd69532882160 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:39:49 -0500 Subject: [PATCH 583/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 7dfcd25c..cd22be72 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -54,7 +54,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv --input-type tsv \ --normal-name diploid \ --plot both \ - --maxcn 10 \ + --maxcn 8 \ --n-cores 4") endef From de5d7d6421268e0f7ab46693f016f2864a8c1792 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 22:56:16 -0500 Subject: [PATCH 584/766] Update medicc2.mk --- copy_number/medicc2.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index cd22be72..15ec4adb 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -53,6 +53,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv medicc2/$1/ \ --input-type tsv \ --normal-name diploid \ + --total-copy-numbers True \ --plot both \ --maxcn 8 \ --n-cores 4") From 58a97bc1b9d84a1732f86f66be17b82251c13184 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 23:03:20 -0500 Subject: [PATCH 585/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 15ec4adb..94e48189 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -53,7 +53,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv medicc2/$1/ \ --input-type tsv \ --normal-name diploid \ - --total-copy-numbers True \ + --total-copy-numbers \ --plot both \ --maxcn 8 \ --n-cores 4") From 912e92c88dd10863bb386daf23a9375f41ff363c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 9 Dec 2022 23:09:08 -0500 Subject: [PATCH 586/766] ++ --- copy_number/medicc2.R | 3 ++- copy_number/medicc2.mk | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/copy_number/medicc2.R b/copy_number/medicc2.R index 0bb8bbf1..4fee680e 100644 --- a/copy_number/medicc2.R +++ b/copy_number/medicc2.R @@ -94,7 +94,8 @@ if (as.numeric(opt$option) == 1) { dplyr::mutate(cn_a = nAB - nB) %>% dplyr::mutate(cn_b = nB) %>% dplyr::mutate(cn_a = ifelse(cn_a < 0, 0, cn_a), - cn_b = ifelse(cn_a < 0, 0, cn_b)) %>% + cn_b = ifelse(cn_a < 0, 0, cn_b), + cn_t = nAB) %>% dplyr::select(-purity, -ploidy, -nAB, -nB) readr::write_tsv(x = major_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 94e48189..3af7b557 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -54,6 +54,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv --input-type tsv \ --normal-name diploid \ --total-copy-numbers \ + --input-allele-columns 'cn_t' \ --plot both \ --maxcn 8 \ --n-cores 4") From 2fca098ceb6da23c8b4c215d06581c377b49ccde Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:02:08 -0500 Subject: [PATCH 587/766] ++ --- copy_number/medicc2.mk | 8 ++++---- {copy_number => scripts}/medicc2.R | 33 ++++-------------------------- 2 files changed, 8 insertions(+), 33 deletions(-) rename {copy_number => scripts}/medicc2.R (70%) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 3af7b557..13585253 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -10,7 +10,7 @@ medicc : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) \ define collect-copy-number medicc2/$1/$1.txt : facets/cncf/$1_$2.Rdata $$(call RUN,-c -n 1 -s 1G -m 2G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/copy_number/medicc2.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \ --option 1 \ --tumor_sample_name $1 \ --normal_sample_name $2 \ @@ -25,7 +25,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ define aggregate-copy-number medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample).txt) $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/copy_number/medicc2.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \ --option 2 \ --tumor_sample_name '$(tumors.$1)' \ --normal_sample_name '$(normal.$1)' \ @@ -33,7 +33,7 @@ medicc2/$1/$1.txt : $(foreach sample,$(TUMOR_SAMPLES),medicc2/$(sample)/$(sample medicc2/$1/$1.tsv : medicc2/$1/$1.txt $$(call RUN,-c -n 1 -s 2G -m 4G -v $(MEDICC_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/copy_number/medicc2.R \ + $(RSCRIPT) $(SCRIPTS_DIR)/medicc2.R \ --option 3 \ --tumor_sample_name '$(tumors.$1)' \ --normal_sample_name '$(normal.$1)' \ @@ -54,7 +54,7 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv --input-type tsv \ --normal-name diploid \ --total-copy-numbers \ - --input-allele-columns 'cn_t' \ + --input-allele-columns 'nAB' \ --plot both \ --maxcn 8 \ --n-cores 4") diff --git a/copy_number/medicc2.R b/scripts/medicc2.R similarity index 70% rename from copy_number/medicc2.R rename to scripts/medicc2.R index 4fee680e..deddb670 100644 --- a/copy_number/medicc2.R +++ b/scripts/medicc2.R @@ -55,8 +55,8 @@ if (as.numeric(opt$option) == 1) { cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% as.data.frame() - cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", k = 40, verbose = FALSE) - cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 25, normalize = TRUE, fast = FALSE, verbose = FALSE) + cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 1.5, k = 25, verbose = FALSE) + cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 15, normalize = FALSE, fast = FALSE, verbose = FALSE) total_copies = cn_segmented %>% dplyr::select(c("chrom", "start.pos", "end.pos", contains("Log2_Ratio"))) %>% @@ -70,33 +70,8 @@ if (as.numeric(opt$option) == 1) { by = "sample_id") %>% readr::type_convert() %>% dplyr::mutate(nAB = ((2^nAB)*((purity*ploidy) + (2*(1-purity))) - 2*(1-purity))/purity) %>% + dplyr::mutate(nAB = round(nAB)) %>% dplyr::select(-purity, -ploidy) - major_copies = cn_segmented %>% - dplyr::select(c("chrom", "start.pos", "end.pos", contains("B_Allele_F"))) %>% - dplyr::rename(start = start.pos, end = end.pos) %>% - reshape2::melt(id.vars = c("chrom", "start", "end")) %>% - dplyr::select(sample_id = variable, - chrom, start, end, nB = value) %>% - dplyr::mutate(sample_id = gsub(pattern = "_B_Allele_F", replacement = "", x = sample_id, fixed = TRUE)) %>% - dplyr::left_join(readr::read_tsv(file = "facets/summary/summary.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% - dplyr::select(sample_id = tumorName, purity, ploidy), - by = "sample_id") %>% - dplyr::left_join(total_copies, by = c("sample_id", "chrom", "start", "end")) %>% - readr::type_convert() %>% - dplyr::mutate(nB = (nB*(2 - 2*purity + purity*nAB) - 1 + purity)/purity) %>% - dplyr::mutate(nAB = round(nAB), - nB = round(nB)) %>% - dplyr::mutate(nAB = ifelse(is.na(nAB), 0, nAB), - nB = ifelse(is.na(nB), 0, nB)) %>% - dplyr::mutate(nAB = ifelse(nAB<0, 0, nAB), - nB = ifelse(nB<0, 0, nB)) %>% - dplyr::mutate(cn_a = nAB - nB) %>% - dplyr::mutate(cn_b = nB) %>% - dplyr::mutate(cn_a = ifelse(cn_a < 0, 0, cn_a), - cn_b = ifelse(cn_a < 0, 0, cn_b), - cn_t = nAB) %>% - dplyr::select(-purity, -ploidy, -nAB, -nB) - - readr::write_tsv(x = major_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) + readr::write_tsv(x = total_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) } From fdcb7233a838cc5c0bd900205764d44eea3ad213 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:12:17 -0500 Subject: [PATCH 588/766] Update medicc2.R --- scripts/medicc2.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index deddb670..613f673a 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -55,8 +55,8 @@ if (as.numeric(opt$option) == 1) { cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% as.data.frame() - cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 1.5, k = 25, verbose = FALSE) - cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 15, normalize = FALSE, fast = FALSE, verbose = FALSE) + cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 2.5, k = 25, verbose = FALSE) + cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 10, normalize = FALSE, fast = FALSE, verbose = FALSE) total_copies = cn_segmented %>% dplyr::select(c("chrom", "start.pos", "end.pos", contains("Log2_Ratio"))) %>% From 6f43052835335247bfa638f8f7e21bd070c4421f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:12:51 -0500 Subject: [PATCH 589/766] Update medicc2.R --- scripts/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index 613f673a..b1272742 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -70,7 +70,7 @@ if (as.numeric(opt$option) == 1) { by = "sample_id") %>% readr::type_convert() %>% dplyr::mutate(nAB = ((2^nAB)*((purity*ploidy) + (2*(1-purity))) - 2*(1-purity))/purity) %>% - dplyr::mutate(nAB = round(nAB)) %>% + dplyr::mutate(nAB = floor(nAB)) %>% dplyr::select(-purity, -ploidy) readr::write_tsv(x = total_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) From 14fd8bd8bbd0828f675219a510d12b8642ee7cd0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:15:03 -0500 Subject: [PATCH 590/766] Update medicc2.mk --- copy_number/medicc2.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/copy_number/medicc2.mk b/copy_number/medicc2.mk index 13585253..faa0c457 100644 --- a/copy_number/medicc2.mk +++ b/copy_number/medicc2.mk @@ -57,6 +57,8 @@ medicc2/$1/$1_summary.tsv : medicc2/$1/$1.tsv --input-allele-columns 'nAB' \ --plot both \ --maxcn 8 \ + --bootstrap-method 'segment-wise' \ + --bootstrap-nr 100 \ --n-cores 4") endef From 5bcd5fda93a128e6e672527a0bba3a57a2ea9043 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:33:17 -0500 Subject: [PATCH 591/766] Update medicc2.R --- scripts/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index b1272742..613f673a 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -70,7 +70,7 @@ if (as.numeric(opt$option) == 1) { by = "sample_id") %>% readr::type_convert() %>% dplyr::mutate(nAB = ((2^nAB)*((purity*ploidy) + (2*(1-purity))) - 2*(1-purity))/purity) %>% - dplyr::mutate(nAB = floor(nAB)) %>% + dplyr::mutate(nAB = round(nAB)) %>% dplyr::select(-purity, -ploidy) readr::write_tsv(x = total_copies, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) From 2014127b6c190a2bc3c40873d5b8b6ba551ded6d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 17:33:57 -0500 Subject: [PATCH 592/766] Update medicc2.R --- scripts/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index 613f673a..e7bf40f1 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -56,7 +56,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% as.data.frame() cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 2.5, k = 25, verbose = FALSE) - cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 10, normalize = FALSE, fast = FALSE, verbose = FALSE) + cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 40, normalize = FALSE, fast = FALSE, verbose = FALSE) total_copies = cn_segmented %>% dplyr::select(c("chrom", "start.pos", "end.pos", contains("Log2_Ratio"))) %>% From 298cecea615dc7e65eed2debb52568ba3da423b3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 20:25:04 -0500 Subject: [PATCH 593/766] Update medicc2.R --- scripts/medicc2.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index e7bf40f1..97971beb 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -25,12 +25,9 @@ if (as.numeric(opt$option) == 1) { load(as.character(opt$file_in)) cn_df = out2$jointseg %>% dplyr::as_tibble() %>% - dplyr::filter(het == 1) %>% - dplyr::mutate(vafT = 1 - vafT) %>% dplyr::select(Chromosome = chrom, Position = maploc, - Log2_Ratio = cnlr, - B_Allele_F = vafT) + Log2_Ratio = cnlr) readr::write_tsv(x = cn_df, file = as.character(opt$file_out), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option) == 2) { @@ -41,7 +38,7 @@ if (as.numeric(opt$option) == 1) { data_ = readr::read_tsv(file = paste0("medicc2/", tumor_sample_names[i], "/", tumor_sample_names[i], ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() - colnames(data_) = c("Chromosome", "Position", paste0(tumor_sample_names[i], "_Log2_Ratio"), paste0(tumor_sample_names[i], "_B_Allele_F")) + colnames(data_) = c("Chromosome", "Position", paste0(tumor_sample_names[i], "_Log2_Ratio")) cn_df[[i]] = data_ %>% reshape2::melt(id.vars = c("Chromosome", "Position")) } From 861d378850533a094d2d85d3aa1777d03005d2c6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 10 Dec 2022 20:25:36 -0500 Subject: [PATCH 594/766] Update medicc2.R --- scripts/medicc2.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/medicc2.R b/scripts/medicc2.R index 97971beb..4289f47c 100644 --- a/scripts/medicc2.R +++ b/scripts/medicc2.R @@ -52,7 +52,7 @@ if (as.numeric(opt$option) == 1) { cn_df = readr::read_tsv(file = as.character(opt$file_in), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% as.data.frame() - cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 2.5, k = 25, verbose = FALSE) + cn_smooth = copynumber::winsorize(data = cn_df, method = "mad", tau = 2.5, k = 40, verbose = FALSE) cn_segmented = copynumber::multipcf(data = cn_smooth, gamma = 40, normalize = FALSE, fast = FALSE, verbose = FALSE) total_copies = cn_segmented %>% From b5f76cdbe94ba84f29e8f7a4b757406ad94d377f Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Dec 2022 20:57:56 -0500 Subject: [PATCH 595/766] ++ --- scripts/get_basecounts.R | 29 +++++++++++++++++++++++++++++ variant_callers/getBaseCount.R | 24 ------------------------ variant_callers/get_basecounts.mk | 11 ++++++++++- 3 files changed, 39 insertions(+), 25 deletions(-) create mode 100644 scripts/get_basecounts.R delete mode 100644 variant_callers/getBaseCount.R diff --git a/scripts/get_basecounts.R b/scripts/get_basecounts.R new file mode 100644 index 00000000..6c8a42ee --- /dev/null +++ b/scripts/get_basecounts.R @@ -0,0 +1,29 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--option", default = NA, type = 'character', help = "Which option?"), + make_option("--sample_name", default = NA, type = 'character', help = "sample name")) +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +if (as.numeric(opt$option) == 1) { + sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + data = list() + for (i in 1:length(sample_names)) { + data[[i]] = readr::read_tsv(file = paste0("gbc/", sample_names[i], ".txt.gz"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_name = sample_names[i]) + } + data = do.call(bind_rows, data) + readr::write_tsv(x = data, path = "gbc/summary.txt", append = FALSE, col_names = TRUE) +} + diff --git a/variant_callers/getBaseCount.R b/variant_callers/getBaseCount.R deleted file mode 100644 index 9293c252..00000000 --- a/variant_callers/getBaseCount.R +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -args_list <- list(make_option("--file_name", default = NA, type = 'character', help = "sample names set")) -parser <- OptionParser(usage = "%prog", option_list = args_list) -arguments <- parse_args(parser, positional_arguments = T) -opt <- arguments$options - -genotype = readr::read_tsv(file = opt$file_name, col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(Chrom_N = gsub(pattern = "chr", replacement = "", x = Chrom, fixed = TRUE)) %>% - readr::type_convert() %>% - dplyr::arrange(Chrom_N, Pos) %>% - dplyr::select(-Chrom_N) - -write_tsv(genotype, path = gsub(pattern = ".txt", replacement = ".tsv", x = opt$file_name), append = FALSE, col_names = TRUE) diff --git a/variant_callers/get_basecounts.mk b/variant_callers/get_basecounts.mk index 9530a098..5d85b5cd 100644 --- a/variant_callers/get_basecounts.mk +++ b/variant_callers/get_basecounts.mk @@ -6,7 +6,8 @@ MAPQ := 0 BAQ := 0 COV := 0 -getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) +getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) \ + summary.txt define get-basecount gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf @@ -30,6 +31,14 @@ gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf endef $(foreach sample,$(SAMPLES),\ $(eval $(call get-basecount,$(sample)))) + + +gbc/summary.txt : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) + $(call RUN,-n 1 -s 24G -m 32G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/get_basecounts.R \ + --option 1 \ + --sample_name '$(SAMPLES)'") + ..DUMMY := $(shell mkdir -p version; \ ${GBC} &> version/get_basecount.txt;) From 4694599e49ba7863d65ab73488bee5d1141af8de Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Dec 2022 20:59:05 -0500 Subject: [PATCH 596/766] Update get_basecounts.mk --- variant_callers/get_basecounts.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/get_basecounts.mk b/variant_callers/get_basecounts.mk index 5d85b5cd..331d9aa2 100644 --- a/variant_callers/get_basecounts.mk +++ b/variant_callers/get_basecounts.mk @@ -7,7 +7,7 @@ BAQ := 0 COV := 0 getbasecount : $(foreach sample,$(SAMPLES),gbc/$(sample).txt.gz) \ - summary.txt + gbc/summary.txt define get-basecount gbc/$1.txt.gz : bam/$1.bam vcf/dataSilentNoPoleNotTertPromot.vcf From c29d11d4a560ceeb61d6aa3f20aa3d4166efe17d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 12 Dec 2022 21:02:30 -0500 Subject: [PATCH 597/766] Update get_basecounts.R --- scripts/get_basecounts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/get_basecounts.R b/scripts/get_basecounts.R index 6c8a42ee..f65fae14 100644 --- a/scripts/get_basecounts.R +++ b/scripts/get_basecounts.R @@ -16,7 +16,7 @@ arguments <- parse_args(parser, positional_arguments = T) opt <- arguments$options if (as.numeric(opt$option) == 1) { - sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) data = list() for (i in 1:length(sample_names)) { data[[i]] = readr::read_tsv(file = paste0("gbc/", sample_names[i], ".txt.gz"), col_names = TRUE, col_types = cols(.default = col_character())) %>% From 550327f4baedba34814cd1c58d13ef74975b6a35 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 13 Dec 2022 14:02:23 -0500 Subject: [PATCH 598/766] Update varscanTN.mk --- variant_callers/somatic/varscanTN.mk | 50 +++++++++++++--------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk index b22b0e1f..35ba9dfd 100644 --- a/variant_callers/somatic/varscanTN.mk +++ b/variant_callers/somatic/varscanTN.mk @@ -1,42 +1,39 @@ -# Run VarScan on tumour-normal matched pairs -# Detect point mutations -##### DEFAULTS ###### +include modules/Makefile.inc LOGDIR ?= log/varscanTN.$(NOW) -##### MAKE INCLUDES ##### -include modules/Makefile.inc IGNORE_FP_FILTER ?= true - +VALIDATION ?= false FP_FILTER = $(PERL) $(HOME)/share/usr/bin/fpfilter.pl BAM_READCOUNT = $(HOME)/share/usr/bin/bam-readcount - VARSCAN_TO_VCF = $(PERL) modules/variant_callers/somatic/varscanTNtoVcf.pl - MIN_MAP_QUAL ?= 1 -VALIDATION ?= false MIN_VAR_FREQ ?= $(if $(findstring false,$(VALIDATION)),0.05,0.000001) -#VARSCAN VARSCAN_MEM = $(JAVA7) -Xmx$1 -jar $(VARSCAN_JAR) VARSCAN = $(call VARSCAN_MEM,8G) VARSCAN_OPTS = $(if $(findstring true,$(VALIDATION)),--validation 1 --strand-filter 0) --min-var-freq $(MIN_VAR_FREQ) - VARSCAN_SOURCE_ANN_VCF = python modules/vcf_tools/annotate_source_vcf.py --source varscan - VPATH ?= bam - VARSCAN_VARIANT_TYPES = varscan_indels varscan_snps -PHONY += varscan varscan_vcfs varscan_mafs -varscan : varscan_vcfs #varscan_mafs -varscan_vcfs : $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf)) -varscan_mafs : $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),maf/$(pair).$(type).maf)) - +varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) \ + $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \ + $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) + +# $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ +# $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ +# $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf)) %.Somatic.txt : %.txt - $(call RUN,-s 5G -m 8G,"$(call VARSCAN_MEM,4G) somaticFilter $< && $(call VARSCAN_MEM,4G) processSomatic $< && rename .txt.Somatic .Somatic.txt $** && rename .txt.Germline .Germline.txt $** && rename .txt.LOH .LOH.txt $** && rename .txt.hc .hc.txt $**") + $(call RUN,-s 5G -m 8G,"set -o pipefail && \ + $(call VARSCAN_MEM,4G) somaticFilter $< && \ + $(call VARSCAN_MEM,4G) processSomatic $< && \ + rename .txt.Somatic .Somatic.txt $** && \ + rename .txt.Germline .Germline.txt $** && \ + rename .txt.LOH .LOH.txt $** && \ + rename .txt.hc .hc.txt $**") define varscan-somatic-tumor-normal-chr varscan/chr_tables/$1_$2.$3.varscan_timestamp : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai @@ -54,10 +51,12 @@ varscan/chr_tables/$1_$2.$3.varscan_timestamp : bam/$1.bam bam/$2.bam bam/$1.bam fi varscan/chr_tables/$1_$2.$3.indel.txt : varscan/chr_tables/$1_$2.$3.varscan_timestamp + varscan/chr_tables/$1_$2.$3.snp.txt : varscan/chr_tables/$1_$2.$3.varscan_timestamp varscan/chr_tables/$1_$2.$3.%.fp_pass.txt : varscan/chr_tables/$1_$2.$3.%.txt bamrc/$1.$3.bamrc.gz $$(call RUN,-s 8G -m 55G,"$$(VARSCAN) fpfilter $$< <(zcat $$(<<)) --output-file $$@") + endef $(foreach chr,$(CHROMOSOMES), \ $(foreach pair,$(SAMPLE_PAIRS), \ @@ -69,13 +68,16 @@ varscan/tables/$1.$2.txt : $$(foreach chr,$$(CHROMOSOMES),\ varscan/chr_tables/$1.$$(chr).$2.txt,\ varscan/chr_tables/$1.$$(chr).$2.fp_pass.txt)) $$(INIT) head -1 $$< > $$@ && for x in $$^; do sed 1d $$$$x >> $$@; done + endef $(foreach pair,$(SAMPLE_PAIRS), \ $(foreach type,snp indel,$(eval $(call merge-varscan-pair-type,$(pair),$(type))))) define convert-varscan-tumor-normal varscan/vcf/$1_$2.%.vcf : varscan/tables/$1_$2.%.txt - $$(call RUN,-s 4G -m 8G,"$$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$< | $$(VCF_SORT) $$(REF_DICT) - > $$@") + $$(call RUN,-s 4G -m 8G,"set -o pipefail && \ + $$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$< | $$(VCF_SORT) $$(REF_DICT) - > $$@") + endef $(foreach pair,$(SAMPLE_PAIRS), \ $(eval $(call convert-varscan-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) @@ -86,15 +88,9 @@ vcf/%.varscan_indels.vcf : varscan/vcf/%.indel.Somatic.vcf vcf/%.varscan_snps.vcf : varscan/vcf/%.snp.Somatic.vcf $(INIT) $(VARSCAN_SOURCE_ANN_VCF) < $< > $@ -define bamrc-chr -bamrc/%.$1.bamrc.gz : bam/%.bam - $$(call RUN,-s 8G -m 12G,"$$(BAM_READCOUNT) -f $$(REF_FASTA) $$< $1 | gzip > $$@ 2> /dev/null") -endef -$(foreach chr,$(CHROMOSOMES),$(eval $(call bamrc-chr,$(chr)))) - include modules/variant_callers/gatk.mk .DELETE_ON_ERROR: .SECONDARY: -.PHONY: $(PHONY) +.PHONY: varscan From 455418245c0490de3e765e9f4879b94da710c88f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 13 Dec 2022 14:05:22 -0500 Subject: [PATCH 599/766] Update varscanTN.mk --- variant_callers/somatic/varscanTN.mk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk index 35ba9dfd..f79eb81f 100644 --- a/variant_callers/somatic/varscanTN.mk +++ b/variant_callers/somatic/varscanTN.mk @@ -18,9 +18,10 @@ VARSCAN_SOURCE_ANN_VCF = python modules/vcf_tools/annotate_source_vcf.py --sourc VPATH ?= bam VARSCAN_VARIANT_TYPES = varscan_indels varscan_snps -varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) \ - $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \ - $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) +varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) + +# $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \ +# $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) # $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ # $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ From 251cd09243581d16450870a2e1a536a35a30be1a Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 13 Dec 2022 14:57:18 -0500 Subject: [PATCH 600/766] Update varscanTN.mk --- variant_callers/somatic/varscanTN.mk | 66 +++++++++++++++++----------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk index f79eb81f..99e463c1 100644 --- a/variant_callers/somatic/varscanTN.mk +++ b/variant_callers/somatic/varscanTN.mk @@ -2,7 +2,6 @@ include modules/Makefile.inc LOGDIR ?= log/varscanTN.$(NOW) - IGNORE_FP_FILTER ?= true VALIDATION ?= false FP_FILTER = $(PERL) $(HOME)/share/usr/bin/fpfilter.pl @@ -18,28 +17,24 @@ VARSCAN_SOURCE_ANN_VCF = python modules/vcf_tools/annotate_source_vcf.py --sourc VPATH ?= bam VARSCAN_VARIANT_TYPES = varscan_indels varscan_snps -varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) - -# $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \ -# $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) - -# $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ -# $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).txt) \ -# $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf)) - -%.Somatic.txt : %.txt - $(call RUN,-s 5G -m 8G,"set -o pipefail && \ - $(call VARSCAN_MEM,4G) somaticFilter $< && \ - $(call VARSCAN_MEM,4G) processSomatic $< && \ - rename .txt.Somatic .Somatic.txt $** && \ - rename .txt.Germline .Germline.txt $** && \ - rename .txt.LOH .LOH.txt $** && \ - rename .txt.hc .hc.txt $**") +varscan : $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).varscan_timestamp)) \ + $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).snp.txt)) \ + $(foreach chr,$(CHROMOSOMES),$(foreach pair,$(SAMPLE_PAIRS),varscan/chr_tables/$(pair).$(chr).indel.txt)) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).snp.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).indel.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).snp.Somatic.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/tables/$(pair).indel.Somatic.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/vcf/$(pair).snp.Somatic.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),varscan/vcf/$(pair).indel.Somatic.vcf) \ + $(foreach type,$(VARSCAN_VARIANT_TYPES),$(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).$(type).vcf)) define varscan-somatic-tumor-normal-chr varscan/chr_tables/$1_$2.$3.varscan_timestamp : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai if [[ $$$$($$(SAMTOOLS) view $$< $3 | head -1 | wc -l) -gt 0 ]]; then \ - $$(call RUN,-s 9G -m 12G,"$$(VARSCAN) somatic \ + $$(call RUN,-s 9G -m 12G -w 72:00:00,"set -o pipefail && \ + rm -rf varscan/chr_tables/$1_$2.$3.snp.txt && \ + rm -rf varscan/chr_tables/$1_$2.$3.indel.txt && \ + $$(VARSCAN) somatic \ <($$(SAMTOOLS) mpileup -A -r $3 -q $$(MIN_MAP_QUAL) -f $$(REF_FASTA) $$(word 2,$$^)) \ <($$(SAMTOOLS) mpileup -A -r $3 -q $$(MIN_MAP_QUAL) -f $$(REF_FASTA) $$<) \ $$(VARSCAN_OPTS) \ @@ -61,7 +56,7 @@ varscan/chr_tables/$1_$2.$3.%.fp_pass.txt : varscan/chr_tables/$1_$2.$3.%.txt ba endef $(foreach chr,$(CHROMOSOMES), \ $(foreach pair,$(SAMPLE_PAIRS), \ - $(eval $(call varscan-somatic-tumor-normal-chr,$(tumor.$(pair)),$(normal.$(pair)),$(chr))))) + $(eval $(call varscan-somatic-tumor-normal-chr,$(tumor.$(pair)),$(normal.$(pair)),$(chr))))) define merge-varscan-pair-type varscan/tables/$1.$2.txt : $$(foreach chr,$$(CHROMOSOMES),\ @@ -72,16 +67,38 @@ varscan/tables/$1.$2.txt : $$(foreach chr,$$(CHROMOSOMES),\ endef $(foreach pair,$(SAMPLE_PAIRS), \ - $(foreach type,snp indel,$(eval $(call merge-varscan-pair-type,$(pair),$(type))))) + $(foreach type,snp indel, \ + $(eval $(call merge-varscan-pair-type,$(pair),$(type))))) + +define filter-varscan-pair-type +varscan/tables/$1.$2.Somatic.txt : varscan/tables/$1.$2.txt + $$(call RUN,-s 5G -m 8G,"set -o pipefail && \ + $$(VARSCAN) somaticFilter $$(<) && \ + $$(VARSCAN) processSomatic $$(<) && \ + cp varscan/tables/$1.$2.txt.Somatic varscan/tables/$1.$2.Somatic.txt && \ + rm varscan/tables/$1.$2.txt.Somatic && \ + cp varscan/tables/$1.$2.txt.Germline varscan/tables/$1.$2.Germline.txt && \ + rm varscan/tables/$1.$2.txt.Germline && \ + cp varscan/tables/$1.$2.txt.LOH varscan/tables/$1.$2.LOH.txt && \ + rm varscan/tables/$1.$2.txt.LOH && \ + cp varscan/tables/$1.$2.txt.hc varscan/tables/$1.$2.hc.txt && \ + rm varscan/tables/$1.$2.txt.hc") + +endef +$(foreach pair,$(SAMPLE_PAIRS), \ + $(foreach type,snp indel, \ + $(eval $(call filter-varscan-pair-type,$(pair),$(type))))) define convert-varscan-tumor-normal -varscan/vcf/$1_$2.%.vcf : varscan/tables/$1_$2.%.txt +varscan/vcf/$1_$2.$3.Somatic.vcf : varscan/tables/$1_$2.$3.Somatic.txt $$(call RUN,-s 4G -m 8G,"set -o pipefail && \ - $$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$< | $$(VCF_SORT) $$(REF_DICT) - > $$@") + $$(VARSCAN_TO_VCF) -f $$(REF_FASTA) -t $1 -n $2 $$(<) | $$(VCF_SORT) $$(REF_DICT) - > $$(@)") + endef $(foreach pair,$(SAMPLE_PAIRS), \ - $(eval $(call convert-varscan-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) + $(foreach type,snp indel, \ + $(eval $(call convert-varscan-tumor-normal,$(tumor.$(pair)),$(normal.$(pair)),$(type))))) vcf/%.varscan_indels.vcf : varscan/vcf/%.indel.Somatic.vcf $(INIT) $(VARSCAN_SOURCE_ANN_VCF) < $< > $@ @@ -94,4 +111,3 @@ include modules/variant_callers/gatk.mk .DELETE_ON_ERROR: .SECONDARY: .PHONY: varscan - From 47f3ec47e95f416d7bbfb2d596c23dd8ef3a4446 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 12:51:34 -0500 Subject: [PATCH 601/766] Update varscanTN.mk --- variant_callers/somatic/varscanTN.mk | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk index 99e463c1..cbb070de 100644 --- a/variant_callers/somatic/varscanTN.mk +++ b/variant_callers/somatic/varscanTN.mk @@ -75,14 +75,7 @@ varscan/tables/$1.$2.Somatic.txt : varscan/tables/$1.$2.txt $$(call RUN,-s 5G -m 8G,"set -o pipefail && \ $$(VARSCAN) somaticFilter $$(<) && \ $$(VARSCAN) processSomatic $$(<) && \ - cp varscan/tables/$1.$2.txt.Somatic varscan/tables/$1.$2.Somatic.txt && \ - rm varscan/tables/$1.$2.txt.Somatic && \ - cp varscan/tables/$1.$2.txt.Germline varscan/tables/$1.$2.Germline.txt && \ - rm varscan/tables/$1.$2.txt.Germline && \ - cp varscan/tables/$1.$2.txt.LOH varscan/tables/$1.$2.LOH.txt && \ - rm varscan/tables/$1.$2.txt.LOH && \ - cp varscan/tables/$1.$2.txt.hc varscan/tables/$1.$2.hc.txt && \ - rm varscan/tables/$1.$2.txt.hc") + cp varscan/tables/$1.$2.txt.Somatic.hc varscan/tables/$1.$2.Somatic.txt") endef $(foreach pair,$(SAMPLE_PAIRS), \ From 63b480e7d2558620056d1968704a070b39ad4044 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 12:56:40 -0500 Subject: [PATCH 602/766] Update varscanTN.mk --- variant_callers/somatic/varscanTN.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/somatic/varscanTN.mk b/variant_callers/somatic/varscanTN.mk index cbb070de..560f0719 100644 --- a/variant_callers/somatic/varscanTN.mk +++ b/variant_callers/somatic/varscanTN.mk @@ -75,7 +75,7 @@ varscan/tables/$1.$2.Somatic.txt : varscan/tables/$1.$2.txt $$(call RUN,-s 5G -m 8G,"set -o pipefail && \ $$(VARSCAN) somaticFilter $$(<) && \ $$(VARSCAN) processSomatic $$(<) && \ - cp varscan/tables/$1.$2.txt.Somatic.hc varscan/tables/$1.$2.Somatic.txt") + cp varscan/tables/$1.$2.txt.Somatic varscan/tables/$1.$2.Somatic.txt") endef $(foreach pair,$(SAMPLE_PAIRS), \ From 3825abd57bbb0b0eba36fd091cd1afcf17639b2f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:31:21 -0500 Subject: [PATCH 603/766] ++ --- scripts/star_fish.R | 37 ++++++++++++++++++++++++++++++++++++- signatures/star_fish.mk | 7 ++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index a80b401a..81254492 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -77,6 +77,41 @@ if (as.numeric(opt$option)==1) { starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19") setwd(wd) cat("taskcomplete!!", file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) - + } + +} else if (as.numeric(opt$option)==4) { + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) + sv_df = cn_df = gd_df = list() + for (i in 1:length(sample_names)) { + sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name, ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + gd_df[[i]] = dplyr::tibble(sample = sample_name, gender = "unknown") %>% + readr::type_convert() + } + sv_df = do.call(bind_rows, sv_df) + cn_df = do.call(bind_rows, sn_df) + gd_df = do.call(bind_rows, gd_df) + starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/") + if (length(starfish_link_out)==1) { + cat(starfish_link_out, file = "star_fish/summary/taskcomplete", append = FALSE) + } else { + starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, + complex_sv = starfish_link_out$interleave_tra_complex_sv, + cnv_file = cn_df, + gender_file = gd_df, + prefix = "star_fish/summary/", + genome_v = "hg19", + cnv_factor = "auto", + arm_del_rm = TRUE) + starfish_sig_out = starfish_sig(cluster_feature = starfish_feature_out$cluster_feature, + prefix = "star_fish/summary/", + cmethod = "class") + wd = getwd() + setwd("star_fish/summary/") + starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19") + setwd(wd) + cat("taskcomplete!!", file = "star_fish/summary/taskcomplete", append = FALSE) } } diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 59fa574e..32f5ec74 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -8,7 +8,8 @@ MAX_SIZE = 10000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) \ + star_fish/summary/taskcomplete define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -44,6 +45,10 @@ star_fish/$1_$2/$1_$2.taskcomplete : star_fish/$1_$2/$1_$2.merged_sv.bedpe star_ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) + +star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R --option 4 --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) From 1bfa23982d430baf5e6bf670003aadb4f284a4b2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:34:45 -0500 Subject: [PATCH 604/766] Update star_fish.mk --- signatures/star_fish.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 32f5ec74..d392f372 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -47,8 +47,10 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) - $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R --option 4 --sample_name '$(SAMPLE_PAIRS)'") + $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 4 \ + --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) From e81b8c844ba1ad525e5cbade526da5bf88300546 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:36:46 -0500 Subject: [PATCH 605/766] Update star_fish.R --- scripts/star_fish.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 81254492..af1e90b4 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -85,9 +85,9 @@ if (as.numeric(opt$option)==1) { for (i in 1:length(sample_names)) { sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name, ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() - cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() - gd_df[[i]] = dplyr::tibble(sample = sample_name, gender = "unknown") %>% + gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% readr::type_convert() } sv_df = do.call(bind_rows, sv_df) From f7dbe807e641e2abe22759715e2407a74e8bb821 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:38:38 -0500 Subject: [PATCH 606/766] Update star_fish.R --- scripts/star_fish.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index af1e90b4..4b687a7f 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -83,9 +83,9 @@ if (as.numeric(opt$option)==1) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) sv_df = cn_df = gd_df = list() for (i in 1:length(sample_names)) { - sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name, ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() - cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% readr::type_convert() From 4644ac7616830759a5499c5c0acc42472cc531a4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:43:38 -0500 Subject: [PATCH 607/766] Update star_fish.R --- scripts/star_fish.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 4b687a7f..e2a7be69 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -83,9 +83,9 @@ if (as.numeric(opt$option)==1) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) sv_df = cn_df = gd_df = list() for (i in 1:length(sample_names)) { - sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() - cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_name[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% readr::type_convert() From 0b615fda4f05bc789a385a24e5a2cf0f72cf8404 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 15 Dec 2022 15:45:22 -0500 Subject: [PATCH 608/766] Update star_fish.R --- scripts/star_fish.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index e2a7be69..5d7165b0 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -91,7 +91,7 @@ if (as.numeric(opt$option)==1) { readr::type_convert() } sv_df = do.call(bind_rows, sv_df) - cn_df = do.call(bind_rows, sn_df) + cn_df = do.call(bind_rows, cn_df) gd_df = do.call(bind_rows, gd_df) starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/") if (length(starfish_link_out)==1) { From 728e3c36f34a7581ee6d83b4a603f90de85403ec Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Dec 2022 13:07:48 -0500 Subject: [PATCH 609/766] Update pyclone_vi.R --- scripts/pyclone_vi.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index a0a8cef7..09cfd685 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -50,9 +50,9 @@ if (as.numeric(opt$option) == 1) { End_Position = loc.end, minor_cn = ifelse(is.na(lcn.em), "0", lcn.em), major_cn = tcn.em) %>% - readr::type_convert() %>% - dplyr::mutate(major_cn = major_cn - minor_cn) %>% - dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn) + readr::type_convert() %>% + dplyr::mutate(major_cn = major_cn - minor_cn) %>% + dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn) pyclone[[i]] = sufam %>% dplyr::mutate(Chromosome = ifelse(Chromosome == "X", "23", Chromosome)) %>% @@ -80,7 +80,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(!is.na(ref_counts)) %>% dplyr::filter(!is.na(alt_counts)) %>% dplyr::mutate(alt_counts = ifelse(alt_counts<=1, 0, alt_counts)) %>% - dplyr::filter(!is.na(major_cn)) %>% + dplyr::mutate(major_cn = ifelse(is.na(major_cn), 1, major_cn)) %>% dplyr::filter(major_cn != 0) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) From da929b83bc56e33033c46341784aa595a27f06f0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Dec 2022 13:08:52 -0500 Subject: [PATCH 610/766] Update pyclone_vi.mk --- clonality/pyclone_vi.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index 2965da74..b2c8f6cd 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -7,12 +7,12 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \ - pyclone_vi/summary.txt + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ +# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \ +# pyclone_vi/summary.txt define r-sufam From 12ded0092b5cfc46d706a4ed3f64aec45d657dc1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 16 Dec 2022 13:51:44 -0500 Subject: [PATCH 611/766] ++ --- clonality/pyclone_vi.mk | 12 ++++++------ scripts/pyclone_vi.R | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index b2c8f6cd..2965da74 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -7,12 +7,12 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 50000' pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).txt) \ - $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ -# $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \ -# pyclone_vi/summary.txt + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).tsv) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/$(set).hd5) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/by_loci.txt) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/scatter_by_sample.pdf) \ + $(foreach set,$(SAMPLE_SETS),pyclone_vi/$(set)/summary/heatmap_by_sample.pdf) \ + pyclone_vi/summary.txt define r-sufam diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 09cfd685..35ed13c1 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -81,7 +81,7 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(!is.na(alt_counts)) %>% dplyr::mutate(alt_counts = ifelse(alt_counts<=1, 0, alt_counts)) %>% dplyr::mutate(major_cn = ifelse(is.na(major_cn), 1, major_cn)) %>% - dplyr::filter(major_cn != 0) %>% + dplyr::mutate(major_cn = ifelse(major_cn==0, 1, major_cn)) %>% dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), 0, minor_cn)) smry = pyclone %>% From 820aa3ac2507f4fa5b38da7fa72c7b03c92f11a2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 19:49:39 -0500 Subject: [PATCH 612/766] sv_signature -- --- scripts/sv_signature.R | 98 ++------------------------------------ signatures/sv_signature.mk | 54 ++------------------- 2 files changed, 9 insertions(+), 143 deletions(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 1e779257..9c033990 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -59,102 +59,12 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==2) { - sample_name = as.character(opt$sample_name) - bedpe_org = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - dplyr::filter(chrom1 != "Y") %>% - dplyr::filter(chrom2 != "Y") %>% - readr::type_convert() - bedpe_cli = readr::read_tsv(file = paste0("sv_signature/", sample_name, "/", sample_name, ".merged.sv_clusters_and_footprints.tsv"), col_names = FALSE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::select(chrom1 = X1, - start1 = X2, - end1 = X3, - chrom2 = X4, - start2 = X5, - end2 = X6, - n_svs = X12, - p_value = X17) %>% - dplyr::mutate(p_value = as.numeric(p_value)) %>% - dplyr::mutate(p_value = case_when( - is.na(p_value) ~ 1, - TRUE ~ p_value)) - bedpe_org = bedpe_org %>% - dplyr::left_join(bedpe_cli, by = c("chrom1", "start1", "end1", "chrom2", "start2", "end2")) %>% - dplyr::mutate(is_clustered = case_when( - p_value<.05 & n_svs>=100 ~ "c1", - TRUE ~ "non_clustered" - )) %>% - dplyr::mutate(is_clustered = case_when( - p_value<.01 & n_svs>=250 ~ "c2", - TRUE ~ is_clustered - )) %>% - dplyr::mutate(svclass = case_when( - svclass == "TRA" & is_clustered == "c1" ~ "c1TRA", - svclass == "TRA" & is_clustered == "c2" ~ "c2TRA", - svclass == "INV" & (is_clustered == "c1" | is_clustered == "c2") ~ "cINV", - TRUE ~ svclass - )) %>% - dplyr::select(chrom1, start1, end1, chrom2, start2, end2, sv_id, pe_support, strand1, strand2, svclass) - write_tsv(x = bedpe_org, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==3) { - sample_name = as.character(opt$sample_name) - catalogues = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - catalogues_mutations = data.frame(catalogues %>% dplyr::select(manual_sv_type)) - colnames(catalogues_mutations) = sample_name - rownames(catalogues_mutations) = catalogues %>% .[["...1"]] - - signatures = readr::read_tsv(file = "~/share/lib/resource_files/viola/NMF/signature_matrix.txt", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - signatures_mutations = data.frame(signatures %>% dplyr::select(-`SV Type`)) - colnames(signatures_mutations) = colnames(signatures)[-1] - rownames(signatures_mutations) = signatures %>% .[["SV Type"]] - exposureFilterType = "fixedThreshold" - threshold_percent = 5 - optimisation_method = "KLD" - useBootstrap = FALSE - nboot = 1000 - threshold_p.value = 0.05 - nparallel = 4 - randomSeed = 1 - fit = Fit(catalogues = catalogues_mutations, - signatures = signatures_mutations, - exposureFilterType = exposureFilterType, - threshold_percent = threshold_percent, - method = optimisation_method, - useBootstrap = useBootstrap, - nboot = nboot, - threshold_p.value = threshold_p.value, - nparallel = nparallel, - randomSeed = randomSeed, - verbose = TRUE) - x = dplyr::tibble(feature_name = rownames(fit$catalogues), - feature_count = as.vector(fit$catalogues[,1])) %>% - dplyr::mutate(sample_name = sample_name) - readr::write_tsv(x = x, file = paste0(opt$output_file, "_features.txt"), col_names = TRUE, append = FALSE) - - x = dplyr::tibble(signature_name = colnames(fit$exposures), - signature_exposure = as.vector(fit$exposures[1,])/sum(as.vector(fit$exposures[1,])) * 100) %>% - dplyr::mutate(sample_name = sample_name) - readr::write_tsv(x = x, file = paste0(opt$output_file, "_exposures.txt"), col_names = TRUE, append = FALSE) - -} else if (as.numeric(opt$option)==4) { sample_name = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - signature_x = list() - for (i in 1:length(sample_name)) { - signature_x[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(method = "signature.tools.lib") - } - signature_x = do.call(bind_rows, signature_x) - signature_y = list() + signature_df = list() for (i in 1:length(sample_name)) { - signature_y[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged.sv_clusters_and_footprints_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(method = "viola") + signature_df[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_exposures.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() } - signature_y = do.call(bind_rows, signature_y) - signature_df = dplyr::bind_rows(signature_x, signature_y) + signature_df = do.call(bind_rows, signature_df) readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index fe0e938d..763790b9 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -3,22 +3,12 @@ include modules/Makefile.inc LOGDIR ?= log/sv_signature.$(NOW) MIN_SIZE = 1 -MAX_SIZE = 10000000000000000 -FRAGILE_SITES = /data/reis-filho/lib/resource_files/viola/annotation/fragile_site.b37.bed -REPLICATION_TIMING = /data/reis-filho/lib/resource_files/viola/annotation/replication_timing.b37.bedgraph -SV_DEFINITIONS = /data/reis-filho/lib/resource_files/viola/definitions/sv_class_default.txt -CLUSTER_SV = $(VIOLA_ENV)/opt/ClusterSV/R -CHROM_SIZES = $(VIOLA_ENV)/opt/ClusterSV/references/hg19.chrom_sizes -CENTROMERE_TELOMERE = $(VIOLA_ENV)/opt/ClusterSV/references/hg19_centromere_and_telomere_coords.txt +MAX_SIZE = 100000000000000000000 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.tsv) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) \ - sv_signature/summary.txt + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +# sv_signature/.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -43,47 +33,13 @@ sv_signature/$1_$2/$1_$2.merged_exposures.txt : sv_signature/$1_$2/$1_$2.merged. --input_file $$(<) \ --output_file sv_signature/$1_$2/$1_$2.merged") -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv : sv_signature/$1_$2/$1_$2.merged.bedpe - $$(call RUN,-c -n 4 -s 2G -m 4G -v $(VIOLA_ENV),"set -o pipefail && \ - $(RSCRIPT) $(CLUSTER_SV)/run_cluster_sv.R \ - -bedpe $$(<) \ - -chr $(CHROM_SIZES) \ - -cen_telo $(CENTROMERE_TELOMERE) \ - -out sv_signature/$1_$2/$1_$2.merged \ - -n 4 \ - > sv_signature/$1_$2/$1_$2.merged.log") - -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe : sv_signature/$1_$2/$1_$2.merged.bedpe sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.tsv - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 2 \ - --sample_name $1_$2 \ - --output_file $$(@)") - -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.bedpe - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(VIOLA_ENV),"set -o pipefail && \ - python $(SCRIPTS_DIR)/sv_signature.py \ - --bedpe_infile $$(<) \ - --fragile_bed $(FRAGILE_SITES) \ - --timing_bedgraph $(REPLICATION_TIMING) \ - --sv_definitions $(SV_DEFINITIONS) \ - --text_outfile $$(@)") - -sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints_exposures.txt : sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints.txt - $$(call RUN,-c -n 4 -s 2G -m 4G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R \ - --option 3 \ - --sample_name $1_$2 \ - --input_file $$(<) \ - --output_file sv_signature/$1_$2/$1_$2.merged.sv_clusters_and_footprints") - endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) -sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.sv_clusters_and_footprints_exposures.txt) $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 4 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ From 706b1f430ec823fdba338e079d950615c598db9a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 20:04:18 -0500 Subject: [PATCH 613/766] Update sv_signature.mk --- signatures/sv_signature.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 763790b9..d2107ab7 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -7,8 +7,8 @@ MAX_SIZE = 100000000000000000000 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) -# sv_signature/.txt + $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ + sv_signature/exposures.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf From c6a3670bf58e717520a215cb98006798f1029105 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 20:04:41 -0500 Subject: [PATCH 614/766] Update sv_signature.mk --- signatures/sv_signature.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index d2107ab7..2148345a 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -37,7 +37,7 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) -sv_signature/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +sv_signature/exposures.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") From 75dc2b8bfe00d394cfd32720fed65b1e2a7e4533 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 20:10:36 -0500 Subject: [PATCH 615/766] sv signatures ++ --- scripts/sv_signature.R | 12 ++++++++++++ signatures/sv_signature.mk | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/sv_signature.R b/scripts/sv_signature.R index 9c033990..3ec46f58 100644 --- a/scripts/sv_signature.R +++ b/scripts/sv_signature.R @@ -67,4 +67,16 @@ if (as.numeric(opt$option)==1) { } signature_df = do.call(bind_rows, signature_df) readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==3) { + sample_name = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) + signature_df = list() + for (i in 1:length(sample_name)) { + signature_df[[i]] = readr::read_tsv(file = paste0("sv_signature/", sample_name[i], "/", sample_name[i], ".merged_features.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(feature_proportion = 100*feature_count / sum(feature_count)) %>% + dplyr::select(feature_name, feature_count, feature_proportion, sample_name) + } + signature_df = do.call(bind_rows, signature_df) + readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index 2148345a..e68f38d0 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -8,7 +8,8 @@ MAX_SIZE = 100000000000000000000 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ - sv_signature/exposures.txt + sv_signature/exposures.txt \ + sv_signature/features.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -41,6 +42,9 @@ sv_signature/exposures.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair) $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") +sv_signature/features.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 3 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/sv_signature.txt;) From fda7fde8bd2df3f0b54952358e72dfce57a8a61d Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 20:14:42 -0500 Subject: [PATCH 616/766] Delete sv_signature.py --- scripts/sv_signature.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) delete mode 100644 scripts/sv_signature.py diff --git a/scripts/sv_signature.py b/scripts/sv_signature.py deleted file mode 100644 index 4835cee3..00000000 --- a/scripts/sv_signature.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python - -""" extract structural variant signatures -""" - -import argparse -import viola -import numpy as np -import pandas as pd -import os -import sys - -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog='sv_signature.py', - description='SV feature extraction') - parser.add_argument('--bedpe_infile', required=True) - parser.add_argument('--fragile_bed', required=True) - parser.add_argument('--timing_bedgraph', required=True) - parser.add_argument('--sv_definitions', required=True) - parser.add_argument('--text_outfile', required=True) - - args = parser.parse_args() - - sample_bedpe = viola.viola.read_bedpe(args.bedpe_infile) - bed_fragile = viola.read_bed(args.fragile_bed) - bedgraph_timing = viola.read_bed(args.timing_bedgraph) - - sample_bedpe.annotate_bed(bed=bed_fragile, annotation='fragile', how='flag') - sample_bedpe.annotate_bed(bed=bedgraph_timing, annotation='timing', how='value') - sample_bedpe.calculate_info('(${timingleft} + ${timingright}) / 2', 'timing') - - feature_matrix = sample_bedpe.classify_manual_svtype(definitions=args.sv_definitions) - feature_matrix.drop('others', inplace=True) - feature_matrix.to_csv(args.text_outfile, index=True, sep='\t') From cd17b5245425f3c2ecc4bcc2498ef0a661701ec8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 21:51:22 -0500 Subject: [PATCH 617/766] ++ --- scripts/star_fish.R | 35 +---------------------------------- signatures/star_fish.mk | 2 +- 2 files changed, 2 insertions(+), 35 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 5d7165b0..1a314369 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -80,38 +80,5 @@ if (as.numeric(opt$option)==1) { } } else if (as.numeric(opt$option)==4) { - sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - sv_df = cn_df = gd_df = list() - for (i in 1:length(sample_names)) { - sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% - readr::type_convert() - } - sv_df = do.call(bind_rows, sv_df) - cn_df = do.call(bind_rows, cn_df) - gd_df = do.call(bind_rows, gd_df) - starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/") - if (length(starfish_link_out)==1) { - cat(starfish_link_out, file = "star_fish/summary/taskcomplete", append = FALSE) - } else { - starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, - complex_sv = starfish_link_out$interleave_tra_complex_sv, - cnv_file = cn_df, - gender_file = gd_df, - prefix = "star_fish/summary/", - genome_v = "hg19", - cnv_factor = "auto", - arm_del_rm = TRUE) - starfish_sig_out = starfish_sig(cluster_feature = starfish_feature_out$cluster_feature, - prefix = "star_fish/summary/", - cmethod = "class") - wd = getwd() - setwd("star_fish/summary/") - starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19") - setwd(wd) - cat("taskcomplete!!", file = "star_fish/summary/taskcomplete", append = FALSE) - } + } diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index d392f372..bb926405 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -3,7 +3,7 @@ include modules/Makefile.inc LOGDIR ?= log/star_fish.$(NOW) MIN_SIZE = 1 -MAX_SIZE = 10000000000000000 +MAX_SIZE = 10000000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ From 352aac9807481daf2a09e0b04c84923ec85dd99a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 22:19:19 -0500 Subject: [PATCH 618/766] ++ --- scripts/star_fish.R | 30 +++++++++++++++++++++++++++++- signatures/star_fish.mk | 15 ++++++++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 1a314369..27da5ff6 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -80,5 +80,33 @@ if (as.numeric(opt$option)==1) { } } else if (as.numeric(opt$option)==4) { - + sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + signature_df = list() + ii = 1 + for (i in 1:length(sample_names)) { + if (file.exists(paste0("star_fish/", sample_names[i], "/", sample_names[i], "_pcawg_6signatures_class.csv"))) { + signature_df[[ii]] = readr::read_csv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], "_pcawg_6signatures_class.csv"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_name = sample_names[i]) + ii = ii + 1 + } + } + signature_df = do.call(bind_rows, signature_df) + readr::write_tsv(x = signature_df, path = as.character(opt$output_file), col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==5) { + sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + signature_df = list() + ii = 1 + for (i in 1:length(sample_names)) { + if (file.exists(paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"))) { + signature_df[[ii]] = readr::read_csv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(sample_name = sample) %>% + ii = ii + 1 + } + } + signature_df = do.call(bind_rows, signature_df) + readr::write_tsv(x = signature_df, path = as.character(opt$output_file), col_names = TRUE, append = FALSE) + } diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index bb926405..8a468c19 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -9,7 +9,8 @@ star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv. $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) \ - star_fish/summary/taskcomplete + star_fish/pcawg_6signatures_class.txt \ + star_fish/cgr_feature_matrix.txt define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -46,11 +47,19 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) -star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) +star_fish/pcawg_6signatures_class.txt : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ --option 4 \ - --sample_name '$(SAMPLE_PAIRS)'") + --sample_name '$(SAMPLE_PAIRS)' \ + --output_file $(@)") + +star_fish/cgr_feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 5 \ + --sample_name '$(SAMPLE_PAIRS)' \ + --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) From d3fa321917455944b79b6ddfe073a434513e1711 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 22:20:22 -0500 Subject: [PATCH 619/766] ++ --- scripts/star_fish.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 27da5ff6..e762bcf7 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -92,7 +92,7 @@ if (as.numeric(opt$option)==1) { } } signature_df = do.call(bind_rows, signature_df) - readr::write_tsv(x = signature_df, path = as.character(opt$output_file), col_names = TRUE, append = FALSE) + readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==5) { sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) @@ -107,6 +107,6 @@ if (as.numeric(opt$option)==1) { } } signature_df = do.call(bind_rows, signature_df) - readr::write_tsv(x = signature_df, path = as.character(opt$output_file), col_names = TRUE, append = FALSE) + readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } From f41535e53ef5381d90917e1d3ada7e75d082cc01 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 22:24:38 -0500 Subject: [PATCH 620/766] Update star_fish.R --- scripts/star_fish.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index e762bcf7..bf30c261 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -80,7 +80,7 @@ if (as.numeric(opt$option)==1) { } } else if (as.numeric(opt$option)==4) { - sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) signature_df = list() ii = 1 for (i in 1:length(sample_names)) { @@ -95,7 +95,7 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==5) { - sample_names = strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE) + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) signature_df = list() ii = 1 for (i in 1:length(sample_names)) { From dd35f81273778066e6a3dd3ef85366e13195a21f Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 22 Dec 2022 22:35:37 -0500 Subject: [PATCH 621/766] Update star_fish.R --- scripts/star_fish.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index bf30c261..9b0f8cd2 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -102,7 +102,7 @@ if (as.numeric(opt$option)==1) { if (file.exists(paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"))) { signature_df[[ii]] = readr::read_csv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% - dplyr::rename(sample_name = sample) %>% + dplyr::rename(sample_name = sample) ii = ii + 1 } } From 03acb7be09c09b0e87a4c405b171c540e1d2cfc5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:22:13 -0500 Subject: [PATCH 622/766] Update star_fish.mk --- signatures/star_fish.mk | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 8a468c19..b45ea0c1 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -7,10 +7,9 @@ MAX_SIZE = 10000000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) \ - star_fish/pcawg_6signatures_class.txt \ - star_fish/cgr_feature_matrix.txt + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) +# star_fish/exposures.txt +# star_fish/features.txt define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -36,13 +35,7 @@ star_fish/$1_$2/$1_$2.merged_cn.txt : facets/cncf/$1_$2.txt --sample_name $1_$2 \ --input_file $$(<) \ --output_file $$(@)") - -star_fish/$1_$2/$1_$2.taskcomplete : star_fish/$1_$2/$1_$2.merged_sv.bedpe star_fish/$1_$2/$1_$2.merged_cn.txt - $$(call RUN,-c -n 1 -s 4G -m 8G -v $(STARFISH_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ - --option 3 \ - --sample_name $1_$2") - + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) From 775c108b2bf0bafa3291c8c7d31c18877769b245 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:30:20 -0500 Subject: [PATCH 623/766] ++ --- scripts/star_fish.R | 73 ++++++++++++++++------------------------- signatures/star_fish.mk | 14 +++----- 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 9b0f8cd2..d18f4edf 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -54,59 +54,44 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = data, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option)==3) { - sample_name = as.character(opt$sample_name) - sv_df = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - cn_df = readr::read_tsv(file = paste0("star_fish/", sample_name, "/", sample_name, ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() - gd_df = dplyr::tibble(sample = sample_name, gender = "unknown") %>% - readr::type_convert() - - starfish_link_out = starfish_link(sv_file = sv_df, prefix = paste0("star_fish/", sample_name, "/", sample_name)) - if (length(starfish_link_out)==1) { - cat(starfish_link_out, file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) + sv_df = cn_df = gd_df = list() + for (i in 1:length(sample_names)) { + sv_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_sv.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + cn_df[[i]] = readr::read_tsv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], ".merged_cn.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% + readr::type_convert() + } + sv_df = do.call(bind_rows, sv_df) + cn_df = do.call(bind_rows, cn_df) + gd_df = do.call(bind_rows, gd_df) + starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/") + if (length(starfish_link_out) == 1) { + cat(starfish_link_out, file = as.character(opt$output_file), append = FALSE) } else { - starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, complex_sv = starfish_link_out$interleave_tra_complex_sv, - cnv_file = cn_df, gender_file = gd_df, prefix = paste0("star_fish/", sample_name, "/", sample_name), - genome_v = "hg19", cnv_factor = "auto", arm_del_rm = TRUE) + starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, + complex_sv = starfish_link_out$interleave_tra_complex_sv, + cnv_file = cn_df, + gender_file = gd_df, + prefix = "star_fish/summary/", + genome_v = "hg19", + cnv_factor = "auto", + arm_del_rm = TRUE) starfish_sig_out = starfish_sig(cluster_feature = starfish_feature_out$cluster_feature, - prefix = paste0("star_fish/", sample_name, "/", sample_name), + prefix = "star_fish/summary/", cmethod = "class") wd = getwd() - setwd(paste0("star_fish/", sample_name, "/")) + setwd("star_fish/summary/") starfish_plot(sv_file = sv_df, cnv_file = cn_df, cgr = starfish_link_out$starfish_call, genome_v = "hg19") setwd(wd) - cat("taskcomplete!!", file = paste0("star_fish/", sample_name, "/", sample_name, ".taskcomplete"), append = FALSE) + cat("taskcomplete!!", file = as.character(opt$output_file), append = FALSE) } - + } else if (as.numeric(opt$option)==4) { - sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - signature_df = list() - ii = 1 - for (i in 1:length(sample_names)) { - if (file.exists(paste0("star_fish/", sample_names[i], "/", sample_names[i], "_pcawg_6signatures_class.csv"))) { - signature_df[[ii]] = readr::read_csv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], "_pcawg_6signatures_class.csv"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(sample_name = sample_names[i]) - ii = ii + 1 - } - } - signature_df = do.call(bind_rows, signature_df) - readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) + } else if (as.numeric(opt$option)==5) { - sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - signature_df = list() - ii = 1 - for (i in 1:length(sample_names)) { - if (file.exists(paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"))) { - signature_df[[ii]] = readr::read_csv(file = paste0("star_fish/", sample_names[i], "/", sample_names[i], "_CGR_feature_matrix.csv"), col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::rename(sample_name = sample) - ii = ii + 1 - } - } - signature_df = do.call(bind_rows, signature_df) - readr::write_tsv(x = signature_df, file = as.character(opt$output_file), col_names = TRUE, append = FALSE) } diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index b45ea0c1..7788e8c7 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -7,7 +7,8 @@ MAX_SIZE = 10000000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) + $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ + star_fish/summary/taskcomplete # star_fish/exposures.txt # star_fish/features.txt @@ -40,20 +41,13 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call starfish-sv,$(tumor.$(pair)),$(normal.$(pair))))) -star_fish/pcawg_6signatures_class.txt : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) +star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ - --option 4 \ + --option 3 \ --sample_name '$(SAMPLE_PAIRS)' \ --output_file $(@)") -star_fish/cgr_feature_matrix.txt : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).taskcomplete) - $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ - --option 5 \ - --sample_name '$(SAMPLE_PAIRS)' \ - --output_file $(@)") - ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) .DELETE_ON_ERROR: From 8c3f358f70ec3c79c0aa742e6ed908510cea1b3e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:33:45 -0500 Subject: [PATCH 624/766] Update sv_signature.mk --- signatures/sv_signature.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/signatures/sv_signature.mk b/signatures/sv_signature.mk index e68f38d0..74656371 100644 --- a/signatures/sv_signature.mk +++ b/signatures/sv_signature.mk @@ -8,8 +8,8 @@ MAX_SIZE = 100000000000000000000 signature_sv : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) \ - sv_signature/exposures.txt \ - sv_signature/features.txt + sv_signature/summary/exposures.txt \ + sv_signature/summary/features.txt define signature-sv sv_signature/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -38,11 +38,11 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call signature-sv,$(tumor.$(pair)),$(normal.$(pair))))) -sv_signature/exposures.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +sv_signature/summary/exposures.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 2 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") -sv_signature/features.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) +sv_signature/summary/features.txt : $(foreach pair,$(SAMPLE_PAIRS),sv_signature/$(pair)/$(pair).merged_exposures.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sv_signature.R --option 3 --sample_name '$(SAMPLE_PAIRS)' --output_file $(@)") From caab167d3a911b9339bec4f55433da3a45a4012b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:43:26 -0500 Subject: [PATCH 625/766] ++ --- scripts/star_fish.R | 5 ++++- signatures/star_fish.mk | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index d18f4edf..5fed1117 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -90,7 +90,10 @@ if (as.numeric(opt$option)==1) { } } else if (as.numeric(opt$option)==4) { - + df = readr::read_csv(file = "star_fish/summary/_pcawg_6signatures_class.csv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample_name = unlist(lapply(cluster_id, function(x) { paste0(unlist(strsplit(x, "_", fixed = TRUE))[1:2], collapse="_")}))) + readr::write_tsv(x = df, file = as.character(opt$output_file), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option)==5) { diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 7788e8c7..2308f9ae 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -8,9 +8,9 @@ MAX_SIZE = 10000000000000000000 star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bed) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ - star_fish/summary/taskcomplete -# star_fish/exposures.txt -# star_fish/features.txt + star_fish/summary/taskcomplete \ + star_fish/summary/exposures.txt + star_fish/summary/features.txt define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -48,6 +48,12 @@ star_fish/summary/taskcomplete : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair --sample_name '$(SAMPLE_PAIRS)' \ --output_file $(@)") +star_fish/summary/exposures.txt : star_fish/summary/taskcomplete + $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 4 \ + --output_file $(@)") + ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) .DELETE_ON_ERROR: From 88daacd3794a85fc2d247668d2e3e8cf907ce467 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:45:03 -0500 Subject: [PATCH 626/766] Update star_fish.mk --- signatures/star_fish.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 2308f9ae..66fe687f 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -10,7 +10,7 @@ star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv. $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ star_fish/summary/taskcomplete \ star_fish/summary/exposures.txt - star_fish/summary/features.txt +# star_fish/summary/features.txt define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf From 75ffb09e83603e22ea016a53a204f98c23ef3822 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 15:47:53 -0500 Subject: [PATCH 627/766] ++ --- scripts/star_fish.R | 5 ++++- signatures/star_fish.mk | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 5fed1117..1b58838f 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -96,5 +96,8 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = df, file = as.character(opt$output_file), append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option)==5) { - + df = readr::read_csv(file = "star_fish/summary/_CGR_feature_matrix.csv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(sample_name = sample) + readr::write_tsv(x = df, file = as.character(opt$output_file), append = FALSE, col_names = TRUE) } diff --git a/signatures/star_fish.mk b/signatures/star_fish.mk index 66fe687f..3d24b30b 100644 --- a/signatures/star_fish.mk +++ b/signatures/star_fish.mk @@ -9,8 +9,8 @@ star_fish : $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv. $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_sv.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),star_fish/$(pair)/$(pair).merged_cn.txt) \ star_fish/summary/taskcomplete \ - star_fish/summary/exposures.txt -# star_fish/summary/features.txt + star_fish/summary/exposures.txt \ + star_fish/summary/features.txt define starfish-sv star_fish/$1_$2/$1_$2.merged_sv.bed : vcf/$1_$2.merged_sv.vcf @@ -54,6 +54,12 @@ star_fish/summary/exposures.txt : star_fish/summary/taskcomplete --option 4 \ --output_file $(@)") +star_fish/summary/features.txt : star_fish/summary/taskcomplete + $(call RUN, -c -n 1 -s 8G -m 12G -v $(STARFISH_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/star_fish.R \ + --option 5 \ + --output_file $(@)") + ..DUMMY := $(shell mkdir -p version; \ $(STARFISH_ENV)/bin/R --version &> version/star_fish.txt;) .DELETE_ON_ERROR: From cdb0ba4f73003e20af9cdbc66227aa7628ed4232 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 16:02:13 -0500 Subject: [PATCH 628/766] Update star_fish.R --- scripts/star_fish.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 1b58838f..84f8dc46 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -64,9 +64,9 @@ if (as.numeric(opt$option)==1) { gd_df[[i]] = dplyr::tibble(sample = sample_names[i], gender = "unknown") %>% readr::type_convert() } - sv_df = do.call(bind_rows, sv_df) - cn_df = do.call(bind_rows, cn_df) - gd_df = do.call(bind_rows, gd_df) + sv_df = do.call(rbind, sv_df) + cn_df = do.call(rbind, cn_df) + gd_df = do.call(rbind, gd_df) starfish_link_out = starfish_link(sv_file = sv_df, prefix = "star_fish/summary/") if (length(starfish_link_out) == 1) { cat(starfish_link_out, file = as.character(opt$output_file), append = FALSE) From 88f28fc8fd4a79bf63abb74222ad046273eae3fc Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 23 Dec 2022 16:16:16 -0500 Subject: [PATCH 629/766] Update star_fish.R --- scripts/star_fish.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/star_fish.R b/scripts/star_fish.R index 84f8dc46..b7f53b36 100644 --- a/scripts/star_fish.R +++ b/scripts/star_fish.R @@ -73,7 +73,11 @@ if (as.numeric(opt$option)==1) { } else { starfish_feature_out = starfish_feature(cgr = starfish_link_out$starfish_call, complex_sv = starfish_link_out$interleave_tra_complex_sv, - cnv_file = cn_df, + cnv_file = cn_df %>% + dplyr::mutate(chromosome = as.character(chromosome)) %>% + dplyr::mutate(chromosome = case_when( + chromosome == "23" ~ "X", + TRUE ~ chromosome)), gender_file = gd_df, prefix = "star_fish/summary/", genome_v = "hg19", From 3f05cece340600acc3026c70775cd0ed27446a9f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 4 Jan 2023 14:06:56 -0500 Subject: [PATCH 630/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 78d340d7..5bccabbc 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -15,6 +15,7 @@ annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(ANNOTATE_SV_ENV),"set -o pipefail && \ mkdir -p annotate_sv/$1/$2 && \ $$(ANNOTATE_SV) \ + -benignAF \ -SVinputFile $$(<) \ -outputFile ./annotate_sv/$1/$2/$1.$2_sv.tsv \ -genomeBuild GRCh37") From 9d97c278924350183f4f313fe912fa6a881bc1da Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 5 Jan 2023 20:30:20 -0500 Subject: [PATCH 631/766] Update project_config.yaml --- default_yaml/project_config.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/default_yaml/project_config.yaml b/default_yaml/project_config.yaml index be2b012e..3be1e9e6 100644 --- a/default_yaml/project_config.yaml +++ b/default_yaml/project_config.yaml @@ -32,13 +32,6 @@ ann_pathogen: true # target panels targets_file: ~/share/reference/target_panels/ -# cnvkit default target panels -# ontarget_file: ~/share/reference/target_panels/ -# offtarget_file: ~/share/reference/target_panels/ - -# whole exome sequencing -# exome: false - # gatk options gatk_hard_filter_snps: true gatk_pool_snp_recal: false From 8be31912255dbcc5f9c9538593f866fffa03183b Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 6 Jan 2023 13:49:33 -0500 Subject: [PATCH 632/766] Update annotate_sv.mk --- vcf_tools/annotate_sv.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/vcf_tools/annotate_sv.mk b/vcf_tools/annotate_sv.mk index 5bccabbc..78d340d7 100644 --- a/vcf_tools/annotate_sv.mk +++ b/vcf_tools/annotate_sv.mk @@ -15,7 +15,6 @@ annotate_sv/$1/$2/$1.$2_sv.tsv : vcf/$1.$2_sv.vcf $$(call RUN,-c -n 1 -s 4G -m 8G -v $(ANNOTATE_SV_ENV),"set -o pipefail && \ mkdir -p annotate_sv/$1/$2 && \ $$(ANNOTATE_SV) \ - -benignAF \ -SVinputFile $$(<) \ -outputFile ./annotate_sv/$1/$2/$1.$2_sv.tsv \ -genomeBuild GRCh37") From f3f2e9392e68ea986215b5db6dfadd2e11df28b7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 30 Jan 2023 09:19:00 -0500 Subject: [PATCH 633/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index c876ca9b..9b55d90e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,7 +4,7 @@ LOGDIR ?= log/merge_sv.$(NOW) SV_CALLERS = svaba manta gridss MAX_DIST = 500 -NUM_CALLERS = 2 +NUM_CALLERS = 3 TYPE = 1 STRAND = 1 MIN_SIZE = 30 From b516bcd182cef36652a9313bf3d3e74ef66f100e Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 16 Feb 2023 17:33:06 -0500 Subject: [PATCH 634/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 9b55d90e..9fda196d 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,9 +4,9 @@ LOGDIR ?= log/merge_sv.$(NOW) SV_CALLERS = svaba manta gridss MAX_DIST = 500 -NUM_CALLERS = 3 -TYPE = 1 -STRAND = 1 +NUM_CALLERS = 2 +TYPE = 0 +STRAND = 0 MIN_SIZE = 30 merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) \ From f45e53757a69d6853689b04c622ce24e2bd0107c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 13:51:51 -0500 Subject: [PATCH 635/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 53251a05..eb71eb69 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -7,11 +7,11 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY, sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) \ - sufam/mutation_summary.maf \ - sufam/mutation_summary_ft.maf + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) +# $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ +# $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ann.maf) \ +# sufam/mutation_summary.maf \ +# sufam/mutation_summary_ft.maf define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv @@ -81,7 +81,7 @@ sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),su --output_file $(@)") -sufam/mutation_summary_ft.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) +sufam/mutation_summary_ann.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ --option 5 \ From c2c8d9dd04f6e208b4797e6d41476d983a974917 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:11:19 -0500 Subject: [PATCH 636/766] ++ --- scripts/sufam_gt.R | 25 +++++++++++++++++++++++++ variant_callers/sufam_gt.mk | 26 ++++++++++++++++++-------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 84634257..bcaf9d90 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -4,6 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("fuzzyjoin")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -11,6 +12,7 @@ if (!interactive()) { optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), make_option("--sample_set", default = NA, type = 'character', help = "sample set"), + make_option("--tumor_sample", default = NA, type = 'character', help = "tumor sample"), make_option("--normal_sample", default = NA, type = 'character', help = "normal sample"), make_option("--input_file", default = NA, type = 'character', help = "input file"), make_option("--output_file", default = NA, type = 'character', help = "output file")) @@ -50,6 +52,29 @@ if (as.numeric(opt$option)==1) { readr::write_tsv(x = smry, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) } else if (as.numeric(opt$option)==2) { + tumor_sample = unlist(strsplit(x = as.character(opt$tumor_sample), split = " ", fixed=TRUE)) + normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) + maf = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(chrom = Chromosome, + loc.start = Start_Position, + loc.end = End_Position) + facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_sample, "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::mutate(chrom = case_when( + chrom == "23" ~ "X", + TRUE ~ chrom + )) %>% + readr::type_convert() %>% + dplyr::mutate(qt = tcn.em, + q2 = tcn.em - lcn.em) %>% + dplyr::select(chrom, loc.start, loc.end, qt, q2) + maf = maf %>% + fuzzyjoin::genome_left_join(facets, by = c("chrom", "loc.start", "loc.end")) %>% + dplyr::select(-chrom.x, -loc.start.x, -loc.end.x, -chrom.y, -loc.start.y, -loc.end.y) + + write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==99) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) sample_set = setdiff(sample_set, normal_sample) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index eb71eb69..a47d9ab4 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -7,7 +7,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY, sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) # $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ # $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ann.maf) \ # sufam/mutation_summary.maf \ @@ -15,13 +16,13 @@ sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 1 \ - --sample_set '$(set.$1)' \ - --normal_sample '$(normal.$1)' \ - --input_file $$(<) \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") sufam/$1.txt : sufam/$1.vcf bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ @@ -44,6 +45,15 @@ sufam/$1.maf : sufam/$1.vcf --vep-data $$(VEP_DATA) \ --tmp-dir `mktemp -d` \ --output-maf $$(@)") + +sufam/$1_ann.maf : sufam/$1.maf + $$(call RUN,-c -n 1 -s 2G -m 3G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 2 \ + --tumor_sample $1 \ + --normal_sample '$(normal.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") endef From ea2c28ff4aef019b70d26b332ecc8735765bf35f Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:13:31 -0500 Subject: [PATCH 637/766] Update sufam_gt.R --- scripts/sufam_gt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index bcaf9d90..71776818 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -54,7 +54,7 @@ if (as.numeric(opt$option)==1) { } else if (as.numeric(opt$option)==2) { tumor_sample = unlist(strsplit(x = as.character(opt$tumor_sample), split = " ", fixed=TRUE)) normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) - maf = readr::read_tsv(file = opt$input_file, col_names = TRUE, col_types = cols(.default = col_character())) %>% + maf = readr::read_tsv(file = opt$input_file, comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::mutate(chrom = Chromosome, loc.start = Start_Position, From 26cb4c2eee35bdf9583dc77493c7fd7f34b87f32 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:17:14 -0500 Subject: [PATCH 638/766] Update sufam_gt.R --- scripts/sufam_gt.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 71776818..d35018be 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -58,7 +58,8 @@ if (as.numeric(opt$option)==1) { readr::type_convert() %>% dplyr::mutate(chrom = Chromosome, loc.start = Start_Position, - loc.end = End_Position) + loc.end = End_Position) %>% + dplyr::mutate(chrom = as.character(chrom)) facets = readr::read_tsv(file = paste0("facets/cncf/", tumor_sample, "_", normal_sample, ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% dplyr::mutate(chrom = case_when( chrom == "23" ~ "X", From dc84abadabbe31e2a8f3f5699ea8d0ebd5606c8c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:23:32 -0500 Subject: [PATCH 639/766] ++ --- scripts/sufam_gt.R | 10 +++++----- variant_callers/sufam_gt.mk | 17 ++++------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index d35018be..30f05b5b 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -75,7 +75,7 @@ if (as.numeric(opt$option)==1) { write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==99) { +} else if (as.numeric(opt$option)==3) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) normal_sample = unlist(strsplit(x = as.character(opt$normal_sample), split = " ", fixed=TRUE)) sample_set = setdiff(sample_set, normal_sample) @@ -91,7 +91,7 @@ if (as.numeric(opt$option)==1) { t_alt_count = val_al_count) %>% dplyr::mutate(t_ref_count = t_depth - t_alt_count) - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% + maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ann.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::select(-t_depth, -t_alt_count, -t_ref_count) %>% dplyr::bind_cols(sufam) @@ -99,13 +99,13 @@ if (as.numeric(opt$option)==1) { maf = do.call(bind_rows, maf) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==3) { +} else if (as.numeric(opt$option)==9) { maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% dplyr::filter(t_alt_count > 1) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==4) { +} else if (as.numeric(opt$option)==99) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) maf = list() for (i in 1:length(sample_set)) { @@ -157,7 +157,7 @@ if (as.numeric(opt$option)==1) { by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==5) { +} else if (as.numeric(opt$option)==999) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) maf = list() for (i in 1:length(sample_set)) { diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index a47d9ab4..cb6c2c95 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -8,9 +8,8 @@ SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY, sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ - $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) -# $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ -# $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ann.maf) \ + $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) \ + $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) # sufam/mutation_summary.maf \ # sufam/mutation_summary_ft.maf @@ -61,22 +60,14 @@ $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call sufam-gt,$(sample)))) define combine-maf -sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) +sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 2 \ + --option 3 \ --sample_set '$(set.$1)' \ --normal_sample '$(normal.$1)' \ --output_file $$(@)") -sufam/$1_ft.maf : sufam/$1.maf - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 3 \ - --input_file $$(<) \ - --output_file $$(@)") - - endef $(foreach set,$(SAMPLE_SETS),\ $(eval $(call combine-maf,$(set)))) From 63416f1de821815d2e8157123b89d07605ecba27 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:25:44 -0500 Subject: [PATCH 640/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index cb6c2c95..0f33aa37 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -61,12 +61,12 @@ $(foreach sample,$(TUMOR_SAMPLES),\ define combine-maf sufam/$1.maf : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 3 \ - --sample_set '$(set.$1)' \ - --normal_sample '$(normal.$1)' \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 3 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --output_file $$(@)") endef $(foreach set,$(SAMPLE_SETS),\ From 2f04e4879a3b2ab091a867f9ee18a0ad5a80c7b4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:35:28 -0500 Subject: [PATCH 641/766] ++ --- scripts/sufam_gt.R | 18 ++++++------------ variant_callers/sufam_gt.mk | 25 ++++++++----------------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 30f05b5b..16e720ad 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -99,20 +99,14 @@ if (as.numeric(opt$option)==1) { maf = do.call(bind_rows, maf) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==9) { - maf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::filter(t_alt_count > 1) - write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==99) { +} else if (as.numeric(opt$option)==4) { sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) maf = list() for (i in 1:length(sample_set)) { maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], ".maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) } maf = do.call(bind_rows, maf) %>% - readr::type_convert() + readr::type_convert() smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% dplyr::mutate(HOTSPOT = case_when( is.na(HOTSPOT) ~ FALSE, @@ -135,7 +129,7 @@ if (as.numeric(opt$option)==1) { cmo_hotspot == "TRUE" ~ TRUE, cmo_hotspot == "FALSE" ~ FALSE )) %>% - dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% + dplyr::mutate(is_Hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% dplyr::mutate(facetsLOHCall = case_when( is.na(facetsLOHCall) ~ FALSE, facetsLOHCall == "True" ~ TRUE, @@ -143,17 +137,17 @@ if (as.numeric(opt$option)==1) { facetsLOHCall == "TRUE" ~ TRUE, facetsLOHCall == "FALSE" ~ FALSE )) %>% - dplyr::mutate(is_loh = facetsLOHCall) %>% + dplyr::mutate(is_LOH = facetsLOHCall) %>% readr::type_convert() maf = maf %>% dplyr::left_join(smry %>% dplyr::group_by(CHROM, POS, REF, ALT) %>% - dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% + dplyr::summarize(is_Hotspot = unique(is_Hotspot)) %>% dplyr::ungroup(), by = c("CHROM", "POS", "REF", "ALT")) maf = maf %>% dplyr::left_join(smry %>% - dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_LOH), by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 0f33aa37..79b559f4 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -9,8 +9,8 @@ sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) \ - $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) -# sufam/mutation_summary.maf \ + $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ + sufam/mutation_summary.maf # sufam/mutation_summary_ft.maf define sufam-gt @@ -74,21 +74,12 @@ $(foreach set,$(SAMPLE_SETS),\ sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) - $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 4 \ - --sample_set '$(SAMPLE_SETS)' \ - --input_file $(<) \ - --output_file $(@)") - - -sufam/mutation_summary_ann.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),sufam/$(set)_ft.maf) - $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 5 \ - --sample_set '$(SAMPLE_SETS)' \ - --input_file $(<) \ - --output_file $(@)") + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 4 \ + --sample_set '$(SAMPLE_SETS)' \ + --input_file $(<) \ + --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ R --version > version/sufam_gt.txt) From 744a74d4ed3da8d39c02d8d735165f706d0ef754 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 24 Feb 2023 14:42:43 -0500 Subject: [PATCH 642/766] ++ --- scripts/sufam_gt.R | 64 +++++++------------------------------ variant_callers/sufam_gt.mk | 11 +++++-- 2 files changed, 20 insertions(+), 55 deletions(-) diff --git a/scripts/sufam_gt.R b/scripts/sufam_gt.R index 16e720ad..98fa3726 100644 --- a/scripts/sufam_gt.R +++ b/scripts/sufam_gt.R @@ -147,61 +147,19 @@ if (as.numeric(opt$option)==1) { by = c("CHROM", "POS", "REF", "ALT")) maf = maf %>% dplyr::left_join(smry %>% - dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_LOH), - by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) + dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_LOH) %>% + dplyr::mutate(is_present = TRUE), + by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) %>% + dplyr::mutate(is_present = case_when( + is.na(is_present) ~ FALSE, + TRUE ~ is_present + )) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) -} else if (as.numeric(opt$option)==999) { - sample_set = unlist(strsplit(x = as.character(opt$sample_set), split = " ", fixed=TRUE)) - maf = list() - for (i in 1:length(sample_set)) { - maf[[i]] = readr::read_tsv(file = paste0("sufam/", sample_set[i], "_ft.maf"), comment = "#", col_names = TRUE, col_types = cols(.default = col_character())) - } - maf = do.call(bind_rows, maf) %>% - readr::type_convert() - smry = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% - dplyr::mutate(HOTSPOT = case_when( - is.na(HOTSPOT) ~ FALSE, - HOTSPOT == "True" ~ TRUE, - HOTSPOT == "False" ~ FALSE, - HOTSPOT == "TRUE" ~ TRUE, - HOTSPOT == "FALSE" ~ FALSE - )) %>% - dplyr::mutate(HOTSPOT_INTERNAL = case_when( - is.na(HOTSPOT_INTERNAL) ~ FALSE, - HOTSPOT_INTERNAL == "True" ~ TRUE, - HOTSPOT_INTERNAL == "False" ~ FALSE, - HOTSPOT_INTERNAL == "TRUE" ~ TRUE, - HOTSPOT_INTERNAL == "FALSE" ~ FALSE - )) %>% - dplyr::mutate(cmo_hotspot = case_when( - is.na(cmo_hotspot) ~ FALSE, - cmo_hotspot == "True" ~ TRUE, - cmo_hotspot == "False" ~ FALSE, - cmo_hotspot == "TRUE" ~ TRUE, - cmo_hotspot == "FALSE" ~ FALSE - )) %>% - dplyr::mutate(is_hotspot = HOTSPOT | HOTSPOT_INTERNAL | cmo_hotspot) %>% - dplyr::mutate(facetsLOHCall = case_when( - is.na(facetsLOHCall) ~ FALSE, - facetsLOHCall == "True" ~ TRUE, - facetsLOHCall == "False" ~ FALSE, - facetsLOHCall == "TRUE" ~ TRUE, - facetsLOHCall == "FALSE" ~ FALSE - )) %>% - dplyr::mutate(is_loh = facetsLOHCall) %>% - readr::type_convert() - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::group_by(CHROM, POS, REF, ALT) %>% - dplyr::summarize(is_hotspot = unique(is_hotspot)) %>% - dplyr::ungroup(), - by = c("CHROM", "POS", "REF", "ALT")) - maf = maf %>% - dplyr::left_join(smry %>% - dplyr::select(CHROM, POS, REF, ALT, Tumor_Sample_Barcode = TUMOR_SAMPLE, is_loh), - by = c("CHROM", "POS", "REF", "ALT", "Tumor_Sample_Barcode")) +} else if (as.numeric(opt$option)==5) { + maf = readr::read_tsv(file = as.character(opt$input_file), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(is_present) write_tsv(x = maf, path = as.character(opt$output_file), append = FALSE, col_names = TRUE) - } diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 79b559f4..210f8292 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -10,8 +10,8 @@ sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).maf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample)_ann.maf) \ $(foreach set,$(SAMPLE_SETS),sufam/$(set).maf) \ - sufam/mutation_summary.maf -# sufam/mutation_summary_ft.maf + sufam/mutation_summary.maf \ + sufam/mutation_summary_ft.maf define sufam-gt sufam/$1.vcf : summary/tsv/all.tsv @@ -80,6 +80,13 @@ sufam/mutation_summary.maf : summary/tsv/all.tsv $(foreach set,$(SAMPLE_SETS),su --sample_set '$(SAMPLE_SETS)' \ --input_file $(<) \ --output_file $(@)") + +sufam/mutation_summary_ft.maf : sufam/mutation_summary.maf + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 5 \ + --input_file $(<) \ + --output_file $(@)") ..DUMMY := $(shell mkdir -p version; \ R --version > version/sufam_gt.txt) From 30d969f1c1bea052025780b330f4d2685b32e90f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 12:47:25 -0500 Subject: [PATCH 643/766] Update clusterSamples.mk --- contamination/clusterSamples.mk | 50 +++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk index d3f953d5..064c1208 100644 --- a/contamination/clusterSamples.mk +++ b/contamination/clusterSamples.mk @@ -1,16 +1,9 @@ -# Run unified genotyper on snp positions and cluster samples using results -##### DEFAULTS ###### -LOGDIR = log/cluster_samples.$(NOW) - -##### MAKE INCLUDES ##### include modules/Makefile.inc include modules/variant_callers/gatk.inc -VPATH ?= bam -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY : all +LOGDIR = log/cluster_samples.$(NOW) +VPATH ?= bam ifeq ($(EXOME),true) DBSNP_SUBSET ?= $(HOME)/share/reference/dbsnp_137_exome.bed else @@ -19,21 +12,42 @@ endif CLUSTER_VCF = $(RSCRIPT) modules/contamination/clusterSampleVcf.R -all : snp_vcf/snps_filtered.clust.png +snp_cluster : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) \ + snp_vcf/snps.vcf \ + snp_vcf/snps_ft.vcf \ + snp_vcf/snps_filtered.clust.png -#snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),bam/$(sample).bam) -#$(call RUN,-s 4G -m 8G,"$(SAMTOOLS) mpileup -f $(REF_FASTA) -g -l <(sed '/^#/d' $(DBSNP) | cut -f 1,2) $^ | $(BCFTOOLS) view -g - > $@") +snp_vcf/%.snps.vcf : bam/%.bam + $(call RUN,-n 4 -s 2.5G -m 3G,"set -o pipefail && \ + $(call GATK_MEM,8G) \ + -T UnifiedGenotyper \ + -nt 4 \ + -R $(REF_FASTA) \ + --dbsnp $(DBSNP) \ + $(foreach bam,$(filter %.bam,$^),-I $(bam) ) \ + -L $(DBSNP_SUBSET) \ + -o $@ \ + --output_mode EMIT_ALL_SITES") -snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) - $(call RUN,-s 16G -m 20G,"$(call GATK_MEM,14G) -T CombineVariants $(foreach vcf,$^,--variant $(vcf) ) -o $@ --genotypemergeoption UNSORTED -R $(REF_FASTA)") -snp_vcf/snps_filtered.vcf : snp_vcf/snps.vcf +snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) + $(call RUN,-s 16G -m 20G,"set -o pipefail && \ + $(call GATK_MEM,14G) -T CombineVariants \ + $(foreach vcf,$^,--variant $(vcf) ) \ + -o $@ \ + --genotypemergeoption UNSORTED \ + -R $(REF_FASTA)") + +snp_vcf/snps_ft.vcf : snp_vcf/snps.vcf $(INIT) grep '^#' $< > $@ && grep -e '0/1' -e '1/1' $< >> $@ -snp_vcf/%.snps.vcf : bam/%.bam - $(call RUN,-n 4 -s 2.5G -m 3G,"$(call GATK_MEM,8G) -T UnifiedGenotyper -nt 4 -R $(REF_FASTA) --dbsnp $(DBSNP) $(foreach bam,$(filter %.bam,$^),-I $(bam) ) -L $(DBSNP_SUBSET) -o $@ --output_mode EMIT_ALL_SITES") - snp_vcf/%.clust.png : snp_vcf/%.vcf $(INIT) $(CLUSTER_VCF) --outPrefix snp_vcf/$* $< + +..DUMMY := $(shell mkdir -p version; \ + echo "GATK" > version/cluster_samples.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY : snp_cluster include modules/vcf_tools/vcftools.mk From 0e1a628bc7278d86b8fd4c570c377ff40c8401de Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 12:49:29 -0500 Subject: [PATCH 644/766] Update clusterSamples.mk --- contamination/clusterSamples.mk | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk index 064c1208..1d4d20fd 100644 --- a/contamination/clusterSamples.mk +++ b/contamination/clusterSamples.mk @@ -14,8 +14,7 @@ CLUSTER_VCF = $(RSCRIPT) modules/contamination/clusterSampleVcf.R snp_cluster : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) \ snp_vcf/snps.vcf \ - snp_vcf/snps_ft.vcf \ - snp_vcf/snps_filtered.clust.png + snp_vcf/snps_ft.vcf snp_vcf/%.snps.vcf : bam/%.bam $(call RUN,-n 4 -s 2.5G -m 3G,"set -o pipefail && \ @@ -41,8 +40,8 @@ snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) snp_vcf/snps_ft.vcf : snp_vcf/snps.vcf $(INIT) grep '^#' $< > $@ && grep -e '0/1' -e '1/1' $< >> $@ -snp_vcf/%.clust.png : snp_vcf/%.vcf - $(INIT) $(CLUSTER_VCF) --outPrefix snp_vcf/$* $< +#snp_vcf/%.clust.png : snp_vcf/%.vcf +# $(INIT) $(CLUSTER_VCF) --outPrefix snp_vcf/$* $< ..DUMMY := $(shell mkdir -p version; \ echo "GATK" > version/cluster_samples.txt;) From dbbfc1b01373c5a9bee5d11656184c7b9a66cee7 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 14:45:15 -0500 Subject: [PATCH 645/766] ++ --- config.inc | 1 + contamination/clusterSampleVcf.R | 112 ++++++++++++++++++++----------- contamination/clusterSamples.mk | 15 +++-- 3 files changed, 84 insertions(+), 44 deletions(-) diff --git a/config.inc b/config.inc index fb718e33..c8cc5f09 100644 --- a/config.inc +++ b/config.inc @@ -37,6 +37,7 @@ SIGNATURE_TOOLS_ENV = $(HOME)/share/usr/env/r-signature.tools.lib-2.2.0 CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9 STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11 MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1 +VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index ca114d7e..019a8db1 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -2,51 +2,83 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("VariantAnnotation")) -suppressPackageStartupMessages(library("gplots")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("ComplexHeatmap")) +suppressPackageStartupMessages(library("RColorBrewer")) options(error = quote(dump.frames("testdump", TRUE))) -optList <- list( - make_option("--genome", default = 'b37', help = "genome build [default %default]"), - make_option("--outPrefix", default = NULL, help = "output prefix [default %default]")) +optList <- list(make_option("--input_file", default = 'snp_vcf/snps_ft.vcf', help = "input file"), + make_option("--output_file", default = 'snp_vcf/snps_ft.pdf', help = "output file"), + make_option("--sample_pairs", default = NA, help = "sample pairs"), + make_option("--genome", default = 'b37', help = "genome build")) -parser <- OptionParser(usage = "%prog vcf.files", option_list = optList); -arguments <- parse_args(parser, positional_arguments = T); -opt <- arguments$options; +parser <- OptionParser(usage = "%prog vcf.files", option_list = optList) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options -if (is.null(opt$outPrefix)) { - cat("Need output prefix\n"); - print_help(parser); - stop(); -} else if (length(arguments$args) < 1) { - cat("Need vcf files\n"); - print_help(parser); - stop(); -} +vcf = readVcf(as.character(opt$input_file), as.character(opt$genome)) +gt = geno(vcf)$GT +ad = geno(vcf)$AD +af = structure(sapply(ad, function(x) x[2] / sum(x)), dim = dim(ad)) +X = matrix(0, nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) +X[is.na(af)] = NA +X[af > 0.15 & af < 0.95] = 1 +X[af >= 0.95] = 2 +X[!gt %in% c("0/0", "0/1", "1/1")] = NA -vcfFile <- arguments$args[1] - - -vcf <- readVcf(vcfFile, opt$genome) -gt <- geno(vcf)$GT -ad <- geno(vcf)$AD -af <- structure(sapply(ad, function(x) x[2] / sum(x)), dim = dim(ad)) -X <- matrix(0, nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) -X[is.na(af)] <- NA -X[af > 0.15 & af < 0.95] <- 1 -X[af >= 0.95] <- 2 -X[!gt %in% c("0/0", "0/1", "1/1")] <- NA -#plot(hclust(dist(t(X), method = 'manhattan'))) - -gt <- matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) - -fn <- paste(opt$outPrefix, ".clust.pdf", sep = '') -pdf(fn, height = 9, width = 15) -null <- plot(hclust(dist(t(gt)), method = 'ward')) -dev.off() - -fn <- paste(opt$outPrefix, ".heatmap.pdf", sep = '') -pdf(fn, height = 30, width = 30) -null <- heatmap.2(as.matrix(dist(t(gt))), scale = 'none', trace = 'none', keysize = 0.3, cexRow = 2, cexCol = 2, margins = c(20,20)) +gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) +dt = as.matrix(dist(t(gt))) + +tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) +normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) +sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE), + normal_samples = c(normal_samples, unique(normal_samples))) %>% + dplyr::arrange(tumor_samples) %>% + dplyr::mutate(normal_samples = factor(normal_samples, levels = unique(normal_samples), ordered = TRUE)) +cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) +names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) +row_annot = rowAnnotation( + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE +) +col_annot = columnAnnotation( + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE +) + +pdf(as.character(opt$output_file), height = 21, width = 22) +draw(Heatmap(matrix = dt, + name = " ", + rect_gp = gpar(col = "white"), + border = NA, + col = rev(brewer.pal(n = 9, name = "YlOrRd")), + cluster_rows = TRUE, + show_row_dend = TRUE, + row_dend_width = unit(3, "cm"), + row_names_side = "right", + row_names_gp = gpar(fontsize = 12), + show_row_names = TRUE, + left_annotation = row_annot, + + show_column_names = TRUE, + column_names_side = "bottom", + column_names_gp = gpar(fontsize = 12), + cluster_columns = TRUE, + show_column_dend = TRUE, + column_dend_height = unit(3, "cm"), + top_annotation = col_annot, + + use_raster = FALSE, + show_heatmap_legend = TRUE, + heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")))) dev.off() diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk index 1d4d20fd..0b503f27 100644 --- a/contamination/clusterSamples.mk +++ b/contamination/clusterSamples.mk @@ -10,11 +10,12 @@ else DBSNP_SUBSET = $(HOME)/share/reference/dbsnp_tseq_intersect.bed endif -CLUSTER_VCF = $(RSCRIPT) modules/contamination/clusterSampleVcf.R +CLUSTER_VCF = modules/contamination/clusterSampleVcf.R snp_cluster : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) \ snp_vcf/snps.vcf \ - snp_vcf/snps_ft.vcf + snp_vcf/snps_ft.vcf \ + snp_vcf/snps_ft.pdf snp_vcf/%.snps.vcf : bam/%.bam $(call RUN,-n 4 -s 2.5G -m 3G,"set -o pipefail && \ @@ -40,8 +41,14 @@ snp_vcf/snps.vcf : $(foreach sample,$(SAMPLES),snp_vcf/$(sample).snps.vcf) snp_vcf/snps_ft.vcf : snp_vcf/snps.vcf $(INIT) grep '^#' $< > $@ && grep -e '0/1' -e '1/1' $< >> $@ -#snp_vcf/%.clust.png : snp_vcf/%.vcf -# $(INIT) $(CLUSTER_VCF) --outPrefix snp_vcf/$* $< +snp_vcf/snps_ft.pdf : snp_vcf/snps_ft.vcf + $(call RUN,-n 1 -s 16G -m 20G -v $(VARIANT_ANNOTATION_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/contamination/clusterSampleVcf.R \ + --input_file $(<)\ + --output_file $(@)\ + --sample_pairs '$(SAMPLE_PAIRS)'\ + --genome b37") + ..DUMMY := $(shell mkdir -p version; \ echo "GATK" > version/cluster_samples.txt;) From e534df05ae2a2a7969bb192d34bb60e6d994902f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 14:52:05 -0500 Subject: [PATCH 646/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 019a8db1..dd6ded81 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -54,13 +54,16 @@ col_annot = columnAnnotation( simple_anno_size = unit(.5, "cm"), show_legend = FALSE ) - +col_pal = c(rep("#800026", 2), + rev(brewer.pal(n = 9, name = "YlOrRd")), + rep("#FFFFCC", 3)) + pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt, name = " ", rect_gp = gpar(col = "white"), border = NA, - col = rev(brewer.pal(n = 9, name = "YlOrRd")), + col = col_pal, cluster_rows = TRUE, show_row_dend = TRUE, row_dend_width = unit(3, "cm"), From 266fcaf359565c53262bead7cc6a9ae465e1f092 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 15:10:01 -0500 Subject: [PATCH 647/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index dd6ded81..d16b0b7a 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -54,9 +54,9 @@ col_annot = columnAnnotation( simple_anno_size = unit(.5, "cm"), show_legend = FALSE ) -col_pal = c(rep("#800026", 2), - rev(brewer.pal(n = 9, name = "YlOrRd")), - rep("#FFFFCC", 3)) +col_pal = c(rep("#662506", 3), + rev(brewer.pal(n = 7, name = "YlOrBr")), + rep("#fff7bc", 3)) pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt, @@ -84,4 +84,3 @@ draw(Heatmap(matrix = dt, show_heatmap_legend = TRUE, heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")))) dev.off() - From 7ca980e23817a06881cc1cf51770d8436ca82539 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 15:44:41 -0500 Subject: [PATCH 648/766] ++ --- contamination/clusterSampleVcf.R | 2 -- contamination/clusterSamples.mk | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index d16b0b7a..0b8c16aa 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -8,8 +8,6 @@ suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("ComplexHeatmap")) suppressPackageStartupMessages(library("RColorBrewer")) -options(error = quote(dump.frames("testdump", TRUE))) - optList <- list(make_option("--input_file", default = 'snp_vcf/snps_ft.vcf', help = "input file"), make_option("--output_file", default = 'snp_vcf/snps_ft.pdf', help = "output file"), make_option("--sample_pairs", default = NA, help = "sample pairs"), diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk index 0b503f27..c7b1d6f7 100644 --- a/contamination/clusterSamples.mk +++ b/contamination/clusterSamples.mk @@ -44,9 +44,9 @@ snp_vcf/snps_ft.vcf : snp_vcf/snps.vcf snp_vcf/snps_ft.pdf : snp_vcf/snps_ft.vcf $(call RUN,-n 1 -s 16G -m 20G -v $(VARIANT_ANNOTATION_ENV),"set -o pipefail && \ $(RSCRIPT) modules/contamination/clusterSampleVcf.R \ - --input_file $(<)\ - --output_file $(@)\ - --sample_pairs '$(SAMPLE_PAIRS)'\ + --input_file $(<) \ + --output_file $(@) \ + --sample_pairs '$(SAMPLE_PAIRS)' \ --genome b37") From 73c83399964f15b8fa931f5759d74127d37ce7de Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:27:39 -0500 Subject: [PATCH 649/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 0b8c16aa..343a7037 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -61,7 +61,7 @@ draw(Heatmap(matrix = dt, name = " ", rect_gp = gpar(col = "white"), border = NA, - col = col_pal, + #col = col_pal, cluster_rows = TRUE, show_row_dend = TRUE, row_dend_width = unit(3, "cm"), From e2149254f746ddba6016285b117a29e5ce1330ec Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:28:49 -0500 Subject: [PATCH 650/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 343a7037..f607db11 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -61,14 +61,14 @@ draw(Heatmap(matrix = dt, name = " ", rect_gp = gpar(col = "white"), border = NA, - #col = col_pal, + col = col_pal, cluster_rows = TRUE, show_row_dend = TRUE, row_dend_width = unit(3, "cm"), row_names_side = "right", row_names_gp = gpar(fontsize = 12), show_row_names = TRUE, - left_annotation = row_annot, + #left_annotation = row_annot, show_column_names = TRUE, column_names_side = "bottom", @@ -76,7 +76,7 @@ draw(Heatmap(matrix = dt, cluster_columns = TRUE, show_column_dend = TRUE, column_dend_height = unit(3, "cm"), - top_annotation = col_annot, + #top_annotation = col_annot, use_raster = FALSE, show_heatmap_legend = TRUE, From 3f90603daf9800f41ce9d79749be91200ca185d5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:29:48 -0500 Subject: [PATCH 651/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index f607db11..a6378e93 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -40,17 +40,17 @@ cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pai names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), + col = list(cluster_id = cluster_color, show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), - show_legend = FALSE + show_legend = FALSE) ) col_annot = columnAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), + col = list(cluster_id = cluster_color, show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), - show_legend = FALSE + show_legend = FALSE) ) col_pal = c(rep("#662506", 3), rev(brewer.pal(n = 7, name = "YlOrBr")), @@ -68,7 +68,7 @@ draw(Heatmap(matrix = dt, row_names_side = "right", row_names_gp = gpar(fontsize = 12), show_row_names = TRUE, - #left_annotation = row_annot, + left_annotation = row_annot, show_column_names = TRUE, column_names_side = "bottom", @@ -76,7 +76,7 @@ draw(Heatmap(matrix = dt, cluster_columns = TRUE, show_column_dend = TRUE, column_dend_height = unit(3, "cm"), - #top_annotation = col_annot, + top_annotation = col_annot, use_raster = FALSE, show_heatmap_legend = TRUE, From 2257d463221eecf355f22f2fca7d1a70aa0bf2be Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:33:37 -0500 Subject: [PATCH 652/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index a6378e93..5b64e7b8 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -40,22 +40,22 @@ cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pai names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color, - show_annotation_name = FALSE, - simple_anno_size = unit(.5, "cm"), - show_legend = FALSE) + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE ) col_annot = columnAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color, - show_annotation_name = FALSE, - simple_anno_size = unit(.5, "cm"), - show_legend = FALSE) + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE ) col_pal = c(rep("#662506", 3), rev(brewer.pal(n = 7, name = "YlOrBr")), rep("#fff7bc", 3)) - + pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt, name = " ", From ad99cada74bb3d4c5a43a1814b4ac23fb9503a3e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:34:16 -0500 Subject: [PATCH 653/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 5b64e7b8..dcb52640 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -68,7 +68,7 @@ draw(Heatmap(matrix = dt, row_names_side = "right", row_names_gp = gpar(fontsize = 12), show_row_names = TRUE, - left_annotation = row_annot, + #left_annotation = row_annot, show_column_names = TRUE, column_names_side = "bottom", @@ -76,7 +76,7 @@ draw(Heatmap(matrix = dt, cluster_columns = TRUE, show_column_dend = TRUE, column_dend_height = unit(3, "cm"), - top_annotation = col_annot, + #top_annotation = col_annot, use_raster = FALSE, show_heatmap_legend = TRUE, From d79fab9083c0ae7238dae3429f64dc68b36168e4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:36:03 -0500 Subject: [PATCH 654/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index dcb52640..ab51e207 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -38,6 +38,9 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm dplyr::mutate(normal_samples = factor(normal_samples, levels = unique(normal_samples), ordered = TRUE)) cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) + +print("ok") + row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], col = list(cluster_id = cluster_color), @@ -68,7 +71,7 @@ draw(Heatmap(matrix = dt, row_names_side = "right", row_names_gp = gpar(fontsize = 12), show_row_names = TRUE, - #left_annotation = row_annot, + left_annotation = row_annot, show_column_names = TRUE, column_names_side = "bottom", @@ -76,7 +79,7 @@ draw(Heatmap(matrix = dt, cluster_columns = TRUE, show_column_dend = TRUE, column_dend_height = unit(3, "cm"), - #top_annotation = col_annot, + top_annotation = col_annot, use_raster = FALSE, show_heatmap_legend = TRUE, From b188af66232b43a5d0534a3c5eec4e58053fa4fc Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:37:35 -0500 Subject: [PATCH 655/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index ab51e207..946d477a 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -39,8 +39,6 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) -print("ok") - row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], col = list(cluster_id = cluster_color), @@ -64,14 +62,14 @@ draw(Heatmap(matrix = dt, name = " ", rect_gp = gpar(col = "white"), border = NA, - col = col_pal, + #col = col_pal, cluster_rows = TRUE, show_row_dend = TRUE, row_dend_width = unit(3, "cm"), row_names_side = "right", row_names_gp = gpar(fontsize = 12), show_row_names = TRUE, - left_annotation = row_annot, + #left_annotation = row_annot, show_column_names = TRUE, column_names_side = "bottom", @@ -79,7 +77,7 @@ draw(Heatmap(matrix = dt, cluster_columns = TRUE, show_column_dend = TRUE, column_dend_height = unit(3, "cm"), - top_annotation = col_annot, + #top_annotation = col_annot, use_raster = FALSE, show_heatmap_legend = TRUE, From 57a99db546645145d2439d2e9ed3e87258d4c684 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:38:45 -0500 Subject: [PATCH 656/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 946d477a..d461ef60 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -40,15 +40,15 @@ cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pai names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) row_annot = rowAnnotation( - cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), + `cluster_id` = sample_pairs %>% .[["normal_samples"]], + col = list(`cluster_id` = cluster_color), show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), show_legend = FALSE ) col_annot = columnAnnotation( - cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), + `cluster_id` = sample_pairs %>% .[["normal_samples"]], + col = list(`cluster_id` = cluster_color), show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), show_legend = FALSE From 9fd6e86eb92c981752d2479b755d0a9158c96406 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:46:43 -0500 Subject: [PATCH 657/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 61 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index d461ef60..556a3cda 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -40,15 +40,15 @@ cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pai names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) row_annot = rowAnnotation( - `cluster_id` = sample_pairs %>% .[["normal_samples"]], - col = list(`cluster_id` = cluster_color), + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), show_legend = FALSE ) col_annot = columnAnnotation( - `cluster_id` = sample_pairs %>% .[["normal_samples"]], - col = list(`cluster_id` = cluster_color), + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), show_annotation_name = FALSE, simple_anno_size = unit(.5, "cm"), show_legend = FALSE @@ -56,30 +56,35 @@ col_annot = columnAnnotation( col_pal = c(rep("#662506", 3), rev(brewer.pal(n = 7, name = "YlOrBr")), rep("#fff7bc", 3)) + +print(row_annot) +print(col_annot) +print(col_pal) pdf(as.character(opt$output_file), height = 21, width = 22) -draw(Heatmap(matrix = dt, - name = " ", - rect_gp = gpar(col = "white"), - border = NA, - #col = col_pal, - cluster_rows = TRUE, - show_row_dend = TRUE, - row_dend_width = unit(3, "cm"), - row_names_side = "right", - row_names_gp = gpar(fontsize = 12), - show_row_names = TRUE, - #left_annotation = row_annot, - - show_column_names = TRUE, - column_names_side = "bottom", - column_names_gp = gpar(fontsize = 12), - cluster_columns = TRUE, - show_column_dend = TRUE, - column_dend_height = unit(3, "cm"), - #top_annotation = col_annot, - - use_raster = FALSE, - show_heatmap_legend = TRUE, - heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")))) +draw(Heatmap(matrix = dt +# name = " ", +# rect_gp = gpar(col = "white"), +# border = NA, +# col = col_pal, +# cluster_rows = TRUE, +# show_row_dend = TRUE, +# row_dend_width = unit(3, "cm"), +# row_names_side = "right", +# row_names_gp = gpar(fontsize = 12), +# show_row_names = TRUE, +# left_annotation = row_annot, +# +# show_column_names = TRUE, +# column_names_side = "bottom", +# column_names_gp = gpar(fontsize = 12), +# cluster_columns = TRUE, +# show_column_dend = TRUE, +# column_dend_height = unit(3, "cm"), +# top_annotation = col_annot, +# +# use_raster = FALSE, +# show_heatmap_legend = TRUE, +# heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm"))) +) dev.off() From 5ae6d952c2bfc643d532f2ed430f3f5eee1b67e2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:47:40 -0500 Subject: [PATCH 658/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 556a3cda..2fdfd162 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -39,27 +39,27 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) -row_annot = rowAnnotation( - cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), - show_annotation_name = FALSE, - simple_anno_size = unit(.5, "cm"), - show_legend = FALSE -) -col_annot = columnAnnotation( - cluster_id = sample_pairs %>% .[["normal_samples"]], - col = list(cluster_id = cluster_color), - show_annotation_name = FALSE, - simple_anno_size = unit(.5, "cm"), - show_legend = FALSE -) -col_pal = c(rep("#662506", 3), - rev(brewer.pal(n = 7, name = "YlOrBr")), - rep("#fff7bc", 3)) - -print(row_annot) -print(col_annot) -print(col_pal) +#row_annot = rowAnnotation( +# cluster_id = sample_pairs %>% .[["normal_samples"]], +# col = list(cluster_id = cluster_color), +# show_annotation_name = FALSE, +# simple_anno_size = unit(.5, "cm"), +# show_legend = FALSE +#) +#col_annot = columnAnnotation( +# cluster_id = sample_pairs %>% .[["normal_samples"]], +# col = list(cluster_id = cluster_color), +# show_annotation_name = FALSE, +# simple_anno_size = unit(.5, "cm"), +# show_legend = FALSE +#) +#col_pal = c(rep("#662506", 3), +# rev(brewer.pal(n = 7, name = "YlOrBr")), +# rep("#fff7bc", 3)) +# +#print(row_annot) +#print(col_annot) +#print(col_pal) pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt From 4775521e5f6d90518bb5d5f97b5eddb2b8f2e249 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:48:29 -0500 Subject: [PATCH 659/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 2fdfd162..1fdbe6a0 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -85,6 +85,6 @@ draw(Heatmap(matrix = dt # # use_raster = FALSE, # show_heatmap_legend = TRUE, -# heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm"))) -) +# heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")) +)) dev.off() From 3c8e8d6dcf9f19ad68e877c8801533414ca4484b Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:49:21 -0500 Subject: [PATCH 660/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 1fdbe6a0..826f161d 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -39,13 +39,13 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) -#row_annot = rowAnnotation( -# cluster_id = sample_pairs %>% .[["normal_samples"]], -# col = list(cluster_id = cluster_color), -# show_annotation_name = FALSE, -# simple_anno_size = unit(.5, "cm"), -# show_legend = FALSE -#) +row_annot = rowAnnotation( + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE +) #col_annot = columnAnnotation( # cluster_id = sample_pairs %>% .[["normal_samples"]], # col = list(cluster_id = cluster_color), @@ -56,10 +56,6 @@ names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) #col_pal = c(rep("#662506", 3), # rev(brewer.pal(n = 7, name = "YlOrBr")), # rep("#fff7bc", 3)) -# -#print(row_annot) -#print(col_annot) -#print(col_pal) pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt From 1eafddfd9c82895d67985655abfc6b9ef996a860 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:50:17 -0500 Subject: [PATCH 661/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 826f161d..10597ce2 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -39,6 +39,8 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) +print(cluster_color) + row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], col = list(cluster_id = cluster_color), From 73cac32e84dac4d2053731585cb30a6ddc9dc2ac Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:51:57 -0500 Subject: [PATCH 662/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 10597ce2..c52bf5c6 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -39,7 +39,7 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) -print(cluster_color) +print(sample_pairs) row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], From 2c505e0648ca61a00c150e14d88b2693b5d4c64f Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:53:12 -0500 Subject: [PATCH 663/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 70 ++++++++++++++++---------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index c52bf5c6..4cc19a8f 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -31,6 +31,9 @@ gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = dt = as.matrix(dist(t(gt))) tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) + +print(tumor_samples) + normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE), normal_samples = c(normal_samples, unique(normal_samples))) %>% @@ -48,41 +51,40 @@ row_annot = rowAnnotation( simple_anno_size = unit(.5, "cm"), show_legend = FALSE ) -#col_annot = columnAnnotation( -# cluster_id = sample_pairs %>% .[["normal_samples"]], -# col = list(cluster_id = cluster_color), -# show_annotation_name = FALSE, -# simple_anno_size = unit(.5, "cm"), -# show_legend = FALSE -#) -#col_pal = c(rep("#662506", 3), -# rev(brewer.pal(n = 7, name = "YlOrBr")), -# rep("#fff7bc", 3)) +col_annot = columnAnnotation( + cluster_id = sample_pairs %>% .[["normal_samples"]], + col = list(cluster_id = cluster_color), + show_annotation_name = FALSE, + simple_anno_size = unit(.5, "cm"), + show_legend = FALSE +) +col_pal = c(rep("#662506", 3), + rev(brewer.pal(n = 7, name = "YlOrBr")), + rep("#fff7bc", 3)) pdf(as.character(opt$output_file), height = 21, width = 22) draw(Heatmap(matrix = dt -# name = " ", -# rect_gp = gpar(col = "white"), -# border = NA, -# col = col_pal, -# cluster_rows = TRUE, -# show_row_dend = TRUE, -# row_dend_width = unit(3, "cm"), -# row_names_side = "right", -# row_names_gp = gpar(fontsize = 12), -# show_row_names = TRUE, -# left_annotation = row_annot, -# -# show_column_names = TRUE, -# column_names_side = "bottom", -# column_names_gp = gpar(fontsize = 12), -# cluster_columns = TRUE, -# show_column_dend = TRUE, -# column_dend_height = unit(3, "cm"), -# top_annotation = col_annot, -# -# use_raster = FALSE, -# show_heatmap_legend = TRUE, -# heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")) -)) + name = " ", + rect_gp = gpar(col = "white"), + border = NA, + col = col_pal, + cluster_rows = TRUE, + show_row_dend = TRUE, + row_dend_width = unit(3, "cm"), + row_names_side = "right", + row_names_gp = gpar(fontsize = 12), + show_row_names = TRUE, + left_annotation = row_annot, + + show_column_names = TRUE, + column_names_side = "bottom", + column_names_gp = gpar(fontsize = 12), + cluster_columns = TRUE, + show_column_dend = TRUE, + column_dend_height = unit(3, "cm"), + top_annotation = col_annot, + + use_raster = FALSE, + show_heatmap_legend = TRUE, + heatmap_legend_param = list(legend_height = unit(5, "cm"), legend_width = unit(5, "cm")))) dev.off() From 1c547b8f5d0c76af00cfb566fed16b16757e636d Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:54:14 -0500 Subject: [PATCH 664/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 4cc19a8f..592a736d 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -29,11 +29,9 @@ X[!gt %in% c("0/0", "0/1", "1/1")] = NA gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) dt = as.matrix(dist(t(gt))) - -tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) -print(tumor_samples) - +print(opt$sample_pairs) +tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE), normal_samples = c(normal_samples, unique(normal_samples))) %>% @@ -42,8 +40,6 @@ sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(norm cluster_color = colorRampPalette(brewer.pal(9, "Set1"))(length(unique(sample_pairs %>% .[["normal_samples"]]))) names(cluster_color) = sort(unique(sample_pairs %>% .[["normal_samples"]])) -print(sample_pairs) - row_annot = rowAnnotation( cluster_id = sample_pairs %>% .[["normal_samples"]], col = list(cluster_id = cluster_color), From d9424a659aa1c1cb7ace13509de0d77609d257e5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 18:58:56 -0500 Subject: [PATCH 665/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 592a736d..e997b746 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -8,9 +8,13 @@ suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("ComplexHeatmap")) suppressPackageStartupMessages(library("RColorBrewer")) +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + optList <- list(make_option("--input_file", default = 'snp_vcf/snps_ft.vcf', help = "input file"), make_option("--output_file", default = 'snp_vcf/snps_ft.pdf', help = "output file"), - make_option("--sample_pairs", default = NA, help = "sample pairs"), + make_option("--sample_pairs", default = NA, type = 'character', help = "sample pairs"), make_option("--genome", default = 'b37', help = "genome build")) parser <- OptionParser(usage = "%prog vcf.files", option_list = optList) @@ -30,7 +34,8 @@ X[!gt %in% c("0/0", "0/1", "1/1")] = NA gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) dt = as.matrix(dist(t(gt))) -print(opt$sample_pairs) +print(opt) + tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE), From 4bba0c48c0f190296d2f809f87db18a88bfe306e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 19:00:04 -0500 Subject: [PATCH 666/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index e997b746..7ffa6648 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -34,7 +34,7 @@ X[!gt %in% c("0/0", "0/1", "1/1")] = NA gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) dt = as.matrix(dist(t(gt))) -print(opt) +print(opt$sample_pairs) tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) From 75c92ba7b0ec054efa50a77230afb3b9f7312ccd Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 28 Feb 2023 19:00:42 -0500 Subject: [PATCH 667/766] Update clusterSampleVcf.R --- contamination/clusterSampleVcf.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/contamination/clusterSampleVcf.R b/contamination/clusterSampleVcf.R index 7ffa6648..35ee71cc 100644 --- a/contamination/clusterSampleVcf.R +++ b/contamination/clusterSampleVcf.R @@ -34,8 +34,6 @@ X[!gt %in% c("0/0", "0/1", "1/1")] = NA gt = matrix(as.integer(factor(X)), nrow = nrow(gt), ncol = ncol(gt), dimnames = list(rownames(gt), colnames(gt))) dt = as.matrix(dist(t(gt))) -print(opt$sample_pairs) - tumor_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[1] })) normal_samples = unlist(lapply(strsplit(x = unlist(strsplit(x = as.character(opt$sample_pairs), split = " ")), split = "_"), function(x) { x[2] })) sample_pairs = dplyr::tibble(tumor_samples = factor(c(tumor_samples, unique(normal_samples)), levels = rownames(dt), ordered = TRUE), @@ -64,7 +62,7 @@ col_pal = c(rep("#662506", 3), rep("#fff7bc", 3)) pdf(as.character(opt$output_file), height = 21, width = 22) -draw(Heatmap(matrix = dt +draw(Heatmap(matrix = dt, name = " ", rect_gp = gpar(col = "white"), border = NA, From 54f5c50880a25c175c07b5f6ed26f3d28832dafb Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 4 Mar 2023 16:16:19 -0500 Subject: [PATCH 668/766] Update sufam_gt.mk --- variant_callers/sufam_gt.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/sufam_gt.mk b/variant_callers/sufam_gt.mk index 210f8292..1b58f247 100644 --- a/variant_callers/sufam_gt.mk +++ b/variant_callers/sufam_gt.mk @@ -3,7 +3,7 @@ include modules/Makefile.inc LOGDIR ?= log/sufam_gt.$(NOW) SUFAM_ENV = $(HOME)/share/usr/anaconda-envs/sufam-dev -SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000 --ff UNMAP,SECONDARY,QCFAIL' +SUFAM_OPTS = --mpileup-parameters='-A -q 15 -Q 15 -d 15000' sufam_gt : $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).vcf) \ $(foreach sample,$(TUMOR_SAMPLES),sufam/$(sample).txt) \ From 9ae8b3d4402132a8b4dc96e06bb1c3d1dd80ba45 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 5 Mar 2023 16:45:17 -0500 Subject: [PATCH 669/766] Update fix_bam.mk --- bam_tools/fix_bam.mk | 108 +++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/bam_tools/fix_bam.mk b/bam_tools/fix_bam.mk index 42a08ead..343fbf86 100644 --- a/bam_tools/fix_bam.mk +++ b/bam_tools/fix_bam.mk @@ -8,62 +8,62 @@ PICARD_JAR = ~/share/usr/picard/bin/picard.jar fix_bam : $(foreach sample,$(SAMPLES),fixed_bam/$(sample).bam) define fix-bam -unprocessed_bam/%.ubam : unprocessed_bam/%.bam - $$(call RUN,-c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) RevertSam \ - I=$$(<) \ - O=unprocessed_bam/$$(*).ubam \ - SANITIZE=true \ - MAX_DISCARD_FRACTION=0.005 \ - ATTRIBUTE_TO_CLEAR=XT \ - ATTRIBUTE_TO_CLEAR=XN \ - ATTRIBUTE_TO_CLEAR=AS \ - ATTRIBUTE_TO_CLEAR=OC \ - ATTRIBUTE_TO_CLEAR=OP \ - SORT_ORDER=queryname \ - RESTORE_ORIGINAL_QUALITIES=true \ - REMOVE_DUPLICATE_INFORMATION=true \ - REMOVE_ALIGNMENT_INFORMATION=true \ - TMP_DIR=$(TMPDIR)") +unprocessed_bam/$1.ubam : unprocessed_bam/$1.bam + $$(call RUN,-c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) RevertSam \ + I=$$(<) \ + O=$$(@) \ + SANITIZE=true \ + MAX_DISCARD_FRACTION=0.005 \ + ATTRIBUTE_TO_CLEAR=XT \ + ATTRIBUTE_TO_CLEAR=XN \ + ATTRIBUTE_TO_CLEAR=AS \ + ATTRIBUTE_TO_CLEAR=OC \ + ATTRIBUTE_TO_CLEAR=OP \ + SORT_ORDER=queryname \ + RESTORE_ORIGINAL_QUALITIES=true \ + REMOVE_DUPLICATE_INFORMATION=true \ + REMOVE_ALIGNMENT_INFORMATION=true \ + TMP_DIR=$(TMPDIR)") -unprocessed_bam/%.fixed.bam : unprocessed_bam/%.ubam - $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \ - R=$$(DMP_FASTA) \ - UNMAPPED_BAM=$$(<) \ - ALIGNED_BAM=unprocessed_bam/$$(*).bam \ - O=unprocessed_bam/$$(*).fixed.bam \ - CREATE_INDEX=true \ - ADD_MATE_CIGAR=true \ - CLIP_ADAPTERS=true \ - CLIP_OVERLAPPING_READS=true \ - INCLUDE_SECONDARY_ALIGNMENTS=false \ - MAX_INSERTIONS_OR_DELETIONS=-1 \ - TMP_DIR=$(TMPDIR)") +unprocessed_bam/$1.fixed.bam : unprocessed_bam/$1.bam unprocessed_bam/$1.ubam + $$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \ + R=$$(DMP_FASTA) \ + ALIGNED_BAM=$$(<).bam \ + UNMAPPED_BAM=$$(<<) \ + O=$$(@).fixed.bam \ + CREATE_INDEX=true \ + ADD_MATE_CIGAR=true \ + CLIP_ADAPTERS=true \ + CLIP_OVERLAPPING_READS=true \ + INCLUDE_SECONDARY_ALIGNMENTS=false \ + MAX_INSERTIONS_OR_DELETIONS=-1 \ + TMP_DIR=$(TMPDIR)") -unprocessed_bam/%.dedup.bam : unprocessed_bam/%.fixed.bam - $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MarkDuplicates \ - I=$$(<) \ - O=unprocessed_bam/$$(*).dedup.bam \ - M=unprocessed_bam/$$(*).txt \ - TMP_DIR=$$(TMPDIR)") +unprocessed_bam/$1.dedup.bam : unprocessed_bam/$1.fixed.bam + $$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MarkDuplicates \ + I=$$(<) \ + O=$$(@) \ + M=unprocessed_bam/$1.txt \ + TMP_DIR=$$(TMPDIR)") -fixed_bam/%.bam : unprocessed_bam/%.dedup.bam - $$(call RUN, -c -n 1 -s 12G -m 18G -w 7200,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) AddOrReplaceReadGroups \ - I=$$(<) \ - O=fixed_bam/$$(*).bam \ - RGID=$$(*) \ - RGLB=$$(*) \ - RGPL=illumina \ - RGPU=NA \ - RGSM=$$(*) \ - TMP_DIR=$(TMPDIR) && \ - samtools index fixed_bam/$$(*).bam && \ - cp fixed_bam/$$(*).bam.bai fixed_bam/$$(*).bai && \ - rm -rf unprocessed_bam/$$(*).ubam && \ - rm -rf unprocessed_bam/$$(*).fixed.bam && \ - rm -rf unprocessed_bam/$$(*).dedup.bam && \ - rm -rf unprocessed_bam/$$(*).fixed.bai && \ - rm -rf unprocessed_bam/$$(*).dedup.bai && \ - rm -rf unprocessed_bam/$$(*).txt") +fixed_bam/$1.bam : unprocessed_bam/$1.dedup.bam + $$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) AddOrReplaceReadGroups \ + I=$$(<) \ + O=$$(@) \ + RGID=$1 \ + RGLB=$1 \ + RGPL=illumina \ + RGPU=NA \ + RGSM=$1 \ + TMP_DIR=$(TMPDIR) && \ + samtools index $$(@) && \ + cp fixed_bam/$1.bam.bai fixed_bam/$1.bai && \ + rm -rf unprocessed_bam/$1.ubam && \ + rm -rf unprocessed_bam/$1.fixed.bam && \ + rm -rf unprocessed_bam/$1.dedup.bam && \ + rm -rf unprocessed_bam/$1.fixed.bai && \ + rm -rf unprocessed_bam/$1.dedup.bai && \ + rm -rf unprocessed_bam/$1.txt") endef $(foreach sample,$(SAMPLES),\ $(eval $(call fix-bam,$(sample)))) @@ -72,4 +72,4 @@ endef echo "picard" > version/fix_bam.txt) .SECONDARY: .DELETE_ON_ERROR: -.PHONY: fix_bam \ No newline at end of file +.PHONY: fix_bam From 542fca39369f56915968ac4f77be093d742332ab Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 5 Mar 2023 17:23:18 -0500 Subject: [PATCH 670/766] Update fix_bam.mk --- bam_tools/fix_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/fix_bam.mk b/bam_tools/fix_bam.mk index 343fbf86..7ae89365 100644 --- a/bam_tools/fix_bam.mk +++ b/bam_tools/fix_bam.mk @@ -28,7 +28,7 @@ unprocessed_bam/$1.ubam : unprocessed_bam/$1.bam unprocessed_bam/$1.fixed.bam : unprocessed_bam/$1.bam unprocessed_bam/$1.ubam $$(call RUN, -c -n 1 -s 12G -m 18G -w 72:00:00,"java -Djava.io.tmpdir=$(TMPDIR) -Xmx16G -jar $$(PICARD_JAR) MergeBamAlignment \ R=$$(DMP_FASTA) \ - ALIGNED_BAM=$$(<).bam \ + ALIGNED_BAM=$$(<) \ UNMAPPED_BAM=$$(<<) \ O=$$(@).fixed.bam \ CREATE_INDEX=true \ From 0cbdc14586cbac465747107049e3f36843868022 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 5 Mar 2023 18:32:41 -0500 Subject: [PATCH 671/766] Update fix_bam.mk --- bam_tools/fix_bam.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/fix_bam.mk b/bam_tools/fix_bam.mk index 7ae89365..3e6ce446 100644 --- a/bam_tools/fix_bam.mk +++ b/bam_tools/fix_bam.mk @@ -30,7 +30,7 @@ unprocessed_bam/$1.fixed.bam : unprocessed_bam/$1.bam unprocessed_bam/$1.ubam R=$$(DMP_FASTA) \ ALIGNED_BAM=$$(<) \ UNMAPPED_BAM=$$(<<) \ - O=$$(@).fixed.bam \ + O=$$(@) \ CREATE_INDEX=true \ ADD_MATE_CIGAR=true \ CLIP_ADAPTERS=true \ From ab9c5061e34cf256b283aea9cb4d5a462c870b3a Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 7 Mar 2023 20:31:37 -0500 Subject: [PATCH 672/766] Update clusterSamples.mk --- contamination/clusterSamples.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/contamination/clusterSamples.mk b/contamination/clusterSamples.mk index c7b1d6f7..975898c6 100644 --- a/contamination/clusterSamples.mk +++ b/contamination/clusterSamples.mk @@ -21,6 +21,7 @@ snp_vcf/%.snps.vcf : bam/%.bam $(call RUN,-n 4 -s 2.5G -m 3G,"set -o pipefail && \ $(call GATK_MEM,8G) \ -T UnifiedGenotyper \ + -rf BadCigar \ -nt 4 \ -R $(REF_FASTA) \ --dbsnp $(DBSNP) \ From f9429b9d7e62eb1938b3a7f86a1e3e768e97f76e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 17 Mar 2023 20:41:42 -0400 Subject: [PATCH 673/766] ++ --- scripts/filter_sv.R | 50 +++++++++++++++++++++++++++++++++++++++++++ vcf_tools/merge_sv.mk | 15 ++++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 scripts/filter_sv.R diff --git a/scripts/filter_sv.R b/scripts/filter_sv.R new file mode 100644 index 00000000..bb9b87f6 --- /dev/null +++ b/scripts/filter_sv.R @@ -0,0 +1,50 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--input_file", default = NA, type = 'character', help = "Input VCF file"), + make_option("--output_file", default = NA, type = 'character', help = "Output VCF file")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +vcf = readr::read_tsv(file = as.character(opt$input_file), comment = "#", col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::filter(!grepl("SUPP_VEC=110", X8, fixed = TRUE)) %>% + dplyr::mutate(X3 = X12) %>% + dplyr::mutate(X3 = unlist(lapply(X3, function(x) { unlist(strsplit(x, split = ":", fixed = TRUE))[8] }))) %>% + dplyr::mutate(X3 = gsub(pattern = "_", replacement = ":", x = X3, fixed = TRUE)) %>% + dplyr::mutate(X5 = case_when( + grepl("DUP", X3, fixed = TRUE) ~ "", + grepl("DEL", X3, fixed = TRUE) ~ "", + grepl("INV", X3, fixed = TRUE) ~ "", + TRUE ~ X5 + )) %>% + dplyr::mutate(X8 = case_when( + grepl("DUP", X3, fixed = TRUE) ~ gsub("SVTYPE=INV", "SVTYPE=DUP", X8), + grepl("DEL", X3, fixed = TRUE) ~ gsub("SVTYPE=INV", "SVTYPE=DEL", X8), + TRUE ~ X8 + )) %>% + dplyr::rename(`#CHROM` = X1, + POS = X2, + ID = X3, + REF = X4, + ALT = X5, + QUAL = X6, + FILTER = X7, + INFO = X8, + FORMAT = X9, + SVABA = X10, + GRIDSS = X11, + MANTA = X12) + +readr::write_tsv(x = vcf, path = as.character(opt$output_file), append = TRUE, col_names = TRUE) + + diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 9fda196d..1c301488 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -2,7 +2,7 @@ include modules/Makefile.inc LOGDIR ?= log/merge_sv.$(NOW) -SV_CALLERS = svaba manta gridss +SV_CALLERS = svaba gridss manta MAX_DIST = 500 NUM_CALLERS = 2 TYPE = 0 @@ -11,6 +11,7 @@ MIN_SIZE = 30 merge_sv : $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/samples.txt) \ $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),merge_sv/$(pair)/$(pair).merged_sv_ft.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).merged_sv.vcf) define merge-sv @@ -23,15 +24,19 @@ merge_sv/$1_$2/$1_$2.merged_sv.vcf : merge_sv/$1_$2/samples.txt SURVIVOR merge $$(<) \ $(MAX_DIST) $(NUM_CALLERS) $(TYPE) $(STRAND) 0 $(MIN_SIZE) $$(@)") -vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/$1_$2.merged_sv.vcf +merge_sv/$1_$2/$1_$2.merged_sv_ft.vcf : merge_sv/$1_$2/$1_$2.merged_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \ + grep '##' $$(<) > $$(@) && \ + $$(RSCRIPT) modules/scripts/filter_sv.R --input_file $$(<) --output_file $$(@)") + + +vcf/$1_$2.merged_sv.vcf : merge_sv/$1_$2/$1_$2.merged_sv_ft.vcf $$(INIT) cat $$(<) > $$(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call merge-sv,$(tumor.$(pair)),$(normal.$(pair))))) - -..DUMMY := $(shell mkdir -p version; \ - $(SURVIVOR_ENV)/bin/SURVIVOR --version &> version/merge_sv.txt;) + .DELETE_ON_ERROR: .SECONDARY: .PHONY: merge_sv From 56a4c63812fee98273ba344710b438b68354725e Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 17 Mar 2023 22:00:22 -0400 Subject: [PATCH 674/766] Update configure.py --- scripts/configure.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/scripts/configure.py b/scripts/configure.py index 8f216041..2db2ce10 100755 --- a/scripts/configure.py +++ b/scripts/configure.py @@ -1,23 +1,18 @@ #!/usr/bin/env python -from __future__ import print_function +from __future__ import print_function import yaml import argparse import collections -""" convert yaml files to make include files -""" - - def lowerBool(x): if isinstance(x, bool): return str(x).lower() else: return x - def sample_yaml2mk(samples_file, out): - samples = yaml.load(open(args.samples_file, 'r')) + samples = yaml.full_load(open(args.samples_file, 'r')) tumors = set() normals = set() @@ -95,7 +90,7 @@ def sample_yaml2mk(samples_file, out): def sample_attr_yaml2mk(sample_attr_file, out): print("\n# sample_attr_file", file=out) - sample_attr = yaml.load(open(sample_attr_file, 'r')) + sample_attr = yaml.full_load(open(sample_attr_file, 'r')) for attr, m in sample_attr.items(): for k, v in m.items(): print("{}.{} = {}".format(attr, k, v), file=out) @@ -103,7 +98,7 @@ def sample_attr_yaml2mk(sample_attr_file, out): def sample_fastq_yaml2mk(sample_fastq_file, out): print("\n# sample_fastq_file", file=out) - sample_fastq = yaml.load(open(sample_fastq_file, 'r')) + sample_fastq = yaml.full_load(open(sample_fastq_file, 'r')) split_samples = set() for k, v in sample_fastq.items(): for idx, fastq in enumerate(v): @@ -122,17 +117,15 @@ def sample_fastq_yaml2mk(sample_fastq_file, out): def sample_merge_yaml2mk(sample_merge_file, out): print("\n# sample_merge_file", file=out) - sample_merge = yaml.load(open(args.sample_merge_file, 'r')) + sample_merge = yaml.full_load(open(args.sample_merge_file, 'r')) print("MERGE_SAMPLES = {}".format(" ".join(list(sample_merge.keys()))), file=out) for k, v in sample_merge.items(): print("merge.{} = {}".format(k, " ".join(v)), file=out) if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='configure', - description='Convert project YAML file to Make') - parser.add_argument('--project_config_file', help='project yaml config file', - default='project_config.yaml') + parser = argparse.ArgumentParser(prog='configure', description='Convert project YAML file to Make') + parser.add_argument('--project_config_file', help='project yaml config file', default='project_config.yaml') parser.add_argument('--samples_file', help='yaml samples file', default='samples.yaml') parser.add_argument('--sample_attr_file', help='yaml sample attr file', default='sample_attr.yaml') parser.add_argument('--sample_fastq_file', help='yaml sample fastq file mappings', default='sample.fastq.yaml') From 929853d2efd0707128537e8e765b7b7f0d970bb8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 17 Mar 2023 22:01:19 -0400 Subject: [PATCH 675/766] Update configure.py --- scripts/configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/configure.py b/scripts/configure.py index 2db2ce10..2a226a61 100755 --- a/scripts/configure.py +++ b/scripts/configure.py @@ -135,7 +135,7 @@ def sample_merge_yaml2mk(sample_merge_file, out): of = open(args.out_file, 'w') - config = yaml.load(open(args.project_config_file, 'r')) + config = yaml.full_load(open(args.project_config_file, 'r')) for k, v in config.items(): print("{} = {}".format(k.upper(), lowerBool(v)), file=of) From f9d0a0554725258645d3949e6a1733fadc31af21 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 22 Mar 2023 14:31:34 -0400 Subject: [PATCH 676/766] Update pyclone_vi.mk --- clonality/pyclone_vi.mk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clonality/pyclone_vi.mk b/clonality/pyclone_vi.mk index 2965da74..746908b7 100644 --- a/clonality/pyclone_vi.mk +++ b/clonality/pyclone_vi.mk @@ -17,13 +17,13 @@ pyclone : $(foreach sample,$(TUMOR_SAMPLES),pyclone_vi/$(sample)/$(sample).vcf) define r-sufam pyclone_vi/$1/$1.vcf : summary/tsv/all.tsv - $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ - --option 1 \ - --sample_set '$(set.$1)' \ - --normal_sample '$(normal.$1)' \ - --input_file $$(<) \ - --output_file $$(@)") + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/sufam_gt.R \ + --option 1 \ + --sample_set '$(set.$1)' \ + --normal_sample '$(normal.$1)' \ + --input_file $$(<) \ + --output_file $$(@)") pyclone_vi/$1/$1.txt : pyclone_vi/$1/$1.vcf bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 3G -v $(SUFAM_ENV),"set -o pipefail && \ From 39612d4231382fc2d6c7c76f3ee65667de793d90 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 13:43:42 -0400 Subject: [PATCH 677/766] ++ --- Makefile | 2 +- qc/bamIntervalMetrics.mk | 102 ---------------------------- qc/bam_interval_metrics.mk | 136 +++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 103 deletions(-) delete mode 100644 qc/bamIntervalMetrics.mk create mode 100644 qc/bam_interval_metrics.mk diff --git a/Makefile b/Makefile index 39b9377e..475c0857 100644 --- a/Makefile +++ b/Makefile @@ -431,7 +431,7 @@ bam_metrics : TARGETS += bam_interval_metrics bam_interval_metrics : - $(call RUN_MAKE,modules/qc/bamIntervalMetrics.mk) + $(call RUN_MAKE,modules/qc/bam_interval_metrics.mk) TARGETS += rnaseq_metrics rnaseq_metrics : diff --git a/qc/bamIntervalMetrics.mk b/qc/bamIntervalMetrics.mk deleted file mode 100644 index 88930e8a..00000000 --- a/qc/bamIntervalMetrics.mk +++ /dev/null @@ -1,102 +0,0 @@ -# generate bam interval metrics per sample - -#NO_RM := true - -include modules/Makefile.inc -include modules/variant_callers/gatk.inc -# picard format intervals file, needs requires sam format header - -VPATH ?= bam - -LOGDIR ?= log/metrics.$(NOW) - -PLOT_HS_METRICS = $(RSCRIPT) modules/qc/plotHsMetrics.R -NON_REF_FREQ = $(PERL) modules/qc/nonRefFreqFromPileup.pl -NON_REF_FREQ_BIN_SIZE = 0.01 - -SUMMARIZE_HS_METRICS = python modules/qc/summarize_hs_metrics.py -SUMMARIZE_IDXSTATS = python modules/qc/summarize_idxstats.py - -.DELETE_ON_ERROR: - -.SECONDARY: - -.PHONY: bam_interval_metrics hs_metrics amplicon_metrics interval_report #non_ref_metrics insert_size_metrics idxstats - -bam_interval_metrics : hs_metrics interval_report #non_ref_metrics idxstats - -#non_ref_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).interval_nonref_freq.tsv) - -hs_metrics : metrics/hs_metrics.tsv metrics/interval_hs_metrics.tsv metrics/hs_metrics.summary.tsv - -amplicon_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).amplicon_metrics.tsv) - -interval_report : metrics/interval_report/interval_report.timestamp - -#insert_size_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_size_metrics.tsv) - -#idxstats : metrics/idxstats_summary.tsv $(foreach sample,$(SAMPLES),metrics/$(sample).idxstats) - -# interval metrics per sample -metrics/%.hs_metrics.tsv metrics/%.interval_hs_metrics.tsv : bam/%.bam bam/%.bam.bai - $(call RUN,-s 10G -m 20G,"TMP=`mktemp`.intervals; \ - $(SAMTOOLS) view -H $< | grep '^@SQ' > \$$TMP && grep -P \"\t\" $(TARGETS_FILE) | awk 'BEGIN {OFS = \"\t\"} { print \$$1$(,)\$$2+1$(,)\$$3$(,)\"+\"$(,)NR }' >> \$$TMP; \ - $(CALC_HS_METRICS) INPUT=$< OUTPUT=metrics/$*.hs_metrics.tsv METRIC_ACCUMULATION_LEVEL=ALL_READS REFERENCE_SEQUENCE=$(REF_FASTA) PER_TARGET_COVERAGE=metrics/$*.interval_hs_metrics.tsv TARGET_INTERVALS=\$$TMP BAIT_SET_NAME=hs BAIT_INTERVALS=\$$TMP") - -# not sure how this differs from above, see picard doc -metrics/%.amplicon_metrics.tsv metrics/%.interval_amplicon_metrics.tsv : bam/%.bam bam/%.bam.bai - $(call RUN,-s 10G -m 20G,"TMP=`mktemp`.intervals; \ - $(SAMTOOLS) view -H $< | grep '^@SQ' > \$$TMP && grep -P \"\t\" $(TARGETS_FILE) | awk 'BEGIN {OFS = \"\t\"} { print \$$1$(,)\$$2+1$(,)\$$3$(,)\"+\"$(,)NR }' >> \$$TMP; \ - $(COLLECT_TARGETED_METRICS) INPUT=$< REFERENCE_SEQUENCE=$(REF_FASTA) OUTPUT=$@ AMPLICON_INTERVALS=\$$TMP TARGET_INTERVALS=\$$TMP METRIC_ACCUMULATION_LEVEL=ALL_READS PER_TARGET_COVERAGE=metrics/$*.interval_amplicon_metrics.tsv") - -# summarize interval metrics into one file -metrics/interval_hs_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).interval_hs_metrics.tsv) - $(INIT) \ - sed '/^#/d; /^$$/d' $< | cut -f 1-6 > $@.tmp; \ - for metrics in $^; do \ - samplename=$$(basename $${metrics%%.interval_hs_metrics.tsv}); \ - sed '/^#/d; /^$$/d' $$metrics | cut -f 7,8 | sed "s/mean_coverage/$${samplename}_mean_coverage/; s/normalized_coverage/$${samplename}_normalized_coverage/" | paste $@.tmp - > $@; \ - cp $@ $@.tmp; \ - done; \ - rm -f $@.tmp - -metrics/hs_metrics.summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.tsv) - $(INIT) $(SUMMARIZE_HS_METRICS) --excel_file $(@:.tsv=.xlsx) --project_name $(PROJECT_NAME) $^ > $@ 2> $(LOG) - -metrics/hs_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.tsv) - $(INIT) \ - { \ - sed '/^$$/d; /^#/d; s/SAMPLE.*//; s/BAIT_SET/SAMPLE/; s/\s$$//' $< | head -1; \ - for metrics in $^; do \ - samplename=$$(basename $${metrics%%.hs_metrics.tsv}); \ - sed "/^#/d; /^BAIT/d; /^\$$/d; s/^hs/$$samplename/; s/\t\+$$//" $$metrics; \ - done; \ - } > $@ - -metrics/interval_report/interval_report.timestamp : metrics/hs_metrics.tsv - $(call RUN,-s 7G -m 10G,"$(PLOT_HS_METRICS) --outDir $(@D) $< && touch $@") - -#metrics/%.interval_nonref_freq.tsv : bam/%.bam -# $(call RUN,-s 8G -m 10G,"$(SAMTOOLS) mpileup -l $(TARGETS_FILE) -f $(REF_FASTA) $< | $(NON_REF_FREQ) -b $(NON_REF_FREQ_BIN_SIZE) > $@") - -#metrics/%.insert_size_metrics.tsv : bam/%.bam -# $(call RUN,-s 8G -m 10G,"$(call PICARD,CollectInsertSizeMetrics,8G) INPUT=$< OUTPUT=$@ \ -# REFERENCE_SEQUENCE=$(REF_FASTA) HISTOGRAM_FILE=$(@:.tsv=.pdf)") - -#metrics/insert_size_metrics.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_size_metrics.tsv) -# $(INIT) \ -# { \ -# sed '/^$$/d; /^#/d; s/SAMPLE.*//; s/\s$$//; s/^/SAMPLE\t/' $< | head -1; \ -# for metrics in $^; do \ -# samplename=$$(basename $${metrics%%.insert_size_metrics.tsv}); \ -# grep -A1 '^MEDIAN_INSERT_SIZE' $$metrics | sed "1d; s/^/$$samplename\t/; s/\t\+$$//"; \ -# done; \ -# } > $@ - -#metrics/%.idxstats : bam/%.bam bam/%.bam.bai -# $(call RUN,,"samtools idxstats $< > $@") - -#metrics/idxstats_summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).idxstats) -# $(INIT) $(SUMMARIZE_IDXSTATS) --excel_file $(@:.tsv=.xlsx) --project_name $(PROJECT_NAME) --targets_file $(TARGETS_FILE) $^ > $@ 2> $(LOG) - -include modules/bam_tools/processBam.mk diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk new file mode 100644 index 00000000..491fbca9 --- /dev/null +++ b/qc/bam_interval_metrics.mk @@ -0,0 +1,136 @@ +include innovation-lab/Makefile.inc + +LOGDIR ?= log/bam_interval_metrics.$(NOW) + +bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) \ + summary/idx_metrics.txt \ + summary/aln_metrics.txt \ + summary/insert_metrics.txt \ + summary/oxog_metrics.txt \ + summary/hs_metrics.txt \ + summary/gc_metrics.txt \ + summary/gc_summary.txt + +TARGETS_LIST ?= $(HOME)/share/lib/resource_files/MSK-IMPACT-v4.sorted.list + +define idx-metrics +metrics/$1.idx_stats.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(BAM_INDEX) \ + INPUT=$$(<) \ + > $$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call idx-metrics,$(sample)))) + +define aln-metrics +metrics/$1.aln_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(COLLECT_ALIGNMENT_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call aln-metrics,$(sample)))) + +define insert-metrics +metrics/$1.insert_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(COLLECT_INSERT_METRICS) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \ + MINIMUM_PCT=0.5") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call insert-metrics,$(sample)))) + +define oxog-metrics +metrics/$1.oxog_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(COLLECT_OXOG_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call oxog-metrics,$(sample)))) + +define hs-metrics +metrics/$1.hs_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(CALC_HS_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + BAIT_INTERVALS=$$(TARGETS_LIST) \ + TARGET_INTERVALS=$$(TARGETS_LIST)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call hs-metrics,$(sample)))) + +define gc-metrics +metrics/$1.gc_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $$(COLLECT_GC_BIAS) \ + INPUT=$$(<) \ + OUTPUT=metrics/$1.gc_bias.txt \ + CHART_OUTPUT=metrics/$1.gc_metrics.pdf \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + SUMMARY_OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call gc-metrics,$(sample)))) + +summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'") + +summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'") + +summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'") + +summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'") + +summary/hs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'") + +summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'") + +summary/gc_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) + $(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'") + + +..DUMMY := $(shell mkdir -p version; \ + echo "picard" >> version/bam_interval_metrics.txt; \ + $(PICARD) CollectAlignmentSummaryMetrics --version &>> version/bam_interval_metrics.txt; \ + $(PICARD) CollectInsertSizeMetrics --version &>> version/bam_interval_metrics.txt; \ + $(PICARD) CollectOxoGMetrics --version &>> version/bam_interval_metrics.txt; \ + $(PICARD) CollectHsMetrics --version &>> version/bam_interval_metrics.txt; \ + $(PICARD) CollectGcBiasMetrics --version &>> version/bam_interval_metrics.txt; \ + R --version >> version/bam_interval_metrics.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: bam_metrics From f7bbbb290760672cffd38fae12fe4e76322bfcb9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 13:45:38 -0400 Subject: [PATCH 678/766] Create bam_metrics.R --- scripts/bam_metrics.R | 109 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100755 scripts/bam_metrics.R diff --git a/scripts/bam_metrics.R b/scripts/bam_metrics.R new file mode 100755 index 00000000..a08eefaa --- /dev/null +++ b/scripts/bam_metrics.R @@ -0,0 +1,109 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_names", default = NA, type = 'character', help = "sample names")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".idx_stats.txt"), + col_names = FALSE, col_types = cols(.default = col_character()))[-85,,drop=FALSE] %>% + readr::type_convert() %>% + dplyr::select(CHROMOSOME = X1, + LENGTH = X2, + ALIGNED_READS = X3) %>% + dplyr::mutate(CHROMOSOME = gsub(pattern=" length=", replacement="", x=CHROMOSOME), + ALIGNED_READS = gsub(pattern="Aligned= ", replacement="", x=ALIGNED_READS), + SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/idx_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==2) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".aln_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-SAMPLE, -READ_GROUP) %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/aln_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==3) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".insert_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-SAMPLE, -READ_GROUP) %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/insert_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==4) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".oxog_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(SAMPLE_NAME = SAMPLE_ALIAS) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/oxog_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==5) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".hs_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/hs_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==6) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/gc_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==7) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + x = list() + for (i in 1:length(sample_names)) { + x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_bias.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + x = do.call(rbind, x) + write_tsv(x, path="summary/gc_summary.txt", na = "NA", append = FALSE, col_names = TRUE) + +} From 2b677c99e3001f1fc27603cbdbb8529fec55fd1d Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 13:46:25 -0400 Subject: [PATCH 679/766] Update bam_interval_metrics.mk --- qc/bam_interval_metrics.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk index 491fbca9..d6f584fc 100644 --- a/qc/bam_interval_metrics.mk +++ b/qc/bam_interval_metrics.mk @@ -1,4 +1,4 @@ -include innovation-lab/Makefile.inc +include modules/Makefile.inc LOGDIR ?= log/bam_interval_metrics.$(NOW) From ecda0931a7e0399e3da9578d2917327337580bab Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 17:10:56 -0400 Subject: [PATCH 680/766] Update bam_interval_metrics.mk --- qc/bam_interval_metrics.mk | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk index d6f584fc..fc7b5519 100644 --- a/qc/bam_interval_metrics.mk +++ b/qc/bam_interval_metrics.mk @@ -15,7 +15,14 @@ bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \ summary/hs_metrics.txt \ summary/gc_metrics.txt \ summary/gc_summary.txt - + +CALC_HS_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectHsMetrics $(PICARD_OPTS) +COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryMetrics $(PICAD_OPTS) +COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICAD_OPTS) +COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OPTS) +COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICAD_OPTS) +BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICAD_OPTS) + TARGETS_LIST ?= $(HOME)/share/lib/resource_files/MSK-IMPACT-v4.sorted.list define idx-metrics From 26a010b8acf160e3b89f6968654a92166f96dc53 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 17:14:21 -0400 Subject: [PATCH 681/766] Update bam_interval_metrics.mk --- qc/bam_interval_metrics.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk index fc7b5519..8ab73a69 100644 --- a/qc/bam_interval_metrics.mk +++ b/qc/bam_interval_metrics.mk @@ -16,6 +16,9 @@ bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \ summary/gc_metrics.txt \ summary/gc_summary.txt +PICARD = picard +PICARD_MEM = 16G +PICARD_OPTS = VALIDATION_STRINGENCY=LENIENT MAX_RECORDS_IN_RAM=4000000 TMP_DIR=$(TMPDIR) CALC_HS_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectHsMetrics $(PICARD_OPTS) COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryMetrics $(PICAD_OPTS) COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICAD_OPTS) From 7d5ef94489176d656288934991d61e86f3c67bff Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 21:28:54 -0400 Subject: [PATCH 682/766] Update bam_interval_metrics.mk --- qc/bam_interval_metrics.mk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk index 8ab73a69..8aad89ce 100644 --- a/qc/bam_interval_metrics.mk +++ b/qc/bam_interval_metrics.mk @@ -106,31 +106,31 @@ $(foreach sample,$(SAMPLES),\ summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'") summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'") summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'") summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'") summary/hs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'") summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'") summary/gc_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) $(call RUN, -c -n 1 -s 12G -m 24G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'") + $(RSCRIPT) $(SCRIPTS_DIR)/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'") ..DUMMY := $(shell mkdir -p version; \ From 722506d3745a90417e267a2531d8dd3699727533 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:05:20 -0400 Subject: [PATCH 683/766] ++ --- Makefile | 5 ++++- scripts/hr_detect.R | 20 ++++++++++++++++++++ signatures/hr_detect.mk | 31 +++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 scripts/hr_detect.R create mode 100644 signatures/hr_detect.mk diff --git a/Makefile b/Makefile index 475c0857..126e560b 100644 --- a/Makefile +++ b/Makefile @@ -515,7 +515,10 @@ sv_signature : TARGETS += star_fish star_fish : $(call RUN_MAKE,modules/signatures/star_fish.mk) - + +TARGETS += hr_detect +hr_detect : + $(call RUN_MAKE,modules/signatures/hr_detect.mk) #================================================== # miscellaneous diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R new file mode 100644 index 00000000..83e8f75b --- /dev/null +++ b/scripts/hr_detect.R @@ -0,0 +1,20 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"), + make_option("--sample_name", default = NA, type = 'character', help = "sample name")) +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +if (as.numeric(opt$option) == 1) { + +} \ No newline at end of file diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk new file mode 100644 index 00000000..2e1512c5 --- /dev/null +++ b/signatures/hr_detect.mk @@ -0,0 +1,31 @@ +include modules/Makefile.inc + +LOGDIR ?= log/hr_detect.$(NOW) + +hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ + +define hr-detect +hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf + $$(call RUN,-c -n 1 -s 4G -m 8G -v $(SURVIVOR_ENV),"set -o pipefail && \ + SURVIVOR vcftobed \ + $$(<) \ + $(MIN_SIZE) \ + $(MAX_SIZE) \ + $$(@)") + +hr_detect/$1_$2/$1_$2.merged.bedpe : hr_detect/$1_$2/$1_$2.merged.bed + $$(call RUN,-c -n 1 -s 4G -m 8G,"set -o pipefail && \ + echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ + $$(@) && \ + cat $$(<) >> $$(@)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) + +..DUMMY := $(shell mkdir -p version; \ + R --version &> version/hr_detect.txt;) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: hr_detect From 35460b72e22c0a1b29a8dffcf878077efb3c120b Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:08:33 -0400 Subject: [PATCH 684/766] Update hr_detect.mk --- signatures/hr_detect.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 2e1512c5..d05b7fd4 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -2,6 +2,9 @@ include modules/Makefile.inc LOGDIR ?= log/hr_detect.$(NOW) +MIN_SIZE = 1 +MAX_SIZE = 100000000000000000000 + hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ From 52b6d2649e7ce9dcd603350743730d81d496b9e5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:28:33 -0400 Subject: [PATCH 685/766] ++ --- scripts/hr_detect.R | 15 ++++++++++++++- signatures/hr_detect.mk | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 83e8f75b..bf30d4ee 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -16,5 +16,18 @@ arguments <- parse_args(parser, positional_arguments = T) opt <- arguments$options if (as.numeric(opt$option) == 1) { + vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% + dplyr::filter(variantCaller == "mutect") %>% + dplyr::filter(Variant_Classification == "Missense_Mutation") %>% + dplyr::select(`#CHROM` = CHROM, + POS = POS, + ID = ID, + REF = REF, + ALT = ALT) + cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), append = FALSE) + readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), col_names = TRUE, append = TRUE) -} \ No newline at end of file +} diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index d05b7fd4..99d8c04c 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -22,6 +22,12 @@ hr_detect/$1_$2/$1_$2.merged.bedpe : hr_detect/$1_$2/$1_$2.merged.bed echo \"chrom1 start1 end1 chrom2 start2 end2 sv_id pe_support strand1 strand2 svclass\" > \ $$(@) && \ cat $$(<) >> $$(@)") + +hr_detect/$1_$2/$1_$2.snv.vcf : summary/tsv/all.tsv + $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 1 \ + --sample_name $1_$2") endef $(foreach pair,$(SAMPLE_PAIRS),\ From f6d4def7caa6615732caa3a28e441c177cf63fac Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:29:20 -0400 Subject: [PATCH 686/766] Update hr_detect.mk --- signatures/hr_detect.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 99d8c04c..790f1d9c 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -7,6 +7,7 @@ MAX_SIZE = 100000000000000000000 hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf From 80db648c526dc621d2c6f72ba8390e15bf4b2ad9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:40:09 -0400 Subject: [PATCH 687/766] ++ --- scripts/hr_detect.R | 18 +++++++++++++++++- signatures/hr_detect.mk | 11 +++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index bf30d4ee..923e0021 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -21,7 +21,7 @@ if (as.numeric(opt$option) == 1) { dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% dplyr::filter(variantCaller == "mutect") %>% - dplyr::filter(Variant_Classification == "Missense_Mutation") %>% + dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% dplyr::select(`#CHROM` = CHROM, POS = POS, ID = ID, @@ -30,4 +30,20 @@ if (as.numeric(opt$option) == 1) { cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), append = FALSE) readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), col_names = TRUE, append = TRUE) +} else if (as.numeric(opt$option) == 2) { + vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% + dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% + dplyr::filter(grepl("varscan", variantCaller, fixed = TRUE)) %>% + dplyr::filter(grepl("strelka", variantCaller, fixed = TRUE)) %>% + dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% + dplyr::select(`#CHROM` = CHROM, + POS = POS, + ID = ID, + REF = REF, + ALT = ALT) + cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), append = FALSE) + readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), col_names = TRUE, append = TRUE) + } diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 790f1d9c..37178e45 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -7,7 +7,8 @@ MAX_SIZE = 100000000000000000000 hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -29,7 +30,13 @@ hr_detect/$1_$2/$1_$2.snv.vcf : summary/tsv/all.tsv $(RSCRIPT) modules/scripts/hr_detect.R \ --option 1 \ --sample_name $1_$2") - + +hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv + $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 2 \ + --sample_name $1_$2") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) From cd19e412d3dc85be1234e37b2e5bb73c62970f70 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 17 Apr 2023 22:56:08 -0400 Subject: [PATCH 688/766] ++ --- scripts/hr_detect.R | 27 +++++++++++++++++++++++++++ signatures/hr_detect.mk | 9 ++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 923e0021..95a38b9a 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -22,6 +22,8 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% dplyr::filter(variantCaller == "mutect") %>% dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% + dplyr::mutate(CHROM = as.character(CHROM)) %>% + dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>% dplyr::select(`#CHROM` = CHROM, POS = POS, ID = ID, @@ -38,6 +40,8 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(grepl("varscan", variantCaller, fixed = TRUE)) %>% dplyr::filter(grepl("strelka", variantCaller, fixed = TRUE)) %>% dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% + dplyr::mutate(CHROM = as.character(CHROM)) %>% + dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>% dplyr::select(`#CHROM` = CHROM, POS = POS, ID = ID, @@ -46,4 +50,27 @@ if (as.numeric(opt$option) == 1) { cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), append = FALSE) readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), col_names = TRUE, append = TRUE) +} else if (as.numeric(opt$option) == 3) { + cn = readr::read_tsv(file = paste0("facets/cncf/", as.character(opt$sample_name), ".txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(chrom = as.character(chrom)) %>% + dplyr::mutate(chrom = ifelse(chrom == "23", "X", chrom)) %>% + dplyr::mutate(seg_no = seg, + Chromosome = chrom, + chromStart = loc.start, + chromEnd = loc.end, + total.copy.number.inNormal = 2, + minor.copy.number.inNormal = 1, + total.copy.number.inTumour = tcn.em, + minor.copy.number.inTumour = lcn.em) %>% + dplyr::select(seg_no, + Chromosome, + chromStart, + chromEnd, + total.copy.number.inNormal, + minor.copy.number.inNormal, + total.copy.number.inTumour, + minor.copy.number.inTumour) + + readr::write_tsv(x = cn, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt"), col_names = TRUE, append = TRUE) } diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 37178e45..e122fac3 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -8,7 +8,8 @@ MAX_SIZE = 100000000000000000000 hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -37,6 +38,12 @@ hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv --option 2 \ --sample_name $1_$2") +hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt + $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 3 \ + --sample_name $1_$2") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) From 4c12ccbb9d984a156d1a8345807d1cb1e9ad4be0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 01:22:34 -0400 Subject: [PATCH 689/766] ++ --- scripts/hr_detect.R | 60 ++++++++++++++++++++++++++++++++++++++++- signatures/hr_detect.mk | 50 ++++++++++++++++++++++++---------- 2 files changed, 95 insertions(+), 15 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 95a38b9a..de9b8092 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -4,6 +4,7 @@ suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("dplyr")) suppressPackageStartupMessages(library("readr")) suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("signature.tools.lib")) if (!interactive()) { options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) @@ -72,5 +73,62 @@ if (as.numeric(opt$option) == 1) { total.copy.number.inTumour, minor.copy.number.inTumour) - readr::write_tsv(x = cn, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt"), col_names = TRUE, append = TRUE) + readr::write_tsv(x = cn, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt"), col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option) == 4) { + sv = readr::read_tsv(file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(sample = as.character(opt$sample_name)) + + readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE) + + +} else if (as.numeric(opt$option) == 5) { + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))[21] + snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) + indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel.vcf.bgz") })) + cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) + sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") })) + + names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names + + snv_cat_list = list() + for (i in 1:length(snv_files)) { + snv_tab = readr::read_tsv(file = snv_files[i], col_names = TRUE, comment = "##", col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(chr = `#CHROM`, + position = POS, + REF, + ALT) %>% + as.data.frame() + res = tabToSNVcatalogue(subs = snv_tab, genome.v = "hg19") + colnames(res$catalogue) = sample_names[i] + snv_cat_list[[i]] = res$catalogue + } + snv_catalogues = do.call(cbind, snv_cat_list) + + sigsToUse = c(1,2,3,5,6,8,13,17,18,20,26,30) + subs_fit_res = Fit(catalogues = snv_catalogues, + signatures = COSMIC30_subs_signatures[,sigsToUse], + useBootstrap = TRUE, + nboot = 100, + nparallel = 4) + snv_exp = subs_fit_res$exposures + + col_hrdetect = c("del.mh.prop", "SNV3", "SV3", "SV5", "hrd", "SNV8") + input_matrix = matrix(NA, nrow = length(sample_names), ncol = length(col_hrdetect), dimnames = list(sample_names, col_hrdetect)) + input_matrix[rownames(snv_exp),"SNV3"] = snv_exp[,"Signature3"] + input_matrix[rownames(snv_exp),"SNV8"] = snv_exp[,"Signature8"] + res = HRDetect_pipeline(input_matrix, + genome.v = "hg19", + SNV_signature_version = "COSMICv2", + SV_bedpe_files = sv_files, + Indels_tab_files = indel_files, + CNV_tab_files = cn_files, + nparallel = 4) + + readr::write_tsv(x = res$hrdetect_output %>% + dplyr::as_tibble() %>% + dplyr::mutate(sample_name = sample_names), + path = "hr_detect/summary.txt", append = FALSE, col_names = TRUE) } diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index e122fac3..b0ccd856 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -9,7 +9,10 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -27,27 +30,46 @@ hr_detect/$1_$2/$1_$2.merged.bedpe : hr_detect/$1_$2/$1_$2.merged.bed cat $$(<) >> $$(@)") hr_detect/$1_$2/$1_$2.snv.vcf : summary/tsv/all.tsv - $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R \ - --option 1 \ - --sample_name $1_$2") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 1 \ + --sample_name $1_$2") hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv - $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R \ - --option 2 \ - --sample_name $1_$2") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 2 \ + --sample_name $1_$2") + +hr_detect/$1_$2/$1_$2.indel.vcf.bgz : hr_detect/$1_$2/$1_$2.indel.vcf + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNNOVATION_ENV),"set -o pipefail && \ + bgzip -c $$(<) > $$(@)") + +hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + tabix -p vcf $$(<)") -hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt - $$(call RUN,-c -n 1 -s 12G -m 16G,"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R \ - --option 3 \ - --sample_name $1_$2") +hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 3 \ + --sample_name $1_$2") + +hr_detect/$1_$2/$1_$2.sv.bedpe : hr_detect/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 4 \ + --sample_name $1_$2") endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) +hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) + $(call RUN, -c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R --option 4 --sample_name '$(SAMPLE_PAIRS)'") + + ..DUMMY := $(shell mkdir -p version; \ R --version &> version/hr_detect.txt;) .DELETE_ON_ERROR: From c6e2938c764cf70bebbb30b395f7dc39173a4cf2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 01:25:51 -0400 Subject: [PATCH 690/766] Update hr_detect.mk --- signatures/hr_detect.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index b0ccd856..15e46bac 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -42,12 +42,12 @@ hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv --sample_name $1_$2") hr_detect/$1_$2/$1_$2.indel.vcf.bgz : hr_detect/$1_$2/$1_$2.indel.vcf - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNNOVATION_ENV),"set -o pipefail && \ - bgzip -c $$(<) > $$(@)") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bgzip -c $$(<) > $$(@)") hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - tabix -p vcf $$(<)") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + tabix -p vcf $$(<)") hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt From fdd14636ba5152b9a7740302ac3c0ae6f4ed91c1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 01:28:23 -0400 Subject: [PATCH 691/766] Update hr_detect.mk --- signatures/hr_detect.mk | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 15e46bac..e78f8732 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -12,7 +12,8 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ + hr_detect/summary.txt define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -65,9 +66,9 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) -hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) - $(call RUN, -c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R --option 4 --sample_name '$(SAMPLE_PAIRS)'") +hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) + $(call RUN, -c -n 1 -s 24G -m 36G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R --option 5 --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ From fb21a3294df323453adc5f6598713b59f6a775b3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 01:31:53 -0400 Subject: [PATCH 692/766] Update hr_detect.R --- scripts/hr_detect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index de9b8092..4edb5b0f 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -84,7 +84,7 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 5) { - sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE))[21] + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel.vcf.bgz") })) cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) From c8762ff830da2eea50a4907e3a588ffbbe4fd15e Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 02:41:53 -0400 Subject: [PATCH 693/766] Update hr_detect.R --- scripts/hr_detect.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 4edb5b0f..6c24c000 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -64,6 +64,14 @@ if (as.numeric(opt$option) == 1) { minor.copy.number.inNormal = 1, total.copy.number.inTumour = tcn.em, minor.copy.number.inTumour = lcn.em) %>% + dplyr::mutate(total.copy.number.inTumour = case_when( + is.na(total.copy.number.inTumour) ~ 2, + TRUE ~ total.copy.number.inTumour + )) %>% + dplyr::mutate(minor.copy.number.inTumour = case_when( + is.na(minor.copy.number.inTumour) ~ 2, + TRUE ~ minor.copy.number.inTumour + )) %>% dplyr::select(seg_no, Chromosome, chromStart, From d1c6b41e08ae506803f62e473c502d2aaa656ccf Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 02:47:06 -0400 Subject: [PATCH 694/766] Update hr_detect.mk --- signatures/hr_detect.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index e78f8732..e176e2cc 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -67,7 +67,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) - $(call RUN, -c -n 1 -s 24G -m 36G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R --option 5 --sample_name '$(SAMPLE_PAIRS)'") From 9c88c11d887d4e939caeb7ac3610a0a3a098d465 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 10:20:15 -0400 Subject: [PATCH 695/766] Update hr_detect.R --- scripts/hr_detect.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 6c24c000..1c02e42f 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -18,7 +18,13 @@ opt <- arguments$options if (as.numeric(opt$option) == 1) { vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::filter(CHROM %in% c(1:22, "X")) %>% + dplyr::mutate(CHROM = case_when( + CHROM == "X" ~ "23", + TRUE ~ CHROM + )) %>% readr::type_convert() %>% + dplyr::arrange(CHROM, POS) %>% dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% dplyr::filter(variantCaller == "mutect") %>% @@ -35,6 +41,13 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 2) { vcf = readr::read_tsv(file = "summary/tsv/all.tsv", col_names = TRUE, col_types = cols(.default = col_character())) %>% + dplyr::filter(CHROM %in% c(1:22, "X")) %>% + dplyr::mutate(CHROM = case_when( + CHROM == "X" ~ "23", + TRUE ~ CHROM + )) %>% + readr::type_convert() %>% + dplyr::arrange(CHROM, POS) %>% readr::type_convert() %>% dplyr::mutate(TUMOR_NORMAL = paste0(TUMOR_SAMPLE, "_", NORMAL_SAMPLE)) %>% dplyr::filter(TUMOR_NORMAL == as.character(opt$sample_name)) %>% @@ -86,6 +99,15 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 4) { sv = readr::read_tsv(file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% + dplyr::mutate(svclass = case_when( + svclass == "BND" ~ "translocation", + svclass == "DEL" ~ "deletion", + svclass == "DUP" ~ "tandem-duplication", + svclass == "INS" ~ "insertion", + svclass == "INV" ~ "inversion", + TRUE ~ svclass + )) %>% + dplyr::filter(svclass != "inversion") %>% dplyr::mutate(sample = as.character(opt$sample_name)) readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE) From 4edfb27776f85181e12b07353a6c38b487ebd247 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 10:25:27 -0400 Subject: [PATCH 696/766] Update hr_detect.R --- scripts/hr_detect.R | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 1c02e42f..101fe012 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -101,6 +101,7 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% dplyr::mutate(svclass = case_when( svclass == "BND" ~ "translocation", + svclass == "TRA" ~ "translocation", svclass == "DEL" ~ "deletion", svclass == "DUP" ~ "tandem-duplication", svclass == "INS" ~ "insertion", From 1b91844a17e2192837d3ba9f597ac82ee7373062 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 13:23:54 -0400 Subject: [PATCH 697/766] ++ --- scripts/hr_detect.R | 35 +++++++++++++++++++++++++ signatures/hr_detect.mk | 58 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 101fe012..8e72bebb 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -109,12 +109,47 @@ if (as.numeric(opt$option) == 1) { TRUE ~ svclass )) %>% dplyr::filter(svclass != "inversion") %>% + dplyr::select(-strand1, -strand2) %>% dplyr::mutate(sample = as.character(opt$sample_name)) readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE) } else if (as.numeric(opt$option) == 5) { + url_subs_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv_repaired.vcf.bgz") + url_indels_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel_repaired.vcf.bgz") + url_cn_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt") + url_sv_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe") + + genomePlot(subsVcf.file = url_subs_file, + indelsVcf.file = url_indels_file, + cnvsTab.file = url_cn_file, + rearrBedpe.file = url_sv_file, + sampleID = as.character(opt$sample_name), + genome.v = "hg19", file.ideogram = NULL, plot_title = NULL, + no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, + no_subs_legend = FALSE, out_format = "png", + out_path = paste0("hr_detect/", as.character(opt$sample_name), "/"), + rearr_only_assembled = FALSE, base.per.unit = NULL) + +} else if (as.numeric(opt$option) == 6) { + url_subs_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv_repaired.vcf.bgz") + url_indels_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel_repaired.vcf.bgz") + url_cn_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".cn.txt") + url_sv_file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe") + + genomePlot(subsVcf.file = url_subs_file, + indelsVcf.file = url_indels_file, + cnvsTab.file = url_cn_file, + rearrBedpe.file = url_sv_file, + sampleID = as.character(opt$sample_name), + genome.v = "hg19", file.ideogram = NULL, plot_title = NULL, + no_copynumber = FALSE, no_rearrangements = FALSE, no_indels = FALSE, + no_subs_legend = FALSE, out_format = "svg", + out_path = paste0("hr_detect/", as.character(opt$sample_name), "/"), + rearr_only_assembled = FALSE, base.per.unit = NULL) + +} else if (as.numeric(opt$option) == 7) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel.vcf.bgz") })) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index e176e2cc..cf6cb524 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -8,12 +8,22 @@ MAX_SIZE = 100000000000000000000 hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bedpe) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz.tbi) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ - hr_detect/summary.txt + hr_detect/summary.txt \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).genomePlot.png) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).genomePlot.svg) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -36,6 +46,14 @@ hr_detect/$1_$2/$1_$2.snv.vcf : summary/tsv/all.tsv --option 1 \ --sample_name $1_$2") +hr_detect/$1_$2/$1_$2.snv.vcf.bgz : hr_detect/$1_$2/$1_$2.snv.vcf + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bgzip -c $$(<) > $$(@)") + +hr_detect/$1_$2/$1_$2.snv.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.snv.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + tabix -p vcf $$(<)") + hr_detect/$1_$2/$1_$2.indel.vcf : summary/tsv/all.tsv $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R \ @@ -50,7 +68,6 @@ hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") - hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R \ @@ -62,13 +79,48 @@ hr_detect/$1_$2/$1_$2.sv.bedpe : hr_detect/$1_$2/$1_$2.merged.bedpe $(RSCRIPT) modules/scripts/hr_detect.R \ --option 4 \ --sample_name $1_$2") + +hr_detect/$1_$2/$1_$2.snv_repaired.vcf : hr_detect/$1_$2/$1_$2.snv.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bcftools view $$(<) > $$(@)") + + +hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz : hr_detect/$1_$2/$1_$2.snv_repaired.vcf + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bgzip -c $$(<) > $$(@)") + +hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + tabix -p vcf $$(<)") + +hr_detect/$1_$2/$1_$2.indel_repaired.vcf : hr_detect/$1_$2/$1_$2.indel.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bcftools view $$(<) > $$(@)") + + +hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz : hr_detect/$1_$2/$1_$2.indel_repaired.vcf + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + bgzip -c $$(<) > $$(@)") + +hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + tabix -p vcf $$(<)") + +hr_detect/$1_$2/$1_$2.genomePlot.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R --option 5 --sample_name $1_$2") + +hr_detect/$1_$2/$1_$2.genomePlot.svg : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R --option 6 --sample_name $1_$2") + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R --option 5 --sample_name '$(SAMPLE_PAIRS)'") + $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ From 3fa09ea762b7c26ff9b3d9b7a26cac0eec717ed5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 13:30:11 -0400 Subject: [PATCH 698/766] Update hr_detect.mk --- signatures/hr_detect.mk | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index cf6cb524..2b630259 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -107,12 +107,16 @@ hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel_r tabix -p vcf $$(<)") hr_detect/$1_$2/$1_$2.genomePlot.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R --option 5 --sample_name $1_$2") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 5 \ + --sample_name $1_$2") hr_detect/$1_$2/$1_$2.genomePlot.svg : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R --option 6 --sample_name $1_$2") + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 6 \ + --sample_name $1_$2") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 7f0b98ce6485b607cfb71d49bab57e535c4996eb Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 13:39:24 -0400 Subject: [PATCH 699/766] Update hr_detect.R --- scripts/hr_detect.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 8e72bebb..85648597 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -108,8 +108,7 @@ if (as.numeric(opt$option) == 1) { svclass == "INV" ~ "inversion", TRUE ~ svclass )) %>% - dplyr::filter(svclass != "inversion") %>% - dplyr::select(-strand1, -strand2) %>% + dplyr::select(-strand1, -strand2) %>% dplyr::mutate(sample = as.character(opt$sample_name)) readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE) From 5880d4e0929f290b0a40cf5acb995c2d258c61db Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 17:14:48 -0400 Subject: [PATCH 700/766] Update hr_detect.mk --- signatures/hr_detect.mk | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 2b630259..7502f7eb 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -22,8 +22,8 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).genomePlot.png) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).genomePlot.svg) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).png) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).svg) define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -106,17 +106,19 @@ hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel_r $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") -hr_detect/$1_$2/$1_$2.genomePlot.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe +hr_detect/$1_$2/$1_$2.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R \ --option 5 \ - --sample_name $1_$2") + --sample_name $1_$2 && \ + mv hr_detect/$1_$2/$1_$2.genomePlot.png $$(@)") -hr_detect/$1_$2/$1_$2.genomePlot.svg : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe +hr_detect/$1_$2/$1_$2.svg : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R \ --option 6 \ - --sample_name $1_$2") + --sample_name $1_$2 && \ + mv hr_detect/$1_$2/$1_$2.genomePlot.svg $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 9145beefb72e88c9110e6ed894caf40918209675 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 17:45:33 -0400 Subject: [PATCH 701/766] Update hr_detect.R --- scripts/hr_detect.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 85648597..1affd42c 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -31,11 +31,17 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% dplyr::mutate(CHROM = as.character(CHROM)) %>% dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>% + dplyr::mutate(QUAL = 100, + FILTER = "PASS", + INFO = ".") %>% dplyr::select(`#CHROM` = CHROM, POS = POS, ID = ID, REF = REF, - ALT = ALT) + ALT = ALT, + QUAL = QUAL, + FILTER = FILTER, + INFO = INFO) cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), append = FALSE) readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".snv.vcf"), col_names = TRUE, append = TRUE) @@ -56,11 +62,17 @@ if (as.numeric(opt$option) == 1) { dplyr::filter(TUMOR_DP>=10 & NORMAL_DP>=10) %>% dplyr::mutate(CHROM = as.character(CHROM)) %>% dplyr::mutate(CHROM = ifelse(CHROM == "23", "X", CHROM)) %>% + dplyr::mutate(QUAL = 100, + FILTER = "PASS", + INFO = ".") %>% dplyr::select(`#CHROM` = CHROM, POS = POS, ID = ID, REF = REF, - ALT = ALT) + ALT = ALT, + QUAL = QUAL, + FILTER = FILTER, + INFO = INFO) cat("##fileformat=VCFv4.1\n", file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), append = FALSE) readr::write_tsv(x = vcf, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".indel.vcf"), col_names = TRUE, append = TRUE) From 4d2703d6ba4e89b9ac979b699496eadce1e64274 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 18:10:43 -0400 Subject: [PATCH 702/766] Update hr_detect.R --- scripts/hr_detect.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 1affd42c..92864cf1 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -111,6 +111,8 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 4) { sv = readr::read_tsv(file = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".merged.bedpe"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() %>% + dplyr::filter(chrom1 %in% c(1:22, "X")) %>% + dplyr::filter(chrom2 %in% c(1:22, "X")) %>% dplyr::mutate(svclass = case_when( svclass == "BND" ~ "translocation", svclass == "TRA" ~ "translocation", From d88d1f579c8db0db2a06cd090c4b56597cbecd38 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 18 Apr 2023 18:47:58 -0400 Subject: [PATCH 703/766] Update cnvkit.mk --- copy_number/cnvkit.mk | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/copy_number/cnvkit.mk b/copy_number/cnvkit.mk index 1041b46f..5e065c78 100644 --- a/copy_number/cnvkit.mk +++ b/copy_number/cnvkit.mk @@ -9,6 +9,7 @@ cnv_kit : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnn/tumor/$(sample).targetcov $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnn/normal/$(sample).antitargetcoverage.cnn) \ cnvkit/reference/combined_reference.cnr \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) \ + $(foreach sample,$(NORMAL_SAMPLES),cnvkit/cnr/$(sample).cnr) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/segmented/$(sample).txt) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/log2/$(sample).pdf) \ $(foreach sample,$(TUMOR_SAMPLES),cnvkit/plots/segmented/$(sample).pdf) \ @@ -75,6 +76,15 @@ cnvkit/plots/segmented/$1.pdf : cnvkit/cnr/$1.cnr endef $(foreach sample,$(TUMOR_SAMPLES),\ $(eval $(call cnvkit-tumor-cnr,$(sample)))) + +define cnvkit-normal-cnr +cnvkit/cnr/$1.cnr : cnvkit/cnn/normal/$1.targetcoverage.cnn cnvkit/cnn/normal/$1.antitargetcoverage.cnn cnvkit/reference/combined_reference.cnr + $$(call RUN,-c -s 6G -m 8G -v $(CNVKIT_ENV),"set -o pipefail && \ + cnvkit.py fix $$(<) $$(<<) $$(<<<) -o cnvkit/cnr/$1.cnr") + +endef + $(foreach sample,$(NORMAL_SAMPLES),\ + $(eval $(call cnvkit-normal-cnr,$(sample)))) define cnvkit-total-copy @@ -100,11 +110,11 @@ cnvkit/summary/total_copy.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/totalco --option 6 \ --sample_name '$(TUMOR_SAMPLES)'") -cnvkit/summary/log2_ratio.txt : $(foreach sample,$(TUMOR_SAMPLES),cnvkit/cnr/$(sample).cnr) +cnvkit/summary/log2_ratio.txt : $(foreach sample,$(SAMPLES),cnvkit/cnr/$(sample).cnr) $(call RUN,-n 1 -s 24G -m 32G -v $(CNVKIT_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/cnvkit.R \ --option 7 \ - --sample_name '$(TUMOR_SAMPLES)'") + --sample_name '$(SAMPLES)'") From 32d792bdb7c28784c43790141d487779287a5188 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 19 Apr 2023 11:32:36 -0400 Subject: [PATCH 704/766] ++ --- scripts/hr_detect.R | 7 +++++-- signatures/hr_detect.mk | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 92864cf1..f478687c 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -205,9 +205,12 @@ if (as.numeric(opt$option) == 1) { Indels_tab_files = indel_files, CNV_tab_files = cn_files, nparallel = 4) - + readr::write_tsv(x = snv_exp %>% + dplyr::as_tibble() %>% + dplyr::mutate(sample_name = sample_names), + path = "hr_detect/signatures.txt", append = FALSE, col_names = TRUE) readr::write_tsv(x = res$hrdetect_output %>% dplyr::as_tibble() %>% dplyr::mutate(sample_name = sample_names), - path = "hr_detect/summary.txt", append = FALSE, col_names = TRUE) + path = "hr_detect/hrdetect.txt", append = FALSE, col_names = TRUE) } diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 7502f7eb..b5627e5f 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -15,7 +15,7 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ - hr_detect/summary.txt \ + hr_detect/hrdetect.txt \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) \ From 895216eae789c3af26c9afd50f3d1698f697409f Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 19 Apr 2023 11:35:31 -0400 Subject: [PATCH 705/766] Update hr_detect.mk --- signatures/hr_detect.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index b5627e5f..05200f78 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -124,7 +124,7 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) -hr_detect/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) +hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") From 0273c66e2b4464c0fee16e38d6967f036c56cb49 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 19 Apr 2023 11:52:09 -0400 Subject: [PATCH 706/766] Update hr_detect.R --- scripts/hr_detect.R | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index f478687c..2e42e3a8 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -205,12 +205,33 @@ if (as.numeric(opt$option) == 1) { Indels_tab_files = indel_files, CNV_tab_files = cn_files, nparallel = 4) - readr::write_tsv(x = snv_exp %>% - dplyr::as_tibble() %>% - dplyr::mutate(sample_name = sample_names), - path = "hr_detect/signatures.txt", append = FALSE, col_names = TRUE) + + snv_exp = snv_exp %>% + dplyr::as_tibble() %>% + dplyr::mutate(sample_name = sample_names) %>% + reshape2::melt() %>% + dplyr::group_by(sample_name) %>% + dplyr::summarize(Sum_Exporsures = sum(value)) %>% + dplyr::left_join(snv_exp, by = "sample_name") %>% + dplyr::rename(Unassigned = unassigned) %>% + dplyr::mutate(Signature1 = Signature1/Sum_Exporsures, + Signature2 = Signature2/Sum_Exporsures, + Signature3 = Signature3/Sum_Exporsures, + Signature5 = Signature5/Sum_Exporsures, + Signature6 = Signature6/Sum_Exporsures, + Signature8 = Signature8/Sum_Exporsures, + Signature13 = Signature13/Sum_Exporsures, + Signature17 = Signature17/Sum_Exporsures, + Signature18 = Signature18/Sum_Exporsures, + Signature20 = Signature20/Sum_Exporsures, + Signature26 = Signature26/Sum_Exporsures, + Signature30 = Signature30/Sum_Exporsures, + Unassigned = Unassigned/Sum_Exporsures) + + readr::write_tsv(x = snv_exp, + path = "hr_detect/signatures.txt", append = FALSE, col_names = TRUE) readr::write_tsv(x = res$hrdetect_output %>% dplyr::as_tibble() %>% dplyr::mutate(sample_name = sample_names), - path = "hr_detect/hrdetect.txt", append = FALSE, col_names = TRUE) + path = "hr_detect/hrdetect.txt", append = FALSE, col_names = TRUE) } From 39fdf70ffb6a096c35c75c895318f514b767d48b Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 19 Apr 2023 11:55:53 -0400 Subject: [PATCH 707/766] Update hr_detect.R --- scripts/hr_detect.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 2e42e3a8..9eea6987 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -212,7 +212,9 @@ if (as.numeric(opt$option) == 1) { reshape2::melt() %>% dplyr::group_by(sample_name) %>% dplyr::summarize(Sum_Exporsures = sum(value)) %>% - dplyr::left_join(snv_exp, by = "sample_name") %>% + dplyr::left_join(snv_exp %>% + dplyr::as_tibble() %>% + dplyr::mutate(sample_name = sample_names), by = "sample_name") %>% dplyr::rename(Unassigned = unassigned) %>% dplyr::mutate(Signature1 = Signature1/Sum_Exporsures, Signature2 = Signature2/Sum_Exporsures, From b93fc006a0f5f463b00cf061572655df8cc55056 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 19:06:44 -0400 Subject: [PATCH 708/766] Update hr_detect.mk --- signatures/hr_detect.mk | 45 ++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 05200f78..25b75f61 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -10,18 +10,22 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ - hr_detect/hrdetect.txt \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) \ + + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \ + + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ + hr_detect/hrdetect.txt \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).png) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).svg) @@ -67,24 +71,11 @@ hr_detect/$1_$2/$1_$2.indel.vcf.bgz : hr_detect/$1_$2/$1_$2.indel.vcf hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") - -hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R \ - --option 3 \ - --sample_name $1_$2") - -hr_detect/$1_$2/$1_$2.sv.bedpe : hr_detect/$1_$2/$1_$2.merged.bedpe - $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R \ - --option 4 \ - --sample_name $1_$2") - + hr_detect/$1_$2/$1_$2.snv_repaired.vcf : hr_detect/$1_$2/$1_$2.snv.vcf.bgz $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ bcftools view $$(<) > $$(@)") - hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz : hr_detect/$1_$2/$1_$2.snv_repaired.vcf $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ bgzip -c $$(<) > $$(@)") @@ -106,6 +97,18 @@ hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel_r $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") +hr_detect/$1_$2/$1_$2.cn.txt : facets/cncf/$1_$2.txt + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 3 \ + --sample_name $1_$2") + +hr_detect/$1_$2/$1_$2.sv.bedpe : hr_detect/$1_$2/$1_$2.merged.bedpe + $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R \ + --option 4 \ + --sample_name $1_$2") + hr_detect/$1_$2/$1_$2.png : hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.indel_repaired.vcf.bgz.tbi hr_detect/$1_$2/$1_$2.cn.txt hr_detect/$1_$2/$1_$2.sv.bedpe $$(call RUN,-c -n 1 -s 12G -m 16G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R \ @@ -124,7 +127,7 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) -hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) +hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") From df7ee45f1b85a1e1c8a5072e45d4fe228536f159 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 19:08:21 -0400 Subject: [PATCH 709/766] Update hr_detect.R --- scripts/hr_detect.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 9eea6987..9fdac118 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -164,8 +164,8 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 7) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) - indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel.vcf.bgz") })) + snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf") })) + indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") })) cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") })) From 151e8333518e814dc363f30c45ea7e037bb3763c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 19:11:44 -0400 Subject: [PATCH 710/766] Update hr_detect.mk --- signatures/hr_detect.mk | 4 ---- 1 file changed, 4 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 25b75f61..1c159f3c 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -10,19 +10,15 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ hr_detect/hrdetect.txt \ From aa2cd568bdfe15abc3f3fc5f25d31c7f5874b1e6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 19:16:49 -0400 Subject: [PATCH 711/766] Update hr_detect.R --- scripts/hr_detect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index 9fdac118..f9aaa56a 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -164,7 +164,7 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 7) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf") })) + snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") })) cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") })) From 3da150696ccafc4082f1e70440451ee27f65b4ee Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 19:27:29 -0400 Subject: [PATCH 712/766] Update hr_detect.mk --- signatures/hr_detect.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index 1c159f3c..f328e442 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -123,9 +123,9 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) -hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) +hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ - $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") + $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ From 60957f8c2f053268b646b2b511db36e1c1a50025 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 23:16:00 -0400 Subject: [PATCH 713/766] Update hr_detect.mk --- signatures/hr_detect.mk | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/signatures/hr_detect.mk b/signatures/hr_detect.mk index f328e442..23f335b9 100644 --- a/signatures/hr_detect.mk +++ b/signatures/hr_detect.mk @@ -21,9 +21,10 @@ hr_detect : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).merged.bed $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) \ - hr_detect/hrdetect.txt \ $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).png) \ - $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).svg) + $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).svg) \ + hr_detect/hrdetect_smry.txt \ + hr_detect/signatures_smry.txt define hr-detect hr_detect/$1_$2/$1_$2.merged.bed : vcf/$1_$2.merged_sv.vcf @@ -68,7 +69,7 @@ hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.indel.vcf.bgz $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") -hr_detect/$1_$2/$1_$2.snv_repaired.vcf : hr_detect/$1_$2/$1_$2.snv.vcf.bgz +hr_detect/$1_$2/$1_$2.snv_repaired.vcf : hr_detect/$1_$2/$1_$2.snv.vcf.bgz hr_detect/$1_$2/$1_$2.snv.vcf.bgz.tbi $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ bcftools view $$(<) > $$(@)") @@ -80,7 +81,7 @@ hr_detect/$1_$2/$1_$2.snv_repaired.vcf.bgz.tbi : hr_detect/$1_$2/$1_$2.snv_repai $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ tabix -p vcf $$(<)") -hr_detect/$1_$2/$1_$2.indel_repaired.vcf : hr_detect/$1_$2/$1_$2.indel.vcf.bgz +hr_detect/$1_$2/$1_$2.indel_repaired.vcf : hr_detect/$1_$2/$1_$2.indel.vcf.bgz hr_detect/$1_$2/$1_$2.indel.vcf.bgz.tbi $$(call RUN,-c -n 1 -s 12G -m 16G -v $(INNOVATION_ENV),"set -o pipefail && \ bcftools view $$(<) > $$(@)") @@ -123,10 +124,13 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hr-detect,$(tumor.$(pair)),$(normal.$(pair))))) -hr_detect/hrdetect.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv.vcf) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) +hr_detect/hrdetect_smry.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ $(RSCRIPT) modules/scripts/hr_detect.R --option 7 --sample_name '$(SAMPLE_PAIRS)'") +hr_detect/signatures_smry.txt : $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).sv.bedpe) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).snv_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).indel_repaired.vcf.bgz.tbi) $(foreach pair,$(SAMPLE_PAIRS),hr_detect/$(pair)/$(pair).cn.txt) + $(call RUN, -c -n 4 -s 6G -m 9G -v $(SIGNATURE_TOOLS_ENV),"set -o pipefail && \ + $(RSCRIPT) modules/scripts/hr_detect.R --option 8 --sample_name '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ R --version &> version/hr_detect.txt;) From a3ebd713537522cc057cb49aed1866085be33d6c Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 21 Apr 2023 23:57:15 -0400 Subject: [PATCH 714/766] Update hr_detect.R --- scripts/hr_detect.R | 104 +++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index f9aaa56a..a3d00842 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -122,7 +122,6 @@ if (as.numeric(opt$option) == 1) { svclass == "INV" ~ "inversion", TRUE ~ svclass )) %>% - dplyr::select(-strand1, -strand2) %>% dplyr::mutate(sample = as.character(opt$sample_name)) readr::write_tsv(x = sv, path = paste0("hr_detect/", as.character(opt$sample_name), "/", as.character(opt$sample_name), ".sv.bedpe"), col_names = TRUE, append = FALSE) @@ -164,76 +163,61 @@ if (as.numeric(opt$option) == 1) { } else if (as.numeric(opt$option) == 7) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) - snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv.vcf") })) + snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf.bgz") })) indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") })) cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") })) names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names - snv_cat_list = list() - for (i in 1:length(snv_files)) { - snv_tab = readr::read_tsv(file = snv_files[i], col_names = TRUE, comment = "##", col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::select(chr = `#CHROM`, - position = POS, - REF, - ALT) %>% - as.data.frame() - res = tabToSNVcatalogue(subs = snv_tab, genome.v = "hg19") - colnames(res$catalogue) = sample_names[i] - snv_cat_list[[i]] = res$catalogue - } - snv_catalogues = do.call(cbind, snv_cat_list) - - sigsToUse = c(1,2,3,5,6,8,13,17,18,20,26,30) - subs_fit_res = Fit(catalogues = snv_catalogues, - signatures = COSMIC30_subs_signatures[,sigsToUse], - useBootstrap = TRUE, - nboot = 100, - nparallel = 4) - snv_exp = subs_fit_res$exposures - - col_hrdetect = c("del.mh.prop", "SNV3", "SV3", "SV5", "hrd", "SNV8") - input_matrix = matrix(NA, nrow = length(sample_names), ncol = length(col_hrdetect), dimnames = list(sample_names, col_hrdetect)) - input_matrix[rownames(snv_exp),"SNV3"] = snv_exp[,"Signature3"] - input_matrix[rownames(snv_exp),"SNV8"] = snv_exp[,"Signature8"] - res = HRDetect_pipeline(input_matrix, - genome.v = "hg19", - SNV_signature_version = "COSMICv2", + res = HRDetect_pipeline(genome.v = "hg19", + SNV_vcf_files = snv_files, SV_bedpe_files = sv_files, - Indels_tab_files = indel_files, + Indels_vcf_files = indel_files, CNV_tab_files = cn_files, + SNV_signature_version = "COSMICv2", nparallel = 4) - snv_exp = snv_exp %>% - dplyr::as_tibble() %>% - dplyr::mutate(sample_name = sample_names) %>% - reshape2::melt() %>% - dplyr::group_by(sample_name) %>% - dplyr::summarize(Sum_Exporsures = sum(value)) %>% - dplyr::left_join(snv_exp %>% - dplyr::as_tibble() %>% - dplyr::mutate(sample_name = sample_names), by = "sample_name") %>% - dplyr::rename(Unassigned = unassigned) %>% - dplyr::mutate(Signature1 = Signature1/Sum_Exporsures, - Signature2 = Signature2/Sum_Exporsures, - Signature3 = Signature3/Sum_Exporsures, - Signature5 = Signature5/Sum_Exporsures, - Signature6 = Signature6/Sum_Exporsures, - Signature8 = Signature8/Sum_Exporsures, - Signature13 = Signature13/Sum_Exporsures, - Signature17 = Signature17/Sum_Exporsures, - Signature18 = Signature18/Sum_Exporsures, - Signature20 = Signature20/Sum_Exporsures, - Signature26 = Signature26/Sum_Exporsures, - Signature30 = Signature30/Sum_Exporsures, - Unassigned = Unassigned/Sum_Exporsures) - - readr::write_tsv(x = snv_exp, - path = "hr_detect/signatures.txt", append = FALSE, col_names = TRUE) readr::write_tsv(x = res$hrdetect_output %>% dplyr::as_tibble() %>% dplyr::mutate(sample_name = sample_names), - path = "hr_detect/hrdetect.txt", append = FALSE, col_names = TRUE) + path = "hr_detect/hrdetect_smry.txt", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option) == 8) { + sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) + snv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".snv_repaired.vcf.bgz") })) + indel_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".indel_repaired.vcf.bgz") })) + cn_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".cn.txt") })) + sv_files = unlist(lapply(sample_names, function(x) { paste0("hr_detect/", x, "/", x, ".sv.bedpe") })) + + names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names + + res = HRDetect_pipeline(genome.v = "hg19", + SNV_vcf_files = snv_files, + SV_bedpe_files = sv_files, + Indels_vcf_files = indel_files, + CNV_tab_files = cn_files, + SNV_signature_version = "COSMICv2", + nparallel = 4) + + signatures_to_use = paste0("Signature", c(1,2,3,5,6,8,13,17,18,20,26,30)) + + res = res$exposures_subs %>% + dplyr::as_tibble() %>% + dplyr::mutate(signatures = rownames(res$exposures_subs)) %>% + reshape2::melt(id.vars = "signatures", variable.name = "sample_name", value.name = "exposure") %>% + dplyr::filter(signatures %in% signatures_to_use) %>% + dplyr::group_by(sample_name) %>% + dplyr::summarize(sum_exposures = sum(exposure)) %>% + dplyr::right_join(res$exposures_subs %>% + dplyr::as_tibble() %>% + dplyr::mutate(signatures = rownames(res$exposures_subs)) %>% + reshape2::melt(id.vars = "signatures", variable.name = "sample_name", value.name = "exposure") %>% + dplyr::filter(signatures %in% signatures_to_use), by = "sample_name") %>% + dplyr::mutate(exposure = exposure/sum_exposures) %>% + reshape2::dcast(formula = sample_name ~ signatures, value.var = "exposure", fill = 0) %>% + dplyr::select(all_of(c("sample_name", signatures_to_use))) + + readr::write_tsv(x = res, path = "hr_detect/signatures_smry.txt", append = FALSE, col_names = TRUE) + } From 3463e99ea9b9b51d9c34f46e9acb06428b349a10 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 24 Apr 2023 20:36:28 -0400 Subject: [PATCH 715/766] Update pyclone_vi.R --- scripts/pyclone_vi.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/pyclone_vi.R b/scripts/pyclone_vi.R index 35ed13c1..831ce0da 100644 --- a/scripts/pyclone_vi.R +++ b/scripts/pyclone_vi.R @@ -48,9 +48,11 @@ if (as.numeric(opt$option) == 1) { dplyr::mutate(Chromosome = chrom, Start_Position = loc.start, End_Position = loc.end, - minor_cn = ifelse(is.na(lcn.em), "0", lcn.em), + minor_cn = lcn.em, major_cn = tcn.em) %>% readr::type_convert() %>% + dplyr::mutate(major_cn = ifelse(is.na(major_cn), 2, major_cn)) %>% + dplyr::mutate(minor_cn = ifelse(is.na(minor_cn), major_cn, minor_cn)) %>% dplyr::mutate(major_cn = major_cn - minor_cn) %>% dplyr::select(Chromosome, Start_Position, End_Position, minor_cn, major_cn) @@ -73,6 +75,8 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() %>% .[["X1"]] + parame = ifelse(is.na(params), .1, params) + pyclone[[i]] = pyclone[[i]] %>% dplyr::mutate(tumour_content = params) } From 38530a67101b00ff5a885f1180d077b386a3bd45 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 26 Apr 2023 16:22:16 -0400 Subject: [PATCH 716/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index 1c301488..e06e6d4e 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,7 +4,7 @@ LOGDIR ?= log/merge_sv.$(NOW) SV_CALLERS = svaba gridss manta MAX_DIST = 500 -NUM_CALLERS = 2 +NUM_CALLERS = 3 TYPE = 0 STRAND = 0 MIN_SIZE = 30 From 0e66dda39e266c21f3613f04aaddd99dba91268c Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 26 Apr 2023 18:53:06 -0400 Subject: [PATCH 717/766] Update merge_sv.mk --- vcf_tools/merge_sv.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf_tools/merge_sv.mk b/vcf_tools/merge_sv.mk index e06e6d4e..1c301488 100644 --- a/vcf_tools/merge_sv.mk +++ b/vcf_tools/merge_sv.mk @@ -4,7 +4,7 @@ LOGDIR ?= log/merge_sv.$(NOW) SV_CALLERS = svaba gridss manta MAX_DIST = 500 -NUM_CALLERS = 3 +NUM_CALLERS = 2 TYPE = 0 STRAND = 0 MIN_SIZE = 30 From 1333bcdf9a589755f53c4a44a3df505b4aeb6edd Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Apr 2023 20:42:21 -0400 Subject: [PATCH 718/766] Update bwamemAligner.mk --- aligners/bwamemAligner.mk | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/aligners/bwamemAligner.mk b/aligners/bwamemAligner.mk index b49510b5..c1e32194 100644 --- a/aligners/bwamemAligner.mk +++ b/aligners/bwamemAligner.mk @@ -23,10 +23,7 @@ BWAMEM_MEM_PER_THREAD = $(if $(findstring true,$(PDX)),4G,2G) BWA_BAMS = $(foreach sample,$(SAMPLES),bam/$(sample).bam) -bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) \ - $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) \ - metrics/dedup_metrics.txt \ - metrics/dedup_summary.txt \ +bwamem : $(BWA_BAMS) $(addsuffix .bai,$(BWA_BAMS)) bam/%.bam : bwamem/bam/%.bwamem.$(BAM_SUFFIX) $(call RUN,,"ln -f $(<) $(@)") @@ -48,31 +45,6 @@ bwamem/bam/%.bwamem.bam : fastq/%.fastq.gz fastq/%.fastq.gz : fastq/%.fastq $(call RUN,,"gzip -c $< > $(@) && $(RM) $<") -define dedup-metrics -metrics/$1.dedup_metrics.txt : bam/$1.bam - $$(call RUN, -c -n 1 -s 16G -m 24G -v $(INNOVATION_ENV) -w 24:00:00, "set -o pipefail && \ - picard \ - -Xmx16G \ - MarkDuplicates \ - VALIDATION_STRINGENCY=LENIENT \ - MAX_RECORDS_IN_RAM=4000000 \ - TMP_DIR=$(TMPDIR) \ - INPUT=$$(<) \ - OUTPUT=/dev/null \ - METRICS_FILE=$$(@)") - -endef -$(foreach sample,$(SAMPLES),\ - $(eval $(call dedup-metrics,$(sample)))) - -metrics/dedup_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) - $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/dedup_summary.R --option 1 --sample_names '$(SAMPLES)'") - -metrics/dedup_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dedup_metrics.txt) - $(call RUN, -c -n 1 -s 8G -m 12G -v $(INNOVATION_ENV),"set -o pipefail && \ - $(RSCRIPT) $(SCRIPTS_DIR)/dedup_summary.R --option 2 --sample_names '$(SAMPLES)'") - ..DUMMY := $(shell mkdir -p version; $(BWA) &> version/bwamem.txt; echo "options: $(BWA_ALN_OPTS)" >> version/bwamem.txt ) .SECONDARY: From 654119704c4209abc0c442827d12d01998d22ed5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 29 Apr 2023 20:43:18 -0400 Subject: [PATCH 719/766] Delete dedup_summary.R --- scripts/dedup_summary.R | 47 ----------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 scripts/dedup_summary.R diff --git a/scripts/dedup_summary.R b/scripts/dedup_summary.R deleted file mode 100644 index c4889638..00000000 --- a/scripts/dedup_summary.R +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env Rscript - -suppressPackageStartupMessages(library("optparse")) -suppressPackageStartupMessages(library("readr")) -suppressPackageStartupMessages(library("dplyr")) -suppressPackageStartupMessages(library("magrittr")) - -if (!interactive()) { - options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -} - -optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), - make_option("--sample_names", default = NA, type = 'character', help = "sample names")) -parser = OptionParser(usage = "%prog", option_list = optList) -arguments = parse_args(parser, positional_arguments = T) -opt = arguments$options - -if (as.numeric(opt$option)==1) { - sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) - x = list() - for (i in 1:length(sample_names)) { - x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".dedup_metrics.txt"), - skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::mutate(SAMPLE_NAME = sample_names[i]) - } - x = do.call(rbind, x) - write_tsv(x, path="metrics/dedup_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) - -} else if (as.numeric(opt$option)==2) { - sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) - x = list() - for (i in 1:length(sample_names)) { - x[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".dedup_metrics.txt"), - skip = 10, col_names = TRUE, col_types = cols(.default = col_character())) %>% - readr::type_convert() %>% - dplyr::select(family_size = BIN, - coverage_multiple = CoverageMult, - all_counts = all_sets, - optical_counts = optical_sets, - non_optical_counts = non_optical_sets) %>% - dplyr::mutate(sample_name = sample_names[i]) - } - x = do.call(rbind, x) - write_tsv(x, path="metrics/dedup_summary.txt", na = "NA", append = FALSE, col_names = TRUE) - -} From c3c8122ec9a455ec5a6806c6bc19d6e8496aa76a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:23:41 -0400 Subject: [PATCH 720/766] ++ --- Makefile | 6 +++++- config.inc | 1 + copy_number/facets_suite.mk | 41 +++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 copy_number/facets_suite.mk diff --git a/Makefile b/Makefile index 126e560b..0524aaa9 100644 --- a/Makefile +++ b/Makefile @@ -201,7 +201,11 @@ strelka_varscan_indels : TARGETS += facets facets : $(call RUN_MAKE,modules/copy_number/facets.mk) - + +TARGETS += facets_suite +facets_suite : + $(call RUN_MAKE,modules/copy_number/facets_suite.mk) + TARGETS += ascat ascat : $(call RUN_MAKE,modules/copy_number/ascat.mk) diff --git a/config.inc b/config.inc index c8cc5f09..733338a9 100644 --- a/config.inc +++ b/config.inc @@ -38,6 +38,7 @@ CNVKIT_ENV ?= $(HOME)/share/usr/env/cnvkit-0.9.9 STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11 MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1 VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 +FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8 JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk new file mode 100644 index 00000000..160f9a75 --- /dev/null +++ b/copy_number/facets_suite.mk @@ -0,0 +1,41 @@ +include modules/Makefile.inc + +LOGDIR ?= log/facets_suite.$(NOW) + +FACETS_MAX_DEPTH ?= 15000 +FACETS_PRE_CVAL ?= 50 +FACETS_CVAL1 ?= 150 +FACETS_CVAL2 ?= 50 +FACETS_MIN_NHET ?= 25 +FACETS_SNP_NBHD ?= 250 +FACETS_HET_THRESHOLD ?= 0.25 + +facets_suite : facets_suite/vcf/targets_dbsnp.vcf + +facets/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) + $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ + +define snp-pileup +facets/$1_$2/$1__$2.N.snp_pileup.gz : facets/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam + $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ + snp-pileup-wrapper.R --verbose \ + -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ + --vcf-file $$(<) \ + --tumor-bam $$(<<) \ + --normal-bam $$(<<<) \ + --output-prefix facets/$1_$2/$1__$2 \ + --pseudo-snps NULL \ + --max-depth $$(FACETS_MAX_DEPTH)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call snp-pileup,\ + $(tumor.$(pair)),$(normal.$(pair))))) + + + +..DUMMY := $(shell mkdir -p version; \ + $(FACETS_SUITE_ENV)/bin/R --version > version/facets_suite.txt) +.DELETE_ON_ERROR: +.SECONDARY: +.PHONY: facets_suite From 33c0735f9c7de4f4b64d3bd237d1247aa582c31e Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:31:46 -0400 Subject: [PATCH 721/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 160f9a75..0c686d36 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -12,11 +12,11 @@ FACETS_HET_THRESHOLD ?= 0.25 facets_suite : facets_suite/vcf/targets_dbsnp.vcf -facets/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) +facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ define snp-pileup -facets/$1_$2/$1__$2.N.snp_pileup.gz : facets/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam +facets/$1_$2/$1__$2.N.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ From bb24d4467d34387e534069a9d46aeb440d32ab0a Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:35:02 -0400 Subject: [PATCH 722/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 0c686d36..c19049f1 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -10,20 +10,21 @@ FACETS_MIN_NHET ?= 25 FACETS_SNP_NBHD ?= 250 FACETS_HET_THRESHOLD ?= 0.25 -facets_suite : facets_suite/vcf/targets_dbsnp.vcf +facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ + $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ define snp-pileup -facets/$1_$2/$1__$2.N.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam +facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ --vcf-file $$(<) \ --tumor-bam $$(<<) \ --normal-bam $$(<<<) \ - --output-prefix facets/$1_$2/$1__$2 \ + --output-prefix facets_suite/$1_$2/$1_$2 \ --pseudo-snps NULL \ --max-depth $$(FACETS_MAX_DEPTH)") From 02696c547862135d5ea0d3f4962a34e92546d23d Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:42:01 -0400 Subject: [PATCH 723/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index c19049f1..d58fb842 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -17,7 +17,7 @@ facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ define snp-pileup -facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam +facets_suite/$1_$2/$1_$2.snp_pileup.gz : bam/$1.bam bam/$2.bam $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ From 25e5a17850eb4854c2b3fc6def152402993a8d8f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:42:18 -0400 Subject: [PATCH 724/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index d58fb842..c19049f1 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -17,7 +17,7 @@ facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ define snp-pileup -facets_suite/$1_$2/$1_$2.snp_pileup.gz : bam/$1.bam bam/$2.bam +facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ From 47e0b5b5e3d2d194efe29454fd4d00911d2ea367 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:44:14 -0400 Subject: [PATCH 725/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index c19049f1..36f1e04f 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -16,6 +16,7 @@ facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ + define snp-pileup facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ From a06bfb84273a10353da95b1256c88573a99bd894 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:45:49 -0400 Subject: [PATCH 726/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 36f1e04f..8fab74e4 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -17,22 +17,22 @@ facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ -define snp-pileup -facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam - $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ - snp-pileup-wrapper.R --verbose \ - -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ - --vcf-file $$(<) \ - --tumor-bam $$(<<) \ - --normal-bam $$(<<<) \ - --output-prefix facets_suite/$1_$2/$1_$2 \ - --pseudo-snps NULL \ - --max-depth $$(FACETS_MAX_DEPTH)") - -endef -$(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call snp-pileup,\ - $(tumor.$(pair)),$(normal.$(pair))))) +#define snp-pileup +#facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam +# $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ +# snp-pileup-wrapper.R --verbose \ +# -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ +# --vcf-file $$(<) \ +# --tumor-bam $$(<<) \ +# --normal-bam $$(<<<) \ +# --output-prefix facets_suite/$1_$2/$1_$2 \ +# --pseudo-snps NULL \ +# --max-depth $$(FACETS_MAX_DEPTH)") +# +#endef +#$(foreach pair,$(SAMPLE_PAIRS),\ +# $(eval $(call snp-pileup,\ +# $(tumor.$(pair)),$(normal.$(pair))))) From c16dabf294a41ae8cc6c7255fb80182433c13529 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:46:06 -0400 Subject: [PATCH 727/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 8fab74e4..240d636c 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -10,8 +10,8 @@ FACETS_MIN_NHET ?= 25 FACETS_SNP_NBHD ?= 250 FACETS_HET_THRESHOLD ?= 0.25 -facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ - $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) +facets_suite : facets_suite/vcf/targets_dbsnp.vcf +# $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ From f6be220828929577963032672db2ac8725edb150 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:46:43 -0400 Subject: [PATCH 728/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 240d636c..78aa1d34 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -10,30 +10,29 @@ FACETS_MIN_NHET ?= 25 FACETS_SNP_NBHD ?= 250 FACETS_HET_THRESHOLD ?= 0.25 -facets_suite : facets_suite/vcf/targets_dbsnp.vcf -# $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) +facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ + $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ -#define snp-pileup -#facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam -# $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ -# snp-pileup-wrapper.R --verbose \ -# -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ -# --vcf-file $$(<) \ -# --tumor-bam $$(<<) \ -# --normal-bam $$(<<<) \ -# --output-prefix facets_suite/$1_$2/$1_$2 \ -# --pseudo-snps NULL \ -# --max-depth $$(FACETS_MAX_DEPTH)") -# -#endef -#$(foreach pair,$(SAMPLE_PAIRS),\ -# $(eval $(call snp-pileup,\ -# $(tumor.$(pair)),$(normal.$(pair))))) - +define snp-pileup +facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam + $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ + snp-pileup-wrapper.R --verbose \ + -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ + --vcf-file $$(<) \ + --tumor-bam $$(<<) \ + --normal-bam $$(<<<) \ + --output-prefix facets_suite/$1_$2/$1_$2 \ + --pseudo-snps NULL \ + --max-depth $$(FACETS_MAX_DEPTH)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call snp-pileup,\ + $(tumor.$(pair)),$(normal.$(pair))))) ..DUMMY := $(shell mkdir -p version; \ From 94286783d89891f51e0c19e094fee7bbd6f0be4f Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 19:47:45 -0400 Subject: [PATCH 729/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 78aa1d34..e6704b3b 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -31,9 +31,7 @@ facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/ endef $(foreach pair,$(SAMPLE_PAIRS),\ - $(eval $(call snp-pileup,\ - $(tumor.$(pair)),$(normal.$(pair))))) - + $(eval $(call snp-pileup,$(tumor.$(pair)),$(normal.$(pair))))) ..DUMMY := $(shell mkdir -p version; \ $(FACETS_SUITE_ENV)/bin/R --version > version/facets_suite.txt) From e832f55252df47deb22b86aed3d28228de313ae8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 20:04:18 -0400 Subject: [PATCH 730/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index e6704b3b..ac70fcf1 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -3,15 +3,16 @@ include modules/Makefile.inc LOGDIR ?= log/facets_suite.$(NOW) FACETS_MAX_DEPTH ?= 15000 -FACETS_PRE_CVAL ?= 50 -FACETS_CVAL1 ?= 150 -FACETS_CVAL2 ?= 50 -FACETS_MIN_NHET ?= 25 -FACETS_SNP_NBHD ?= 250 -FACETS_HET_THRESHOLD ?= 0.25 +FACETS_CVAL ?= 50 +FACETS_PURITY_CVAL ?= 30 +FACETS_MIN_NHET ?= 15 +FACETS_PURITY_MIN_NHET ?= 10 +SNP_WINDOW_SIZE ?= 250 +NORMAL_DEPTH ?= 25 facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ - $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) + $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) \ + $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ @@ -19,7 +20,7 @@ facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) define snp-pileup facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam - $$(call RUN,-c -s 1G -m 2G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ + $$(call RUN,-c -s 2G -m 4G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ --vcf-file $$(<) \ @@ -33,6 +34,30 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call snp-pileup,$(tumor.$(pair)),$(normal.$(pair))))) +define run-facets +facets_suite/$1_$2/taskcomplete : facets_suite/$1_$2/$1_$2.snp_pileup.gz + $$(call RUN,-c -s 4G -m 6G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ + run-facets-wrapper.R --verbose \ + --counts-file $$(<) \ + --sample-id $1_$2 \ + --directory facets_suite/$1_$2/ \ + --everything \ + --genome hg19 \ + --cval $$(FACETS_CVAL) \ + --purity-cval $$(FACETS_PURITY_CVAL) \ + --min-nhet $$(FACETS_MIN_NHET) \ + --purity-min-nhet $$(FACETS_PURITY_MIN_NHET) \ + --snp-window-size $$(SNP_WINDOW_SIZE) \ + --normal-depth $$(NORMAL_DEPTH) \ + --seed 0 \ + --legacy-output True \ + --facets-lib-path /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/lib/R/library/ && \ + touch $$(@)") + +endef +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call run-facets,$(tumor.$(pair)),$(normal.$(pair))))) + ..DUMMY := $(shell mkdir -p version; \ $(FACETS_SUITE_ENV)/bin/R --version > version/facets_suite.txt) .DELETE_ON_ERROR: From 09f1dcf86c8f3da456ce44c879c3e7cc10872c1b Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 20:06:05 -0400 Subject: [PATCH 731/766] Update facets_suite.mk --- copy_number/facets_suite.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index ac70fcf1..b09f203d 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -52,7 +52,7 @@ facets_suite/$1_$2/taskcomplete : facets_suite/$1_$2/$1_$2.snp_pileup.gz --seed 0 \ --legacy-output True \ --facets-lib-path /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/lib/R/library/ && \ - touch $$(@)") + echo 'finished!' > $$(@)") endef $(foreach pair,$(SAMPLE_PAIRS),\ From 643a1efca10e1836b9a1498cc69b4221c70456b3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 21:23:07 -0400 Subject: [PATCH 732/766] ++ --- copy_number/facets_suite.mk | 9 ++++++++- scripts/facets_suite.R | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 scripts/facets_suite.R diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index b09f203d..23d420d5 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -12,7 +12,8 @@ NORMAL_DEPTH ?= 25 facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) \ - $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) + $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) \ + facets_suite/summary/summary.txt facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ @@ -57,6 +58,12 @@ facets_suite/$1_$2/taskcomplete : facets_suite/$1_$2/$1_$2.snp_pileup.gz endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call run-facets,$(tumor.$(pair)),$(normal.$(pair))))) + + +facets_suite/summary/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) + $(call RUN, -c -n 1 -s 24G -m 48G -v $(INNOVATION_ENV),"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/facets_suite.R --option 1 --sample_pairs '$(SAMPLE_PAIRS)'") + ..DUMMY := $(shell mkdir -p version; \ $(FACETS_SUITE_ENV)/bin/R --version > version/facets_suite.txt) diff --git a/scripts/facets_suite.R b/scripts/facets_suite.R new file mode 100644 index 00000000..9e4934d9 --- /dev/null +++ b/scripts/facets_suite.R @@ -0,0 +1,29 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +args_list <- list(make_option("--option", default = NA, type = 'character', help = "type of analysis"), + make_option("--sample_pairs", default = NA, type = 'character', help = "sample pairs")) +parser <- OptionParser(usage = "%prog", option_list = args_list) +arguments <- parse_args(parser, positional_arguments = T) +opt <- arguments$options + +if (as.numeric(opt$option) == 1) { + sample_names = unlist(strsplit(as.character(opt$sample_pairs), split = " ", fixed = TRUE)) + CN = list() + for (i in 1:length(sample_pairs)) { + CN[[i]] = readr::read_tsv(file = paste0("facets_suite/", sample_names[i], "/", sample_names[i], ".gene_level.txt"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + CN = do.call(rbind, CN) + readr::write_tsv(x = CN, path = "facets_suite/summary/summary.txt", col_names = TRUE, append = FALSE) + +} From ffe8870bbca326515eb51beb12d54bbf35d7d1df Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 21:24:48 -0400 Subject: [PATCH 733/766] Update facets_suite.R --- scripts/facets_suite.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/facets_suite.R b/scripts/facets_suite.R index 9e4934d9..7b6e3728 100644 --- a/scripts/facets_suite.R +++ b/scripts/facets_suite.R @@ -18,7 +18,7 @@ opt <- arguments$options if (as.numeric(opt$option) == 1) { sample_names = unlist(strsplit(as.character(opt$sample_pairs), split = " ", fixed = TRUE)) CN = list() - for (i in 1:length(sample_pairs)) { + for (i in 1:length(sample_names)) { CN[[i]] = readr::read_tsv(file = paste0("facets_suite/", sample_names[i], "/", sample_names[i], ".gene_level.txt"), col_names = TRUE, col_types = cols(.default = col_character())) %>% readr::type_convert() From 351ebbcc10a8e139db31f0a43bb10c381fdfb722 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sun, 14 May 2023 21:26:43 -0400 Subject: [PATCH 734/766] ++ --- copy_number/facets_suite.mk | 10 +++++----- scripts/facets_suite.R | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/copy_number/facets_suite.mk b/copy_number/facets_suite.mk index 23d420d5..dfd9dfa3 100644 --- a/copy_number/facets_suite.mk +++ b/copy_number/facets_suite.mk @@ -10,17 +10,17 @@ FACETS_PURITY_MIN_NHET ?= 10 SNP_WINDOW_SIZE ?= 250 NORMAL_DEPTH ?= 25 -facets_suite : facets_suite/vcf/targets_dbsnp.vcf \ +facets_suite : facets_suite/targets_dbsnp.vcf \ $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/$(pair).snp_pileup.gz) \ $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) \ - facets_suite/summary/summary.txt + facets_suite/summary.txt -facets_suite/vcf/targets_dbsnp.vcf : $(TARGETS_FILE) +facets_suite/targets_dbsnp.vcf : $(TARGETS_FILE) $(INIT) $(BEDTOOLS) intersect -header -u -a $(DBSNP) -b $< > $@ define snp-pileup -facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/vcf/targets_dbsnp.vcf bam/$1.bam bam/$2.bam +facets_suite/$1_$2/$1_$2.snp_pileup.gz : facets_suite/targets_dbsnp.vcf bam/$1.bam bam/$2.bam $$(call RUN,-c -s 2G -m 4G -v $(FACETS_SUITE_ENV),"set -o pipefail && \ snp-pileup-wrapper.R --verbose \ -sp /home/$(USER)/share/usr/env/r-facets-suite-2.0.8/bin/snp-pileup \ @@ -60,7 +60,7 @@ $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call run-facets,$(tumor.$(pair)),$(normal.$(pair))))) -facets_suite/summary/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) +facets_suite/summary.txt : $(foreach pair,$(SAMPLE_PAIRS),facets_suite/$(pair)/taskcomplete) $(call RUN, -c -n 1 -s 24G -m 48G -v $(INNOVATION_ENV),"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/facets_suite.R --option 1 --sample_pairs '$(SAMPLE_PAIRS)'") diff --git a/scripts/facets_suite.R b/scripts/facets_suite.R index 7b6e3728..11d45470 100644 --- a/scripts/facets_suite.R +++ b/scripts/facets_suite.R @@ -24,6 +24,6 @@ if (as.numeric(opt$option) == 1) { readr::type_convert() } CN = do.call(rbind, CN) - readr::write_tsv(x = CN, path = "facets_suite/summary/summary.txt", col_names = TRUE, append = FALSE) + readr::write_tsv(x = CN, path = "facets_suite/summary.txt", col_names = TRUE, append = FALSE) } From 8715d68dfee9f346c90222506b5d9c869d07de14 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 19 May 2023 15:29:11 -0400 Subject: [PATCH 735/766] ++ --- Makefile | 16 ++++----- config.inc | 1 + vcf_tools/cravat_annotation.mk | 60 ++++++++++++++++++++++++---------- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/Makefile b/Makefile index 0524aaa9..20b410a5 100644 --- a/Makefile +++ b/Makefile @@ -510,7 +510,6 @@ pyclone_vi : TARGETS += deconstruct_sigs deconstruct_sigs : $(call RUN_MAKE,modules/signatures/deconstruct_sigs.mk) - TARGETS += sv_signature sv_signature : @@ -524,6 +523,7 @@ TARGETS += hr_detect hr_detect : $(call RUN_MAKE,modules/signatures/hr_detect.mk) + #================================================== # miscellaneous #================================================== @@ -551,7 +551,7 @@ krona_classify : TARGETS += medicc2 medicc2 : $(call RUN_MAKE,modules/copy_number/medicc2.mk) - + #================================================== # reports @@ -569,10 +569,6 @@ TARGETS += mutation_summary mutation_summary : $(call RUN_MAKE,modules/summary/mutationsummary.mk) -TARGETS += cravat_summary -cravat_summary : - $(call RUN_MAKE,modules/summary/cravat_summary.mk) - TARGETS += delmh_summary delmh_summary : $(call RUN_MAKE,modules/summary/delmh_summary.mk) @@ -594,14 +590,14 @@ TARGETS += ann_vcf ann_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateVcf.mk) -TARGETS += cravat_annotation -cravat_annotation : - $(call RUN_MAKE,modules/test/workflows/cravat_annotation.mk) - TARGETS += cravat_annotate cravat_annotate : $(call RUN_MAKE,modules/vcf_tools/cravat_annotation.mk) +TARGETS += cravat_summary +cravat_summary : + $(call RUN_MAKE,modules/summary/cravat_summary.mk) + TARGETS += ann_summary_vcf ann_summary_vcf : $(call RUN_MAKE,modules/vcf_tools/annotateSummaryVcf.mk) diff --git a/config.inc b/config.inc index 733338a9..a95a1a37 100644 --- a/config.inc +++ b/config.inc @@ -39,6 +39,7 @@ STARFISH_ENV ?= $(HOME)/share/usr/env/r-starfish-0.11 MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1 VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8 +CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/vcf_tools/cravat_annotation.mk b/vcf_tools/cravat_annotation.mk index f504ac26..d8ae4961 100644 --- a/vcf_tools/cravat_annotation.mk +++ b/vcf_tools/cravat_annotation.mk @@ -1,33 +1,57 @@ include modules/Makefile.inc LOGDIR ?= log/cravat_annotate.$(NOW) -PHONY += cravat -cravat_annotate : $(foreach sample,$(SAMPLES),cravat/$(sample).vcf cravat/$(sample).maf cravat/$(sample).cravat.vcf cravat/$(sample).tsv cravat/$(sample).txt) - -DEFAULT_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.5 -CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat +cravat_annotate : $(foreach sample,$(SAMPLES),cravat/$(sample).vcf) \ + $(foreach sample,$(SAMPLES),cravat/$(sample).maf) \ + $(foreach sample,$(SAMPLES),cravat/$(sample).cravat.vcf) \ + $(foreach sample,$(SAMPLES),cravat/$(sample).tsv) \ + $(foreach sample,$(SAMPLES),cravat/$(sample).txt) define cravat-annotation -cravat/%.vcf : vcf_ann/%.gatk_snps.vcf vcf_ann/%.gatk_indels.vcf - $$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/combine_vcf.R --sample_name $$(*)") +cravat/$1.vcf : vcf_ann/$1.gatk_snps.vcf vcf_ann/$1.gatk_indels.vcf + $$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \ + $(RSCRIPT) modules/vcf_tools/combine_vcf.R \ + --sample_name $$(*)") -cravat/%.maf : cravat/%.vcf - $$(call RUN,-s 9G -m 12G -v $$(VEP_ENV) -w 7200,"$$(VCF2MAF) --input-vcf $$< --tumor-id $$(*) $$(if $$(EXAC_NONTCGA),--filter-vcf $$(EXAC_NONTCGA)) --ref-fasta $$(REF_FASTA) --vep-path $$(VEP_PATH) --vep-data $$(VEP_DATA) --tmp-dir `mktemp -d` --output-maf $$@") +cravat/$1.maf : cravat/$1.vcf + $$(call RUN,-c -s 9G -m 12G -v $(VEP_ENV) -w 24:00:00,"set -o pipefail && \ + $$(VCF2MAF) \ + --input-vcf $$(<) \ + --tumor-id $1 \ + $$(if $$(EXAC_NONTCGA),--filter-vcf $$(EXAC_NONTCGA)) \ + --ref-fasta $$(REF_FASTA) \ + --vep-path $$(VEP_PATH) \ + --vep-data $$(VEP_DATA) \ + --tmp-dir `mktemp -d` \ + --output-maf $$(@)") -cravat/%.cravat.vcf : cravat/%.vcf cravat/%.maf - $$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/filter_vcf.R --sample_name $$(*)") +cravat/$1.cravat.vcf : cravat/$1.vcf cravat/$1.maf + $$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \ + $(RSCRIPT) modules/vcf_tools/filter_vcf.R \ + --sample_name $1") -cravat/%.tsv: cravat/%.cravat.vcf - $$(call RUN,-c -s 9G -m 12G -v $$(DEFAULT_ENV) -w 7200,"source activate $$(CRAVAT_ENV) && \ - cravat cravat/$$(*).cravat.vcf -n $$(*) -d cravat -a clinvar cosmic dbsnp gnomad hgvs -v -l hg19 -t text") +cravat/$1.tsv: cravat/$1.cravat.vcf + $$(call RUN,-c -s 9G -m 12G -v $(CRAVAT_ENV) -w 24:00:00,"set -o pipefail && \ + cravat $$(<) \ + -n $1 \ + -d cravat \ + -a clinvar cosmic dbsnp gnomad hgvs \ + -v \ + -l hg19 \ + -t text") -cravat/%.txt : cravat/%.tsv - $$(call RUN,-c -s 9G -m 12G -w 7200,"$(RSCRIPT) modules/vcf_tools/summary_vcf.R --sample_name $$(*)") +cravat/$1.txt : cravat/$1.tsv + $$(call RUN,-c -s 9G -m 12G -w 24:00:00,"set -o pipefail && \ + $(RSCRIPT) modules/vcf_tools/summary_vcf.R \ + --sample_name $1") endef $(foreach sample,$(SAMPLES),\ $(eval $(call cravat-annotation,$(sample)))) -.PHONY: $(PHONY) - +..DUMMY := $(shell mkdir -p version; \ + echo "cravat" > version/cravat_annotate.txt;) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY : cravat_annotate From 302cb8d6953cfef9ced7b11f1f4b04167f4b88b3 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 19 May 2023 18:30:31 -0400 Subject: [PATCH 736/766] ++ --- config.inc | 1 + variant_callers/somatic/polysolver.mk | 74 +++++++++++++++++---------- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/config.inc b/config.inc index a95a1a37..1818a0bd 100644 --- a/config.inc +++ b/config.inc @@ -40,6 +40,7 @@ MEDICC_ENV = $(HOME)/share/usr/env/medicc2-0.8.1 VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8 CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat +POLYSOLVER_ENV = $(HOME)/share/usr/anaconda-envs/hla-polysolver JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk index 3c4d3dd8..da048e0a 100644 --- a/variant_callers/somatic/polysolver.mk +++ b/variant_callers/somatic/polysolver.mk @@ -1,44 +1,64 @@ include modules/Makefile.inc LOGDIR ?= log/hla_polysolver.$(NOW) -PHONY += hla_polysolver hla_polysolver/summary -hla_polysolver : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).taskcomplete) hla_polysolver/summary/genotype_summary.txt + +hla_polysolver : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/winners.hla.txt) \ + $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/hla.intervals) \ + $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) \ + $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) \ + hla_polysolver/summary/genotype_summary.txt define hla-polysolver hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam - $$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \ - /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \ - if [ ! -d hla_polysolver/$1_$2 ]; then mkdir hla_polysolver/$1_$2; fi && \ - shell_call_hla_type bam/$2.bam Unknown 1 hg19 STDFQ 0 hla_polysolver/$1_$2") + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ + export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ + shell_call_hla_type \ + -bam $$(<<) \ + -race Unknown \ + -includeFreq 1 \ + -build hg19 \ + -format STDFQ \ + -insertCalc 1 \ + -outDir hla_polysolver/$1_$2") -hla_polysolver/$1_$2/hla.intervals : hla_polysolver/$1_$2/winners.hla.txt - $$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \ - /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \ - shell_call_hla_mutations_from_type bam/$2.bam bam/$1.bam hla_polysolver/$1_$2/winners.hla.txt hg19 STDFQ hla_polysolver/$1_$2") +hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/winners.hla.txt + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ + export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ + shell_call_hla_mutations_from_type \ + -normal_bam_hla $$(<<) \ + -tumor_bam_hla $$(<) \ + -hla $$(<<<) \ + -build hg19 \ + -format STDFQ \ + -outDir hla_polysolver/$1_$2") -hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals - $$(call RUN,-n 8 -s 2G -m 4G, "source /home/${USER}/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate \ - /home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export CONDA_PREFIX=/home/${USER}/share/usr/anaconda-envs/hla-polysolver && \ - export PERL5LIB=/home/${USER}/share/usr/anaconda-envs/hla-polysolver/lib/perl5/5.22.0 && \ - shell_annotate_hla_mutations $1_$2 hla_polysolver/$1_$2") +hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ + export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ + shell_annotate_hla_mutations \ + -indiv $1_$2 \ + -dir hla_polysolver/$1_$2") -hla_polysolver/$1_$2/$1_$2.taskcomplete : hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated - $$(call RUN,-n 1 -s 1G -m 1G,"touch hla_polysolver/$1_$2/$1_$2.taskcomplete") +hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated : hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated + endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hla-polysolver,$(tumor.$(pair)),$(normal.$(pair))))) -hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).taskcomplete) - $(call RUN,-c -s 12G -m 24G,"mkdir -p hla_polysolver/summary && \ - $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --sample_names '$(SAMPLE_PAIRS)'") +hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) + $(call RUN,-c -s 12G -m 24G,"set -o pipefail && \ + $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --sample_names '$(SAMPLE_PAIRS)'") -.DELETE_ON_ERROR: + +..DUMMY := $(shell mkdir -p version; \ + $(POLYSOLVER_ENV)/shell_call_hla_type --help &> version/hla_polysolver.txt; \ + $(POLYSOLVER_ENV)/shell_call_hla_mutations_from_type --help &> version/hla_polysolver.txt; \ + $(POLYSOLVER_ENV)/shell_annotate_hla_mutations --help &> version/hla_polysolver.txt) .SECONDARY: -.PHONY: $(PHONY) +.DELETE_ON_ERROR: +.PHONY: hla_polysolver From 6c082f89c771d5bb56f1db2254f5e71889725b89 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 19 May 2023 18:41:18 -0400 Subject: [PATCH 737/766] Update polysolver.mk --- variant_callers/somatic/polysolver.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk index da048e0a..258e7838 100644 --- a/variant_callers/somatic/polysolver.mk +++ b/variant_callers/somatic/polysolver.mk @@ -50,15 +50,15 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hla-polysolver,$(tumor.$(pair)),$(normal.$(pair))))) -hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) +hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) $(call RUN,-c -s 12G -m 24G,"set -o pipefail && \ $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --sample_names '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ - $(POLYSOLVER_ENV)/shell_call_hla_type --help &> version/hla_polysolver.txt; \ - $(POLYSOLVER_ENV)/shell_call_hla_mutations_from_type --help &> version/hla_polysolver.txt; \ - $(POLYSOLVER_ENV)/shell_annotate_hla_mutations --help &> version/hla_polysolver.txt) + $(POLYSOLVER_ENV)/bin/shell_call_hla_type --help &> version/hla_polysolver.txt; \ + $(POLYSOLVER_ENV)/bin/shell_call_hla_mutations_from_type --help &>> version/hla_polysolver.txt; \ + $(POLYSOLVER_ENV)/bin/shell_annotate_hla_mutations --help &>> version/hla_polysolver.txt) .SECONDARY: .DELETE_ON_ERROR: .PHONY: hla_polysolver From 9cdea08cf00d8032f5e247912c44749d144ae83d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 19 May 2023 18:47:23 -0400 Subject: [PATCH 738/766] Update polysolver.mk --- variant_callers/somatic/polysolver.mk | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk index 258e7838..bff7c260 100644 --- a/variant_callers/somatic/polysolver.mk +++ b/variant_callers/somatic/polysolver.mk @@ -15,33 +15,33 @@ hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_call_hla_type \ - -bam $$(<<) \ - -race Unknown \ - -includeFreq 1 \ - -build hg19 \ - -format STDFQ \ - -insertCalc 1 \ - -outDir hla_polysolver/$1_$2") + $$(<<) \ + Unknown \ + 1 \ + hg19 \ + STDFQ \ + 1 \ + hla_polysolver/$1_$2") hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/winners.hla.txt $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_call_hla_mutations_from_type \ - -normal_bam_hla $$(<<) \ - -tumor_bam_hla $$(<) \ - -hla $$(<<<) \ - -build hg19 \ - -format STDFQ \ - -outDir hla_polysolver/$1_$2") + $$(<<) \ + $$(<) \ + $$(<<<) \ + hg19 \ + STDFQ \ + hla_polysolver/$1_$2") hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_annotate_hla_mutations \ - -indiv $1_$2 \ - -dir hla_polysolver/$1_$2") + $1_$2 \ + hla_polysolver/$1_$2") hla_polysolver/$1_$2/$1_$2.strelka_indels.unfiltered.annotated : hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated From fa64884b2806ba9975e333aac62a894c5d14d418 Mon Sep 17 00:00:00 2001 From: David Brown Date: Sat, 20 May 2023 19:03:40 -0400 Subject: [PATCH 739/766] Update polysolver.mk --- variant_callers/somatic/polysolver.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk index bff7c260..4743354f 100644 --- a/variant_callers/somatic/polysolver.mk +++ b/variant_callers/somatic/polysolver.mk @@ -20,7 +20,7 @@ hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam 1 \ hg19 \ STDFQ \ - 1 \ + 0 \ hla_polysolver/$1_$2") hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/winners.hla.txt From e41328de21787aa9946324c7dcdf66b1d837ae36 Mon Sep 17 00:00:00 2001 From: David Brown Date: Mon, 22 May 2023 11:57:26 -0400 Subject: [PATCH 740/766] ++ --- variant_callers/somatic/hla_summary.R | 62 +++++++++++++++++++++------ variant_callers/somatic/polysolver.mk | 23 +++++++--- 2 files changed, 65 insertions(+), 20 deletions(-) diff --git a/variant_callers/somatic/hla_summary.R b/variant_callers/somatic/hla_summary.R index 5d3ef8f0..e08590db 100644 --- a/variant_callers/somatic/hla_summary.R +++ b/variant_callers/somatic/hla_summary.R @@ -1,25 +1,59 @@ suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("magrittr")) options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) -optList <- list( - make_option("--sample_names", default = "NA", help = "tumor normal sample pair names") - ) +optList <- list(make_option("--option", default = "NA", help = "which option?"), + make_option("--sample_names", default = "NA", help = "sample names")) parser <- OptionParser(usage = "%prog [options]", option_list = optList) arguments <- parse_args(parser, positional_arguments = T) opt <- arguments$options sample_names = unlist(strsplit(opt$sample_names, split=" ", fixed=TRUE)) -hla_genotypes = list() -for (i in 1:length(sample_names)) { - data = read.csv(file=paste0("hla_polysolver/", sample_names[i], "/winners.hla.txt"), header=FALSE, sep="\t", stringsAsFactors=FALSE) - gen_1 = t(data[,2,drop=FALSE]) - gen_2 = t(data[,3,drop=FALSE]) - colnames(gen_1) = paste0(c("HLA-A", "HLA-B", "HLA-C"), "_1") - colnames(gen_2) = paste0(c("HLA-A", "HLA-B", "HLA-C"), "_2") - hla_genotypes[[i]] = cbind(gen_1, gen_2) + +if (as.numeric(opt$option)==1) { + hla_genotypes = list() + for (i in 1:length(sample_names)) { + hla_genotypes[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/winners.hla.txt"), + col_names = FALSE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(hla = X1, major_allele = X2, minor_allele = X3) %>% + dplyr::mutate(sample_name = sample_names[i]) + } + hla_genotypes = do.call(rbind, hla_genotypes) + readr::write_tsv(x = hla_genotypes, path = "hla_polysolver/summary/hla_summary.txt", col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==2) { + somatic_vars = list() + for (i in 1:length(sample_names)) { + somatic_vars[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/", sample_names[i], ".mutect.unfiltered.annotated"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + somatic_vars = do.call(rbind, somatic_vars) + if (nrow(somatic_vars)>0) { + somatic_vars = somatic_vars %>% + dplyr::mutate(tumor_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[1] }))) %>% + dplyr::mutate(normal_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[2] }))) + } + readr::write_tsv(x = somatic_vars, path = "hla_polysolver/summary/mutect_summary.txt", col_names = TRUE, append = FALSE) + +} else if (as.numeric(opt$option)==3) { + somatic_vars = list() + for (i in 1:length(sample_names)) { + somatic_vars[[i]] = readr::read_tsv(file = paste0("hla_polysolver/", sample_names[i], "/", sample_names[i], ".strelka_indels.unfiltered.annotated"), + col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() + } + somatic_vars = do.call(rbind, somatic_vars) + if (nrow(somatic_vars)>0) { + somatic_vars = somatic_vars %>% + dplyr::mutate(tumor_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[1] }))) %>% + dplyr::mutate(normal_name = unlist(lapply(individual, function(x) { unlist(strsplit(x, split = "_", fixed = TRUE))[2] }))) + } + readr::write_tsv(x = somatic_vars, path = "hla_polysolver/summary/strelka_summary.txt", col_names = TRUE, append = FALSE) + } -hla_genotypes = do.call(rbind, hla_genotypes) -hla_genotypes = cbind("SAMPLE_NAMES"=sample_names, hla_genotypes) -write.table(hla_genotypes, file="hla_polysolver/summary/genotype_summary.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) diff --git a/variant_callers/somatic/polysolver.mk b/variant_callers/somatic/polysolver.mk index 4743354f..ce302f5e 100644 --- a/variant_callers/somatic/polysolver.mk +++ b/variant_callers/somatic/polysolver.mk @@ -7,11 +7,14 @@ hla_polysolver : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/winners.h $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/hla.intervals) \ $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) \ $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) \ - hla_polysolver/summary/genotype_summary.txt + hla_polysolver/summary/hla_summary.txt \ + hla_polysolver/summary/mutect_summary.txt \ + hla_polysolver/summary/strelka_summary.txt + define hla-polysolver hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam - $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \ export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_call_hla_type \ @@ -24,7 +27,7 @@ hla_polysolver/$1_$2/winners.hla.txt : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2") hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/winners.hla.txt - $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \ export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_call_hla_mutations_from_type \ @@ -36,7 +39,7 @@ hla_polysolver/$1_$2/hla.intervals : bam/$1.bam bam/$2.bam hla_polysolver/$1_$2/ hla_polysolver/$1_$2") hla_polysolver/$1_$2/$1_$2.mutect.unfiltered.annotated : hla_polysolver/$1_$2/hla.intervals - $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 24:00:00, "set -o pipefail && \ + $$(call RUN,-c -n 8 -s 2G -m 4G -v $(POLYSOLVER_ENV) -w 72:00:00, "set -o pipefail && \ export CONDA_PREFIX=$$(POLYSOLVER_ENV) && \ export PERL5LIB=$$(POLYSOLVER_ENV)/lib/perl5/5.22.0 && \ shell_annotate_hla_mutations \ @@ -50,9 +53,17 @@ endef $(foreach pair,$(SAMPLE_PAIRS),\ $(eval $(call hla-polysolver,$(tumor.$(pair)),$(normal.$(pair))))) -hla_polysolver/summary/genotype_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) +hla_polysolver/summary/hla_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) $(call RUN,-c -s 12G -m 24G,"set -o pipefail && \ - $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --sample_names '$(SAMPLE_PAIRS)'") + $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 1 --sample_names '$(SAMPLE_PAIRS)'") + +hla_polysolver/summary/mutect_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) + $(call RUN,-c -s 12G -m 24G,"set -o pipefail && \ + $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 2 --sample_names '$(SAMPLE_PAIRS)'") + +hla_polysolver/summary/strelka_summary.txt : $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).mutect.unfiltered.annotated) $(foreach pair,$(SAMPLE_PAIRS),hla_polysolver/$(pair)/$(pair).strelka_indels.unfiltered.annotated) + $(call RUN,-c -s 12G -m 24G,"set -o pipefail && \ + $(RSCRIPT) modules/variant_callers/somatic/hla_summary.R --option 3 --sample_names '$(SAMPLE_PAIRS)'") ..DUMMY := $(shell mkdir -p version; \ From 1833fa85e1f15c798eb0962953c43c873fe6fd42 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 23 May 2023 20:52:26 -0400 Subject: [PATCH 741/766] Update hr_detect.R --- scripts/hr_detect.R | 64 +++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index a3d00842..e8e72981 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -179,9 +179,10 @@ if (as.numeric(opt$option) == 1) { nparallel = 4) readr::write_tsv(x = res$hrdetect_output %>% - dplyr::as_tibble() %>% - dplyr::mutate(sample_name = sample_names), - path = "hr_detect/hrdetect_smry.txt", append = FALSE, col_names = TRUE) + as.data.frame() %>% + tibble::rownames_to_column(var = "sample_name") %>% + dplyr::as_tibble(), + file = "hr_detect/hrdetect_smry.txt", append = FALSE, col_names = TRUE) } else if (as.numeric(opt$option) == 8) { sample_names = unlist(strsplit(x = as.character(opt$sample_name), split = " ", fixed = TRUE)) @@ -192,32 +193,39 @@ if (as.numeric(opt$option) == 1) { names(snv_files) = names(indel_files) = names(cn_files) = names(sv_files) <- sample_names - res = HRDetect_pipeline(genome.v = "hg19", - SNV_vcf_files = snv_files, - SV_bedpe_files = sv_files, - Indels_vcf_files = indel_files, - CNV_tab_files = cn_files, - SNV_signature_version = "COSMICv2", - nparallel = 4) - - signatures_to_use = paste0("Signature", c(1,2,3,5,6,8,13,17,18,20,26,30)) - - res = res$exposures_subs %>% - dplyr::as_tibble() %>% - dplyr::mutate(signatures = rownames(res$exposures_subs)) %>% - reshape2::melt(id.vars = "signatures", variable.name = "sample_name", value.name = "exposure") %>% - dplyr::filter(signatures %in% signatures_to_use) %>% + res = signatureFit_pipeline(genome.v = "hg19", + SNV_vcf_files = snv_files, + nparallel = 4) + signatures_to_use = c("SBS1", "SBS2", "SBS3", "SBS4", "SBS6", "SBS7a", "SBS7c", "SBS8", + "SBS9", "SBS10a", "SBS10d", "SBS11", "SBS13", "SBS14", "SBS15", + "SBS18", "SBS20", "SBS22", "SBS24", "SBS26", "SBS30", "SBS31", + "SBS32", "SBS35", "SBS38", "SBS44", "SBS84", "SBS87", "SBS88", + "SBS90", "SBS94", "SBS95", "SBS96", "SBS97", "SBS104", "SBS105", + "SBS107", "SBS108", "SBS109", "SBS110", "SBS111", "SBS112", + "SBS113", "SBS119", "SBS129", "SBS137") + tags_to_use = c("Deamination (Age)", "Deamination (APOBEC)", "HR deficiency", "Tobacco", "MMR deficiency", + "UV exposure", "UV exposure", "HR deficiency", "Lymphoma", "POLE deficiency", "POLD deficiency", + "Temozolomide-1,2-DMH", "Deamination (APOBEC)", "MMR deficiency (POLE deficiency)", "MMR deficiency", + "BER deficiency", "MMR deficiency (POLD deficiency)", "AAI", "Aflatoxin", "MMR deficiency", + "BER deficiency", "Platinum", "Azathioprine", "Platinum", "Similar to UV", "MMR deficiency", + "AID", "Deamination (Thiopurine)", "Colibactin", "Duocarmycin", "Similar to tobacco", "Deamination", + "Deamination", "MMR deficiency", "Platinum-related", "Deamination", "Similar to tobacco", "BER deficiency", + "Similar to tobacco", "Similar to AAI", "Platinum-related", "Platinum-related", "AAI", "Temozolomide-1,2-DMH", + "Similar to UV", "Similar to UV") + + res = res$fitResults$exposures %>% + as.data.frame() %>% + tibble::rownames_to_column(var = "sample_name") %>% + reshape2::melt(id.vars = "sample_name", variable.name = "signature", value.name = "exposure") %>% + dplyr::filter(signature %in% signatures_to_use) %>% dplyr::group_by(sample_name) %>% - dplyr::summarize(sum_exposures = sum(exposure)) %>% - dplyr::right_join(res$exposures_subs %>% - dplyr::as_tibble() %>% - dplyr::mutate(signatures = rownames(res$exposures_subs)) %>% - reshape2::melt(id.vars = "signatures", variable.name = "sample_name", value.name = "exposure") %>% - dplyr::filter(signatures %in% signatures_to_use), by = "sample_name") %>% - dplyr::mutate(exposure = exposure/sum_exposures) %>% - reshape2::dcast(formula = sample_name ~ signatures, value.var = "exposure", fill = 0) %>% - dplyr::select(all_of(c("sample_name", signatures_to_use))) + dplyr::summarize(signature = signature, + exposure = exposure/sum(exposure)) %>% + dplyr::ungroup() %>% + dplyr::left_join(dplyr::tibble(signature = signatures_to_use, + description = tags_to_use), by = "signature") + - readr::write_tsv(x = res, path = "hr_detect/signatures_smry.txt", append = FALSE, col_names = TRUE) + readr::write_tsv(x = res, file = "hr_detect/signatures_smry.txt", append = FALSE, col_names = TRUE) } From 495db40d7e83101a00b9663a0e0b6ea0bbec4251 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 23 May 2023 21:12:04 -0400 Subject: [PATCH 742/766] Update hr_detect.R --- scripts/hr_detect.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/hr_detect.R b/scripts/hr_detect.R index e8e72981..d0ff2e03 100644 --- a/scripts/hr_detect.R +++ b/scripts/hr_detect.R @@ -196,6 +196,7 @@ if (as.numeric(opt$option) == 1) { res = signatureFit_pipeline(genome.v = "hg19", SNV_vcf_files = snv_files, nparallel = 4) + signatures_to_use = c("SBS1", "SBS2", "SBS3", "SBS4", "SBS6", "SBS7a", "SBS7c", "SBS8", "SBS9", "SBS10a", "SBS10d", "SBS11", "SBS13", "SBS14", "SBS15", "SBS18", "SBS20", "SBS22", "SBS24", "SBS26", "SBS30", "SBS31", @@ -203,6 +204,7 @@ if (as.numeric(opt$option) == 1) { "SBS90", "SBS94", "SBS95", "SBS96", "SBS97", "SBS104", "SBS105", "SBS107", "SBS108", "SBS109", "SBS110", "SBS111", "SBS112", "SBS113", "SBS119", "SBS129", "SBS137") + tags_to_use = c("Deamination (Age)", "Deamination (APOBEC)", "HR deficiency", "Tobacco", "MMR deficiency", "UV exposure", "UV exposure", "HR deficiency", "Lymphoma", "POLE deficiency", "POLD deficiency", "Temozolomide-1,2-DMH", "Deamination (APOBEC)", "MMR deficiency (POLE deficiency)", "MMR deficiency", @@ -218,6 +220,10 @@ if (as.numeric(opt$option) == 1) { tibble::rownames_to_column(var = "sample_name") %>% reshape2::melt(id.vars = "sample_name", variable.name = "signature", value.name = "exposure") %>% dplyr::filter(signature %in% signatures_to_use) %>% + dplyr::mutate(exposure = case_when( + is.na(exposure) ~ 0, + TRUE ~ exposure + )) %>% dplyr::group_by(sample_name) %>% dplyr::summarize(signature = signature, exposure = exposure/sum(exposure)) %>% From c9a3cfa3e11941908f6997920bb6e80431652f08 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 27 Jun 2023 13:01:33 -0400 Subject: [PATCH 743/766] Update gridss_tumor_normal.mk --- sv_callers/gridss_tumor_normal.mk | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sv_callers/gridss_tumor_normal.mk b/sv_callers/gridss_tumor_normal.mk index 29707a0f..a1f5a470 100644 --- a/sv_callers/gridss_tumor_normal.mk +++ b/sv_callers/gridss_tumor_normal.mk @@ -12,7 +12,8 @@ GRIDSS_PON_DIR ?= $(HOME)/share/lib/resource_files/gridss/pon/ gridss : $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv.vcf) \ $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/$(pair).gridss_sv_ft.vcf.bgz) \ - $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).gridss_sv.vcf) + $(foreach pair,$(SAMPLE_PAIRS),vcf/$(pair).gridss_sv.vcf) \ + $(foreach pair,$(SAMPLE_PAIRS),gridss/$(pair)/taskcomplete) define gridss-tumor-normal gridss/$1_$2/$1_$2.gridss_sv.vcf : bam/$1.bam bam/$2.bam @@ -40,6 +41,15 @@ gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz : gridss/$1_$2/$1_$2.gridss_sv.vcf vcf/$1_$2.gridss_sv.vcf : gridss/$1_$2/$1_$2.gridss_sv_ft.vcf.bgz $$(INIT) zcat $$(<) > $$(@) + +gridss/$1_$2/taskcomplete : vcf/$1_$2.gridss_sv.vcf + $$(INIT) rm -f gridss/$1_$2/$1.bam.gridss.working/$1.bam.sv.bam && \ + rm -f gridss/$1_$2/$1.bam.gridss.working/$1.bam.sv.bam.bai && \ + rm -f gridss/$1_$2/$2.bam.gridss.working/$2.bam.sv.bam && \ + rm -f gridss/$1_$2/$2.bam.gridss.working/$2.bam.sv.bam.bai && \ + rm -f gridss/$1_$2/$1_$2.gridss_sv.vcf.assembly.bam.gridss.working/FL001-101CD_FL001-101NL.gridss_sv.vcf.assembly.bam.sv.bam && \ + rm -f gridss/$1_$2/$1_$2.gridss_sv.vcf.assembly.bam.gridss.working/FL001-101CD_FL001-101NL.gridss_sv.vcf.assembly.bam.sv.bam.bai && \ + echo 'complete!' > $$(@) endef $(foreach pair,$(SAMPLE_PAIRS),\ From dfc6133817ae26b035bf742fc840868249196489 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 11 Jul 2023 12:03:43 -0400 Subject: [PATCH 744/766] Update bam_interval_metrics.mk --- qc/bam_interval_metrics.mk | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qc/bam_interval_metrics.mk b/qc/bam_interval_metrics.mk index 8aad89ce..b7d049ec 100644 --- a/qc/bam_interval_metrics.mk +++ b/qc/bam_interval_metrics.mk @@ -26,7 +26,8 @@ COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OP COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICAD_OPTS) BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICAD_OPTS) -TARGETS_LIST ?= $(HOME)/share/lib/resource_files/MSK-IMPACT-v4.sorted.list +BAITS_LIST = $(HOME)/share/lib/bed_files/targets/IMPACT505/b37/IMPACT505_b37_baits.list +TARGETS_LIST ?= $(HOME)/share/lib/bed_files/targets/IMPACT505/b37/IMPACT505_b37_targets.list define idx-metrics metrics/$1.idx_stats.txt : bam/$1.bam @@ -83,7 +84,7 @@ metrics/$1.hs_metrics.txt : bam/$1.bam REFERENCE_SEQUENCE=$$(REF_FASTA) \ INPUT=$$(<) \ OUTPUT=$$(@) \ - BAIT_INTERVALS=$$(TARGETS_LIST) \ + BAIT_INTERVALS=$$(BAITS_LIST) \ TARGET_INTERVALS=$$(TARGETS_LIST)") endef From 1bb9b05cee7fa08f180562b8bc9eec6adefa05e8 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 13 Sep 2023 12:03:23 -0400 Subject: [PATCH 745/766] ++ bam_metrics --- Makefile | 2 +- Makefile.inc | 6 +- qc/bamMetrics.mk | 50 ----------------- qc/bam_metrics.mk | 136 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 52 deletions(-) delete mode 100644 qc/bamMetrics.mk create mode 100644 qc/bam_metrics.mk diff --git a/Makefile b/Makefile index 20b410a5..74905fef 100644 --- a/Makefile +++ b/Makefile @@ -431,7 +431,7 @@ bam_to_fasta : TARGETS += bam_metrics bam_metrics : - $(call RUN_MAKE,modules/qc/bamMetrics.mk) + $(call RUN_MAKE,modules/qc/bam_metrics.mk) TARGETS += bam_interval_metrics bam_interval_metrics : diff --git a/Makefile.inc b/Makefile.inc index 7458fffd..fc95ff4a 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -99,7 +99,11 @@ CREATE_SEQ_DICT = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CreateSequenceDic CALC_HS_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CalculateHsMetrics.jar $(PICARD_OPTS) COLLECT_MULT_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CollectMultipleMetrics.jar $(PICARD_OPTS) COLLECT_TARGETED_METRICS = $(JAVA) -Xmx$(PICARD_MEM) -jar $(PICARD_DIR)/CollectTargetedPcrMetrics.jar $(PICARD_OPTS) - +COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryMetrics $(PICARD_OPTS) +COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICARD_OPTS) +COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OPTS) +COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICARD_OPTS) +BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICARD_OPTS) FIX_MATE = $(call FIX_MATE_MEM,$(PICARD_MEM)) FIX_MATE_MEM = $(JAVA) -Xmx$(1) -jar $(PICARD_DIR)/FixMateInformation.jar $(PICARD_OPTS) TMP_DIR=$(TMPDIR) SAM_TO_FASTQ = $(call SAM_TO_FASTQ_MEM,$(PICARD_MEM)) diff --git a/qc/bamMetrics.mk b/qc/bamMetrics.mk deleted file mode 100644 index be2f4fa3..00000000 --- a/qc/bamMetrics.mk +++ /dev/null @@ -1,50 +0,0 @@ -include modules/Makefile.inc -include modules/variant_callers/gatk.inc - -LOGDIR ?= log/bam_metrics.$(NOW) -PHONY += metrics - -COLLECT_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_DIR)/CollectMultipleMetrics.jar VALIDATION_STRINGENCY=LENIENT -COLLECT_WGS_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_JAR) CollectWgsMetrics VALIDATION_STRINGENCY=LENIENT -COLLECT_GC_METRICS = $(JAVA) -Xmx12G -jar $(PICARD_DIR)/CollectGcBiasMetrics.jar VALIDATION_STRINGENCY=LENIENT - -SUMMARIZE_IDXSTATS = python modules/qc/summarize_idxstats.py - -bam_metrics : summary_metrics gc flagstats wgs_metrics - -PHONY += flagstats -flagstats : $(foreach sample,$(SAMPLES),metrics/$(sample).flagstats) -PHONY += summary_metrics -summary_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).alignment_summary_metrics) -PHONY += wgs_metrics -wgs_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics) metrics/wgs_metrics_summary.tsv -PHONY += dup -dup : $(foreach sample,$(SAMPLES),metrics/$(sample).dup_metrics) -PHONY += gc -gc : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_bias_metrics) - -metrics/%.alignment_summary_metrics : bam/%.bam - $(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_METRICS) I=$< O=metrics/$(*).alignment_summary_metrics REFERENCE_SEQUENCE=$(REF_FASTA)") - -metrics/wgs_metrics_summary.tsv : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics) - $(INIT) (grep GENOME_TERRITORY $< | sed 's/^/SAMPLE\t/'; for x in $(SAMPLES); do grep -A1 GENOME_TERRITORY metrics/$$x.wgs_metrics | sed 1d | sed "s/^/$$x\t/" ; done) > $@ - -metrics/%.wgs_metrics : bam/%.bam - $(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_WGS_METRICS) I=$< O=$@ REFERENCE_SEQUENCE=$(REF_FASTA)") - -metrics/%.gc_bias_metrics : bam/%.bam - $(call RUN,-s 18G -m 24G -w 7200,"$(COLLECT_GC_METRICS) I=$< O=$@ CHART_OUTPUT=$(addsuffix .pdf,$@) REFERENCE_SEQUENCE=$(REF_FASTA)") - -metrics/%.flagstats : bam/%.bam - $(call RUN,-s 18G -m 24G -w 7200,"$(SAMTOOLS) flagstat $< > $@") - -bam/%.markdup.bam metrics/%.dup_metrics : bam/%.bam - $(call RUN,-s 18G -m 24G -w 7200,"$(MARK_DUP) I=$< O=bam/$*.markdup.bam METRICS_FILE=metrics/$*.dup_metrics") - -metrics/dup_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).dup_metrics.txt) - $(INIT) grep '^LIBRARY' $< > $@ && \ - for metrics in $^; do \ - grep -A1 '^LIBRARY' $$metrics | sed '1d' >> $@; \ - done - -.PHONY: $(PHONY) diff --git a/qc/bam_metrics.mk b/qc/bam_metrics.mk new file mode 100644 index 00000000..00377c43 --- /dev/null +++ b/qc/bam_metrics.mk @@ -0,0 +1,136 @@ +include modules/Makefile.inc + +LOGDIR ?= log/bam_metrics.$(NOW) + +bam_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) \ + summary/idx_metrics.txt \ + summary/aln_metrics.txt \ + summary/insert_metrics.txt \ + summary/oxog_metrics.txt \ + summary/hs_metrics.txt \ + summary/gc_metrics.txt \ + summary/gc_summary.txt + +TARGETS_LIST ?= $(HOME)/share/lib/resource_files/MSK-IMPACT-v4.sorted.list + +define idx-metrics +metrics/$1.idx_stats.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(BAM_INDEX) \ + INPUT=$$(<) \ + > $$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call idx-metrics,$(sample)))) + +define aln-metrics +metrics/$1.aln_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(COLLECT_ALIGNMENT_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call aln-metrics,$(sample)))) + +define insert-metrics +metrics/$1.insert_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(COLLECT_INSERT_METRICS) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \ + MINIMUM_PCT=0.5") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call insert-metrics,$(sample)))) + +define oxog-metrics +metrics/$1.oxog_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(COLLECT_OXOG_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call oxog-metrics,$(sample)))) + +define hs-metrics +metrics/$1.hs_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(CALC_HS_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + BAIT_INTERVALS=$$(TARGETS_LIST) \ + TARGET_INTERVALS=$$(TARGETS_LIST)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call hs-metrics,$(sample)))) + +define gc-metrics +metrics/$1.gc_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $$(COLLECT_GC_BIAS) \ + INPUT=$$(<) \ + OUTPUT=metrics/$1.gc_bias.txt \ + CHART_OUTPUT=metrics/$1.gc_metrics.pdf \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + SUMMARY_OUTPUT=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call gc-metrics,$(sample)))) + +summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 1 --sample_names '$(SAMPLES)'") + +summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 2 --sample_names '$(SAMPLES)'") + +summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 3 --sample_names '$(SAMPLES)'") + +summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 4 --sample_names '$(SAMPLES)'") + +summary/hs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).hs_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 5 --sample_names '$(SAMPLES)'") + +summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 6 --sample_names '$(SAMPLES)'") + +summary/gc_summary.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics.txt) + $(call RUN, -c -n 1 -s 12G -m 24G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/qc/bam_metrics.R --option 7 --sample_names '$(SAMPLES)'") + + +..DUMMY := $(shell mkdir -p version; \ + echo "picard" >> version/bam_metrics.txt; \ + $(PICARD) CollectAlignmentSummaryMetrics --version &>> version/bam_metrics.txt; \ + $(PICARD) CollectInsertSizeMetrics --version &>> version/bam_metrics.txt; \ + $(PICARD) CollectOxoGMetrics --version &>> version/bam_metrics.txt; \ + $(PICARD) CollectHsMetrics --version &>> version/bam_metrics.txt; \ + $(PICARD) CollectGcBiasMetrics --version &>> version/bam_metrics.txt; \ + R --version >> version/bam_metrics.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: bam_metrics From 5c6b7cb15d29d28dc210a25812e6c45da73172b2 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 13 Sep 2023 12:11:17 -0400 Subject: [PATCH 746/766] WGS metrics --- Makefile | 4 ++ Makefile.inc | 2 + qc/wgs_metrics.mk | 116 ++++++++++++++++++++++++++++++++++++++++++ scripts/wgs_metrics.R | 109 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 231 insertions(+) create mode 100644 qc/wgs_metrics.mk create mode 100755 scripts/wgs_metrics.R diff --git a/Makefile b/Makefile index 74905fef..ce8c4077 100644 --- a/Makefile +++ b/Makefile @@ -437,6 +437,10 @@ TARGETS += bam_interval_metrics bam_interval_metrics : $(call RUN_MAKE,modules/qc/bam_interval_metrics.mk) +TARGETS += wgs_metrics +wgs_metrics : + $(call RUN_MAKE,modules/qc/wgs_metrics.mk) + TARGETS += rnaseq_metrics rnaseq_metrics : $(call RUN_MAKE,modules/qc/rnaseqMetrics.mk) diff --git a/Makefile.inc b/Makefile.inc index fc95ff4a..2a63beec 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -103,6 +103,8 @@ COLLECT_ALIGNMENT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectAlignmentSummaryM COLLECT_INSERT_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectInsertSizeMetrics $(PICARD_OPTS) COLLECT_OXOG_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectOxoGMetrics $(PICAD_OPTS) COLLECT_GC_BIAS = $(PICARD) -Xmx$(PICARD_MEM) CollectGcBiasMetrics $(PICARD_OPTS) +COLLECT_WGS_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectWgsMetrics $(PICARD_OPTS) +COLLECT_DUP_METRICS = $(PICARD) -Xmx$(PICARD_MEM) CollectDuplicateMetrics $(PICARD_OPTS) BAM_INDEX = $(PICARD) -Xmx$(PICARD_MEM) BamIndexStats $(PICARD_OPTS) FIX_MATE = $(call FIX_MATE_MEM,$(PICARD_MEM)) FIX_MATE_MEM = $(JAVA) -Xmx$(1) -jar $(PICARD_DIR)/FixMateInformation.jar $(PICARD_OPTS) TMP_DIR=$(TMPDIR) diff --git a/qc/wgs_metrics.mk b/qc/wgs_metrics.mk new file mode 100644 index 00000000..bcc963a3 --- /dev/null +++ b/qc/wgs_metrics.mk @@ -0,0 +1,116 @@ +include modules/Makefile.inc + +LOGDIR ?= log/wgs_metrics.$(NOW) + +wgs_metrics : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics_summary.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics.txt) \ + $(foreach sample,$(SAMPLES),metrics/$(sample).duplicate_metrics.txt) \ + summary/idx_metrics.txt \ + summary/aln_metrics.txt \ + summary/insert_metrics.txt \ + summary/oxog_metrics.txt \ + summary/gc_metrics.txt \ + summary/wgs_metrics.txt \ + summary/duplicate_metrics.txt + +SAMTOOLS_THREADS = 4 +SAMTOOLS_MEM_THREAD = 1G + +GATK_THREADS = 4 +GATK_MEM_THREAD = 2G + +define picard-metrics +metrics/$1.idx_stats.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(BAM_INDEX) \ + INPUT=$$(<) \ + > $$(@)") + +metrics/$1.aln_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_ALIGNMENT_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +metrics/$1.insert_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_INSERT_METRICS) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + HISTOGRAM_FILE=metrics/$1.insert_metrics.pdf \ + MINIMUM_PCT=0.05") + +metrics/$1.oxog_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_OXOG_METRICS) \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + INPUT=$$(<) \ + OUTPUT=$$(@)") + +metrics/$1.gc_metrics_summary.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_GC_BIAS) \ + INPUT=$$(<) \ + OUTPUT=metrics/$1.gc_metrics.txt \ + CHART_OUTPUT=metrics/$1.gc_metrics.pdf \ + REFERENCE_SEQUENCE=$$(REF_FASTA) \ + SUMMARY_OUTPUT=$$(@)") + +metrics/$1.wgs_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_WGS_METRICS) \ + INPUT=$$(<) \ + OUTPUT=$$(@) \ + REFERENCE_SEQUENCE=$$(REF_FASTA)") + +metrics/$1.duplicate_metrics.txt : bam/$1.bam + $$(call RUN, -c -n 1 -s 12G -m 24G -w 24:00:00,"set -o pipefail && \ + $$(COLLECT_DUP_METRICS) \ + INPUT=$$(<) \ + METRICS_FILE=$$(@)") + +endef +$(foreach sample,$(SAMPLES),\ + $(eval $(call picard-metrics,$(sample)))) + +summary/idx_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).idx_stats.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 1 --sample_names '$(SAMPLES)'") + +summary/aln_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).aln_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 2 --sample_names '$(SAMPLES)'") + +summary/insert_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).insert_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 3 --sample_names '$(SAMPLES)'") + +summary/oxog_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).oxog_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 4 --sample_names '$(SAMPLES)'") + +summary/gc_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).gc_metrics_summary.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 5 --sample_names '$(SAMPLES)'") + +summary/wgs_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).wgs_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 6 --sample_names '$(SAMPLES)'") + +summary/duplicate_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).duplicate_metrics.txt) + $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ + $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 7 --sample_names '$(SAMPLES)'") + +..DUMMY := $(shell mkdir -p version; \ + $(SAMTOOLS) --version >> version/wgs_metrics.txt; \ + echo "gatk3" >> version/wgs_metrics.txt; \ + $(GATK) --version >> version/wgs_metrics.txt; \ + echo "picard" >> version/wgs_metrics.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: wgs_metrics diff --git a/scripts/wgs_metrics.R b/scripts/wgs_metrics.R new file mode 100755 index 00000000..48c35cb6 --- /dev/null +++ b/scripts/wgs_metrics.R @@ -0,0 +1,109 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("optparse")) +suppressPackageStartupMessages(library("readr")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("magrittr")) + +if (!interactive()) { + options(warn = -1, error = quote({ traceback(); q('no', status = 1) })) +} + +optList = list(make_option("--option", default = NA, type = 'character', help = "analysis type"), + make_option("--sample_names", default = NA, type = 'character', help = "sample names")) +parser = OptionParser(usage = "%prog", option_list = optList) +arguments = parse_args(parser, positional_arguments = T) +opt = arguments$options + +if (as.numeric(opt$option)==1) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".idx_stats.txt"), + col_names = FALSE, col_types = cols(.default = col_character()))[-85,,drop=FALSE] %>% + readr::type_convert() %>% + dplyr::select(CHROMOSOME = X1, + LENGTH = X2, + ALIGNED_READS = X3) %>% + dplyr::mutate(CHROMOSOME = gsub(pattern=" length=", replacement="", x=CHROMOSOME), + ALIGNED_READS = gsub(pattern="Aligned= ", replacement="", x=ALIGNED_READS), + SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/idx_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==2) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".aln_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-SAMPLE, -READ_GROUP) %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/aln_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==3) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".insert_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::select(-SAMPLE, -READ_GROUP) %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/insert_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==4) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".oxog_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::rename(SAMPLE_NAME = SAMPLE_ALIAS) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/oxog_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==5) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".gc_metrics.txt"), + skip = 6, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/gc_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==6) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".wgs_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/wgs_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} else if (as.numeric(opt$option)==7) { + sample_names = unlist(strsplit(x=as.character(opt$sample_names), split=" ", fixed=TRUE)) + metrics = list() + for (i in 1:length(sample_names)) { + metrics[[i]] = readr::read_tsv(file = paste0("metrics/", sample_names[i], ".duplicate_metrics.txt"), + skip = 6, n_max = 1, col_names = TRUE, col_types = cols(.default = col_character())) %>% + readr::type_convert() %>% + dplyr::mutate(SAMPLE_NAME = sample_names[i]) + } + metrics = do.call(rbind, metrics) + write_tsv(metrics, path="summary/duplicate_metrics.txt", na = "NA", append = FALSE, col_names = TRUE) + +} From aadc15c121b47e98477af2cb0386a2f4eca08c48 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 13 Sep 2023 12:13:43 -0400 Subject: [PATCH 747/766] Update wgs_metrics.mk --- qc/wgs_metrics.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qc/wgs_metrics.mk b/qc/wgs_metrics.mk index bcc963a3..ded23e69 100644 --- a/qc/wgs_metrics.mk +++ b/qc/wgs_metrics.mk @@ -106,11 +106,11 @@ summary/duplicate_metrics.txt : $(foreach sample,$(SAMPLES),metrics/$(sample).du $(call RUN, -c -n 1 -s 8G -m 12G,"set -o pipefail && \ $(RSCRIPT) $(SCRIPTS_DIR)/wgs_metrics.R --option 7 --sample_names '$(SAMPLES)'") -..DUMMY := $(shell mkdir -p version; \ - $(SAMTOOLS) --version >> version/wgs_metrics.txt; \ - echo "gatk3" >> version/wgs_metrics.txt; \ - $(GATK) --version >> version/wgs_metrics.txt; \ - echo "picard" >> version/wgs_metrics.txt) +#..DUMMY := $(shell mkdir -p version; \ +# $(SAMTOOLS) --version >> version/wgs_metrics.txt; \ +# echo "gatk3" >> version/wgs_metrics.txt; \ +# $(GATK) --version >> version/wgs_metrics.txt; \ +# echo "picard" >> version/wgs_metrics.txt) .SECONDARY: .DELETE_ON_ERROR: .PHONY: wgs_metrics From a7753f6cafabfcc42a2df7dbcdbda5e946eff3fd Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 17 Oct 2023 18:22:38 -0400 Subject: [PATCH 748/766] ++ --- Makefile | 6 +++--- bam_tools/{get_bam.mk => getbam_irb_mirror.mk} | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename bam_tools/{get_bam.mk => getbam_irb_mirror.mk} (93%) diff --git a/Makefile b/Makefile index ce8c4077..bdcfbc6f 100644 --- a/Makefile +++ b/Makefile @@ -378,9 +378,9 @@ TARGETS += process_bam process_bam : $(call RUN_MAKE,modules/bam_tools/processBam.mk) -TARGETS += get_bam -get_bam : - $(call RUN_MAKE,modules/bam_tools/get_bam.mk) +TARGETS += getbam_irb_mirror +getbam_irb_mirror : + $(call RUN_MAKE,modules/bam_tools/getbam_irb_mirror.mk) #================================================== diff --git a/bam_tools/get_bam.mk b/bam_tools/getbam_irb_mirror.mk similarity index 93% rename from bam_tools/get_bam.mk rename to bam_tools/getbam_irb_mirror.mk index 9bff77cd..f4ac67ae 100644 --- a/bam_tools/get_bam.mk +++ b/bam_tools/getbam_irb_mirror.mk @@ -26,7 +26,7 @@ endef $(eval $(call get-bam,$(sample)))) ..DUMMY := $(shell mkdir -p version; \ - which scp > version/get_bam.txt) + which scp > version/getbam_irb_mirror.txt) .SECONDARY: .DELETE_ON_ERROR: .PHONY: get_bam \ No newline at end of file From fe5d5be7b3edd26fb24b968ed9b5e93b78e96afc Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 18 Oct 2023 17:23:29 -0400 Subject: [PATCH 749/766] ++ --- Makefile | 7 +++++++ bam_tools/getbam_data_mirror.mk | 34 +++++++++++++++++++++++++++++++++ bam_tools/getbam_irb_mirror.mk | 2 +- 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 bam_tools/getbam_data_mirror.mk diff --git a/Makefile b/Makefile index bdcfbc6f..b252edfc 100644 --- a/Makefile +++ b/Makefile @@ -382,6 +382,13 @@ TARGETS += getbam_irb_mirror getbam_irb_mirror : $(call RUN_MAKE,modules/bam_tools/getbam_irb_mirror.mk) +TARGETS += getbam_data_mirror +getbam_data_mirror : + $(call RUN_MAKE,modules/bam_tools/getbam_data_mirror.mk) + +TARGETS += putbam_data_mirror +putbam_data_mirror : + $(call RUN_MAKE,modules/bam_tools/putbam_data_mirror.mk) #================================================== # VCF tools diff --git a/bam_tools/getbam_data_mirror.mk b/bam_tools/getbam_data_mirror.mk new file mode 100644 index 00000000..77654342 --- /dev/null +++ b/bam_tools/getbam_data_mirror.mk @@ -0,0 +1,34 @@ +include modules/Makefile.inc + +LOGDIR = log/getbam_data_mirror.$(NOW) + +get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ + $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ + $(foreach sample,$(SAMPLES),bam/$(sample).bai) + +define get-bam +bam/$1.bam : + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam \ + bam/") + +bam/$1.bam.bai : + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai \ + bam/") + +bam/$1.bai : + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai \ + bam/") + + +endef + $(foreach sample,$(SAMPLES),\ + $(eval $(call get-bam,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + which scp > version/getbam_data_mirror.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: get_bam \ No newline at end of file diff --git a/bam_tools/getbam_irb_mirror.mk b/bam_tools/getbam_irb_mirror.mk index f4ac67ae..ca90c7e3 100644 --- a/bam_tools/getbam_irb_mirror.mk +++ b/bam_tools/getbam_irb_mirror.mk @@ -1,6 +1,6 @@ include modules/Makefile.inc -LOGDIR = log/get_bam.$(NOW) +LOGDIR = log/getbam_irb_mirror.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ From 12bd171377710533f0313ee97ac98c1325a812b4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 18 Oct 2023 17:30:18 -0400 Subject: [PATCH 750/766] Update getbam_data_mirror.mk --- bam_tools/getbam_data_mirror.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bam_tools/getbam_data_mirror.mk b/bam_tools/getbam_data_mirror.mk index 77654342..a990c663 100644 --- a/bam_tools/getbam_data_mirror.mk +++ b/bam_tools/getbam_data_mirror.mk @@ -5,6 +5,8 @@ LOGDIR = log/getbam_data_mirror.$(NOW) get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ $(foreach sample,$(SAMPLES),bam/$(sample).bai) + +PROJECT_NAME = (basename $(PWD)) define get-bam bam/$1.bam : From 49c9d478bcfb6de72b0d2effa1e28ca5a6e5e12a Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 18 Oct 2023 17:30:57 -0400 Subject: [PATCH 751/766] Update getbam_data_mirror.mk --- bam_tools/getbam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/getbam_data_mirror.mk b/bam_tools/getbam_data_mirror.mk index a990c663..90713e87 100644 --- a/bam_tools/getbam_data_mirror.mk +++ b/bam_tools/getbam_data_mirror.mk @@ -6,7 +6,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ $(foreach sample,$(SAMPLES),bam/$(sample).bai) -PROJECT_NAME = (basename $(PWD)) +PROJECT_NAME = basename $(PWD) define get-bam bam/$1.bam : From df03ceb6ec7ec33cac75703946f5d12ad5f4b8e9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 18 Oct 2023 17:32:00 -0400 Subject: [PATCH 752/766] Update Makefile --- Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Makefile b/Makefile index b252edfc..7452500f 100644 --- a/Makefile +++ b/Makefile @@ -386,9 +386,6 @@ TARGETS += getbam_data_mirror getbam_data_mirror : $(call RUN_MAKE,modules/bam_tools/getbam_data_mirror.mk) -TARGETS += putbam_data_mirror -putbam_data_mirror : - $(call RUN_MAKE,modules/bam_tools/putbam_data_mirror.mk) #================================================== # VCF tools From d4e72c7084fb2f007c8c953ea75d2c4608cf36e0 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:05:02 -0400 Subject: [PATCH 753/766] +- --- Makefile | 8 ++++-- ..._data_mirror.mk => get_bam_data_mirror.mk} | 0 ...am_irb_mirror.mk => get_bam_irb_mirror.mk} | 0 bam_tools/put_bam_data_mirror.mk | 25 +++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) rename bam_tools/{getbam_data_mirror.mk => get_bam_data_mirror.mk} (100%) rename bam_tools/{getbam_irb_mirror.mk => get_bam_irb_mirror.mk} (100%) create mode 100644 bam_tools/put_bam_data_mirror.mk diff --git a/Makefile b/Makefile index 7452500f..5daa43ab 100644 --- a/Makefile +++ b/Makefile @@ -380,11 +380,15 @@ process_bam : TARGETS += getbam_irb_mirror getbam_irb_mirror : - $(call RUN_MAKE,modules/bam_tools/getbam_irb_mirror.mk) + $(call RUN_MAKE,modules/bam_tools/get_bam_irb_mirror.mk) TARGETS += getbam_data_mirror getbam_data_mirror : - $(call RUN_MAKE,modules/bam_tools/getbam_data_mirror.mk) + $(call RUN_MAKE,modules/bam_tools/get_bam_data_mirror.mk) + +TARGETS += putbam_data_mirror +putbam_data_mirror : + $(call RUN_MAKE,modules/bam_tools/put_bam_data_mirror.mk) #================================================== diff --git a/bam_tools/getbam_data_mirror.mk b/bam_tools/get_bam_data_mirror.mk similarity index 100% rename from bam_tools/getbam_data_mirror.mk rename to bam_tools/get_bam_data_mirror.mk diff --git a/bam_tools/getbam_irb_mirror.mk b/bam_tools/get_bam_irb_mirror.mk similarity index 100% rename from bam_tools/getbam_irb_mirror.mk rename to bam_tools/get_bam_irb_mirror.mk diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk new file mode 100644 index 00000000..41155ece --- /dev/null +++ b/bam_tools/put_bam_data_mirror.mk @@ -0,0 +1,25 @@ +include modules/Makefile.inc + +LOGDIR = log/putbam_data_mirror.$(NOW) + +put_bam : $(foreach sample,$(SAMPLES),bam/$(sample).taskcomplete) + +PROJECT_NAME = basename $(PWD) + +define put-bam +bam/$1.taskcomplete : bam/$1.bam + $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ + rsync -aP -e ssh bam/$1.bam $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam && \ + rsync -aP -e ssh bam/$1.bam.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ + rsync -aP -e ssh bam/$1.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ + echo 'finished!' > $$(@)") + +endef + $(foreach sample,$(SAMPLES),\ + $(eval $(call put-bam,$(sample)))) + +..DUMMY := $(shell mkdir -p version; \ + which scp > version/putbam_data_mirror.txt) +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: put_bam \ No newline at end of file From a5c1236957afd571114332c232f17b3723a360f1 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:08:02 -0400 Subject: [PATCH 754/766] Update put_bam_data_mirror.mk --- bam_tools/put_bam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk index 41155ece..2d5281eb 100644 --- a/bam_tools/put_bam_data_mirror.mk +++ b/bam_tools/put_bam_data_mirror.mk @@ -4,7 +4,7 @@ LOGDIR = log/putbam_data_mirror.$(NOW) put_bam : $(foreach sample,$(SAMPLES),bam/$(sample).taskcomplete) -PROJECT_NAME = basename $(PWD) +PROJECT_NAME = $(basename $(PWD)) define put-bam bam/$1.taskcomplete : bam/$1.bam From 20807df46c8c09a84032e55391b1591025441431 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:08:11 -0400 Subject: [PATCH 755/766] Update put_bam_data_mirror.mk --- bam_tools/put_bam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk index 2d5281eb..d69506a3 100644 --- a/bam_tools/put_bam_data_mirror.mk +++ b/bam_tools/put_bam_data_mirror.mk @@ -4,7 +4,7 @@ LOGDIR = log/putbam_data_mirror.$(NOW) put_bam : $(foreach sample,$(SAMPLES),bam/$(sample).taskcomplete) -PROJECT_NAME = $(basename $(PWD)) +PROJECT_NAME = $(eval basename $(PWD)) define put-bam bam/$1.taskcomplete : bam/$1.bam From eda28759bc3c1937e5df45c0baa59f7f031b45c5 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:08:43 -0400 Subject: [PATCH 756/766] Update put_bam_data_mirror.mk --- bam_tools/put_bam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk index d69506a3..56825fef 100644 --- a/bam_tools/put_bam_data_mirror.mk +++ b/bam_tools/put_bam_data_mirror.mk @@ -4,7 +4,7 @@ LOGDIR = log/putbam_data_mirror.$(NOW) put_bam : $(foreach sample,$(SAMPLES),bam/$(sample).taskcomplete) -PROJECT_NAME = $(eval basename $(PWD)) +PROJECT_NAME = $(shell basename $(PWD)) define put-bam bam/$1.taskcomplete : bam/$1.bam From 9b5912ce7761c8b319f42fe42b5bc3abffb80b71 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:09:36 -0400 Subject: [PATCH 757/766] Update get_bam_data_mirror.mk --- bam_tools/get_bam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam_data_mirror.mk b/bam_tools/get_bam_data_mirror.mk index 90713e87..ce0d0a8f 100644 --- a/bam_tools/get_bam_data_mirror.mk +++ b/bam_tools/get_bam_data_mirror.mk @@ -6,7 +6,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ $(foreach sample,$(SAMPLES),bam/$(sample).bam.bai) \ $(foreach sample,$(SAMPLES),bam/$(sample).bai) -PROJECT_NAME = basename $(PWD) +PROJECT_NAME = $(shell basename $(PWD)) define get-bam bam/$1.bam : From d3d7ec42d71526ae33623970be979633544a658a Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 19 Oct 2023 19:21:47 -0400 Subject: [PATCH 758/766] Update put_bam_data_mirror.mk --- bam_tools/put_bam_data_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk index 56825fef..d273f871 100644 --- a/bam_tools/put_bam_data_mirror.mk +++ b/bam_tools/put_bam_data_mirror.mk @@ -11,7 +11,7 @@ bam/$1.taskcomplete : bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ rsync -aP -e ssh bam/$1.bam $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam && \ rsync -aP -e ssh bam/$1.bam.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ - rsync -aP -e ssh bam/$1.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ + rsync -aP -e ssh bam/$1.bam.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai && \ echo 'finished!' > $$(@)") endef From 48b70fd0abf6e8148a85c48db38ba7f5dfb1fbf4 Mon Sep 17 00:00:00 2001 From: David Brown Date: Thu, 16 May 2024 14:46:15 -0400 Subject: [PATCH 759/766] Update qmake.pl --- scripts/qmake.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/qmake.pl b/scripts/qmake.pl index a95593f9..20461017 100755 --- a/scripts/qmake.pl +++ b/scripts/qmake.pl @@ -17,7 +17,8 @@ zhuy1 => "W013UH382P9", peix => "W0147TPN3E1", issabhas => "U01V8R1RKQU", - xiaoy => "U01C8MPBSH5" + xiaoy => "U01C8MPBSH5", + giacomf1 => "U06SW7W6D44" ); sub HELP_MESSAGE { From 9b6b94b44b30abdb7499f164b2da31f43b232518 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 12 Jun 2024 10:05:20 -0400 Subject: [PATCH 760/766] Update get_bam_data_mirror.mk --- bam_tools/get_bam_data_mirror.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bam_tools/get_bam_data_mirror.mk b/bam_tools/get_bam_data_mirror.mk index ce0d0a8f..1f616d50 100644 --- a/bam_tools/get_bam_data_mirror.mk +++ b/bam_tools/get_bam_data_mirror.mk @@ -11,17 +11,17 @@ PROJECT_NAME = $(shell basename $(PWD)) define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam \ + rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam \ bam/") bam/$1.bam.bai : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai \ + rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai \ bam/") bam/$1.bai : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - rsync -aP -e ssh $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai \ + rsync -aP -e ssh $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai \ bam/") From 5ae0e5724ccd1a48b606b20440329eb073b5f218 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 12 Jun 2024 10:10:05 -0400 Subject: [PATCH 761/766] Update get_bam_irb_mirror.mk --- bam_tools/get_bam_irb_mirror.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam_tools/get_bam_irb_mirror.mk b/bam_tools/get_bam_irb_mirror.mk index ca90c7e3..02a78b4b 100644 --- a/bam_tools/get_bam_irb_mirror.mk +++ b/bam_tools/get_bam_irb_mirror.mk @@ -9,7 +9,7 @@ get_bam : $(foreach sample,$(SAMPLES),bam/$(sample).bam) \ define get-bam bam/$1.bam : $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - scp $(USER)@selene.mskcc.org:/res/dmpcollab/dmpshare/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ + scp $(USER)@juno-xfer01.mskcc.org:/juno/dmp/share/irb12_245/`echo $1 | cut -c 1-1`/`echo $1 | cut -c 2-2`/$1.bam \ bam/") bam/$1.bam.bai : bam/$1.bam From 471c00c4062a8d6b573c8f00a505f522944f3788 Mon Sep 17 00:00:00 2001 From: David Brown Date: Wed, 12 Jun 2024 10:11:12 -0400 Subject: [PATCH 762/766] Update put_bam_data_mirror.mk --- bam_tools/put_bam_data_mirror.mk | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bam_tools/put_bam_data_mirror.mk b/bam_tools/put_bam_data_mirror.mk index d273f871..8bc19bdc 100644 --- a/bam_tools/put_bam_data_mirror.mk +++ b/bam_tools/put_bam_data_mirror.mk @@ -9,9 +9,9 @@ PROJECT_NAME = $(shell basename $(PWD)) define put-bam bam/$1.taskcomplete : bam/$1.bam $$(call RUN,-c -n 1 -s 2G -m 4G, "set -o pipefail && \ - rsync -aP -e ssh bam/$1.bam $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam && \ - rsync -aP -e ssh bam/$1.bam.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ - rsync -aP -e ssh bam/$1.bam.bai $(USER)@swan.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai && \ + rsync -aP -e ssh bam/$1.bam $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam && \ + rsync -aP -e ssh bam/$1.bam.bai $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bam.bai && \ + rsync -aP -e ssh bam/$1.bam.bai $(USER)@lilac-xfer01.mskcc.org:/oscar/warm/reis-filho/by_user/$(USER)/$(PROJECT_NAME)/$1.bai && \ echo 'finished!' > $$(@)") endef From b4c01fdcc9a15e88c986838bd37d4dbb84b928d9 Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 30 Jul 2024 11:09:56 -0400 Subject: [PATCH 763/766] ++ --- config.inc | 1 + variant_callers/somatic/msisensor.mk | 32 ++++++++++++---------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/config.inc b/config.inc index 1818a0bd..901b1402 100644 --- a/config.inc +++ b/config.inc @@ -41,6 +41,7 @@ VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8 CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat POLYSOLVER_ENV = $(HOME)/share/usr/anaconda-envs/hla-polysolver +MSISENSOR_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.5/ JARDIR ?= $(HOME)/share/usr/lib/java diff --git a/variant_callers/somatic/msisensor.mk b/variant_callers/somatic/msisensor.mk index b61e0298..6f93723d 100644 --- a/variant_callers/somatic/msisensor.mk +++ b/variant_callers/somatic/msisensor.mk @@ -2,30 +2,26 @@ include modules/Makefile.inc LOGDIR ?= log/msisensor.$(NOW) -MSISENSOR_OPTS ?= -d $(REF_MSI) $(if $(TARGETS_FILE),-e $(TARGETS_FILE)) - -PHONY += msisensor - -.DELETE_ON_ERROR: -.SECONDARY: -.PHONY : $(PHONY) +msisensor: $(foreach pair,$(SAMPLE_PAIRS),msisensor/$(pair).msi) \ + msisensor/msi.tsv -msisensor: msisensor/msi.tsv +MSISENSOR_OPTS ?= -d $(REF_MSI) $(if $(TARGETS_FILE),-e $(TARGETS_FILE)) define msisensor-tumor-normal msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai - $$(call RUN,-c -n 8 -s 1G -m 1.2G,"source ~/share/usr/anaconda-envs/jrflab-modules-0.1.5/bin/activate ~/share/usr/anaconda-envs/msisensor && \ - msisensor msi $$(MSISENSOR_OPTS) -n $$(<<) -t $$< -b 8 -o $$@") + $$(call RUN,-c -n 8 -s 1G -m 2G -v $(MSISENSOR_ENV),"set -o pipefail && \ + msisensor msi $$(MSISENSOR_OPTS) \ + -n $$(<<) \ + -t $$(<) \ + -b 8 \ + -o $$(@)") endef -$(foreach pair,$(SAMPLE_PAIRS),$(eval $(call msisensor-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) +$(foreach pair,$(SAMPLE_PAIRS),\ + $(eval $(call msisensor-tumor-normal,$(tumor.$(pair)),$(normal.$(pair))))) msisensor/msi.tsv : $(foreach pair,$(SAMPLE_PAIRS),msisensor/$(pair).msi) $(INIT) (head -1 $< | sed 's/^/sample\t/'; for x in $^; do sed "1d; s/^/$$(basename $$x)\t/" $$x; done | sed 's/_.*msi//' ) > $@ -bam/%.ds.bam : metrics/hs_metrics.tsv bam/%.bam - $(call RUN,-s 4G -m 6G,\ - "ds=\`py 'round(500 / pandas.read_table(\"$<\", index_col=0).ix[\"$*\", \"MEAN_TARGET_COVERAGE\"], 2)'\`; \ - if [ \$$(echo \"\$$ds >= 1\" | bc) -eq 1 ]; then ln -s \$$(readlink -f $(<<)) $@; else \ - samtools view -hb -s \$$ds $(<<) > $@; fi") - -include modules/bam_tools/processBam.mk +.SECONDARY: +.DELETE_ON_ERROR: +.PHONY: msisensor From f6a432982b3e9222f4910f05211f93819995185d Mon Sep 17 00:00:00 2001 From: David Brown Date: Tue, 30 Jul 2024 11:15:18 -0400 Subject: [PATCH 764/766] Update config.inc --- config.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.inc b/config.inc index 901b1402..57692023 100644 --- a/config.inc +++ b/config.inc @@ -41,7 +41,7 @@ VARIANT_ANNOTATION_ENV = $(HOME)/share/usr/env/r-variantannotation-1.44.0 FACETS_SUITE_ENV = $(HOME)/share/usr/env/r-facets-suite-2.0.8 CRAVAT_ENV = $(HOME)/share/usr/anaconda-envs/open-cravat POLYSOLVER_ENV = $(HOME)/share/usr/anaconda-envs/hla-polysolver -MSISENSOR_ENV = $(HOME)/share/usr/anaconda-envs/jrflab-modules-0.1.5/ +MSISENSOR_ENV = $(HOME)/share/usr/anaconda-envs/msisensor JARDIR ?= $(HOME)/share/usr/lib/java From e55d8d6e752db1eb25603fe329a93be677f4e65d Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 2 Aug 2024 16:05:46 -0400 Subject: [PATCH 765/766] ++ --- variant_callers/somatic/msisensor.mk | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/variant_callers/somatic/msisensor.mk b/variant_callers/somatic/msisensor.mk index 6f93723d..153dcbfe 100644 --- a/variant_callers/somatic/msisensor.mk +++ b/variant_callers/somatic/msisensor.mk @@ -5,12 +5,15 @@ LOGDIR ?= log/msisensor.$(NOW) msisensor: $(foreach pair,$(SAMPLE_PAIRS),msisensor/$(pair).msi) \ msisensor/msi.tsv -MSISENSOR_OPTS ?= -d $(REF_MSI) $(if $(TARGETS_FILE),-e $(TARGETS_FILE)) +MICROSATELLITES_LIST = $(HOME)/share/lib/resource_files/MSIsensor/microsatellites.list +MSI_REGIONS = $(HOME)/share/lib/resource_files/MSIsensor/msiregions.bed define msisensor-tumor-normal -msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam bam/$1.bam.bai bam/$2.bam.bai +msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam $$(call RUN,-c -n 8 -s 1G -m 2G -v $(MSISENSOR_ENV),"set -o pipefail && \ msisensor msi $$(MSISENSOR_OPTS) \ + -d $$(MICROSATELLITES_LIST) \ + -d $$(MSI_REGIONS) \ -n $$(<<) \ -t $$(<) \ -b 8 \ From 3aa1e2077d2e5c3bbe7a29956f8e6b0224ef5ec6 Mon Sep 17 00:00:00 2001 From: David Brown Date: Fri, 2 Aug 2024 16:43:20 -0400 Subject: [PATCH 766/766] Update msisensor.mk --- variant_callers/somatic/msisensor.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/variant_callers/somatic/msisensor.mk b/variant_callers/somatic/msisensor.mk index 153dcbfe..42f5f108 100644 --- a/variant_callers/somatic/msisensor.mk +++ b/variant_callers/somatic/msisensor.mk @@ -13,7 +13,7 @@ msisensor/$1_$2.msi : bam/$1.bam bam/$2.bam $$(call RUN,-c -n 8 -s 1G -m 2G -v $(MSISENSOR_ENV),"set -o pipefail && \ msisensor msi $$(MSISENSOR_OPTS) \ -d $$(MICROSATELLITES_LIST) \ - -d $$(MSI_REGIONS) \ + -e $$(MSI_REGIONS) \ -n $$(<<) \ -t $$(<) \ -b 8 \