File tree Expand file tree Collapse file tree 2 files changed +86
-0
lines changed Expand file tree Collapse file tree 2 files changed +86
-0
lines changed Original file line number Diff line number Diff line change @@ -321,3 +321,46 @@ task DeduplicateBam {
321321 docker : select_first ([runtime_attr .docker , default_attr .docker ])
322322 }
323323}
324+
325+ task GetBasecallModel {
326+ meta {
327+ desciption : "Getting the basecall model string of an ONT BAM"
328+ }
329+ parameter_meta {
330+ bam : {
331+ desciption : "BAM to operate on" ,
332+ localization_optional : true
333+ }
334+ runid_2_model : "The basecall model for each run."
335+ }
336+ input {
337+ File bam
338+ }
339+ output {
340+ Map [String , String ] runid_2_model = read_map ("results.tsv" )
341+ }
342+
343+ command <<<
344+ set -eux
345+
346+ export GCS_OAUTH_TOKEN = $(gcloud auth application-default print-access-token )
347+ samtools view -H ~{bam } | grep "^@RG" > one_rg_per_line.txt
348+
349+ while IFS = read -r line
350+ do
351+ echo "$line " | tr '\t' '\n' | grep "^DS:" | sed "s/^DS://" | tr ' ' '\n' > tmp.txt
352+ runid = $(grep "^runid=" tmp.txt | awk -F '=' '{print $2}' )
353+ model = $(grep "^basecall_model=" tmp.txt | awk -F '=' '{print $2}' )
354+ echo -e "${runid} \t${model} " >> results.tsv
355+ done < one_rg_per_line.txt
356+ >>>
357+
358+ runtime {
359+ cpu : 1
360+ memory : "4 GiB"
361+ disks : "local-disk 10 HDD"
362+ preemptible : 2
363+ maxRetries : 1
364+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
365+ }
366+ }
Original file line number Diff line number Diff line change @@ -1280,3 +1280,46 @@ task SummarizePBI {
12801280 docker : select_first ([runtime_attr .docker , default_attr .docker ])
12811281 }
12821282}
1283+
1284+ # todo: primrose is rebranded as jasmine, take care of that later
1285+ task VerifyPacBioBamHasAppropriatePrimroseRuns {
1286+ meta {
1287+ desciption : "Verify that a PacBio's BAM has primrose run on all its read groups"
1288+ }
1289+ input {
1290+ String bam
1291+ }
1292+
1293+ output {
1294+ Array [String ] readgroups_missing_primrose = read_lines ("movies_without_primrose.txt" )
1295+ }
1296+
1297+ command <<<
1298+ set -eux
1299+
1300+ export GCS_OAUTH_TOKEN = `gcloud auth application-default print-access-token `
1301+ samtools view -H ~{bam } > header.txt
1302+
1303+ # get read groups' movies
1304+ grep "^@RG" header.txt | tr '\t' '\n' | grep "^PU:" | awk -F ':' '{print $2}' | sort > readgroup.movies.txt
1305+ cat readgroup.movies.txt
1306+
1307+ # get primrose PG lines
1308+ grep "^@PG" header.txt | grep -v "^@SQ" | grep "^@PG" | grep -F 'ID:primrose' | tr '\t' '\n' | grep '^CL:' > primrose.pg.lines.txt
1309+ tr ' ' '\n' < primrose.pg.lines.txt
1310+
1311+ touch movies_without_primrose.txt
1312+ while IFS = read -r readgroup ; do
1313+ if ! grep -q "${readgroup} " primrose.pg.lines.txt ; then echo "${readgroup} " >> movies_without_primrose.txt ; fi
1314+ done < readgroup.movies.txt
1315+ >>>
1316+
1317+ runtime {
1318+ cpu : 1
1319+ memory : "4 GiB"
1320+ disks : "local-disk 10 HDD"
1321+ preemptible : 2
1322+ maxRetries : 1
1323+ docker : "us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.3"
1324+ }
1325+ }
You can’t perform that action at this time.
0 commit comments