From 1a04afc328f90d75b09928361f2793084e9a6db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benjamin=20Schuster-Bo=CC=88ckler?= Date: Wed, 22 Jun 2022 15:48:56 +0100 Subject: [PATCH] Report alignment position for pileup --- project.clj | 2 +- src/cljam/algo/pileup.clj | 17 ++++++++++------- src/cljam/io/pileup.clj | 5 +++-- test/cljam/io/pileup_test.clj | 25 +++++++++++++------------ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/project.clj b/project.clj index 6bfe1c50..653e96d4 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject cljam "0.8.1-SNAPSHOT" +(defproject cljam "0.8.2-SNAPSHOT" :description "A DNA Sequence Alignment/Map (SAM) library for Clojure" :url "https://github.com/chrovis/cljam" :license {:name "Apache License, Version 2.0" diff --git a/src/cljam/algo/pileup.clj b/src/cljam/algo/pileup.clj index 9e7a3f53..83de7c29 100644 --- a/src/cljam/algo/pileup.clj +++ b/src/cljam/algo/pileup.clj @@ -48,17 +48,18 @@ 93))) short-array)))) -(deftype PosBase [^char base indel]) +(deftype PosBase [^char base indel ^int pos]) (defn- seqs-at-ref [idx ^String s] (->> idx (map (fn [[op x xs]] - (let [c (if (number? x) (.charAt s x) x)] + (let [c (if (number? x) (.charAt s x) x) + rel-pos (if (number? x) (inc x) -1)] (case op - :m (->PosBase c nil) - :d (->PosBase c xs) - :i (->PosBase c (subs s (first xs) (last xs))))))) + :m (->PosBase c nil rel-pos) + :d (->PosBase c xs rel-pos) + :i (->PosBase c (subs s (first xs) (last xs)) rel-pos))))) object-array)) (defn index-cigar @@ -92,7 +93,8 @@ ^PosBase pb (aget ^objects (:seqs-at-ref aln) relative-pos) base (.base pb) indel (.indel pb) - deletion? (number? indel)] + qpos (.pos pb) + deletion? (= \* base)] (plpio/->PileupBase (zero? relative-pos) (.mapq aln) @@ -101,7 +103,8 @@ (flag/reversed? (.flag aln)) (= ref-pos (.end aln)) (when-not deletion? indel) - (when deletion? indel) + (when deletion? indel) ;; TODO this is incorrect IMO - D in the cigar refers to the base _after_ this in the ref, so this field is set for the wrong column! + qpos (.qname aln) (dissoc aln :seqs-at-ref :quals-at-ref)))) diff --git a/src/cljam/io/pileup.clj b/src/cljam/io/pileup.clj index 01bde034..0e52e461 100644 --- a/src/cljam/io/pileup.clj +++ b/src/cljam/io/pileup.clj @@ -17,6 +17,7 @@ ^boolean end? insertion ;; String? deletion ;; int? + ^int pos qname ;; String? alignment]) ;; SAMAlignment? @@ -68,7 +69,7 @@ (when (zero? upper-base-int) (throw (ex-info (format "Invalid character %s in %s" base column) {:column column :base base}))) (if (= len (+ i (if mapq 3 1))) - (persistent! (conj! results (PileupBase. start? mapq upper-base -1 reverse? false nil nil nil nil))) + (persistent! (conj! results (PileupBase. start? mapq upper-base -1 reverse? false nil nil -1 nil nil))) (let [x (.charAt column (+ i (if mapq 3 1))) ins? (= x \+) del? (= x \-) @@ -91,7 +92,7 @@ (if indel-num (+ 1 indel-num-chars indel-num) 0) (if end? 1 0)))] (recur next-pos - (conj! results (PileupBase. start? mapq upper-base -1 reverse? end? (when ins? indel-seq) (when del? indel-num) nil nil)))))) + (conj! results (PileupBase. start? mapq upper-base -1 reverse? end? (when ins? indel-seq) (when del? indel-num) -1 nil nil)))))) (persistent! results))))) (defn- parse-pileup-line diff --git a/test/cljam/io/pileup_test.clj b/test/cljam/io/pileup_test.clj index 8bf837e8..71ac55d6 100644 --- a/test/cljam/io/pileup_test.clj +++ b/test/cljam/io/pileup_test.clj @@ -41,6 +41,7 @@ :end? true, :insertion nil, :deletion nil, + :pos -1, :qname nil, :alignment nil}]}) @@ -50,7 +51,7 @@ (deftest parse-bases-col (testing "without-ref" (are [?in ?out] - (= (mapv #(into {:qname nil :alignment nil} %) ?out) + (= (mapv #(into {:pos -1, :qname nil :alignment nil} %) ?out) (mapv rec->map (#'plpio/parse-bases-col nil ?in))) "" [] "A" [{:start? false, :mapq nil, :base \A, :qual -1, :reverse? false, :end? false, :insertion nil, :deletion nil}] @@ -78,7 +79,7 @@ {:start? true, :mapq 34, :base \A, :qual -1, :reverse? true, :end? false, :insertion nil, :deletion nil}])) (testing "with-ref" (are [?in ?out] - (= (mapv #(into {:qname nil :alignment nil} %) ?out) + (= (mapv #(into {:pos -1, :qname nil :alignment nil} %) ?out) (mapv rec->map (#'plpio/parse-bases-col \T ?in))) "" [] "A" [{:start? false, :mapq nil, :base \A, :qual -1, :reverse? false, :end? false, :insertion nil, :deletion nil}] @@ -117,26 +118,26 @@ "chr1\t10\tN\t0\t\t" {:rname "chr1", :pos 10, :ref \N, :count 0, :pile []} "chr1\t10\tN\t1\tA\tI" {:rname "chr1", :pos 10, :ref \N, :count 1, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? false, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil}]} + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? false, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil}]} "chr1\t10\tN\t2\taA\tIB" {:rname "chr1", :pos 10, :ref \N, :count 2, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil} - {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil}]} + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil} + {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil}]} "chr1\t10\tN\t2\taA-2NN$\tIB" {:rname "chr1", :pos 10, :ref \N, :count 2, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil} - {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? true, :insertion nil, :deletion 2, :qname nil, :alignment nil}]})) + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil} + {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? true, :insertion nil, :deletion 2, :pos -1, :qname nil, :alignment nil}]})) (testing "with-ref" (are [?in ?out] (= ?out (rec->map (#'plpio/parse-pileup-line ?in))) "chr1\t10\tA\t0\t\t" {:rname "chr1", :pos 10, :ref \A, :count 0, :pile []} "chr1\t10\ta\t1\t.\tI" {:rname "chr1", :pos 10, :ref \a, :count 1, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? false, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil}]} + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? false, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil}]} "chr1\t10\tA\t2\t,.\tIB" {:rname "chr1", :pos 10, :ref \A, :count 2, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil} - {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil}]} + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil} + {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil}]} "chr1\t10\tA\t2\t,.-2CA$\tIB" {:rname "chr1", :pos 10, :ref \A, :count 2, - :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :qname nil, :alignment nil} - {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? true, :insertion nil, :deletion 2, :qname nil, :alignment nil}]}))) + :pile [{:start? false, :mapq nil, :base \A, :qual 40, :reverse? true, :end? false, :insertion nil, :deletion nil, :pos -1, :qname nil, :alignment nil} + {:start? false, :mapq nil, :base \A, :qual 33, :reverse? false, :end? true, :insertion nil, :deletion 2, :pos -1, :qname nil, :alignment nil}]}))) (deftest reader (with-open [r (plpio/reader test-pileup-file)]