From 6d04ff41163023114e4c4ed4c2cca9b32fe24b7e Mon Sep 17 00:00:00 2001 From: chapmanb Date: Sun, 18 May 2014 13:09:14 -0400 Subject: [PATCH] Avoid trying to re-align and normalize very long indels which cause cause null pointer errors. These are dealt with by structural variation comparions. Thanks to Severine Catreux --- project.clj | 6 +++--- src/bcbio/variation/complex.clj | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/project.clj b/project.clj index 6012271..23bd282 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject bcbio.variation "0.1.6" +(defproject bcbio.variation "0.1.7-SNAPSHOT" :description "Toolkit to analyze genomic variation data, built on the GATK with Clojure" :license {:name "MIT" :url "http://www.opensource.org/licenses/mit-license.html"} :dependencies [[org.clojure/clojure "1.5.1"] @@ -21,8 +21,8 @@ [org.simpleframework/simple-xml "2.0.4"] [org.apache.servicemix.bundles/org.apache.servicemix.bundles.jets3t "0.8.1_1"] ;; - [org.biojava/biojava3-core "3.0.4"] - [org.biojava/biojava3-alignment "3.0.4"] + [org.biojava/biojava3-core "3.0.8"] + [org.biojava/biojava3-alignment "3.0.8"] [org.clojars.chapmanb/circdesigna "0.0.2" :exclusions [net.sf.beaver/beaver-ant]] [nz.ac.waikato.cms.weka/weka-stable "3.6.6"] [org.clojars.chapmanb/fast-random-forest "0.98"] diff --git a/src/bcbio/variation/complex.clj b/src/bcbio/variation/complex.clj index 407dd60..3ef604d 100644 --- a/src/bcbio/variation/complex.clj +++ b/src/bcbio/variation/complex.clj @@ -30,8 +30,9 @@ (defn- get-vc-alleles [vc] (vec (map #(.getDisplayString %) (cons (:ref-allele vc) (:alt-alleles vc))))) -(defn is-multi-indel? - "Identify complex indels that can be split into multiple calls." +(defn- is-multi-indel? + "Identify complex indels that can be split into multiple calls. + Caps indels we can operate on at 5000bp to avoid realignment errors for longer." [vc] (letfn [(monomorphic-alleles? [vc] (= 1 (->> (get-vc-alleles vc) @@ -44,8 +45,12 @@ (not (monomorphic-alleles? vc)))) (has-ref-padding-mismatch? [vc] (let [alleles (get-vc-alleles vc)] - (not= (nth (first alleles) 0) (nth (second alleles) 0))))] + (not= (nth (first alleles) 0) (nth (second alleles) 0)))) + (splittable-size? [vc] + (< (apply max (map count (get-vc-alleles vc))) + 5000))] (and (= "INDEL" (:type vc)) + (splittable-size? vc) (or (has-multiple-nonref-alleles? vc) (has-ref-padding-mismatch? vc)))))