diff --git a/HISTORY.md b/HISTORY.md index b80a3b1..c69d5b9 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,8 +1,12 @@ -## 0.1.5 (In progress) +## 0.1.5 (15 March 2014) - Move to MIT licensed GATK 3.0 framework. +- Support bgzipped inputs for variant assessment. Thanks to Severine Catreux. - Support lightweight loading options for gemini integration to avoid large load times with new gene tables in gemini 0.6.5. +- Avoid running GATK VariantEval which causes intermittent java core dumps. +- Avoid re-runs of callable regions when already prepared using non-GATK chanjo-based + methods. ## 0.1.4 (5 March 2014) diff --git a/README.md b/README.md index 0b7ebdf..8cbc308 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ associated with different variant representations. ### Download -The latest release is 0.1.4 (5 March 2014): [bcbio.variation-0.1.4-standalone.jar][dl]. +The latest release is 0.1.5 (15 March 2014): [bcbio.variation-0.1.5-standalone.jar][dl]. Run from the command line: $ java -jar bcbio.variation-VERSION-standalone.jar [arguments] @@ -44,7 +44,7 @@ the library for variant comparison, normalization and ensemble calling. Note that bcbio.variation requires Java 1.7 since the underlying GATK libraries are not compatible with earlier versions. -[dl]: https://github.com/chapmanb/bcbio.variation/releases/download/v0.1.4/bcbio.variation-0.1.4-standalone.jar +[dl]: https://github.com/chapmanb/bcbio.variation/releases/download/v0.1.5/bcbio.variation-0.1.5-standalone.jar ### As a library diff --git a/project.clj b/project.clj index 438d996..d07d1ba 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject bcbio.variation "0.1.5-SNAPSHOT" +(defproject bcbio.variation "0.1.5" :description "Toolkit to analyze genomic variation data, built on the GATK with Clojure" :license {:name "MIT" :url "http://www.opensource.org/licenses/mit-license.html"} :dependencies [[org.clojure/clojure "1.5.1"] diff --git a/src/bcbio/variation/callable.clj b/src/bcbio/variation/callable.clj index 940c60b..92eaa5a 100644 --- a/src/bcbio/variation/callable.clj +++ b/src/bcbio/variation/callable.clj @@ -31,7 +31,8 @@ (if-not (fs/exists? base-dir) (fs/mkdirs base-dir)) (broad/index-bam align-bam) - (broad/run-gatk "CallableLoci" args file-info {:out [:out-bed :out-summary]}) + (when (itx/needs-run? (:out-bed file-info)) + (broad/run-gatk "CallableLoci" args file-info {:out [:out-bed :out-summary]})) (:out-bed file-info))) (defn features-in-region [source space start end] diff --git a/src/bcbio/variation/compare.clj b/src/bcbio/variation/compare.clj index a577d0a..941f4f2 100644 --- a/src/bcbio/variation/compare.clj +++ b/src/bcbio/variation/compare.clj @@ -157,18 +157,20 @@ (let [c-files (select-by-concordance (:sample exp) c1 c2 (:ref exp) :out-dir (get-in config [:dir :out]) :intervals (:intervals exp)) - eval (calc-variant-eval-metrics (:file c1) (:file c2) (:ref exp) - :out-base (first c-files) - :intervals (:intervals exp)) - c-eval (calc-variant-eval-metrics (:file c1) (:file c2) (:ref exp) - :out-base (fsp/add-file-part (first c-files) "callable") - :intervals (callable-intervals exp c1 c2))] + ;; eval (calc-variant-eval-metrics (:file c1) (:file c2) (:ref exp) + ;; :out-base (first c-files) + ;; :intervals (:intervals exp)) + ;; c-eval (calc-variant-eval-metrics (:file c1) (:file c2) (:ref exp) + ;; :out-base (fsp/add-file-part (first c-files) "callable") + ;; :intervals (callable-intervals exp c1 c2)) + ] {:c-files (zipmap-ordered (map keyword ["concordant" (discordant-name c1) (discordant-name c2)]) c-files) :c1 c1 :c2 c2 :exp exp :dir (config :dir) - :metrics (report/concordance-report-metrics (:sample exp) eval) - :callable-metrics (report/concordance-report-metrics (:sample exp) c-eval)}))) + ;; :metrics (report/concordance-report-metrics (:sample exp) eval) + ;; :callable-metrics (report/concordance-report-metrics (:sample exp) c-eval) + }))) (defn compare-two-vcf "Compare two VCF files, handling standard and haploid specific comparisons." diff --git a/src/bcbio/variation/normalize.clj b/src/bcbio/variation/normalize.clj index f651f39..bcb0af2 100644 --- a/src/bcbio/variation/normalize.clj +++ b/src/bcbio/variation/normalize.clj @@ -551,6 +551,14 @@ (neg-qual? xs) [] :else xs))) +(defn zipsafe-reader + "Provide a reader, handling gzipped or plain text inputs." + [f] + (reader + (cond + (.endsWith f ".gz") (-> f input-stream java.util.zip.GZIPInputStream.) + :else f))) + (defn clean-problem-vcf "Clean VCF file which GATK parsers cannot handle due to illegal characters. Fixes: @@ -559,7 +567,7 @@ - Removes spaces in INFO fields." [in-vcf-file ref-file sample call & {:keys [out-dir]}] (let [get-ref-base (ref-base-getter ref-file) - out-file (fsp/add-file-part in-vcf-file "preclean" out-dir)] + out-file (string/replace (fsp/add-file-part in-vcf-file "preclean" out-dir) ".vcf.gz" ".vcf")] (letfn [(remove-gap [n xs] (assoc xs n (-> (nth xs n) @@ -584,7 +592,7 @@ (string/join "\t"))))] (when (itx/needs-run? out-file) (itx/with-tx-file [tx-out-file out-file] - (with-open [rdr (reader in-vcf-file) + (with-open [rdr (zipsafe-reader in-vcf-file) wtr (writer tx-out-file)] (doall (map #(.write wtr (str % "\n"))