Skip to content

Commit 6dabe0b

Browse files
committed
Ensure BED files passed to evaluations are converted to reference material coordinates when inputs differ (hg19->GRCh37). Thanks to Severine Catreux.
1 parent cb7cc75 commit 6dabe0b

File tree

11 files changed

+228
-20
lines changed

11 files changed

+228
-20
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ test/data/phased/*-cmp*.vcf
5454
test/data/phased/work/
5555
test/data/digrade/*.idx
5656
test/data/digrade/*.csv
57+
test/data/digrade/*-sorted.bed
58+
test/data/digrade/hg19/*-sorted.bed
5759
test/data/digrade/work/
5860
test/data/svs/*.idx
5961
test/data/svs/*-sorted.bed

HISTORY.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
- Avoid errors on converting hg19 to GRCh37 where hg19 variants contain hg19 hap
44
contigs with no equivalent in GRCh37. It now drops these variants instead
55
of generating an error. Thanks to Severine Catreux.
6+
- Ensure BED files passed to evaluations are converted to reference material
7+
coordinates when inputs differ (hg19->GRCh37). Thanks to Severine Catreux.
68
- Avoid issues with running LeftAlignVariants on indels with END tags. Thanks to
79
Justin Johnson.
810
- Move to external bcbio.run tool to help abstract out some core functionality

src/bcbio/align/interval.clj

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,21 @@
3030
(map (partial update-contig-name name-map))
3131
(remove nil?)))))))
3232
out-file))
33+
34+
(defn- maybe-remap-bed
35+
[interval int-ref base-ref out-dir]
36+
(when interval
37+
(if (and (not (nil? int-ref)) (not= base-ref int-ref))
38+
(rename-bed interval base-ref :out-dir out-dir)
39+
interval)))
40+
41+
(defn prep-multi
42+
"Prepare multiple input intervals, remapping chromosome names as needed."
43+
([to-prep ref-file orig out-dir]
44+
(->> to-prep
45+
(map (juxt :intervals :ref))
46+
(map (fn [[bed ref]] (maybe-remap-bed bed ref ref-file out-dir)))
47+
(concat (flatten [orig]))
48+
(remove nil?)))
49+
([to-prep ref-file out-dir]
50+
(prep-multi to-prep ref-file [] out-dir)))

src/bcbio/variation/compare.clj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
[bcbio.variation.evaluate :only [calc-variant-eval-metrics]]
2020
[bcbio.variation.filter :only [variant-filter variant-format-filter
2121
pipeline-recalibration]]
22-
[bcbio.variation.filter.intervals :only [combine-multiple-intervals]]
2322
[bcbio.variation.multiple :only [prep-cmp-name-lookup pipeline-compare-multiple]]
2423
[bcbio.variation.multisample :only [compare-two-vcf-flexible
2524
multiple-samples?]]
@@ -32,9 +31,11 @@
3231
[clj-yaml.core :as yaml]
3332
[me.raynes.fs :as fs]
3433
[lonocloud.synthread :as ->]
34+
[bcbio.align.interval :as ainterval]
3535
[bcbio.run.fsp :as fsp]
3636
[bcbio.run.itx :as itx]
3737
[bcbio.run.broad :as broad]
38+
[bcbio.variation.filter.intervals :as fintervals]
3839
[bcbio.variation.grade :as grade]
3940
[bcbio.variation.phasing :as phasing]
4041
[bcbio.variation.report :as report]
@@ -46,8 +47,7 @@
4647
"Variant comparison producing 3 files: concordant and both directions discordant"
4748
[sample call1 call2 ref & {:keys [out-dir intervals]}]
4849
(let [base-dir (if (nil? out-dir) (fs/parent (:file call1)) out-dir)
49-
ready-intervals (remove nil? (flatten [intervals (:intervals call1)
50-
(:intervals call2)]))
50+
ready-intervals (ainterval/prep-multi [call1 call2] ref intervals out-dir)
5151
sample (or sample (-> call1 :file gvc/get-vcf-header .getGenotypeSamples first))]
5252
(if-not (fs/exists? base-dir)
5353
(fs/mkdirs base-dir))
@@ -116,7 +116,7 @@
116116
(let [out-dir (get-in config [:dir :prep] (get-in config [:dir :out]))
117117
transition (partial do-transition config)
118118
align-bams (prepare-input-bams exp out-dir)
119-
all-intervals (remove nil? (map :intervals (cons exp (:calls exp))))
119+
all-intervals (ainterval/prep-multi (cons exp (:calls exp)) (:ref exp) out-dir)
120120
start-vcfs (vec (map #(gatk-normalize % exp all-intervals out-dir transition)
121121
(:calls exp)))
122122
_ (transition :combine "Creating merged VCF files for all comparisons")
@@ -148,8 +148,8 @@
148148
(let [out-dir (get-in config [:dir :prep] (get-in config [:dir :out]))
149149
align-bams (remove nil? (map :align [c1 c2]))]
150150
(when (and (:intervals exp) (seq align-bams))
151-
(combine-multiple-intervals (:intervals exp) align-bams (:ref exp)
152-
:out-dir out-dir :name (:sample exp)))))
151+
(fintervals/combine-multiple (:intervals exp) align-bams (:ref exp)
152+
:out-dir out-dir :name (:sample exp)))))
153153
(discordant-name [x]
154154
(format "%s-discordant" (:name x)))
155155
(zipmap-ordered [xs1 xs2]

src/bcbio/variation/filter/intervals.clj

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@
8080
(catch UserException$BadInput e []))
8181
(rest intervals)))))
8282

83+
;; ## Merge BED files
84+
8385
(defn- prep-intervals-by-contig
8486
"Intersect and exclude intervals on a contig."
8587
[start-intervals exclude-intervals loc-parser combine-rule]
@@ -109,7 +111,7 @@
109111
combine-rule)
110112
contigs))))
111113

112-
(defn combine-multiple-intervals
114+
(defn combine-multiple
113115
"Combine intervals from an initial BED and coverage BAM files."
114116
[initial-bed align-bams ref & {:keys [out-dir name exclude-intervals combine-rule
115117
more-beds]}]
@@ -137,9 +139,8 @@
137139
(let [base-intervals (:intervals exp)
138140
all-aligns (set (remove nil? (map :align (cons exp (:calls exp)))))]
139141
(when (and base-intervals (seq all-aligns))
140-
(combine-multiple-intervals base-intervals all-aligns
141-
(:ref exp)
142-
:exclude-intervals (:exclude-intervals exp)
143-
:name (:sample exp)
144-
:out-dir (get-in config [:dir :prep] (get-in config [:dir :out]))))))
145-
142+
(combine-multiple base-intervals all-aligns
143+
(:ref exp)
144+
:exclude-intervals (:exclude-intervals exp)
145+
:name (:sample exp)
146+
:out-dir (get-in config [:dir :prep] (get-in config [:dir :out]))))))

src/bcbio/variation/utils/svmerge.clj

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,8 @@
4545
(-> (combine/combine-variants [call-safesv svready-file] ref-file
4646
:merge-type :full)
4747
(fs/rename (:calls out-files)))
48-
(-> (intervals/combine-multiple-intervals region-file [] ref-file
49-
:combine-rule :union
50-
:more-beds [sv-bed])
48+
(-> (intervals/combine-multiple region-file [] ref-file
49+
:combine-rule :union :more-beds [sv-bed])
5150
(fs/rename (:regions out-files)))
5251
))
5352
out-files))

test/bcbio/variation/test/grade.clj

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,24 @@
6767
(-> cmp :grade-breakdown :discordant :snp :shared :hethom) => 1
6868
(-> cmp :c-files :eval-discordant) => out-file)))
6969

70+
(facts "Comparisons where inputs have alternative reference sequence: hg19->GRCh37"
71+
(let [base-dir (fs/file data-dir "digrade")
72+
c1o {:file (str (fs/file base-dir "NA12878-cmp-r1.vcf"))
73+
:prep true
74+
:name "ref" :type "grading-ref"}
75+
c2o {:file (str (fs/file base-dir "hg19" "NA12878-cmp-r3.vcf"))
76+
:name "eval"
77+
:prep true
78+
:intervals (str (fs/file base-dir "hg19" "NA12878-cmp-r3-intervals.bed"))
79+
:ref (str (fs/file data-dir "hg19.fa"))}
80+
exp {:ref ref-file :sample "NA12878" :approach "grade"
81+
:intervals (str (fs/file base-dir "NA12878-cmp-r1-intervals.bed"))}
82+
config {:dir {:out (str (fs/file base-dir "work"))}}]
83+
(fsp/remove-path (get-in config [:dir :out]))
84+
(fs/mkdirs (get-in config [:dir :out]))
85+
(let [[c1 c2] (#'compare/prepare-vcf-calls (assoc exp :calls [c1o c2o]) config)]
86+
(compare-two-vcf c1 c2 exp config))))
87+
7088
(facts "Normalize input VCFs containing END tags"
7189
(let [base-dir (fs/file data-dir "digrade")
7290
c1 {:file (str (fs/file base-dir "NA12878-cmp-r1.vcf"))

test/bcbio/variation/test/validate.clj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
[bcbio.variation.haploid :exclude [-main]]
55
[bcbio.variation.filter.attr]
66
[bcbio.variation.filter.classify]
7-
[bcbio.variation.filter.intervals]
87
[bcbio.variation.filter]
98
[bcbio.variation.validate]
109
[bcbio.variation.variantcontext :exclude [-main]])
1110
(:require [me.raynes.fs :as fs]
1211
[bcbio.run.fsp :as fsp]
1312
[bcbio.run.itx :as itx]
14-
[bcbio.variation.filter.custom :as cf]))
13+
[bcbio.variation.filter.custom :as cf]
14+
[bcbio.variation.filter.intervals :as fintervals]))
1515

1616
(background
1717
(around :facts
@@ -95,5 +95,5 @@
9595
(cf/freebayes-filter fb-vcf ref) => fb-filter-out)
9696

9797
(facts "Prepare combined interval lists based on filtering criteria"
98-
(combine-multiple-intervals region-bed [align-bam] ref
99-
:exclude-intervals exclude-bed) => region-multi-out)
98+
(fintervals/combine-multiple region-bed [align-bam] ref
99+
:exclude-intervals exclude-bed) => region-multi-out)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
22 1 15000
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
chr22 1 15000

0 commit comments

Comments
 (0)