Skip to content

Commit 8a78790

Browse files
authored
HaplotypeCaller: fix -contamination argument, and add tests (#4455)
The -contamination argument was not hooked up properly in the HaplotypeCaller. This patch fixes the tool argument, and adds tests on artificially contaminated data to demonstrate that the feature works as intended. Resolves #4312
1 parent 4e87321 commit 8a78790

File tree

15 files changed

+1954
-27
lines changed

15 files changed

+1954
-27
lines changed

src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollection.java

+52-14
Original file line numberDiff line numberDiff line change
@@ -72,37 +72,75 @@ public void copyStandardCallerArgsFrom( final StandardCallerArgumentCollection o
7272
@Argument(fullName = "contamination-fraction-per-sample-file", shortName = "contamination-file", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", optional = true)
7373
public File CONTAMINATION_FRACTION_FILE = null;
7474

75+
private DefaultedMap<String,Double> sampleContamination;
76+
private boolean mapHasContaminationSet = false;
77+
7578
/**
7679
* Returns true if there is some sample contamination present, false otherwise.
7780
* @return {@code true} iff there is some sample contamination
7881
*/
7982
public boolean isSampleContaminationPresent() {
80-
return (!Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0) || (sampleContamination != null && !sampleContamination.isEmpty());
83+
return contaminationFractionIsSet(CONTAMINATION_FRACTION) || mapHasContaminationSet;
8184
}
82-
83-
private DefaultedMap<String,Double> sampleContamination;
84-
85+
8586
/**
8687
* Returns an unmodifiable view of the map of SampleId -> contamination.
88+
*
89+
* The returned map will return a default value equal to the configured
90+
* {@link #CONTAMINATION_FRACTION} for samples whose contamination is not
91+
* explicitly set.
8792
*/
8893
public Map<String,Double> getSampleContamination() {
94+
if (sampleContamination == null) {
95+
setSampleContamination(Collections.emptyMap()); // default to empty map
96+
}
8997
return Collections.unmodifiableMap(sampleContamination);
9098
}
9199

92100
/**
93-
* Returns the sample contamination or CONTAMINATION_FRACTION if no contamination level was specified for this sample.
101+
* Set the sample contamination map using the provided map. The resulting map will have
102+
* its default value for unknown keys set equal to {@link #CONTAMINATION_FRACTION}, regardless
103+
* of any default value set in the provided map (if it's a DefaultedMap).
104+
*
105+
* @param sampleContamination Map of sample to contamination fraction with which to initialize our
106+
* sample contamination map. Replaces any existing values in our map.
107+
* The resulting map will have {@link #CONTAMINATION_FRACTION} as the default
108+
* value for unknown keys, regardless of any default set in the provided map.
94109
*/
95-
public Double getSampleContamination(final String sampleId){
96-
Utils.nonNull(sampleId);
97-
if (sampleContamination == null){
98-
setSampleContamination(new DefaultedMap<>(CONTAMINATION_FRACTION));//default to empty map
99-
}
100-
return sampleContamination.get(sampleId);
101-
}
102-
103-
public void setSampleContamination(final DefaultedMap<String, Double> sampleContamination) {
110+
public void setSampleContamination(final Map<String, Double> sampleContamination) {
104111
this.sampleContamination = new DefaultedMap<>(CONTAMINATION_FRACTION); //NOTE: a bit weird because it ignores the default from the argument and uses ours
105112
this.sampleContamination.putAll(sampleContamination); //make a copy to be safe
113+
114+
this.mapHasContaminationSet = contaminationIsPresentInMap(this.sampleContamination);
115+
}
116+
117+
/**
118+
* @param fraction double value to test
119+
* @return True if fraction represents non-zero contamination, otherwise false
120+
*/
121+
private boolean contaminationFractionIsSet(final double fraction) {
122+
return ! Double.isNaN(fraction) && fraction > 0.0;
123+
}
124+
125+
/**
126+
* Given a map of sample to contamination fraction, determines whether any samples have
127+
* a non-zero contamination fraction set.
128+
*
129+
* @param contaminationMap sample -> contamination fraction map to test
130+
* @return true if at least one sample has a non-zero contamination fraction, otherwise false
131+
*/
132+
private boolean contaminationIsPresentInMap(final Map<String, Double> contaminationMap) {
133+
if ( contaminationMap == null ) {
134+
return false;
135+
}
136+
137+
for ( final Map.Entry<String,Double> mapEntry : contaminationMap.entrySet() ) {
138+
if ( contaminationFractionIsSet(mapEntry.getValue()) ) {
139+
return true;
140+
}
141+
}
142+
143+
return false;
106144
}
107145

108146
/**

src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollectionUnitTest.java

+91-5
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
package org.broadinstitute.hellbender.tools.walkers.genotyper;
22

3+
import org.broadinstitute.hellbender.GATKBaseTest;
34
import org.broadinstitute.hellbender.utils.Utils;
45
import org.testng.Assert;
56
import org.testng.annotations.DataProvider;
67
import org.testng.annotations.Test;
78

89
import java.lang.reflect.Field;
910
import java.lang.reflect.Modifier;
10-
import java.util.ArrayList;
11-
import java.util.List;
12-
import java.util.Random;
11+
import java.util.*;
1312

1413
/**
15-
* Checks on Caller argument collection cloning.
14+
* Checks on Caller argument collection cloning and getting/setting of values.
1615
*
1716
* @author Valentin Ruano-Rubio &lt;[email protected]&gt;
1817
*/
19-
public final class StandardCallerArgumentCollectionUnitTest {
18+
public final class StandardCallerArgumentCollectionUnitTest extends GATKBaseTest {
2019

2120
public final static List<Class<? extends StandardCallerArgumentCollection>> COLLECTION_CLASSES;
2221

@@ -113,4 +112,91 @@ else if (type == Long.class)
113112
else
114113
return type;
115114
}
115+
116+
@Test
117+
public void testGetSampleContaminationUninitializedArgs() {
118+
final StandardCallerArgumentCollection args = new StandardCallerArgumentCollection();
119+
120+
Assert.assertFalse(args.isSampleContaminationPresent());
121+
Map<String, Double> returnedContaminationMap = args.getSampleContamination();
122+
123+
// The returned map should be officially empty, but return 0.0 on query for any sample (it's a DefaultedMap)
124+
Assert.assertTrue(returnedContaminationMap.isEmpty());
125+
Assert.assertEquals(returnedContaminationMap.get("MySample"), 0.0);
126+
}
127+
128+
@Test
129+
public void testGetSampleContaminationInitializedFraction() {
130+
final StandardCallerArgumentCollection args = new StandardCallerArgumentCollection();
131+
132+
args.CONTAMINATION_FRACTION = 0.1;
133+
Assert.assertTrue(args.isSampleContaminationPresent());
134+
Map<String, Double> returnedContaminationMap = args.getSampleContamination();
135+
136+
// The returned map should be officially empty, but return 0.1 on query for any sample (it's a DefaultedMap)
137+
Assert.assertTrue(returnedContaminationMap.isEmpty());
138+
Assert.assertEquals(returnedContaminationMap.get("MySample"), 0.1);
139+
}
140+
141+
@Test
142+
public void testGetSampleContaminationInitializedMap() {
143+
final StandardCallerArgumentCollection args = new StandardCallerArgumentCollection();
144+
145+
Map<String,Double> contaminationMap = new HashMap<>();
146+
contaminationMap.put("Sample1", 0.1);
147+
contaminationMap.put("Sample2", 0.2);
148+
args.setSampleContamination(contaminationMap);
149+
150+
Assert.assertTrue(args.isSampleContaminationPresent());
151+
Map<String, Double> returnedContaminationMap = args.getSampleContamination();
152+
153+
// The returned map should be of size 2, and return 0.0 on query for unknown samples (it's a DefaultedMap)
154+
Assert.assertEquals(returnedContaminationMap.size(), 2);
155+
Assert.assertEquals(returnedContaminationMap.get("Sample1"), 0.1);
156+
Assert.assertEquals(returnedContaminationMap.get("Sample2"), 0.2);
157+
Assert.assertEquals(returnedContaminationMap.get("Sample3"), 0.0);
158+
Assert.assertEquals(returnedContaminationMap.get("Sample4"), 0.0);
159+
}
160+
161+
@Test
162+
public void testGetSampleContaminationInitializedMapAndFraction() {
163+
final StandardCallerArgumentCollection args = new StandardCallerArgumentCollection();
164+
165+
args.CONTAMINATION_FRACTION = 0.05;
166+
Map<String,Double> contaminationMap = new HashMap<>();
167+
contaminationMap.put("Sample1", 0.1);
168+
contaminationMap.put("Sample2", 0.2);
169+
args.setSampleContamination(contaminationMap);
170+
171+
Assert.assertTrue(args.isSampleContaminationPresent());
172+
Map<String, Double> returnedContaminationMap = args.getSampleContamination();
173+
174+
// The returned map should be of size 2, and return 0.05 on query for unknown samples (it's a DefaultedMap)
175+
Assert.assertEquals(returnedContaminationMap.size(), 2);
176+
Assert.assertEquals(returnedContaminationMap.get("Sample1"), 0.1);
177+
Assert.assertEquals(returnedContaminationMap.get("Sample2"), 0.2);
178+
Assert.assertEquals(returnedContaminationMap.get("Sample3"), 0.05);
179+
Assert.assertEquals(returnedContaminationMap.get("Sample4"), 0.05);
180+
}
181+
182+
@Test
183+
public void testGetSampleContaminationMapWithNoContamination() {
184+
final StandardCallerArgumentCollection args = new StandardCallerArgumentCollection();
185+
186+
// Create a map that doesn't actually have any contamination set
187+
Map<String,Double> contaminationMap = new HashMap<>();
188+
contaminationMap.put("Sample1", 0.0);
189+
contaminationMap.put("Sample2", 0.0);
190+
args.setSampleContamination(contaminationMap);
191+
192+
Assert.assertFalse(args.isSampleContaminationPresent());
193+
Map<String, Double> returnedContaminationMap = args.getSampleContamination();
194+
195+
// The returned map should be of size 2, and return 0.0 on queries for any sample (it's a DefaultedMap)
196+
Assert.assertEquals(returnedContaminationMap.size(), 2);
197+
Assert.assertEquals(returnedContaminationMap.get("Sample1"), 0.0);
198+
Assert.assertEquals(returnedContaminationMap.get("Sample2"), 0.0);
199+
Assert.assertEquals(returnedContaminationMap.get("Sample3"), 0.0);
200+
Assert.assertEquals(returnedContaminationMap.get("Sample4"), 0.0);
201+
}
116202
}

0 commit comments

Comments
 (0)