Skip to content

Commit 971114d

Browse files
authored
Remove duplicate position filter (#9)
* Fix issue with multi-allelic snps in reference panel * Remove duplicate position filter
1 parent 37080e8 commit 971114d

File tree

2 files changed

+13
-26
lines changed

2 files changed

+13
-26
lines changed

src/main/java/genepi/imputationserver/steps/fastqc/StatisticsTask.java

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -348,21 +348,6 @@ private void processLine(MinimalVariantContext snp, List<SitesEntry> refSnps, in
348348
return;
349349
}
350350

351-
// count duplicates
352-
353-
if ((chunk.lastPos == snp.getStart() && chunk.lastPos > 0)) {
354-
355-
if (insideChunk) {
356-
duplicates++;
357-
excludedSnpsWriter.write(snp, "Duplicate");
358-
filtered++;
359-
}
360-
361-
chunk.lastPos = snp.getStart();
362-
return;
363-
364-
}
365-
366351
// update last pos only when not filtered
367352
if (!snp.isFiltered()) {
368353
chunk.lastPos = snp.getStart();

src/test/java/genepi/imputationserver/steps/QualityControlCommandTest.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ public void testQcStatisticsAllChunksFailed() throws Exception {
135135
OutputReader log = new OutputReader(CLOUDGENE_LOG);
136136

137137
// check statistics
138-
assertTrue(log.hasInMemory("Alternative allele frequency > 0.5 sites: 37,503"));
139-
assertTrue(log.hasInMemory("Duplicated sites: 618"));
138+
assertTrue(log.hasInMemory("Alternative allele frequency > 0.5 sites: 37,610"));
139+
assertTrue(log.hasInMemory("Duplicated sites: 0"));
140140
assertTrue(log.hasInMemory("36 Chunk(s) excluded"));
141141
assertTrue(log.hasInMemory("No chunks passed the QC step"));
142142

@@ -183,8 +183,9 @@ public void testQcStatisticsAllChunksPassed() throws Exception {
183183
OutputReader log = new OutputReader(CLOUDGENE_LOG);
184184

185185
// check statistics
186-
assertTrue(log.hasInMemory("Excluded sites in total: 3,058"));
187-
assertTrue(log.hasInMemory("Remaining sites in total: 117,498"));
186+
log.view();
187+
assertTrue(log.hasInMemory("Excluded sites in total: 2,744"));
188+
assertTrue(log.hasInMemory("Remaining sites in total: 117,785"));
188189

189190
}
190191

@@ -201,7 +202,7 @@ public void testCountSitesForOneChunkedContig() throws Exception {
201202
File[] files = new File(TEST_DATA_TMP).listFiles();
202203
Arrays.sort(files);
203204
// baseline from a earlier job execution
204-
int[] array = { 4750, 5174, 5106, 5832, 5318, 4588, 968, 3002, 5781, 5116, 5699, 6334, 3188 };
205+
int[] array = { 4763, 5189, 5123, 5845, 5327, 4595, 970, 3007, 5792, 5129, 5711, 6345, 3195 };
205206
int pos = 0;
206207

207208
for (File file : files) {
@@ -426,8 +427,9 @@ public void testQcStatisticsAllowStrandFlips() throws Exception {
426427
OutputReader log = new OutputReader(CLOUDGENE_LOG);
427428

428429
// check statistics
429-
assertTrue(log.hasInMemory("Excluded sites in total: 3,058"));
430-
assertTrue(log.hasInMemory("Remaining sites in total: 117,498"));
430+
log.view();
431+
assertTrue(log.hasInMemory("Excluded sites in total: 2,744"));
432+
assertTrue(log.hasInMemory("Remaining sites in total: 117,785"));
431433

432434
}
433435

@@ -443,8 +445,8 @@ public void testQcStatisticsDontAllowStrandFlips() throws Exception {
443445
OutputReader log = new OutputReader(CLOUDGENE_LOG);
444446

445447
// check statistics
446-
assertTrue(log.hasInMemory("Excluded sites in total: 3,058"));
447-
assertTrue(log.hasInMemory("Remaining sites in total: 117,498"));
448+
assertTrue(log.hasInMemory("Excluded sites in total: 2,744"));
449+
assertTrue(log.hasInMemory("Remaining sites in total: 117,785"));
448450
assertTrue(log.hasInMemory(
449451
"<b>Error:</b> More than -1 obvious strand flips have been detected. Please check strand. Imputation cannot be started!"));
450452

@@ -462,8 +464,8 @@ public void testQcStatisticsDontAllowAlleleSwitches() throws Exception {
462464
OutputReader log = new OutputReader(CLOUDGENE_LOG);
463465

464466
// check statistics
465-
assertTrue(log.hasInMemory("Excluded sites in total: 121,176"));
466-
assertTrue(log.hasInMemory("Allele switch: 118,209"));
467+
assertTrue(log.hasInMemory("Excluded sites in total: 121,140"));
468+
assertTrue(log.hasInMemory("Allele switch: 118,443"));
467469
assertTrue(log.hasInMemory("No chunks passed the QC step. Imputation cannot be started!"));
468470
}
469471

0 commit comments

Comments
 (0)