Skip to content

Commit a82cfe0

Browse files
clintvalnh13
andauthored
Make PileupBuilder.includeMapPositionsOutsideFrInsert intuitively correct (#981)
* Make PileupBuilder.includeMapPositionsOutsideFrInsert intuitively correct * Revert back to the original isFrPair filtering logic and document better * chore: update test name * fix: codecov try specifying the environment (#982) --------- Co-authored-by: Nils Homer <[email protected]>
1 parent 3a74fd2 commit a82cfe0

File tree

4 files changed

+42
-14
lines changed

4 files changed

+42
-14
lines changed

.github/workflows/unittests.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on: [push, pull_request]
55
jobs:
66
test:
77
runs-on: ubuntu-latest
8+
environment: github-actions
89
steps:
910
- name: Checkout
1011
uses: actions/checkout@v2

src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala

+14-9
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,15 @@ object PileupBuilder {
6161
/** Allow records flagged as supplementary alignments to contribute to a pileup by default. */
6262
val includeSupplementalAlignments: Boolean = false
6363

64-
/** Exclude any record of an FR pair where the site requested is outside the insert by default. */
64+
/** For FR pairs only, determine if we should keep the pair of reads if the site requested is outside of the
65+
* insert. By default this filter is set to <false> which means for FR pairs where R1 and R2 overlap each other
66+
* and *extend* past the start coordinates of their mate, the pair will be filtered out if the position requested
67+
* overlaps the end of either read in the span that is beyond the start coordinate of the read's mate.
68+
*
69+
* See the following GitHub issue comment for a visualization of when this filter applies:
70+
*
71+
* - https://github.com/fulcrumgenomics/fgbio/issues/980#issuecomment-2075049301
72+
*/
6573
val includeMapPositionsOutsideFrInsert: Boolean = false
6674
}
6775

@@ -116,14 +124,11 @@ object PileupBuilder {
116124
)
117125
}
118126

119-
/** Returns true if <rec> is in a mapped FR pair but the position <pos> is outside the insert coordinates of <rec>.
120-
* Returns false if <rec> is in a mapped FR pair and the position <pos> is inside the insert coordinates of <rec> or
121-
* <rec> is not in a mapped FR pair.
122-
*/
123-
private def positionIsOutsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = {
124-
rec.isFrPair && {
127+
/** Returns true if <rec> is in a mapped FR pair and the position <pos> is inside the insert coordinates of <rec>. */
128+
private def positionIsInsideFrInsert(rec: SamRecord, refIndex: Int, pos: Int): Boolean = {
129+
refIndex == rec.refIndex && rec.isFrPair && {
125130
val (start, end) = Bams.insertCoordinates(rec)
126-
rec.refIndex == refIndex && pos >= start && pos <= end
131+
pos >= start && pos <= end
127132
}
128133
}
129134

@@ -201,7 +206,7 @@ trait PileupBuilder extends PileupParameters {
201206
if (compare) compare = rec.end >= pos
202207
if (compare) compare = rec.start <= pos || PileupBuilder.startsWithInsertion(rec)
203208
if (compare) compare = if (!includeMapPositionsOutsideFrInsert && rec.isFrPair) {
204-
PileupBuilder.positionIsOutsideFrInsert(rec, refIndex = refIndex, pos = pos)
209+
PileupBuilder.positionIsInsideFrInsert(rec, refIndex = refIndex, pos = pos)
205210
} else { compare }
206211
compare
207212
}

src/main/scala/com/fulcrumgenomics/testing/SamBuilder.scala

+7-5
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,19 @@
2424

2525
package com.fulcrumgenomics.testing
2626

27-
import java.nio.file.Files
28-
import java.text.DecimalFormat
29-
import java.util.concurrent.atomic.AtomicLong
30-
3127
import com.fulcrumgenomics.FgBioDef._
3228
import com.fulcrumgenomics.alignment.Cigar
3329
import com.fulcrumgenomics.bam.api.SamRecord.{MissingBases, MissingQuals}
3430
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord, SamSource, SamWriter}
3531
import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata}
3632
import com.fulcrumgenomics.testing.SamBuilder._
33+
import htsjdk.samtools.SamPairUtil.PairOrientation
3734
import htsjdk.samtools._
3835

36+
import java.nio.file.Files
37+
import java.text.DecimalFormat
38+
import java.util
39+
import java.util.concurrent.atomic.AtomicLong
3940
import scala.collection.mutable.ArrayBuffer
4041
import scala.util.Random
4142

@@ -173,7 +174,8 @@ class SamBuilder(val readLength: Int=100,
173174
r2("RG") = this.rg.getId
174175
attrs.foreach { case (key, value) => r2(key) = value }
175176

176-
SamPairUtil.setMateInfo(r1.asSam, r2.asSam, true)
177+
val orientations = util.Arrays.asList(PairOrientation.values(): _*)
178+
SamPairUtil.setProperPairAndMateInfo(r1.asSam, r2.asSam, orientations, true)
177179
val recs = Seq(r1, r2)
178180
this.records ++= recs
179181
recs

src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala

+20
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,26 @@ class PileupBuilderTest extends UnitSpec {
289289
piler.safelyClose()
290290
}
291291

292+
it should "not filter out single-end records when we are not filter for mapped pairs only and we are not removing records where the position is outside the insert of FR pairs" in {
293+
val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate))
294+
295+
builder.addFrag(name = "q1", contig = Chr1Index, start = 100)
296+
297+
val source = builder.toSource
298+
val piler = PileupBuilder(
299+
source,
300+
accessPattern = accessPattern,
301+
mappedPairsOnly = false,
302+
includeMapPositionsOutsideFrInsert = true
303+
)
304+
305+
piler.pileup(Chr1, 100).depth shouldBe 1
306+
piler.pileup(Chr1, 100 + ReadLength - 1).depth shouldBe 1
307+
308+
source.safelyClose()
309+
piler.safelyClose()
310+
}
311+
292312
it should "filter out records where a position is outside the insert for an FR pair" in {
293313
val builder = new SamBuilder(readLength = ReadLength, sd = Some(TestSequenceDictionary), sort = Some(Coordinate))
294314

0 commit comments

Comments
 (0)