Skip to content

Commit db7a512

Browse files
authored
feat: honor original algorithm, increase test coverage (#7)
1 parent 62b9da7 commit db7a512

8 files changed

Lines changed: 111 additions & 24 deletions

File tree

.github/workflows/unit-tests.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@ jobs:
88
strategy:
99
matrix:
1010
suite: [ neodisambiguate ]
11-
java-version: [ 1.8, 11, 17, 21 ]
11+
java-version: [ 8, 11, 17, 21 ]
1212
steps:
13-
- uses: actions/checkout@v2
14-
- uses: actions/setup-java@v1
13+
- uses: actions/checkout@v4
14+
- uses: actions/setup-java@v4
1515
with:
16+
distribution: 'temurin'
1617
java-version: ${{ matrix.java-version }}
1718
- name: Cache for Scala Dependencies
18-
uses: actions/cache@v2
19+
uses: actions/cache@v4
1920
with:
2021
path: |
2122
~/.mill/download
@@ -33,5 +34,10 @@ jobs:
3334
- name: Create Code Coverage Report
3435
if: matrix.java-version == '11'
3536
run: |
36-
./mill --no-server --disable-ticker ${{ matrix.suite }}.scoverage.xmlReport
37-
# TODO: build an HTML report and upload as an artifact
37+
./mill --no-server --disable-ticker ${{ matrix.suite }}.scoverage.htmlReport
38+
- name: Upload Code Coverage Report
39+
uses: actions/upload-artifact@v4
40+
if: matrix.java-version == '11'
41+
with:
42+
name: code-coverage
43+
path: out/neodisambiguate/scoverage/htmlReport.dest/

neodisambiguate/src/io/cvbio/neodisambiguate/Disambiguate.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ object Disambiguate {
8787
}
8888

8989
/** Command line configuration. */
90-
private class NeodisambiguateConf(args: Seq[String]) extends ScallopConf(args) {
90+
private[neodisambiguate] class NeodisambiguateConf(args: Seq[String]) extends ScallopConf(args) {
9191
private val packageName: String = Option(this.getClass.getPackage.getImplementationTitle).getOrElse("neodisambiguate")
9292
private val version: String = Option(this.getClass.getPackage.getImplementationVersion).getOrElse("UNKNOWN")
9393
version(s"$packageName $version\n")
@@ -168,10 +168,12 @@ object Disambiguate {
168168
IOUtil.setCompressionLevel(conf.compression())
169169
Io.compressionLevel = conf.compression()
170170

171+
// $COVERAGE-OFF$
171172
if (IntelCompressionLibrarySupported) {
172173
BlockCompressedOutputStream.setDefaultDeflaterFactory(new IntelDeflaterFactory)
173174
BlockGunzipper.setDefaultInflaterFactory(new IntelInflaterFactory)
174175
}
176+
// $COVERAGE-ON$
175177

176178
Io.tmpDir = conf.tmpDir()
177179
System.setProperty("java.io.tmpdir", conf.tmpDir().toAbsolutePath.toString)

neodisambiguate/src/io/cvbio/neodisambiguate/MathUtil.scala

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,6 @@ object MathUtil {
1313
def countMinBy[B](fn: A => B)(implicit cmp: Ordering[B]): Int = MathUtil.countMin(self.map(fn))
1414
}
1515

16-
/** Implicits for optionally finding the maximum and minimum items in a collection. */
17-
implicit class WithMaxMinOption[A](self: IterableOnce[A]) {
18-
19-
/** Optionally return the maximum item from a container. */
20-
def maxOption(implicit cmp: Ordering[A]): Option[A] = if (self.iterator.isEmpty) None else Some(self.iterator.max)
21-
22-
/** Optionally return the minimum item from a container. */
23-
def minOption(implicit cmp: Ordering[A]): Option[A] = if (self.iterator.isEmpty) None else Some(self.iterator.min)
24-
}
25-
2616
/** Implicits picking the single maximum or single minimum item in a collection. */
2717
implicit class WithPickMaxMinBy[A](self: Seq[A]) {
2818

neodisambiguate/src/io/cvbio/neodisambiguate/TemplateOrdering.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,19 @@ object TemplateOrdering extends FgBioEnum[TemplateOrdering] {
2727
* If neither template is clearly better, then the templates are equivalent.
2828
*/
2929
case object ClassicOrdering extends TemplateOrdering {
30+
// $COVERAGE-OFF$
31+
private def bestAlignmentScore(template: Template): MetricPair[Int] = MetricPair[Int](template, AS)(_ max _)
32+
private def worstAlignmentScore(template: Template): MetricPair[Int] = MetricPair[Int](template, AS)(_ min _)
33+
private def bestNumMismatches(template: Template): MetricPair[Int] = MetricPair[Int](template, NM)(_ min _)
34+
private def worstNumMismatches(template: Template): MetricPair[Int] = MetricPair[Int](template, NM)(_ max _)
35+
// $COVERAGE-ON$
3036

3137
/** Compare two templates using the original published algorithm. */
3238
override def compare(x: Template, y: Template): Int = {
33-
val alignmentScore = (template: Template) => MetricPair[Int](template, AS.toString)(_ max _)
34-
val numMismatches = (template: Template) => MetricPair[Int](template, NM.toString)(_ min _)
35-
36-
var compare = alignmentScore(x).compare(alignmentScore(y))
37-
if (compare == 0) compare = -numMismatches(x).compare(numMismatches(y)) // Negate because less is better.
39+
var compare = bestAlignmentScore(x).compare(bestAlignmentScore(y))
40+
if (compare == 0) worstAlignmentScore(x).compare(worstAlignmentScore(y))
41+
if (compare == 0) compare = -bestNumMismatches(x).compare(bestNumMismatches(y)) // Negate because less is better.
42+
if (compare == 0) compare = -worstNumMismatches(x).compare(worstNumMismatches(y)) // Negate because less is better.
3843
compare
3944
}
4045
}

neodisambiguate/src/io/cvbio/neodisambiguate/bam/Bams.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ object Bams {
6464

6565
/** Advance to the next sequence of templates. */
6666
override def next(): Seq[Template] = {
67-
require(hasNext, "next() called on empty iterator")
6867
val templates = iterators.map(_.next())
6968
require(
7069
templates.map(_.name).distinct.length <= 1,

neodisambiguate/src/io/cvbio/neodisambiguate/metric/MetricPair.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package io.cvbio.neodisambiguate.metric
22

33
import com.fulcrumgenomics.bam.Template
4+
import htsjdk.samtools.SAMTag
45
import io.cvbio.neodisambiguate.CommonsDef.SamTag
56
import io.cvbio.neodisambiguate.bam.Bams.ReadOrdinal.{Read1, Read2}
67
import io.cvbio.neodisambiguate.bam.Bams.TemplateUtil
@@ -33,6 +34,11 @@ object MetricPair {
3334
)
3435
}
3536

37+
/** Build a [[MetricPair]] from a [[Template]]. A function is required to reduce the tag values to one canonical value. */
38+
def apply[T](template: Template, tag: SAMTag)(fn: (T, T) => T)(implicit cmp: Ordering[T]): MetricPair[T] = {
39+
apply(template = template, tag = tag.toString)(fn = fn)(cmp = cmp)
40+
}
41+
3642
/** Build an empty [[MetricPair]]. */
3743
def empty[T](implicit cmp: Ordering[T]): MetricPair[T] = new MetricPair[T](read1 = None, read2 = None)
3844
}

neodisambiguate/test/src/io/cvbio/neodisambiguate/DisambiguateTest.scala

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import htsjdk.samtools.SAMSequenceRecord
99
import htsjdk.samtools.SAMTag.{AS, NM}
1010
import io.cvbio.neodisambiguate.CommonsDef.BamExtension
1111
import io.cvbio.neodisambiguate.DisambiguationStrategy.Classic
12+
import io.cvbio.neodisambiguate.Disambiguate.NeodisambiguateConf
1213
import io.cvbio.neodisambiguate.testing.{TemplateBuilder, UnitSpec}
1314

1415
class DisambiguateTest extends UnitSpec {
@@ -113,7 +114,7 @@ class DisambiguateTest extends UnitSpec {
113114
val input = builder.toTempFile(deleteOnExit = true)
114115
val prefix = PathUtil.pathTo(dir.toString, more = "insilico")
115116

116-
val disambiguate = new Disambiguate(input = Seq(input), prefix = prefix)
117+
val disambiguate = new Disambiguate(input = Seq(input), prefix = prefix, referenceNames = Seq("hg38"))
117118
disambiguate.execute()
118119

119120
val source = SamSource(PathUtil.pathTo(Seq(prefix, assembly).mkString(".") + BamExtension))
@@ -152,4 +153,76 @@ class DisambiguateTest extends UnitSpec {
152153

153154
Io.assertWritableDirectory(PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")) // will raise exception if non-existent.
154155
}
156+
157+
it should "write out ambiguous templates to ambiguous-specific BAM files" in {
158+
val humanAssembly: String = "hg38"
159+
val mouseAssembly: String = "mm10"
160+
val dir: DirPath = Io.makeTempDir("disambiguate")
161+
//dir.toFile.deleteOnExit()
162+
163+
val humanBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
164+
val mouseBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
165+
166+
val humanPair = humanBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)
167+
val mousePair = mouseBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)
168+
169+
humanBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(humanAssembly) }
170+
mouseBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(mouseAssembly) }
171+
172+
val prefix = PathUtil.pathTo(dir.toString, more = "insilico")
173+
174+
val humanBam = humanBuilder.toTempFile(deleteOnExit = false)
175+
val mouseBam = mouseBuilder.toTempFile(deleteOnExit = false)
176+
177+
val disambiguate = new Disambiguate(input = Seq(mouseBam, humanBam), prefix = prefix)
178+
disambiguate.execute()
179+
180+
val humanSource = SamSource(PathUtil.pathTo(Seq(prefix, humanAssembly).mkString(".") + BamExtension))
181+
val mouseSource = SamSource(PathUtil.pathTo(Seq(prefix, mouseAssembly).mkString(".") + BamExtension))
182+
183+
humanSource.iterator.toSeq shouldBe empty
184+
mouseSource.iterator.toSeq shouldBe empty
185+
186+
val ambiguousAlignments = PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")
187+
188+
Io.assertWritableDirectory(ambiguousAlignments) // will raise exception if non-existent.
189+
190+
val ambiguousHumanSource = SamSource(ambiguousAlignments.resolve(PathUtil.replaceExtension(humanBam.getFileName, ".ambiguous" + BamExtension)))
191+
val ambiguousMouseSource = SamSource(ambiguousAlignments.resolve(PathUtil.replaceExtension(mouseBam.getFileName, ".ambiguous" + BamExtension)))
192+
193+
ambiguousHumanSource.map(_.name).toSeq should contain theSameElementsInOrderAs humanPair.map(_.name)
194+
ambiguousMouseSource.map(_.name).toSeq should contain theSameElementsInOrderAs mousePair.map(_.name)
195+
}
196+
197+
"NeoNeodisambiguate" should "run end-to-end via the CLI entrypoint" in {
198+
val humanAssembly: String = "hg38"
199+
val mouseAssembly: String = "mm10"
200+
val dir: DirPath = Io.makeTempDir("disambiguate")
201+
dir.toFile.deleteOnExit()
202+
203+
val humanBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
204+
val mouseBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
205+
206+
val _ = humanBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 6, AS.toString -> 32), start1 = 2, start2 = 101)
207+
val mousePair = mouseBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)
208+
209+
humanBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(humanAssembly) }
210+
mouseBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(mouseAssembly) }
211+
212+
val prefix = PathUtil.pathTo(dir.toString, more = "insilico")
213+
214+
val mouseBam = mouseBuilder.toTempFile(deleteOnExit = true)
215+
val humanBam = humanBuilder.toTempFile(deleteOnExit = true)
216+
217+
val conf = new NeodisambiguateConf(args = Seq("--input", mouseBam.toString, humanBam.toString, "--output", prefix.toString))
218+
Disambiguate.main(Array.from(conf.args))
219+
220+
val humanSource = SamSource(PathUtil.pathTo(Seq(prefix, humanAssembly).mkString(".") + BamExtension))
221+
val mouseSource = SamSource(PathUtil.pathTo(Seq(prefix, mouseAssembly).mkString(".") + BamExtension))
222+
223+
humanSource.iterator.toSeq shouldBe empty
224+
mouseSource.map(_.name).toSeq should contain theSameElementsInOrderAs mousePair.map(_.name)
225+
226+
Io.assertWritableDirectory(PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")) // will raise exception if non-existent.
227+
}
155228
}

neodisambiguate/test/src/io/cvbio/neodisambiguate/bam/BamsTest.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ class BamsTest extends UnitSpec {
3333
iterator.hasNext shouldBe false
3434
}
3535

36+
it should "raise an exception when empty and next() is called" in {
37+
val builder = new SamBuilder(sort = Some(SamOrder.Queryname))
38+
val iterator = Bams.templatesIterator(builder.toSource)
39+
an[NoSuchElementException] shouldBe thrownBy { iterator.next() }
40+
}
41+
3642
it should "accept a single valid SamSource with more than zero alignments" in {
3743
val builder = new SamBuilder(sort = Some(SamOrder.Queryname))
3844
builder.addPair(name = "pair1", start1 = 1, start2 = 2)

0 commit comments

Comments
 (0)