Skip to content

Commit 06b498f

Browse files
committed
Add support for collections in Metric.
Collections will be comma-separated lists, so values in the list should not contain commas.
1 parent c7ec240 commit 06b498f

File tree

3 files changed

+111
-8
lines changed

3 files changed

+111
-8
lines changed

src/main/scala/com/fulcrumgenomics/util/Metric.scala

+55-7
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import scala.util.{Failure, Success}
4545
object Metric extends LazyLogging {
4646
val Delimiter: Char = '\t'
4747
val DelimiterAsString: String = s"$Delimiter"
48+
val DefaultCollectionDelimiter: Char = ','
4849

4950
/** A typedef for [[scala.Long]] to be used when representing counts. */
5051
type Count = Long
@@ -101,7 +102,9 @@ object Metric extends LazyLogging {
101102

102103
/** Reads metrics from a set of lines. The first line should be the header with the field names. Each subsequent
103104
* line should be a single metric. */
104-
def iterator[T <: Metric](lines: Iterator[String], source: Option[String] = None)(implicit tt: ru.TypeTag[T]): Iterator[T] = {
105+
def iterator[T <: Metric](lines: Iterator[String],
106+
source: Option[String] = None,
107+
collectionDelimiter: Char = DefaultCollectionDelimiter)(implicit tt: ru.TypeTag[T]): Iterator[T] = {
105108
val clazz: Class[T] = ReflectionUtil.typeTagToClass[T]
106109

107110
def fail(lineNumber: Int,
@@ -127,15 +130,28 @@ object Metric extends LazyLogging {
127130

128131
parser.zipWithIndex.map { case (row, rowIndex) =>
129132
forloop(from = 0, until = names.length) { i =>
133+
130134
reflectiveBuilder.argumentLookup.forField(names(i)) match {
131135
case Some(arg) =>
132-
val value = {
136+
val value: String = {
133137
val tmp = row[String](i)
134138
if (tmp.isEmpty && arg.argumentType == classOf[Option[_]]) ReflectionUtil.SpecialEmptyOrNoneToken else tmp
135139
}
136-
137-
val argumentValue = ReflectionUtil.constructFromString(arg.argumentType, arg.unitType, value) match {
138-
case Success(v) => v
140+
val values: Seq[String] = {
141+
142+
// If we have a collection, then we need to check for the delimiter to rebuild it
143+
if (value != ReflectionUtil.SpecialEmptyOrNoneToken &&
144+
ReflectionUtil.isCollectionClass(arg.argumentType)) {
145+
// If the argument type is equal to the unit type, then we need to return a single string value,
146+
// otherwise, one value per unit
147+
if (arg.argumentType == arg.unitType) Seq(value) else value.split(collectionDelimiter)
148+
}
149+
else {
150+
Seq(value)
151+
}
152+
}
153+
val argumentValue = ReflectionUtil.constructFromString(arg.argumentType, arg.unitType, values:_*) match {
154+
case Success(v) => v
139155
case Failure(thr) =>
140156
fail(lineNumber=rowIndex+2, message=s"Could not construct value for column '${arg.name}' of type '${arg.typeDescription}' from '$value'", Some(thr))
141157
}
@@ -152,16 +168,24 @@ object Metric extends LazyLogging {
152168
}
153169
}
154170

171+
/** Reads metrics from the given path. The first line should be the header with the field names. Each subsequent
172+
* line should be a single metric. */
173+
def iterator[T <: Metric](path: Path, collectionDelimiter: Char)(implicit tt: ru.TypeTag[T]): Iterator[T] = iterator[T](Io.readLines(path), Some(path.toString), collectionDelimiter)
174+
155175
/** Reads metrics from the given path. The first line should be the header with the field names. Each subsequent
156176
* line should be a single metric. */
157177
def iterator[T <: Metric](path: Path)(implicit tt: ru.TypeTag[T]): Iterator[T] = iterator[T](Io.readLines(path), Some(path.toString))
158178

159179
/** Reads metrics from a set of lines. The first line should be the header with the field names. Each subsequent
160180
* line should be a single metric. */
161-
def read[T <: Metric](lines: Iterator[String], source: Option[String] = None)(implicit tt: ru.TypeTag[T]): Seq[T] = {
162-
iterator(lines, source).toSeq
181+
def read[T <: Metric](lines: Iterator[String], source: Option[String] = None, collectionDelimiter: Char = DefaultCollectionDelimiter)(implicit tt: ru.TypeTag[T]): Seq[T] = {
182+
iterator(lines, source, collectionDelimiter).toSeq
163183
}
164184

185+
/** Reads metrics from the given path. The first line should be the header with the field names. Each subsequent
186+
* line should be a single metric. */
187+
def read[T <: Metric](path: Path, collectionDelimiter: Char)(implicit tt: ru.TypeTag[T]): Seq[T] = read[T](Io.readLines(path), Some(path.toString), collectionDelimiter)
188+
165189
/** Reads metrics from the given path. The first line should be the header with the field names. Each subsequent
166190
* line should be a single metric. */
167191
def read[T <: Metric](path: Path)(implicit tt: ru.TypeTag[T]): Seq[T] = read[T](Io.readLines(path), Some(path.toString))
@@ -234,6 +258,9 @@ trait Metric extends Product with Iterable[(String,String)] {
234258
/** Gets an iterator over the fields of this metric in the order they were defined. Returns tuples of names and values */
235259
override def iterator: Iterator[(String,String)] = this.names.zip(this.values).iterator
236260

261+
/** The delimiter for collection types. */
262+
protected def collectionDelimiter: Char = Metric.DefaultCollectionDelimiter
263+
237264
/** @deprecated use [[formatValue]] instead. */
238265
@deprecated(message="Use formatValue instead.", since="0.5.0")
239266
protected def formatValues(value: Any): String = formatValue(value)
@@ -251,6 +278,27 @@ trait Metric extends Product with Iterable[(String,String)] {
251278
case d: Double if d.isNaN || d.isInfinity => d.toString
252279
case d: Double => Metric.BigDoubleFormat.synchronized { Metric.BigDoubleFormat.format(d) }
253280
case e: EnumEntry => e.entryName
281+
case other if ReflectionUtil.isCollectionClass(other.getClass) =>
282+
val resultType = other.getClass
283+
// Condition for the collection type
284+
val collection: Seq[String] = if (ReflectionUtil.isJavaCollectionClass(resultType)) {
285+
other.asInstanceOf[java.util.Collection[AnyRef]].map(formatValue).toSeq
286+
}
287+
else if (ReflectionUtil.isSeqClass(resultType) || ReflectionUtil.isSetClass(resultType)) {
288+
other.asInstanceOf[Iterable[_]].map(formatValue).toList
289+
}
290+
else {
291+
throw new IllegalArgumentException(s"Unknown collection type '${resultType.getSimpleName}'")
292+
}
293+
// No commas in the values allowed.
294+
if (collection.exists(_.contains(collectionDelimiter))) {
295+
throw new IllegalArgumentException(s"Metric collection value contained a comma: $value")
296+
}
297+
if (collection.isEmpty) {
298+
ReflectionUtil.SpecialEmptyOrNoneToken
299+
} else {
300+
collection.mkString(collectionDelimiter.toString)
301+
}
254302
case other => other.toString
255303
}
256304

src/test/scala/com/fulcrumgenomics/basecalling/ExtractIlluminaRunInfoTest.scala

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
package com.fulcrumgenomics.basecalling
2727

28+
import com.fulcrumgenomics.commons.io.Io
2829
import com.fulcrumgenomics.illumina.RunInfo
2930
import com.fulcrumgenomics.testing.UnitSpec
3031
import com.fulcrumgenomics.util.{Metric, ReadStructure}

src/test/scala/com/fulcrumgenomics/util/MetricTest.scala

+55-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ private case class TestDoubleMetric(d: Double) extends Metric
7272
private case class TestFloatMetric(f: Float) extends Metric
7373
private case class TestCharMetric(c: Char) extends Metric
7474

75+
private case class TestScalaCollection(list: List[String]) extends Metric
76+
private case class TestJavaCollection(list: java.util.List[String]) extends Metric
77+
7578
/**
7679
* Tests for Metric.
7780
*/
@@ -284,7 +287,7 @@ class MetricTest extends UnitSpec with OptionValues with TimeLimits {
284287
it should "read and write a char" in {
285288
val path = makeTempFile("char_test", ".txt")
286289

287-
Seq('X', '$', 'a').foreach { c =>
290+
Seq('X', '$', 'a').foreach { c =>
288291
val expected = TestCharMetric(c=c)
289292
Metric.write(path, expected)
290293
val actual = Metric.read[TestCharMetric](path)
@@ -322,4 +325,55 @@ class MetricTest extends UnitSpec with OptionValues with TimeLimits {
322325
metricMixin.formatted(metricMixin.foo) shouldBe "TESTUPPERCASE" // Serialization
323326
TestEnumMixin.withName(metricMixin.foo.entryName) shouldBe TestEnumMixin.TestUpperCase // De-serialization
324327
}
328+
329+
it should "write and read scala collections" in {
330+
val path = makeTempFile("test.", ".txt")
331+
332+
// empty
333+
{
334+
val expected = TestScalaCollection(list=List.empty)
335+
Metric.write(path, expected)
336+
val actual = Metric.read[TestScalaCollection](path)
337+
actual should have size 1
338+
actual.head.list should contain theSameElementsInOrderAs expected.list
339+
}
340+
341+
// non-empty
342+
{
343+
val expected = TestScalaCollection(list = List("A", "B", "C"))
344+
Metric.write(path, expected)
345+
val actual = Metric.read[TestScalaCollection](path)
346+
actual should have size 1
347+
actual.head.list should contain theSameElementsInOrderAs expected.list
348+
}
349+
}
350+
351+
it should "write and read java collections" in {
352+
import com.fulcrumgenomics.commons.CommonsDef.javaIterableToIterator
353+
val path = makeTempFile("test.", ".txt")
354+
355+
// empty
356+
{
357+
val expected = TestJavaCollection(list=java.util.Collections.emptyList())
358+
Metric.write(path, expected)
359+
val actual = Metric.read[TestJavaCollection](path)
360+
actual should have size 1
361+
actual.head.list should contain theSameElementsInOrderAs expected.list.toSeq
362+
}
363+
364+
// non-empty
365+
{
366+
val expected = TestJavaCollection(list = java.util.Arrays.asList("A", "B", "C"))
367+
Metric.write(path, expected)
368+
val actual = Metric.read[TestJavaCollection](path)
369+
actual should have size 1
370+
actual.head.list should contain theSameElementsInOrderAs expected.list.toSeq
371+
}
372+
}
373+
374+
it should "not allow commas in collections" in {
375+
val path = makeTempFile("test.", ".txt")
376+
val expected = TestScalaCollection(list=List("a", "comma,comma"))
377+
an[Exception] should be thrownBy Metric.write(path, expected)
378+
}
325379
}

0 commit comments

Comments
 (0)