Skip to content

Commit a86eec3

Browse files
committed
Update bad rows sent by ES loader for enriched events (close #161)
1 parent b5c3714 commit a86eec3

File tree

9 files changed

+35
-79
lines changed

9 files changed

+35
-79
lines changed

Diff for: build.sbt

+2-4
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ lazy val commonDependencies = Seq(
2121
Dependencies.Libraries.kinesisConnector,
2222
Dependencies.Libraries.nsqClient,
2323
// Scala
24-
Dependencies.Libraries.cats,
25-
Dependencies.Libraries.catsEffect,
2624
Dependencies.Libraries.catsRetry,
2725
Dependencies.Libraries.circeOptics,
2826
Dependencies.Libraries.decline,
@@ -32,6 +30,7 @@ lazy val commonDependencies = Seq(
3230
Dependencies.Libraries.awsSigner,
3331
Dependencies.Libraries.pureconfig,
3432
Dependencies.Libraries.pureconfigEnum,
33+
Dependencies.Libraries.badRows,
3534
// Scala (test only)
3635
Dependencies.Libraries.specs2,
3736
Dependencies.Libraries.circeLiteral
@@ -63,9 +62,8 @@ lazy val root = project
6362

6463
lazy val core = project
6564
.settings(moduleName := "snowplow-elasticsearch-loader-core")
66-
.settings(buildSettings)
65+
.settings(allSettings)
6766
.settings(BuildSettings.scalifySettings)
68-
.settings(libraryDependencies ++= commonDependencies)
6967

7068
// project dealing with the ES
7169
lazy val elasticsearch = project

Diff for: core/src/main/scala/com.snowplowanalytics.stream/loader/Emitter.scala

+3-4
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,10 @@ class Emitter(
166166
*/
167167
override def fail(records: JList[EmitterJsonInput]): Unit = {
168168
records.asScala.foreach {
169-
case (r: String, Validated.Invalid(fs)) =>
170-
val output = EsLoaderBadRow(r, fs).toCompactJson
171-
badSink.store(output, None, false)
169+
case (r, Validated.Invalid(fs)) =>
170+
val badRow = createBadRow(r, fs)
171+
badSink.store(badRow.compact, None, false)
172172
case (_, Validated.Valid(_)) => ()
173173
}
174174
}
175-
176175
}

Diff for: core/src/main/scala/com.snowplowanalytics.stream/loader/EsLoaderBadRow.scala

-57
This file was deleted.

Diff for: core/src/main/scala/com.snowplowanalytics.stream/loader/clients/BulkSender.scala

+5-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ import scala.concurrent.duration._
3030
import scala.util.Random
3131

3232
// cats
33-
import cats.effect.IO
33+
import cats.effect.{ContextShift, IO}
34+
import scala.concurrent.ExecutionContext
3435
import cats.{Applicative, Id}
3536
import cats.syntax.functor._
3637

@@ -74,13 +75,15 @@ trait BulkSender[A] {
7475
}
7576

7677
object BulkSender {
78+
implicit val contextShift: ContextShift[IO] = IO.contextShift(ExecutionContext.global)
79+
7780
def delayPolicy[M[_]: Applicative](
7881
maxAttempts: Int,
7982
maxConnectionWaitTimeMs: Long): RetryPolicy[M] =
8083
RetryPolicy.lift { status =>
8184
if (status.retriesSoFar >= maxAttempts) PolicyDecision.GiveUp
8285
else {
83-
val maxDelay = 2.milliseconds * Math.pow(2, status.retriesSoFar).toLong
86+
val maxDelay = 2.milliseconds * Math.pow(2, status.retriesSoFar.toDouble).toLong
8487
val randomDelayNanos = (maxDelay.toNanos * Random.nextDouble()).toLong
8588
val maxConnectionWaitTimeNano = maxConnectionWaitTimeMs * 1000
8689
val delayNanos =

Diff for: core/src/main/scala/com.snowplowanalytics.stream/loader/executors/StdinExecutor.scala

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ package com.snowplowanalytics.stream.loader.executors
1414

1515
import cats.syntax.validated._
1616

17-
import com.snowplowanalytics.stream.loader.{EmitterJsonInput, EsLoaderBadRow}
17+
import com.snowplowanalytics.stream.loader.EmitterJsonInput
1818
import com.snowplowanalytics.stream.loader.Config.{StreamLoaderConfig, StreamType}
1919
import com.snowplowanalytics.stream.loader.clients.BulkSender
2020
import com.snowplowanalytics.stream.loader.sinks.ISink
@@ -23,6 +23,7 @@ import com.snowplowanalytics.stream.loader.transformers.{
2323
EnrichedEventJsonTransformer,
2424
PlainJsonTransformer
2525
}
26+
import com.snowplowanalytics.stream.loader.createBadRow
2627

2728
class StdinExecutor(
2829
config: StreamLoaderConfig,
@@ -43,7 +44,7 @@ class StdinExecutor(
4344
def run = for (ln <- scala.io.Source.stdin.getLines) {
4445
val (line, result) = transformer.consumeLine(ln)
4546
result.bimap(
46-
f => badSink.store(EsLoaderBadRow(line, f).toCompactJson, None, false),
47+
f => badSink.store(createBadRow(line, f).compact, None, false),
4748
s =>
4849
goodSink match {
4950
case Some(gs) => gs.store(s.json.toString, None, true)

Diff for: core/src/main/scala/com.snowplowanalytics.stream/loader/transformers/BadEventTransformer.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import cats.data.ValidatedNel
3434
import cats.syntax.validated._
3535

3636
import com.snowplowanalytics.iglu.core.SelfDescribingData
37-
import com.snowplowanalytics.iglu.core.circe.instances._
37+
import com.snowplowanalytics.iglu.core.circe.implicits._
3838

3939
/**
4040
* Class to convert bad events to ElasticsearchObjects
@@ -73,6 +73,7 @@ object BadEventTransformer {
7373
root.obj.modify(renameField("payload")),
7474
root.payload.raw.obj.modify(serializeField("parameters")),
7575
root.failure.obj.modify(renameField("error")),
76+
root.failure.obj.modify(renameField("errors")),
7677
root.failure.obj.modify(renameField("message")),
7778
root.failure.messages.each.obj.modify(renameField("error")),
7879
root.failure.messages.each.obj.modify(serializeField("expectedMapping")),

Diff for: core/src/main/scala/com.snowplowanalytics.stream/package.scala

+15-1
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@
1818
*/
1919
package com.snowplowanalytics.stream
2020

21-
// cats
21+
import java.time.Instant
22+
2223
import cats.data.ValidatedNel
24+
import cats.data.NonEmptyList
25+
26+
import com.snowplowanalytics.snowplow.badrows._
2327

2428
package object loader {
2529

@@ -34,4 +38,14 @@ package object loader {
3438
* The input type for the ElasticsearchSender objects
3539
*/
3640
type EmitterJsonInput = (String, ValidatedNel[String, JsonRecord])
41+
42+
val processor = Processor(generated.Settings.name, generated.Settings.version)
43+
44+
/** Create a generic bad row. */
45+
def createBadRow(line: String, errors: NonEmptyList[String]): BadRow.GenericError = {
46+
val payload = Payload.RawPayload(line)
47+
val timestamp = Instant.now()
48+
val failure = Failure.GenericFailure(timestamp, errors)
49+
BadRow.GenericError(processor, failure, payload)
50+
}
3751
}

Diff for: project/BuildSettings.scala

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ object BuildSettings {
7676
assemblyJarName in assembly := { s"${moduleName.value}-${version.value}.jar" },
7777
test in assembly := {},
7878
assemblyMergeStrategy in assembly := {
79+
case x if x.endsWith("module-info.class") => MergeStrategy.discard // not used by JDK8
7980
case "META-INF/io.netty.versions.properties" => MergeStrategy.first
8081
case PathList("org", "joda", "time", "base", "BaseDateTime.class") => MergeStrategy.first
8182
case x =>

Diff for: project/Dependencies.scala

+4-8
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,16 @@ object Dependencies {
2424
val elasticsearch = "6.3.2"
2525
val nsqClient = "1.1.0-rc1"
2626
val jackson = "2.9.6"
27-
2827
// Scala
29-
val cats = "1.6.1"
30-
val catsEffect = "1.3.1"
3128
val catsRetry = "0.2.5"
32-
val circe = "0.11.2"
33-
val circeOptics = "0.11.0"
29+
val circe = "0.13.0"
3430
val decline = "0.6.2"
3531
val snowplowTracker = "0.6.1"
3632
val analyticsSDK = "2.0.1"
3733
val awsSigner = "0.5.0"
3834
val elastic4s = "6.3.6"
3935
val pureconfig = "0.9.1"
36+
val badRows = "2.1.0"
4037
// Scala (test only)
4138
val specs2 = "4.1.0"
4239
}
@@ -53,10 +50,8 @@ object Dependencies {
5350
val elasticsearch = "org.elasticsearch" % "elasticsearch" % V.elasticsearch
5451
val nsqClient = "com.snowplowanalytics" % "nsq-java-client_2.10" % V.nsqClient
5552
// Scala
56-
val cats = "org.typelevel" %% "cats-core" % V.cats
57-
val catsEffect = "org.typelevel" %% "cats-effect" % V.catsEffect
5853
val catsRetry = "com.github.cb372" %% "cats-retry-cats-effect" % V.catsRetry
59-
val circeOptics = "io.circe" %% "circe-optics" % V.circeOptics
54+
val circeOptics = "io.circe" %% "circe-optics" % V.circe
6055
val decline = "com.monovore" %% "decline" % V.decline
6156
val snowplowTracker = "com.snowplowanalytics" %% "snowplow-scala-tracker-core" % V.snowplowTracker
6257
val snowplowTrackerId = "com.snowplowanalytics" %% "snowplow-scala-tracker-emitter-id" % V.snowplowTracker
@@ -65,6 +60,7 @@ object Dependencies {
6560
val pureconfig = "com.github.pureconfig" %% "pureconfig" % V.pureconfig
6661
val pureconfigEnum = "com.github.pureconfig" %% "pureconfig-enumeratum" % V.pureconfig
6762
val elastic4sHttp = "com.sksamuel.elastic4s" %% "elastic4s-http" % V.elastic4s
63+
val badRows = "com.snowplowanalytics" %% "snowplow-badrows" % V.badRows
6864
// Scala (test only)
6965
val circeLiteral = "io.circe" %% "circe-literal" % V.circe % Test
7066
val specs2 = "org.specs2" %% "specs2-core" % V.specs2 % Test

0 commit comments

Comments
 (0)