Skip to content

Commit 1a75ab2

Browse files
authored
Merge pull request #1026 from ivantopo/delay-span-reporting
introduce Span Reporting delay
2 parents 3734b8d + bb7f0f1 commit 1a75ab2

File tree

4 files changed

+131
-15
lines changed

4 files changed

+131
-15
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/* =========================================================================================
2+
* Copyright © 2013-2017 the kamon project <http://kamon.io/>
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
5+
* except in compliance with the License. You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the
10+
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
11+
* either express or implied. See the License for the specific language governing permissions
12+
* and limitations under the License.
13+
* =========================================================================================
14+
*/
15+
16+
package kamon.trace
17+
18+
import kamon.Kamon
19+
import kamon.testkit.{Reconfigure, SpanInspection, TestSpanReporter}
20+
import org.scalactic.TimesOnInt.convertIntToRepeater
21+
import org.scalatest.concurrent.Eventually
22+
import org.scalatest.time.SpanSugar
23+
import org.scalatest.{Matchers, OptionValues, WordSpec}
24+
25+
26+
class SpanReportingDelaySpec extends WordSpec with Matchers with OptionValues with SpanInspection.Syntax with Eventually
27+
with SpanSugar with TestSpanReporter with Reconfigure {
28+
29+
"the Kamon tracer" when {
30+
"has span reporting delay disabled" should {
31+
"keep spans with a positive sampling decision" in {
32+
val span = Kamon.spanBuilder("positive-span-without-delay").start()
33+
span.trace.keep()
34+
span.finish()
35+
36+
eventually(timeout(5 seconds)) {
37+
val reportedSpan = testSpanReporter().nextSpan().value
38+
reportedSpan.operationName shouldBe span.operationName()
39+
}
40+
}
41+
42+
"not report spans with a negative sampling decision" in {
43+
val span = Kamon.spanBuilder("negative-span-without-delay").start()
44+
span.trace.drop()
45+
span.finish()
46+
span.trace.keep() // Should not have any effect
47+
48+
5 times {
49+
val allSpans = testSpanReporter().spans()
50+
allSpans.find(_.operationName == span.operationName()) shouldBe empty
51+
52+
Thread.sleep(100) // Should be enough because Spans are reported every millisecond in tests
53+
}
54+
}
55+
}
56+
57+
"has span reporting delay enabled" should {
58+
"keep spans with a positive sampling decision" in {
59+
applyConfig("kamon.trace.span-reporting-delay = 2 seconds")
60+
val span = Kamon.spanBuilder("overwrite-to-positive-with-delay").start()
61+
span.trace.drop()
62+
span.finish()
63+
span.trace.keep() // Should force the Span to be reported, even though it was dropped before finising
64+
65+
eventually(timeout(5 seconds)) {
66+
val reportedSpan = testSpanReporter().nextSpan().value
67+
reportedSpan.operationName shouldBe span.operationName()
68+
}
69+
}
70+
71+
"not report spans with a negative sampling decision" in {
72+
val span = Kamon.spanBuilder("negative-span-without-delay").start()
73+
span.trace.keep()
74+
span.finish()
75+
span.trace.drop() // Should force the Span to be dropped, even though it was sampled before finishing
76+
77+
5 times {
78+
val allSpans = testSpanReporter().spans()
79+
allSpans.find(_.operationName == span.operationName()) shouldBe empty
80+
81+
Thread.sleep(100) // Should be enough because Spans are reported every millisecond in tests
82+
}
83+
}
84+
}
85+
}
86+
}

core/kamon-core/src/main/resources/reference.conf

+10
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,16 @@ kamon {
145145
# reporter has a separate queue.
146146
reporter-queue-size = 4096
147147

148+
# Decide for how long should the tracer wait to report Spans after they are finished. This setting helps keeping
149+
# Spans in memory for a few seconds after they finished, in case users want to manually override the sampling
150+
# decision for the trace later on. Keep in mind that:
151+
# - Overriding the sampling decision is a local-only action, any Spans from other connected services will not be
152+
# retained or affected in any way.
153+
# - This will introduce additional memory consumption proportional to the reporting delay and the Spans volume.
154+
#
155+
# Setting this value to "0 seconds" disables the reporting delay.
156+
span-reporting-delay = 0 seconds
157+
148158
# Decide whether a new, locally created Span should have the same Span Identifier as it's remote parent (if any) or
149159
# get a new local identifier. Certain tracing systems use the same Span Identifier to represent both sides (client
150160
# and server) of a RPC call, if you are reporting data to such systems then this option should be enabled. This

core/kamon-core/src/main/scala/kamon/trace/Span.scala

+29-12
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ package kamon
1818
package trace
1919

2020
import java.time.{Duration, Instant}
21-
2221
import kamon.context.Context
2322
import kamon.tag.TagSet
2423
import kamon.trace.Span.Link
2524
import kamon.trace.Trace.SamplingDecision
2625
import kamon.util.Clock
2726
import org.slf4j.LoggerFactory
2827

28+
import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
2929
import scala.compat.Platform.EOL
3030

3131
/**
@@ -410,7 +410,7 @@ object Span {
410410
val kind: Kind, localParent: Option[Span], initialOperationName: String, spanTags: TagSet.Builder, metricTags: TagSet.Builder,
411411
createdAt: Instant, initialMarks: List[Mark], initialLinks: List[Link], initialTrackMetrics: Boolean, tagWithParentOperation: Boolean,
412412
includeErrorStacktrace: Boolean, isDelayed: Boolean, clock: Clock, preFinishHooks: Array[Tracer.PreFinishHook],
413-
onFinish: Span.Finished => Unit, sampler: Sampler) extends Span.Delayed {
413+
onFinish: Span.Finished => Unit, sampler: Sampler, scheduler: ScheduledExecutorService, reportingDelay: Duration) extends Span.Delayed {
414414

415415
private val _metricTags = metricTags
416416
private val _spanTags = spanTags
@@ -440,25 +440,25 @@ object Span {
440440
}
441441

442442
override def tag(key: String, value: String): Span = synchronized {
443-
if(isSampled && _isOpen)
443+
if((isSampled || !reportingDelay.isZero) && _isOpen)
444444
_spanTags.add(key, value)
445445
this
446446
}
447447

448448
override def tag(key: String, value: Long): Span = synchronized {
449-
if(isSampled && _isOpen)
449+
if((isSampled || !reportingDelay.isZero) && _isOpen)
450450
_spanTags.add(key, value)
451451
this
452452
}
453453

454454
override def tag(key: String, value: Boolean): Span = synchronized {
455-
if(isSampled && _isOpen)
455+
if((isSampled || !reportingDelay.isZero) && _isOpen)
456456
_spanTags.add(key, value)
457457
this
458458
}
459459

460460
override def tag(tags: TagSet): Span = synchronized {
461-
if(isSampled && _isOpen)
461+
if((isSampled || !reportingDelay.isZero) && _isOpen)
462462
_spanTags.add(tags)
463463
this
464464
}
@@ -507,7 +507,7 @@ object Span {
507507
if(_isOpen) {
508508
_hasError = true
509509

510-
if(isSampled)
510+
if((isSampled || !reportingDelay.isZero))
511511
_spanTags.add(TagKeys.ErrorMessage, message)
512512
}
513513
this
@@ -517,7 +517,7 @@ object Span {
517517
if(_isOpen) {
518518
_hasError = true
519519

520-
if(isSampled) {
520+
if((isSampled || !reportingDelay.isZero)) {
521521
_spanTags.add(TagKeys.ErrorMessage, throwable.getMessage)
522522

523523
if(includeErrorStacktrace)
@@ -531,7 +531,7 @@ object Span {
531531
if(_isOpen) {
532532
_hasError = true
533533

534-
if(isSampled) {
534+
if((isSampled || !reportingDelay.isZero)) {
535535
_spanTags.add(TagKeys.ErrorMessage, message)
536536

537537
if(includeErrorStacktrace)
@@ -617,7 +617,7 @@ object Span {
617617
private def toStackTraceString(throwable: Throwable): String =
618618
throwable.getStackTrace().mkString("", EOL, EOL)
619619

620-
private def toFinishedSpan(to: Instant, metricTags: TagSet): Span.Finished =
620+
protected def toFinishedSpan(to: Instant, metricTags: TagSet): Span.Finished =
621621
Span.Finished(id, trace, parentId, _operationName, _hasError, isDelayed, createdAt, to, kind, position, _spanTags.build(),
622622
metricTags, _marks, _links)
623623

@@ -643,10 +643,20 @@ object Span {
643643
}
644644

645645
private def reportSpan(finishedAt: Instant, metricTags: TagSet): Unit = {
646-
if(isSampled)
647-
onFinish(toFinishedSpan(finishedAt, metricTags))
646+
if(reportingDelay.isZero) {
647+
if(isSampled)
648+
onFinish(toFinishedSpan(finishedAt, metricTags))
649+
}
650+
else {
651+
scheduler.schedule(
652+
new DelayedReportingRunnable(finishedAt, metricTags),
653+
reportingDelay.toMillis,
654+
TimeUnit.MILLISECONDS
655+
)
656+
}
648657
}
649658

659+
650660
private def createMetricTags(): TagSet = {
651661
_metricTags.add(TagKeys.OperationName, _operationName)
652662
_metricTags.add(TagKeys.Error, _hasError)
@@ -662,6 +672,13 @@ object Span {
662672

663673
_metricTags.build()
664674
}
675+
676+
private class DelayedReportingRunnable(finishedAt: Instant, metricTags: TagSet) extends Runnable {
677+
override def run(): Unit = {
678+
if (isSampled)
679+
onFinish(toFinishedSpan(finishedAt, metricTags))
680+
}
681+
}
665682
}
666683

667684
object Local {

core/kamon-core/src/main/scala/kamon/trace/Tracer.scala

+6-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
package kamon
1818
package trace
1919

20-
import java.time.Instant
20+
import java.time.{Duration, Instant}
2121
import java.util.concurrent.{ScheduledExecutorService, ScheduledFuture, TimeUnit}
22-
2322
import com.typesafe.config.Config
2423
import kamon.context.Context
2524
import kamon.tag.TagSet
@@ -53,6 +52,7 @@ class Tracer(initialConfig: Config, clock: Clock, contextStorage: ContextStorage
5352
@volatile private var _adaptiveSamplerSchedule: Option[ScheduledFuture[_]] = None
5453
@volatile private var _preStartHooks: Array[Tracer.PreStartHook] = Array.empty
5554
@volatile private var _preFinishHooks: Array[Tracer.PreFinishHook] = Array.empty
55+
@volatile private var _delayedSpanReportingDelay: Duration = Duration.ZERO
5656
private val _onSpanFinish: Span.Finished => Unit = _spanBuffer.offer
5757

5858
reconfigure(initialConfig)
@@ -340,7 +340,8 @@ class Tracer(initialConfig: Config, clock: Clock, contextStorage: ContextStorage
340340
}
341341

342342
new Span.Local(id, parentId, trace, position, _kind, localParent, _name, _spanTags, _metricTags, at, _marks, _links,
343-
_trackMetrics, _tagWithParentOperation, _includeErrorStacktrace, isDelayed, clock, _preFinishHooks, _onSpanFinish, _sampler)
343+
_trackMetrics, _tagWithParentOperation, _includeErrorStacktrace, isDelayed, clock, _preFinishHooks, _onSpanFinish,
344+
_sampler, scheduler, _delayedSpanReportingDelay)
344345
}
345346

346347
private def suggestedOrSamplerDecision(): SamplingDecision =
@@ -408,6 +409,7 @@ class Tracer(initialConfig: Config, clock: Clock, contextStorage: ContextStorage
408409
val tagWithUpstreamService = traceConfig.getBoolean("span-metric-tags.upstream-service")
409410
val tagWithParentOperation = traceConfig.getBoolean("span-metric-tags.parent-operation")
410411
val includeErrorStacktrace = traceConfig.getBoolean("include-error-stacktrace")
412+
val delayedSpanReportingDelay = traceConfig.getDuration("span-reporting-delay")
411413

412414
if(_traceReporterQueueSize != traceReporterQueueSize) {
413415
// By simply changing the buffer we might be dropping Spans that have not been collected yet by the reporters.
@@ -424,6 +426,7 @@ class Tracer(initialConfig: Config, clock: Clock, contextStorage: ContextStorage
424426
_tagWithUpstreamService = tagWithUpstreamService
425427
_tagWithParentOperation = tagWithParentOperation
426428
_traceReporterQueueSize = traceReporterQueueSize
429+
_delayedSpanReportingDelay = delayedSpanReportingDelay
427430
_preStartHooks = preStartHooks
428431
_preFinishHooks = preFinishHooks
429432

0 commit comments

Comments
 (0)