Skip to content

Commit e702f27

Browse files
author
YongGang
authored
Add simple latency histogram metrics (#89)
* Add simple latency histogram metrics
1 parent f243762 commit e702f27

File tree

4 files changed

+159
-25
lines changed

4 files changed

+159
-25
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
runs-on: ubuntu-latest
1717
strategy:
1818
matrix:
19-
java: [ '8', '11' ]
19+
java: ['11']
2020
steps:
2121
- uses: actions/checkout@v2
2222
- name: Set up JDK

pom.xml

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
<properties>
1313
<dist.dir>${project.build.directory}/dist</dist.dir>
1414
<jackson.version>2.10.0</jackson.version>
15-
<java.numeric.version>1.8</java.numeric.version>
15+
<java.numeric.version>11</java.numeric.version>
1616
<kafka.version>2.6.2</kafka.version>
1717
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
1818
</properties>
@@ -204,23 +204,16 @@
204204
</executions>
205205
</plugin>
206206
<plugin>
207-
<groupId>org.codehaus.mojo</groupId>
208-
<artifactId>findbugs-maven-plugin</artifactId>
209-
<version>3.0.5</version>
210-
<configuration>
211-
<effort>Max</effort>
212-
<threshold>Low</threshold>
213-
<failOnError>true</failOnError>
214-
</configuration>
215-
<executions>
216-
<execution>
217-
<id>analyze-compile</id>
218-
<goals>
219-
<goal>check</goal>
220-
</goals>
221-
<phase>compile</phase>
222-
</execution>
223-
</executions>
207+
<groupId>com.github.spotbugs</groupId>
208+
<artifactId>spotbugs-maven-plugin</artifactId>
209+
<version>4.5.2.0</version>
210+
<dependencies>
211+
<dependency>
212+
<groupId>com.github.spotbugs</groupId>
213+
<artifactId>spotbugs</artifactId>
214+
<version>4.5.3</version>
215+
</dependency>
216+
</dependencies>
224217
</plugin>
225218
<plugin>
226219
<groupId>com.github.ekryd.sortpom</groupId>
@@ -292,7 +285,7 @@
292285
<configuration>
293286
<target>
294287
<chmod dir="${dist.dir}" perm="700">
295-
<include name="**/*.sh"/>
288+
<include name="**/*.sh" />
296289
</chmod>
297290
</target>
298291
</configuration>
@@ -392,7 +385,7 @@
392385
</goals>
393386
</pluginExecutionFilter>
394387
<action>
395-
<ignore/>
388+
<ignore />
396389
</action>
397390
</pluginExecution>
398391
<pluginExecution>
@@ -405,7 +398,7 @@
405398
</goals>
406399
</pluginExecutionFilter>
407400
<action>
408-
<ignore/>
401+
<ignore />
409402
</action>
410403
</pluginExecution>
411404
</pluginExecutions>
@@ -459,10 +452,10 @@
459452
<phase>package</phase>
460453
<configuration>
461454
<target>
462-
<copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar"/>
455+
<copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar" />
463456
<!-- Grant execute permission to all shell scripts in package dir by setting it to 755 -->
464457
<chmod dir="${dist.dir}" perm="755">
465-
<include name="**/*.sh"/>
458+
<include name="**/*.sh" />
466459
</chmod>
467460
</target>
468461
</configuration>

src/main/java/com/salesforce/mirus/metrics/MirrorJmxReporter.java

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,45 @@
11
package com.salesforce.mirus.metrics;
22

3+
import com.google.common.collect.Sets;
34
import com.salesforce.mirus.MirusSourceConnector;
45
import java.util.*;
6+
import java.util.concurrent.TimeUnit;
57
import java.util.stream.Collectors;
68
import org.apache.kafka.common.MetricNameTemplate;
79
import org.apache.kafka.common.TopicPartition;
10+
import org.apache.kafka.common.metrics.MetricConfig;
811
import org.apache.kafka.common.metrics.Metrics;
912
import org.apache.kafka.common.metrics.Sensor;
1013
import org.apache.kafka.common.metrics.stats.*;
14+
import org.apache.kafka.common.utils.Time;
1115
import org.slf4j.Logger;
1216
import org.slf4j.LoggerFactory;
1317

1418
public class MirrorJmxReporter extends AbstractMirusJmxReporter {
1519

1620
private static final Logger logger = LoggerFactory.getLogger(MirrorJmxReporter.class);
1721

22+
public static final Map<Long, String> LATENCY_BUCKETS =
23+
Map.of(
24+
TimeUnit.MINUTES.toMillis(0),
25+
"0m",
26+
TimeUnit.MINUTES.toMillis(5),
27+
"5m",
28+
TimeUnit.MINUTES.toMillis(10),
29+
"10m",
30+
TimeUnit.MINUTES.toMillis(30),
31+
"30m",
32+
TimeUnit.MINUTES.toMillis(60),
33+
"60m",
34+
TimeUnit.HOURS.toMillis(12),
35+
"12h");
36+
1837
private static MirrorJmxReporter instance = null;
1938

2039
private static final String SOURCE_CONNECTOR_GROUP = MirusSourceConnector.class.getSimpleName();
2140

2241
private static final Set<String> TOPIC_TAGS = new HashSet<>(Collections.singletonList("topic"));
42+
private static final Set<String> TOPIC_BUCKET_TAGS = Sets.newHashSet("topic", "bucket");
2343

2444
private static final MetricNameTemplate REPLICATION_LATENCY =
2545
new MetricNameTemplate(
@@ -38,16 +58,25 @@ public class MirrorJmxReporter extends AbstractMirusJmxReporter {
3858
"replication-latency-ms-avg", SOURCE_CONNECTOR_GROUP,
3959
"Average time it takes records to replicate from source to target cluster.", TOPIC_TAGS);
4060

61+
protected static final MetricNameTemplate HISTOGRAM_LATENCY =
62+
new MetricNameTemplate(
63+
"replication-latency-histogram",
64+
SOURCE_CONNECTOR_GROUP,
65+
"Cumulative histogram counting records delivered per second with latency exceeding a set of fixed bucket thresholds.",
66+
TOPIC_BUCKET_TAGS);
67+
4168
// Map of topics to their metric objects
4269
private final Map<String, Sensor> topicSensors;
4370
private final Set<TopicPartition> topicPartitionSet;
71+
private final Map<String, TreeMap<Long, Sensor>> histogramLatencySensors;
4472

4573
private MirrorJmxReporter() {
46-
super(new Metrics());
74+
super(new Metrics(new MetricConfig(), new ArrayList<>(0), Time.SYSTEM, true));
4775
metrics.sensor("replication-latency");
4876

4977
topicSensors = new HashMap<>();
5078
topicPartitionSet = new HashSet<>();
79+
histogramLatencySensors = new HashMap<>();
5180

5281
logger.info("Initialized MirrorJMXReporter");
5382
}
@@ -73,6 +102,15 @@ public synchronized void addTopics(List<TopicPartition> topicPartitions) {
73102
.filter(topic -> !topicSensors.containsKey(topic))
74103
.collect(Collectors.toMap(topic -> topic, this::createTopicSensor)));
75104
topicPartitionSet.addAll(topicPartitions);
105+
106+
for (TopicPartition topicPartition : topicPartitions) {
107+
TreeMap<Long, Sensor> bucketSensors = new TreeMap<>();
108+
String topic = topicPartition.topic();
109+
LATENCY_BUCKETS.forEach(
110+
(edgeMillis, bucketName) ->
111+
bucketSensors.put(edgeMillis, createHistogramSensor(topic, bucketName)));
112+
histogramLatencySensors.put(topic, bucketSensors);
113+
}
76114
}
77115

78116
/**
@@ -104,6 +142,7 @@ public synchronized void removeTopics(List<TopicPartition> topicPartitions) {
104142
topic -> {
105143
metrics.removeSensor(replicationLatencySensorName(topic));
106144
topicSensors.remove(topic);
145+
histogramLatencySensors.remove(topic);
107146
});
108147
}
109148

@@ -112,6 +151,24 @@ public synchronized void recordMirrorLatency(String topic, long millis) {
112151
if (sensor != null) {
113152
sensor.record((double) millis);
114153
}
154+
155+
TreeMap<Long, Sensor> bucketSensors = histogramLatencySensors.get(topic);
156+
for (Map.Entry<Long, Sensor> sensorEntry : bucketSensors.entrySet()) {
157+
long edgeMillis = sensorEntry.getKey();
158+
Sensor bucketSensor = sensorEntry.getValue();
159+
if (millis >= edgeMillis) {
160+
if (bucketSensor.hasExpired()) {
161+
String bucket = LATENCY_BUCKETS.get(edgeMillis);
162+
// explicitly replace the expired sensor with a new one
163+
metrics.removeSensor(histogramLatencySensorName(topic, bucket));
164+
bucketSensor = createHistogramSensor(topic, bucket);
165+
}
166+
bucketSensor.record(1);
167+
} else {
168+
// bucket sensors are sorted by edgeMillis
169+
break;
170+
}
171+
}
115172
}
116173

117174
private Sensor createTopicSensor(String topic) {
@@ -127,7 +184,32 @@ private Sensor createTopicSensor(String topic) {
127184
return sensor;
128185
}
129186

187+
private Sensor createHistogramSensor(String topic, String bucket) {
188+
Map<String, String> tags = new LinkedHashMap<>();
189+
tags.put("topic", topic);
190+
tags.put("bucket", bucket);
191+
192+
// bucket sensor will be expired after 5 mins if inactive
193+
// this is to prevent inactive bucket sensors from reporting too many zero value metrics
194+
Sensor sensor =
195+
metrics.sensor(
196+
histogramLatencySensorName(topic, bucket),
197+
null,
198+
TimeUnit.MINUTES.toSeconds(5),
199+
Sensor.RecordingLevel.INFO,
200+
null);
201+
sensor.add(
202+
metrics.metricInstance(HISTOGRAM_LATENCY, tags),
203+
new Rate(TimeUnit.SECONDS, new WindowedSum()));
204+
205+
return sensor;
206+
}
207+
130208
private String replicationLatencySensorName(String topic) {
131209
return topic + "-" + "replication-latency";
132210
}
211+
212+
private String histogramLatencySensorName(String topic, String bucket) {
213+
return topic + "-" + bucket + "-" + "histogram-latency";
214+
}
133215
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package com.salesforce.mirus.metrics;
2+
3+
import java.util.LinkedHashMap;
4+
import java.util.List;
5+
import java.util.Map;
6+
import org.apache.kafka.common.TopicPartition;
7+
import org.apache.kafka.common.metrics.Metrics;
8+
import org.junit.Assert;
9+
import org.junit.Before;
10+
import org.junit.Test;
11+
12+
public class MirrorJmxReporterTest {
13+
14+
private MirrorJmxReporter mirrorJmxReporter;
15+
private Metrics metrics;
16+
private final String TEST_TOPIC = "TestTopic";
17+
18+
@Before
19+
public void setUp() throws Exception {
20+
mirrorJmxReporter = MirrorJmxReporter.getInstance();
21+
metrics = mirrorJmxReporter.metrics;
22+
}
23+
24+
@Test
25+
public void updateLatencyMetrics() {
26+
TopicPartition topicPartition = new TopicPartition(TEST_TOPIC, 1);
27+
mirrorJmxReporter.addTopics(List.of(topicPartition));
28+
29+
mirrorJmxReporter.recordMirrorLatency(TEST_TOPIC, 500);
30+
31+
Map<String, String> tags = new LinkedHashMap<>();
32+
tags.put("topic", TEST_TOPIC);
33+
tags.put("bucket", "0m");
34+
Object value =
35+
metrics
36+
.metrics()
37+
.get(
38+
metrics.metricName(
39+
MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
40+
MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
41+
MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
42+
tags))
43+
.metricValue();
44+
Assert.assertTrue((double) value > 0);
45+
46+
tags.put("bucket", "12h");
47+
value =
48+
metrics
49+
.metrics()
50+
.get(
51+
metrics.metricName(
52+
MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
53+
MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
54+
MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
55+
tags))
56+
.metricValue();
57+
Assert.assertTrue((double) value == 0);
58+
}
59+
}

0 commit comments

Comments
 (0)