diff --git a/.chloggen/fix-zookeeper-avg-latency-float.yaml b/.chloggen/fix-zookeeper-avg-latency-float.yaml new file mode 100644 index 0000000000000..5091d0e87296d --- /dev/null +++ b/.chloggen/fix-zookeeper-avg-latency-float.yaml @@ -0,0 +1,6 @@ +change_type: bug_fix +component: scraper/zookeeper +note: Fix zk_avg_latency metric being silently dropped on Zookeeper 3.7+ where the value is reported as a float. +issues: [47320] +subtext: "" +change_logs: [user] diff --git a/scraper/zookeeperscraper/scraper.go b/scraper/zookeeperscraper/scraper.go index 586db06d67ae0..e9934c383b4c0 100644 --- a/scraper/zookeeperscraper/scraper.go +++ b/scraper/zookeeperscraper/scraper.go @@ -148,11 +148,16 @@ func (z *zookeeperMetricsScraper) processMntr(response []string) { } int64Val, err := strconv.ParseInt(metricValue, 10, 64) if err != nil { - z.logger.Debug( - "non-integer value from "+mntrCommand, - zap.String("value", metricValue), - ) - continue + // zk_avg_latency changed to float in ZK 3.7+; truncate to int64. + floatVal, floatErr := strconv.ParseFloat(metricValue, 64) + if floatErr != nil { + z.logger.Debug( + "non-parseable value from "+mntrCommand, + zap.String("value", metricValue), + ) + continue + } + int64Val = int64(floatVal) } recordDataPoints(now, int64Val) } diff --git a/scraper/zookeeperscraper/scraper_test.go b/scraper/zookeeperscraper/scraper_test.go index 6408ae0d1c556..c502663603b5a 100644 --- a/scraper/zookeeperscraper/scraper_test.go +++ b/scraper/zookeeperscraper/scraper_test.go @@ -85,6 +85,25 @@ func TestZookeeperMetricsScraperScrape(t *testing.T) { }, expectedNumResourceMetrics: 1, }, + { + name: "Test correctness with v3.7.2", + mockedZKCmdToOutputFilename: map[string]string{ + "mntr": "mntr-3.7.2", + "ruok": "ruok-valid", + }, + expectedMetricsFilename: "correctness-v3.7.2", + expectedResourceAttributes: map[string]string{ + "server.state": "standalone", + "zk.version": "3.7.2-a055d78707164783287056086786315873919992", + }, + expectedLogs: []logMsg{ + { + msg: "metric computation failed", + level: zapcore.DebugLevel, + }, + }, + expectedNumResourceMetrics: 1, + }, { name: "Arbitrary connection error", mockZKConnectionErr: true, @@ -122,7 +141,7 @@ func TestZookeeperMetricsScraperScrape(t *testing.T) { }, expectedLogs: []logMsg{ { - msg: "non-integer value from mntr", + msg: "non-parseable value from mntr", level: zapcore.DebugLevel, }, { diff --git a/scraper/zookeeperscraper/testdata/mntr-3.7.2 b/scraper/zookeeperscraper/testdata/mntr-3.7.2 new file mode 100644 index 0000000000000..534a480e830c6 --- /dev/null +++ b/scraper/zookeeperscraper/testdata/mntr-3.7.2 @@ -0,0 +1,16 @@ +zk_version 3.7.2-a055d78707164783287056086786315873919992, built on 09/08/2023 09:00 GMT +zk_avg_latency 0.0989 +zk_max_latency 47 +zk_min_latency 0 +zk_packets_received 156 +zk_packets_sent 155 +zk_num_alive_connections 3 +zk_outstanding_requests 0 +zk_server_state standalone +zk_znode_count 5 +zk_watch_count 1 +zk_ephemerals_count 0 +zk_approximate_data_size 44 +zk_open_file_descriptor_count 68 +zk_max_file_descriptor_count 1048576 +zk_fsync_threshold_exceed_count 0 diff --git a/scraper/zookeeperscraper/testdata/scraper/correctness-v3.7.2.yaml b/scraper/zookeeperscraper/testdata/scraper/correctness-v3.7.2.yaml new file mode 100644 index 0000000000000..c15fe6b924a0b --- /dev/null +++ b/scraper/zookeeperscraper/testdata/scraper/correctness-v3.7.2.yaml @@ -0,0 +1,148 @@ +resourceMetrics: + - resource: + attributes: + - key: server.state + value: + stringValue: standalone + - key: zk.version + value: + stringValue: 3.7.2-a055d78707164783287056086786315873919992 + scopeMetrics: + - metrics: + - description: Number of active clients connected to a ZooKeeper server. + name: zookeeper.connection.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "3" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{connections}' + - description: Number of ephemeral nodes that a ZooKeeper server has in its data tree. + name: zookeeper.data_tree.ephemeral_node.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{nodes}' + - description: Size of data in bytes that a ZooKeeper server has in its data tree. + name: zookeeper.data_tree.size + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "44" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: By + - description: Maximum number of file descriptors that a ZooKeeper server can open. + gauge: + dataPoints: + - asInt: "1048576" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: zookeeper.file_descriptor.limit + unit: '{file_descriptors}' + - description: Number of file descriptors that a ZooKeeper server has open. + name: zookeeper.file_descriptor.open + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "68" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{file_descriptors}' + - description: Number of times fsync duration has exceeded warning threshold. + name: zookeeper.fsync.exceeded_threshold.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + isMonotonic: true + unit: '{events}' + - description: Average time in milliseconds for requests to be processed. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: zookeeper.latency.avg + unit: ms + - description: Maximum time in milliseconds for requests to be processed. + gauge: + dataPoints: + - asInt: "47" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: zookeeper.latency.max + unit: ms + - description: Minimum time in milliseconds for requests to be processed. + gauge: + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: zookeeper.latency.min + unit: ms + - description: The number of ZooKeeper packets received or sent by a server. + name: zookeeper.packet.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "156" + attributes: + - key: direction + value: + stringValue: received + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + - asInt: "155" + attributes: + - key: direction + value: + stringValue: sent + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + isMonotonic: true + unit: '{packets}' + - description: Number of currently executing requests. + name: zookeeper.request.active + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "0" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{requests}' + - description: Response from zookeeper ruok command + gauge: + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + name: zookeeper.ruok + unit: "1" + - description: Number of watches placed on Z-Nodes on a ZooKeeper server. + name: zookeeper.watch.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "1" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{watches}' + - description: Number of z-nodes that a ZooKeeper server has in its data tree. + name: zookeeper.znode.count + sum: + aggregationTemporality: 2 + dataPoints: + - asInt: "5" + startTimeUnixNano: "1000000" + timeUnixNano: "2000000" + unit: '{znodes}' + scope: + name: github.com/open-telemetry/opentelemetry-collector-contrib/scraper/zookeeperscraper + version: latest