Skip to content
Draft

test #24290

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from prometheus_client.metrics_core import Metric
from prometheus_client.parser import _parse_sample, _replace_help_escaping

import datadog_checks.base.checks.openmetrics.parser_optimizations # noqa: F401


def text_fd_to_metric_families(fd):
raw_lines, input_lines = tee(fd, 2)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# (C) Datadog, Inc. 2025-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
"""Optimized replacements for prometheus_client parser hot-path functions.

The prometheus_client v0.22.0+ parser introduced character-by-character scanning
via _next_unquoted_char() for UTF-8 support, causing a ~3-5x performance regression
(see https://github.com/prometheus/client_python/issues/1114).

This module replaces _next_unquoted_char with a version that uses str.find() to
jump directly to candidate characters instead of iterating character-by-character,
restoring near-original performance. Quote-aware scanning is omitted because
structural characters inside quoted label values do not occur in practice.
"""

import string

import prometheus_client.parser as _prom_parser


def _is_char_escaped(text, pos):
"""Return True if the character at pos is preceded by an odd number of backslashes."""
num_bslashes = 0
while pos > num_bslashes and text[pos - 1 - num_bslashes] == '\\':
num_bslashes += 1
return num_bslashes % 2 == 1


def _next_unquoted_char(text, chs, startidx=0):
"""Find the next unquoted occurrence of any character in chs.

Uses str.find() to jump to candidate characters, skipping over quoted regions.
"""
if chs is None:
chs = string.whitespace

i = startidx
n = len(text)

while i < n:
best = -1
for ch in chs:
p = text.find(ch, i)
if p != -1 and (best == -1 or p < best):
best = p

# Find the next unescaped opening quote
q = text.find('"', i)
while q != -1 and _is_char_escaped(text, q):
q = text.find('"', q + 1)

# If no quote comes before the best candidate, return it directly
if q == -1 or (best != -1 and best < q):
return best

# A quoted region starts before the candidate; skip over it
close = text.find('"', q + 1)
while close != -1 and _is_char_escaped(text, close):
close = text.find('"', close + 1)

if close == -1:
return -1

i = close + 1

return -1


def apply():
"""Monkey-patch prometheus_client parser modules with optimized functions."""
if getattr(_prom_parser, '_dd_optimized', False):
return

_prom_parser._next_unquoted_char = _next_unquoted_char
_prom_parser._dd_optimized = True

try:
import prometheus_client.openmetrics.parser as _om_parser

_om_parser._next_unquoted_char = _next_unquoted_char
except (ImportError, AttributeError):
pass


apply()
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from prometheus_client.parser import text_fd_to_metric_families as parse_prometheus
from requests.exceptions import ConnectionError

import datadog_checks.base.checks.openmetrics.parser_optimizations # noqa: F401
from datadog_checks.base.agent import datadog_agent
from datadog_checks.base.checks.openmetrics.v2.first_scrape_handler import first_scrape_handler
from datadog_checks.base.checks.openmetrics.v2.labels import LabelAggregator, get_label_normalizer
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
# (C) Datadog, Inc. 2025-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
"""Tests for the optimized _next_unquoted_char replacement.

Verifies that the optimized version produces the same results as the
original prometheus_client implementation across representative inputs.
"""

from datadog_checks.base.checks.openmetrics.parser_optimizations import (
_next_unquoted_char,
)


class TestNextUnquotedChar:
"""Tests for the optimized _next_unquoted_char function."""

def test_find_single_char(self):
assert _next_unquoted_char('foo{bar="baz"} 1', '{') == 3

def test_find_closing_brace(self):
assert _next_unquoted_char('bar="baz"} 1', '}') == 9

def test_find_equals(self):
assert _next_unquoted_char('label="value"', '=') == 5

def test_find_comma(self):
assert _next_unquoted_char('a="1",b="2"', ',') == 5

def test_find_space(self):
assert _next_unquoted_char('metric{l="v"} 42', ' ') == 13

def test_find_multiple_targets(self):
assert _next_unquoted_char('label=value,next', '=,}') == 5

def test_find_multiple_targets_comma_first(self):
assert _next_unquoted_char('value,next=foo', '=,}') == 5

def test_find_multiple_targets_brace(self):
assert _next_unquoted_char('value}', '=,}') == 5

def test_not_found(self):
assert _next_unquoted_char('no_special_chars', '{') == -1

def test_empty_string(self):
assert _next_unquoted_char('', '{') == -1

def test_startidx(self):
assert _next_unquoted_char('a{b{c', '{', 2) == 3

def test_startidx_at_target(self):
assert _next_unquoted_char('a{b', '{', 1) == 1

def test_startidx_past_end(self):
assert _next_unquoted_char('abc', '{', 10) == -1

def test_whitespace_default(self):
assert _next_unquoted_char('foo bar', None) == 3

def test_whitespace_tab(self):
assert _next_unquoted_char('foo\tbar', None) == 3

def test_first_char_is_target(self):
assert _next_unquoted_char('{foo}', '{') == 0

def test_last_char_is_target(self):
assert _next_unquoted_char('foo}', '}') == 3

def test_multiple_occurrences_returns_first(self):
assert _next_unquoted_char('a{b{c', '{') == 1

def test_skip_target_char_inside_quotes(self):
# comma inside quoted value must not be returned
assert _next_unquoted_char('a="apn,gw",b', ',') == 10

def test_skip_brace_inside_quotes(self):
assert _next_unquoted_char('label="val}ue"}', '}') == 14

def test_skip_equals_inside_quotes(self):
assert _next_unquoted_char('label="a=b"} 1', '}') == 11

def test_escaped_quote_not_treated_as_delimiter(self):
# backslash-escaped quote does not close the quoted region
assert _next_unquoted_char(r'label="val\"still,inside",next', ',') == 25


class TestNextUnquotedCharWithRealMetrics:
"""Tests using real Prometheus metric line patterns."""

def test_simple_gauge(self):
line = 'envoy_server_live 1'
assert _next_unquoted_char(line, '{') == -1
assert _next_unquoted_char(line, ' ') == 17

def test_labeled_metric(self):
line = 'envoy_cluster_upstream_cx_active{envoy_cluster_name="service1"} 0'
assert _next_unquoted_char(line, '{') == 32
assert _next_unquoted_char(line, '}', 33) == 62

def test_multi_label_metric(self):
line = 'http_requests_total{method="GET",code="200"} 1027'
assert _next_unquoted_char(line, '{') == 19
assert _next_unquoted_char(line, '=', 20) == 26
labels_text = 'method="GET",code="200"'
assert _next_unquoted_char(labels_text, '=,}') == 6
assert _next_unquoted_char(labels_text, ',}', 12) == 12

def test_histogram_bucket(self):
line = 'http_request_duration_seconds_bucket{le="0.5"} 24054'
assert _next_unquoted_char(line, '{') == 36
assert _next_unquoted_char(line, '}', 37) == 45

def test_help_line_split(self):
line = '# HELP http_requests_total The total number of HTTP requests.'
assert _next_unquoted_char(line, None) == 1

def test_type_line_split(self):
line = '# TYPE http_requests_total counter'
assert _next_unquoted_char(line, None) == 1


class TestParseFullMetricText:
"""Integration tests that parse complete metric text through the patched parser."""

def test_parse_simple_metrics(self):
from prometheus_client.parser import text_string_to_metric_families

text = '# HELP test_gauge A test gauge.\n# TYPE test_gauge gauge\ntest_gauge 42\n'
families = list(text_string_to_metric_families(text))
assert len(families) == 1
assert families[0].name == 'test_gauge'
assert families[0].samples[0].value == 42

def test_parse_labeled_metrics(self):
from prometheus_client.parser import text_string_to_metric_families

text = (
'# HELP http_requests_total Total requests.\n'
'# TYPE http_requests_total counter\n'
'http_requests_total{method="GET",code="200"} 1027\n'
'http_requests_total{method="POST",code="200"} 3\n'
)
families = list(text_string_to_metric_families(text))
assert len(families) == 1
assert len(families[0].samples) == 2
assert families[0].samples[0].labels == {'method': 'GET', 'code': '200'}
assert families[0].samples[0].value == 1027
assert families[0].samples[1].labels == {'method': 'POST', 'code': '200'}

def test_parse_histogram(self):
from prometheus_client.parser import text_string_to_metric_families

text = (
'# HELP rpc_duration_seconds RPC duration.\n'
'# TYPE rpc_duration_seconds histogram\n'
'rpc_duration_seconds_bucket{le="0.5"} 2000\n'
'rpc_duration_seconds_bucket{le="1.0"} 2500\n'
'rpc_duration_seconds_bucket{le="+Inf"} 3000\n'
'rpc_duration_seconds_sum 5000\n'
'rpc_duration_seconds_count 3000\n'
)
families = list(text_string_to_metric_families(text))
assert len(families) == 1
assert families[0].type == 'histogram'
assert len(families[0].samples) == 5

def test_parse_escaped_label_value(self):
from prometheus_client.parser import text_string_to_metric_families

text = '# HELP test_metric A test.\n# TYPE test_metric gauge\ntest_metric{label="value with \\"quotes\\""} 1\n'
families = list(text_string_to_metric_families(text))
assert len(families) == 1
assert families[0].samples[0].labels == {'label': 'value with "quotes"'}

def test_parse_multiple_families(self):
from prometheus_client.parser import text_string_to_metric_families

text = (
'# HELP gauge_one First.\n'
'# TYPE gauge_one gauge\n'
'gauge_one 1\n'
'# HELP gauge_two Second.\n'
'# TYPE gauge_two gauge\n'
'gauge_two{env="prod"} 2\n'
)
families = list(text_string_to_metric_families(text))
assert len(families) == 2
assert families[0].name == 'gauge_one'
assert families[1].name == 'gauge_two'

def test_parse_empty_label_value(self):
from prometheus_client.parser import text_string_to_metric_families

text = '# HELP test_metric A test.\n# TYPE test_metric gauge\ntest_metric{label=""} 1\n'
families = list(text_string_to_metric_families(text))
assert families[0].samples[0].labels == {'label': ''}

def test_parse_newline_in_label_value(self):
from prometheus_client.parser import text_string_to_metric_families

text = '# HELP test_metric A test.\n# TYPE test_metric gauge\ntest_metric{label="line1\\nline2"} 1\n'
families = list(text_string_to_metric_families(text))
assert families[0].samples[0].labels == {'label': 'line1\nline2'}

def test_parse_comma_in_label_value(self):
from prometheus_client.parser import text_string_to_metric_families

text = (
'# HELP apn_active_connections Active connections.\n'
'# TYPE apn_active_connections gauge\n'
'apn_active_connections{func="apn,gw",proto="tcp"} 8\n'
)
families = list(text_string_to_metric_families(text))
assert len(families) == 1
assert families[0].samples[0].labels == {'func': 'apn,gw', 'proto': 'tcp'}
assert families[0].samples[0].value == 8
Loading