Skip to content

Commit 33e6828

Browse files
authored
Support UTF-8 in metric creation, parsing, and exposition (#1070)
part of #1013 Signed-off-by: Owen Williams <[email protected]>
1 parent c89624f commit 33e6828

12 files changed

+675
-381
lines changed

prometheus_client/exposition.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .openmetrics import exposition as openmetrics
2121
from .registry import CollectorRegistry, REGISTRY
2222
from .utils import floatToGoString
23+
from .validation import _is_valid_legacy_metric_name
2324

2425
__all__ = (
2526
'CONTENT_TYPE_LATEST',
@@ -247,19 +248,26 @@ class TmpServer(ThreadingWSGIServer):
247248
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
248249
"""Returns the metrics from the registry in latest text format as a string."""
249250

250-
def sample_line(line):
251-
if line.labels:
252-
labelstr = '{{{0}}}'.format(','.join(
251+
def sample_line(samples):
252+
if samples.labels:
253+
labelstr = '{0}'.format(','.join(
253254
['{}="{}"'.format(
254-
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
255-
for k, v in sorted(line.labels.items())]))
255+
openmetrics.escape_label_name(k), openmetrics._escape(v))
256+
for k, v in sorted(samples.labels.items())]))
256257
else:
257258
labelstr = ''
258259
timestamp = ''
259-
if line.timestamp is not None:
260+
if samples.timestamp is not None:
260261
# Convert to milliseconds.
261-
timestamp = f' {int(float(line.timestamp) * 1000):d}'
262-
return f'{line.name}{labelstr} {floatToGoString(line.value)}{timestamp}\n'
262+
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
263+
if _is_valid_legacy_metric_name(samples.name):
264+
if labelstr:
265+
labelstr = '{{{0}}}'.format(labelstr)
266+
return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
267+
maybe_comma = ''
268+
if labelstr:
269+
maybe_comma = ','
270+
return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
263271

264272
output = []
265273
for metric in registry.collect():
@@ -282,8 +290,8 @@ def sample_line(line):
282290
mtype = 'untyped'
283291

284292
output.append('# HELP {} {}\n'.format(
285-
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
286-
output.append(f'# TYPE {mname} {mtype}\n')
293+
openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
294+
output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n')
287295

288296
om_samples: Dict[str, List[str]] = {}
289297
for s in metric.samples:
@@ -299,9 +307,9 @@ def sample_line(line):
299307
raise
300308

301309
for suffix, lines in sorted(om_samples.items()):
302-
output.append('# HELP {}{} {}\n'.format(metric.name, suffix,
303-
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
304-
output.append(f'# TYPE {metric.name}{suffix} gauge\n')
310+
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix),
311+
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
312+
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n')
305313
output.extend(lines)
306314
return ''.join(output).encode('utf-8')
307315

prometheus_client/metrics.py

Lines changed: 7 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,21 @@
1010

1111
from . import values # retain this import style for testability
1212
from .context_managers import ExceptionCounter, InprogressTracker, Timer
13-
from .metrics_core import (
14-
Metric, METRIC_LABEL_NAME_RE, METRIC_NAME_RE,
15-
RESERVED_METRIC_LABEL_NAME_RE,
16-
)
13+
from .metrics_core import Metric
1714
from .registry import Collector, CollectorRegistry, REGISTRY
1815
from .samples import Exemplar, Sample
1916
from .utils import floatToGoString, INF
17+
from .validation import (
18+
_validate_exemplar, _validate_labelnames, _validate_metric_name,
19+
)
2020

2121
T = TypeVar('T', bound='MetricWrapperBase')
2222
F = TypeVar("F", bound=Callable[..., Any])
2323

2424

2525
def _build_full_name(metric_type, name, namespace, subsystem, unit):
26+
if not name:
27+
raise ValueError('Metric name should not be empty')
2628
full_name = ''
2729
if namespace:
2830
full_name += namespace + '_'
@@ -38,31 +40,6 @@ def _build_full_name(metric_type, name, namespace, subsystem, unit):
3840
return full_name
3941

4042

41-
def _validate_labelname(l):
42-
if not METRIC_LABEL_NAME_RE.match(l):
43-
raise ValueError('Invalid label metric name: ' + l)
44-
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
45-
raise ValueError('Reserved label metric name: ' + l)
46-
47-
48-
def _validate_labelnames(cls, labelnames):
49-
labelnames = tuple(labelnames)
50-
for l in labelnames:
51-
_validate_labelname(l)
52-
if l in cls._reserved_labelnames:
53-
raise ValueError('Reserved label metric name: ' + l)
54-
return labelnames
55-
56-
57-
def _validate_exemplar(exemplar):
58-
runes = 0
59-
for k, v in exemplar.items():
60-
_validate_labelname(k)
61-
runes += len(k)
62-
runes += len(v)
63-
if runes > 128:
64-
raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128')
65-
6643

6744
def _get_use_created() -> bool:
6845
return os.environ.get("PROMETHEUS_DISABLE_CREATED_SERIES", 'False').lower() not in ('true', '1', 't')
@@ -139,8 +116,7 @@ def __init__(self: T,
139116
self._documentation = documentation
140117
self._unit = unit
141118

142-
if not METRIC_NAME_RE.match(self._name):
143-
raise ValueError('Invalid metric name: ' + self._name)
119+
_validate_metric_name(self._name)
144120

145121
if self._is_parent():
146122
# Prepare the fields needed for child metrics.

prometheus_client/metrics_core.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
1-
import re
21
from typing import Dict, List, Optional, Sequence, Tuple, Union
32

43
from .samples import Exemplar, NativeHistogram, Sample, Timestamp
4+
from .validation import _validate_metric_name
55

66
METRIC_TYPES = (
77
'counter', 'gauge', 'summary', 'histogram',
88
'gaugehistogram', 'unknown', 'info', 'stateset',
99
)
10-
METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$')
11-
METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
12-
RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$')
1310

1411

1512
class Metric:
@@ -24,8 +21,7 @@ class Metric:
2421
def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
2522
if unit and not name.endswith("_" + unit):
2623
name += "_" + unit
27-
if not METRIC_NAME_RE.match(name):
28-
raise ValueError('Invalid metric name: ' + name)
24+
_validate_metric_name(name)
2925
self.name: str = name
3026
self.documentation: str = documentation
3127
self.unit: str = unit

prometheus_client/openmetrics/exposition.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33

44
from ..utils import floatToGoString
5+
from ..validation import (
6+
_is_valid_legacy_labelname, _is_valid_legacy_metric_name,
7+
)
58

69
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
710
"""Content type of the latest OpenMetrics text format"""
@@ -24,18 +27,27 @@ def generate_latest(registry):
2427
try:
2528
mname = metric.name
2629
output.append('# HELP {} {}\n'.format(
27-
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')))
28-
output.append(f'# TYPE {mname} {metric.type}\n')
30+
escape_metric_name(mname), _escape(metric.documentation)))
31+
output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n')
2932
if metric.unit:
30-
output.append(f'# UNIT {mname} {metric.unit}\n')
33+
output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n')
3134
for s in metric.samples:
32-
if s.labels:
33-
labelstr = '{{{0}}}'.format(','.join(
34-
['{}="{}"'.format(
35-
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
36-
for k, v in sorted(s.labels.items())]))
35+
if not _is_valid_legacy_metric_name(s.name):
36+
labelstr = escape_metric_name(s.name)
37+
if s.labels:
38+
labelstr += ', '
3739
else:
3840
labelstr = ''
41+
42+
if s.labels:
43+
items = sorted(s.labels.items())
44+
labelstr += ','.join(
45+
['{}="{}"'.format(
46+
escape_label_name(k), _escape(v))
47+
for k, v in items])
48+
if labelstr:
49+
labelstr = "{" + labelstr + "}"
50+
3951
if s.exemplar:
4052
if not _is_valid_exemplar_metric(metric, s):
4153
raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter")
@@ -59,16 +71,47 @@ def generate_latest(registry):
5971
timestamp = ''
6072
if s.timestamp is not None:
6173
timestamp = f' {s.timestamp}'
62-
output.append('{}{} {}{}{}\n'.format(
63-
s.name,
64-
labelstr,
65-
floatToGoString(s.value),
66-
timestamp,
67-
exemplarstr,
68-
))
74+
if _is_valid_legacy_metric_name(s.name):
75+
output.append('{}{} {}{}{}\n'.format(
76+
s.name,
77+
labelstr,
78+
floatToGoString(s.value),
79+
timestamp,
80+
exemplarstr,
81+
))
82+
else:
83+
output.append('{} {}{}{}\n'.format(
84+
labelstr,
85+
floatToGoString(s.value),
86+
timestamp,
87+
exemplarstr,
88+
))
6989
except Exception as exception:
7090
exception.args = (exception.args or ('',)) + (metric,)
7191
raise
7292

7393
output.append('# EOF\n')
7494
return ''.join(output).encode('utf-8')
95+
96+
97+
def escape_metric_name(s: str) -> str:
98+
"""Escapes the metric name and puts it in quotes iff the name does not
99+
conform to the legacy Prometheus character set.
100+
"""
101+
if _is_valid_legacy_metric_name(s):
102+
return s
103+
return '"{}"'.format(_escape(s))
104+
105+
106+
def escape_label_name(s: str) -> str:
107+
"""Escapes the label name and puts it in quotes iff the name does not
108+
conform to the legacy Prometheus character set.
109+
"""
110+
if _is_valid_legacy_labelname(s):
111+
return s
112+
return '"{}"'.format(_escape(s))
113+
114+
115+
def _escape(s: str) -> str:
116+
"""Performs backslash escaping on backslash, newline, and double-quote characters."""
117+
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')

0 commit comments

Comments
 (0)