Skip to content

Commit 17d3c6f

Browse files
committed
Make regex dependency optional
`regex` was used only in four trivial cases that could be implemented more simply, either naively or using memchr, without losing performance. As such the dependency needlessly increases build time, size of binary and attack surface. This change makes `regex` optional and defaults to `naive`/`memchr` implementations. This *improves* performance a bit. The dependency could've been removed entirely but was kept in case regression is discovered on another platform and to make comparing the performance easier. It can be removed in the future if the code is proven to be reliable.
1 parent e1b197b commit 17d3c6f

File tree

3 files changed

+83
-23
lines changed

3 files changed

+83
-23
lines changed

Cargo.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@ lazy_static = "^1.4"
3131
libc = { version = "^0.2", optional = true }
3232
parking_lot = "^0.11"
3333
protobuf = { version = "^2.0", optional = true }
34-
regex = "^1.3"
34+
# DO NOT RELY ON THIS FEATURE TO STAY AVAILABLE!
35+
# It doesn't change the API.
36+
# Intended for testing/debugging only.
37+
# It can affect the performance.
38+
# Report any interesting findings, especially if the performance IMPROVES with `regex` turned ON.
39+
regex = { version = "^1.3", optional = true }
40+
memchr = "^2.3"
3541
reqwest = { version = "^0.11", features = ["blocking"], optional = true }
3642
thiserror = "^1.0"
3743

src/desc.rs

+52-13
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,71 @@ use std::collections::{BTreeSet, HashMap};
55
use std::hash::Hasher;
66

77
use fnv::FnvHasher;
8-
use regex::Regex;
98

109
use crate::errors::{Error, Result};
1110
use crate::metrics::SEPARATOR_BYTE;
1211
use crate::proto::LabelPair;
1312

14-
// Details of required format are at
15-
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
16-
fn is_valid_metric_name(name: &str) -> bool {
17-
lazy_static! {
18-
static ref VALIDATOR: Regex =
19-
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
13+
#[cfg(not(feature = "regex"))]
14+
mod validation {
15+
fn matches_charset_without_colon(c: char) -> bool {
16+
c.is_ascii_alphabetic() || c == '_'
2017
}
2118

22-
VALIDATOR.is_match(name)
19+
fn matches_charset_with_colon(c: char) -> bool {
20+
matches_charset_without_colon(c) || c == ':'
21+
}
22+
23+
// check if there's at least one char
24+
// the charset_validator returns true on zeroth char
25+
// the charset_validator returns true on all remaining chars or they are digits if it returned
26+
// false
27+
// Equivalent to regex ^[?][?0-9]*$ where ? denotes char set as validated by charset_validator
28+
fn is_valid_ident<F: FnMut(char) -> bool>(input: &str, mut charset_validator: F) -> bool {
29+
let mut chars = input.chars();
30+
let zeroth = chars.next();
31+
zeroth
32+
.and_then(|zeroth| if charset_validator(zeroth) { Some(chars.all(|c| charset_validator(c) || c.is_digit(10))) } else { None })
33+
.unwrap_or(false)
34+
}
35+
36+
// Details of required format are at
37+
// https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
38+
pub(super) fn is_valid_metric_name(name: &str) -> bool {
39+
is_valid_ident(name, matches_charset_with_colon)
40+
}
41+
42+
pub(super) fn is_valid_label_name(name: &str) -> bool {
43+
is_valid_ident(name, matches_charset_without_colon)
44+
}
2345
}
2446

25-
fn is_valid_label_name(name: &str) -> bool {
26-
lazy_static! {
27-
static ref VALIDATOR: Regex =
28-
Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
47+
#[cfg(feature = "regex")]
48+
mod validation {
49+
use regex::Regex;
50+
51+
pub(super) fn is_valid_metric_name(name: &str) -> bool {
52+
lazy_static! {
53+
static ref VALIDATOR: Regex =
54+
Regex::new("^[a-zA-Z_:][a-zA-Z0-9_:]*$").expect("Regex to be valid.");
55+
}
56+
57+
VALIDATOR.is_match(name)
2958
}
3059

31-
VALIDATOR.is_match(name)
60+
pub(super) fn is_valid_label_name(name: &str) -> bool {
61+
lazy_static! {
62+
static ref VALIDATOR:
63+
Regex = Regex::new("^[a-zA-Z_][a-zA-Z0-9_]*$").expect("Regex to be valid.");
64+
}
65+
66+
VALIDATOR.is_match(name)
67+
68+
}
3269
}
3370

71+
use validation::*;
72+
3473
/// The descriptor used by every Prometheus [`Metric`](crate::core::Metric). It is essentially
3574
/// the immutable meta-data of a metric. The normal metric implementations
3675
/// included in this package manage their [`Desc`] under the hood.

src/encoder/text.rs

+24-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// Copyright 2019 TiKV Project Authors. Licensed under Apache-2.0.
22

3-
use regex::{Match, Regex};
43
use std::borrow::Cow;
54
use std::io::Write;
65

@@ -216,26 +215,42 @@ fn label_pairs_to_text(
216215
Ok(())
217216
}
218217

219-
/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
220-
/// `include_double_quote` is true.
221-
///
222-
/// Implementation adapted from
223-
/// https://lise-henry.github.io/articles/optimising_strings.html
224-
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
218+
#[cfg(feature = "regex")]
219+
fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
220+
use regex::{Match, Regex};
221+
225222
// Regex compilation is expensive. Use `lazy_static` to compile the regexes
226223
// once per process lifetime and not once per function invocation.
227224
lazy_static! {
228225
static ref ESCAPER: Regex = Regex::new("(\\\\|\n)").expect("Regex to be valid.");
229226
static ref QUOTED_ESCAPER: Regex = Regex::new("(\\\\|\n|\")").expect("Regex to be valid.");
230227
}
231228

232-
let first_occurence = if include_double_quote {
229+
if include_double_quote {
233230
QUOTED_ESCAPER.find(v)
234231
} else {
235232
ESCAPER.find(v)
236233
}
237234
.as_ref()
238-
.map(Match::start);
235+
.map(Match::start)
236+
}
237+
238+
#[cfg(not(feature = "regex"))]
239+
fn find_first_occurence(v: &str, include_double_quote: bool) -> Option<usize> {
240+
if include_double_quote {
241+
memchr::memchr3(b'\\', b'\n', b'\"', v.as_bytes())
242+
} else {
243+
memchr::memchr2(b'\\', b'\n', v.as_bytes())
244+
}
245+
}
246+
247+
/// `escape_string` replaces `\` by `\\`, new line character by `\n`, and `"` by `\"` if
248+
/// `include_double_quote` is true.
249+
///
250+
/// Implementation adapted from
251+
/// https://lise-henry.github.io/articles/optimising_strings.html
252+
fn escape_string(v: &str, include_double_quote: bool) -> Cow<'_, str> {
253+
let first_occurence = find_first_occurence(v, include_double_quote);
239254

240255
if let Some(first) = first_occurence {
241256
let mut escaped = String::with_capacity(v.len() * 2);

0 commit comments

Comments
 (0)