Skip to content

Commit 3bc46fc

Browse files
committed
feat(tag_cardinality_limit): add per-tag cache_size_per_key override in probabilistic mode
1 parent 5d41252 commit 3bc46fc

5 files changed

Lines changed: 192 additions & 14 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The `tag_cardinality_limit` transform now lets you set `cache_size_per_key` per tag in `limit_override` mode. This allows tuning the bloom filter size for specific high-cardinality tags without affecting the rest of the config. The field is optional and inherits from the enclosing per-metric or global config when unset.
2+
3+
authors: ArunPiduguDD

src/transforms/tag_cardinality_limit/config.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ impl OverrideMode {
212212
/// environment:
213213
/// mode: limit_override # track with a per-tag cap
214214
/// value_limit: 3
215+
/// high_cardinality_tag:
216+
/// mode: limit_override
217+
/// value_limit: 1000
218+
/// cache_size_per_key: 102400 # larger bloom filter for this tag
215219
/// trace_id:
216220
/// mode: excluded # opt out of tracking entirely
217221
/// ```
@@ -225,19 +229,24 @@ pub struct PerTagConfig {
225229

226230
/// Mode applied to a specific tag key within a per-metric override.
227231
///
228-
/// The tracking algorithm (`exact`/`probabilistic`), `cache_size_per_key`,
229-
/// `limit_exceeded_action`, and `internal_metrics` are always inherited from the
230-
/// enclosing per-metric configuration.
232+
/// The tracking algorithm (`exact`/`probabilistic`), `limit_exceeded_action`, and
233+
/// `internal_metrics` are inherited from the enclosing per-metric (or global) configuration.
234+
/// `cache_size_per_key` may optionally be overridden per tag when probabilistic mode is in use.
231235
#[configurable_component]
232236
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
233237
#[serde(tag = "mode", rename_all = "snake_case", deny_unknown_fields)]
234238
#[configurable(metadata(docs::enum_tag_description = "Controls how this tag key is handled."))]
235239
pub enum PerTagMode {
236-
/// Track this tag with a per-tag value limit. The enclosing per-metric tracking
237-
/// algorithm and all other settings still apply.
240+
/// Track this tag with a per-tag value limit. All other settings are inherited from
241+
/// the enclosing config.
238242
LimitOverride {
239243
/// Maximum number of distinct values to accept for this tag key.
240244
value_limit: usize,
245+
246+
/// Override the bloom filter cache size for this specific tag key.
247+
/// Only effective in `probabilistic` mode. Inherits from the enclosing config when unset.
248+
#[serde(default)]
249+
cache_size_per_key: Option<usize>,
241250
},
242251
/// Opt this tag out of cardinality tracking entirely. All values pass through
243252
/// without being recorded or checked against any `value_limit`.

src/transforms/tag_cardinality_limit/mod.rs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@ use crate::event::metric::TagValueSet;
2626

2727
type MetricId = (Option<String>, String);
2828

29+
/// Applies a per-tag `cache_size_per_key` override to a `Mode`. No-op in exact mode or when
30+
/// `override_size` is `None`.
31+
const fn apply_cache_size_override(mode: Mode, override_size: Option<usize>) -> Mode {
32+
match (mode, override_size) {
33+
(Mode::Probabilistic(_), Some(size)) => Mode::Probabilistic(BloomFilterConfig {
34+
cache_size_per_key: size,
35+
}),
36+
_ => mode,
37+
}
38+
}
39+
2940
/// Outcome of applying tag cardinality tracking to a tag value.
3041
#[derive(Debug, Eq, PartialEq)]
3142
enum AcceptResult {
@@ -126,13 +137,14 @@ impl TagCardinalityLimit {
126137
if let Some(per_tag) = per_metric.per_tag_limits.get(tag_key) {
127138
match per_tag.mode {
128139
PerTagMode::Excluded => return TagSettings::Excluded,
129-
PerTagMode::LimitOverride { value_limit } => {
130-
// Tracking algorithm and all other settings are always inherited
131-
// from the per-metric config.
140+
PerTagMode::LimitOverride {
141+
value_limit,
142+
cache_size_per_key,
143+
} => {
132144
return TagSettings::Tracked(Inner {
133145
value_limit,
134146
limit_exceeded_action,
135-
mode: metric_mode,
147+
mode: apply_cache_size_override(metric_mode, cache_size_per_key),
136148
internal_metrics,
137149
});
138150
}
@@ -152,8 +164,12 @@ impl TagCardinalityLimit {
152164
let global = self.config.global;
153165
match self.config.per_tag_limits.get(tag_key).map(|c| c.mode) {
154166
Some(PerTagMode::Excluded) => TagSettings::Excluded,
155-
Some(PerTagMode::LimitOverride { value_limit }) => TagSettings::Tracked(Inner {
167+
Some(PerTagMode::LimitOverride {
168+
value_limit,
169+
cache_size_per_key,
170+
}) => TagSettings::Tracked(Inner {
156171
value_limit,
172+
mode: apply_cache_size_override(global.mode, cache_size_per_key),
157173
..global
158174
}),
159175
None => TagSettings::Tracked(global),

src/transforms/tag_cardinality_limit/tests.rs

Lines changed: 142 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,10 @@ fn max_tracked_keys_caps_across_per_metric_buckets() {
849849

850850
fn make_per_tag(value_limit: usize) -> PerTagConfig {
851851
PerTagConfig {
852-
mode: PerTagMode::LimitOverride { value_limit },
852+
mode: PerTagMode::LimitOverride {
853+
value_limit,
854+
cache_size_per_key: None,
855+
},
853856
}
854857
}
855858

@@ -1127,7 +1130,10 @@ fn tag_excluded_unbounded_sibling_limited() {
11271130
#[test]
11281131
fn per_tag_limit_override_caps_at_explicit_value() {
11291132
let per_tag = PerTagConfig {
1130-
mode: PerTagMode::LimitOverride { value_limit: 2 },
1133+
mode: PerTagMode::LimitOverride {
1134+
value_limit: 2,
1135+
cache_size_per_key: None,
1136+
},
11311137
};
11321138
let config = make_transform_hashset_with_per_metric_limits(
11331139
500,
@@ -1210,7 +1216,13 @@ per_metric_limits:
12101216
let per_metric = parsed.per_metric_limits.get("metric_a").unwrap();
12111217

12121218
let capped = per_metric.per_tag_limits.get("capped_tag").unwrap();
1213-
assert_eq!(capped.mode, PerTagMode::LimitOverride { value_limit: 10 });
1219+
assert_eq!(
1220+
capped.mode,
1221+
PerTagMode::LimitOverride {
1222+
value_limit: 10,
1223+
cache_size_per_key: None,
1224+
}
1225+
);
12141226

12151227
let excluded = per_metric.per_tag_limits.get("excluded_tag").unwrap();
12161228
assert_eq!(excluded.mode, PerTagMode::Excluded);
@@ -1456,8 +1468,134 @@ per_tag_limits:
14561468
let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize");
14571469

14581470
let capped = parsed.per_tag_limits.get("capped_tag").unwrap();
1459-
assert_eq!(capped.mode, PerTagMode::LimitOverride { value_limit: 10 });
1471+
assert_eq!(
1472+
capped.mode,
1473+
PerTagMode::LimitOverride {
1474+
value_limit: 10,
1475+
cache_size_per_key: None,
1476+
}
1477+
);
14601478

14611479
let excluded = parsed.per_tag_limits.get("excluded_tag").unwrap();
14621480
assert_eq!(excluded.mode, PerTagMode::Excluded);
14631481
}
1482+
1483+
// ============================================================================
1484+
// cache_size_per_key override tests
1485+
// ============================================================================
1486+
1487+
/// `apply_cache_size_override` replaces the bloom size when mode is probabilistic and an
1488+
/// override is given; leaves the mode unchanged in all other cases.
1489+
#[test]
1490+
fn apply_cache_size_override_probabilistic_with_some() {
1491+
let base = Mode::Probabilistic(BloomFilterConfig {
1492+
cache_size_per_key: default_cache_size(),
1493+
});
1494+
let result = apply_cache_size_override(base, Some(1024));
1495+
assert_eq!(
1496+
result,
1497+
Mode::Probabilistic(BloomFilterConfig {
1498+
cache_size_per_key: 1024,
1499+
})
1500+
);
1501+
}
1502+
1503+
#[test]
1504+
fn apply_cache_size_override_exact_with_some_is_noop() {
1505+
let result = apply_cache_size_override(Mode::Exact, Some(1024));
1506+
assert_eq!(result, Mode::Exact);
1507+
}
1508+
1509+
#[test]
1510+
fn apply_cache_size_override_probabilistic_with_none_inherits() {
1511+
let base = Mode::Probabilistic(BloomFilterConfig {
1512+
cache_size_per_key: default_cache_size(),
1513+
});
1514+
let result = apply_cache_size_override(base, None);
1515+
assert_eq!(
1516+
result,
1517+
Mode::Probabilistic(BloomFilterConfig {
1518+
cache_size_per_key: default_cache_size(),
1519+
})
1520+
);
1521+
}
1522+
1523+
/// A per-metric `limit_override` with `cache_size_per_key` set deserializes correctly.
1524+
#[test]
1525+
fn per_tag_cache_size_per_key_deserializes() {
1526+
let yaml = r#"
1527+
value_limit: 5
1528+
mode: probabilistic
1529+
cache_size_per_key: 5120
1530+
per_metric_limits:
1531+
metric_a:
1532+
mode: probabilistic
1533+
cache_size_per_key: 5120
1534+
per_tag_limits:
1535+
big_tag:
1536+
mode: limit_override
1537+
value_limit: 100
1538+
cache_size_per_key: 32768
1539+
default_tag:
1540+
mode: limit_override
1541+
value_limit: 10
1542+
"#;
1543+
let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize");
1544+
let per_metric = parsed.per_metric_limits.get("metric_a").unwrap();
1545+
1546+
let big_tag = per_metric.per_tag_limits.get("big_tag").unwrap();
1547+
assert_eq!(
1548+
big_tag.mode,
1549+
PerTagMode::LimitOverride {
1550+
value_limit: 100,
1551+
cache_size_per_key: Some(32768),
1552+
}
1553+
);
1554+
1555+
// Omitting the field defaults to None (inherits from enclosing config).
1556+
let default_tag = per_metric.per_tag_limits.get("default_tag").unwrap();
1557+
assert_eq!(
1558+
default_tag.mode,
1559+
PerTagMode::LimitOverride {
1560+
value_limit: 10,
1561+
cache_size_per_key: None,
1562+
}
1563+
);
1564+
}
1565+
1566+
/// A global `per_tag_limits` `limit_override` with `cache_size_per_key` set deserializes correctly.
1567+
#[test]
1568+
fn global_per_tag_cache_size_per_key_deserializes() {
1569+
let yaml = r#"
1570+
value_limit: 5
1571+
mode: probabilistic
1572+
cache_size_per_key: 5120
1573+
per_tag_limits:
1574+
big_tag:
1575+
mode: limit_override
1576+
value_limit: 100
1577+
cache_size_per_key: 32768
1578+
default_tag:
1579+
mode: limit_override
1580+
value_limit: 10
1581+
"#;
1582+
let parsed: Config = serde_yaml::from_str(yaml).expect("yaml should deserialize");
1583+
1584+
let big_tag = parsed.per_tag_limits.get("big_tag").unwrap();
1585+
assert_eq!(
1586+
big_tag.mode,
1587+
PerTagMode::LimitOverride {
1588+
value_limit: 100,
1589+
cache_size_per_key: Some(32768),
1590+
}
1591+
);
1592+
1593+
let default_tag = parsed.per_tag_limits.get("default_tag").unwrap();
1594+
assert_eq!(
1595+
default_tag.mode,
1596+
PerTagMode::LimitOverride {
1597+
value_limit: 10,
1598+
cache_size_per_key: None,
1599+
}
1600+
);
1601+
}

website/cue/reference/components/transforms/generated/tag_cardinality_limit.cue

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ generated: components: transforms: tag_cardinality_limit: configuration: {
154154
description: "An individual tag configuration."
155155
required: true
156156
type: object: options: {
157+
cache_size_per_key: {
158+
description: "Override the bloom filter cache size for this specific tag key. Only effective in `probabilistic` mode. Inherits from the enclosing config when unset."
159+
relevant_when: "mode = \"limit_override\""
160+
required: false
161+
type: uint: {}
162+
}
157163
mode: {
158164
description: "Controls how this tag key is handled."
159165
required: true
@@ -199,6 +205,12 @@ generated: components: transforms: tag_cardinality_limit: configuration: {
199205
description: "An individual tag configuration."
200206
required: true
201207
type: object: options: {
208+
cache_size_per_key: {
209+
description: "Override the bloom filter cache size for this specific tag key. Only effective in `probabilistic` mode. Inherits from the enclosing config when unset."
210+
relevant_when: "mode = \"limit_override\""
211+
required: false
212+
type: uint: {}
213+
}
202214
mode: {
203215
description: "Controls how this tag key is handled."
204216
required: true

0 commit comments

Comments
 (0)