Skip to content

Commit 45b6d2b

Browse files
ArunPiduguDDclaude
andauthored
enhancement(tag_cardinality_limit transform): Add more fine grained controls tag cardinality (#25360)
* Implement per tag control in tag cardinality processor * refactor(tag_cardinality_limit): simplify per-tag config to LimitOverride/Excluded enum Replace the PerTagInner struct (with ambiguous optional fields) with a PerTagConfig struct wrapping a PerTagMode tagged enum: - mode: limit_override + value_limit: N — track with explicit cap - mode: excluded — opt out of tracking entirely Removes PerTagInner, PerTagModeKind, and the old `excluded: bool` field. The new shape is unambiguous: every per-tag entry must be one or the other. YAML format is consistent with per-metric `mode: excluded`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Make tag cardinality limit new public * Update comments * remove schema value limit min value enforcement and handle limit equals zero case * add additional unit test covering zero limit case --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e177273 commit 45b6d2b

6 files changed

Lines changed: 747 additions & 122 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
The `tag_cardinality_limit` transform gained two new configuration capabilities:
2+
3+
- **Per-tag overrides** (`per_tag_limits`): configure cardinality limits per tag key within a metric, or exclude individual tags from tracking.
4+
- **Metric exclusion**: opt entire metrics out of cardinality tracking via `mode: excluded` in `per_metric_limits`.
5+
6+
authors: ArunPiduguDD

src/transforms/tag_cardinality_limit/config.rs

Lines changed: 158 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,7 @@ use crate::{
1111
transforms::{Transform, tag_cardinality_limit::TagCardinalityLimit},
1212
};
1313

14-
/// Configuration of internal metrics for the TagCardinalityLimit transform.
15-
#[configurable_component]
16-
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
17-
#[serde(deny_unknown_fields)]
18-
pub struct InternalMetricsConfig {
19-
/// Whether to include extended tags (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric.
20-
///
21-
/// This helps identify which metrics and tag keys are hitting cardinality limits, but can significantly
22-
/// increase metric cardinality. Defaults to `false` because these tags have potentially unbounded cardinality.
23-
#[serde(default = "default_include_extended_tags")]
24-
#[configurable(metadata(docs::human_name = "Include Extended Tags"))]
25-
pub include_extended_tags: bool,
26-
}
14+
// Top-level configuration
2715

2816
/// Configuration for the `tag_cardinality_limit` transform.
2917
#[configurable_component(transform(
@@ -79,7 +67,7 @@ pub enum TrackingScope {
7967
PerMetric,
8068
}
8169

82-
/// Configuration for the `tag_cardinality_limit` transform for a specific group of metrics.
70+
/// Configuration block used at the global level.
8371
#[configurable_component]
8472
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
8573
pub struct Inner {
@@ -99,7 +87,7 @@ pub struct Inner {
9987
pub internal_metrics: InternalMetricsConfig,
10088
}
10189

102-
/// Controls the approach taken for tracking tag cardinality.
90+
/// Controls the approach taken for tracking tag cardinality at the global level.
10391
#[configurable_component]
10492
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
10593
#[serde(tag = "mode", rename_all = "snake_case", deny_unknown_fields)]
@@ -122,19 +110,131 @@ pub enum Mode {
122110
Probabilistic(BloomFilterConfig),
123111
}
124112

125-
/// Bloom filter configuration in probabilistic mode.
113+
/// Per-metric name tag cardinality limit configuration.
126114
#[configurable_component]
127-
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
128-
pub struct BloomFilterConfig {
129-
/// The size of the cache for detecting duplicate tags, in bytes.
115+
#[derive(Clone, Debug, Eq, PartialEq)]
116+
pub struct PerMetricConfig {
117+
/// Namespace of the metric this configuration refers to.
118+
#[serde(default)]
119+
pub namespace: Option<String>,
120+
121+
/// Per-tag-key overrides scoped to this metric. Each entry sets a `mode`:
122+
/// - `mode: limit_override` + `value_limit: N` — track with a per-tag cap.
123+
/// - `mode: excluded` — opt this tag out of tracking entirely.
130124
///
131-
/// The larger the cache size, the less likely it is to have a false positive, or a case where
132-
/// we allow a new value for tag even after we have reached the configured limits.
133-
#[serde(default = "default_cache_size")]
134-
#[configurable(metadata(docs::human_name = "Cache Size per Key"))]
135-
pub cache_size_per_key: usize,
125+
/// All other settings (tracking algorithm, `limit_exceeded_action`, etc.)
126+
/// are inherited from the enclosing per-metric configuration.
127+
/// Tags not listed here use the per-metric configuration.
128+
#[configurable(
129+
derived,
130+
metadata(docs::additional_props_description = "An individual tag configuration.")
131+
)]
132+
#[serde(default)]
133+
pub per_tag_limits: HashMap<String, PerTagConfig>,
134+
135+
#[serde(flatten)]
136+
pub config: OverrideInner,
136137
}
137138

139+
/// Configuration block used at per-metric level. Same shape as the global configuration but
140+
/// with `OverrideMode`, which adds `excluded` for opting that metric out of cardinality
141+
/// control entirely.
142+
#[configurable_component]
143+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
144+
pub struct OverrideInner {
145+
/// How many distinct values to accept for any given key. Ignored when `mode: excluded`.
146+
#[serde(default = "default_value_limit")]
147+
pub value_limit: usize,
148+
149+
#[configurable(derived)]
150+
#[serde(default = "default_limit_exceeded_action")]
151+
pub limit_exceeded_action: LimitExceededAction,
152+
153+
#[serde(flatten)]
154+
pub mode: OverrideMode,
155+
156+
#[configurable(derived)]
157+
#[serde(default)]
158+
pub internal_metrics: InternalMetricsConfig,
159+
}
160+
161+
/// Controls the approach taken for tracking tag cardinality at the per-metric level.
162+
/// Adds `excluded` to the global `Mode` variants to allow opting a metric out entirely.
163+
#[configurable_component]
164+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
165+
#[serde(tag = "mode", rename_all = "snake_case", deny_unknown_fields)]
166+
#[configurable(metadata(
167+
docs::enum_tag_description = "Controls the approach taken for tracking tag cardinality."
168+
))]
169+
pub enum OverrideMode {
170+
/// Tracks cardinality exactly. See `Mode::Exact` for details.
171+
Exact,
172+
173+
/// Tracks cardinality probabilistically. See `Mode::Probabilistic` for details.
174+
Probabilistic(BloomFilterConfig),
175+
176+
/// Skip cardinality tracking for this metric. All tag values pass through and nothing is
177+
/// limited. Other fields in this per-metric configuration are ignored when this is selected.
178+
Excluded,
179+
}
180+
181+
impl OverrideMode {
182+
/// Returns the equivalent global `Mode` if this scope is tracked, or `None` if excluded.
183+
pub const fn as_mode(&self) -> Option<Mode> {
184+
match self {
185+
OverrideMode::Exact => Some(Mode::Exact),
186+
OverrideMode::Probabilistic(b) => Some(Mode::Probabilistic(*b)),
187+
OverrideMode::Excluded => None,
188+
}
189+
}
190+
}
191+
192+
/// Per-tag cardinality configuration.
193+
///
194+
/// Specify `mode` to control how this tag is handled:
195+
///
196+
/// Example:
197+
/// ```yaml
198+
/// per_tag_limits:
199+
/// environment:
200+
/// mode: limit_override # track with a per-tag cap
201+
/// value_limit: 3
202+
/// trace_id:
203+
/// mode: excluded # opt out of tracking entirely
204+
/// ```
205+
#[configurable_component]
206+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
207+
pub struct PerTagConfig {
208+
#[configurable(derived)]
209+
#[serde(flatten)]
210+
pub mode: PerTagMode,
211+
}
212+
213+
/// Mode applied to a specific tag key within a per-metric override.
214+
///
215+
/// The tracking algorithm (`exact`/`probabilistic`), `cache_size_per_key`,
216+
/// `limit_exceeded_action`, and `internal_metrics` are always inherited from the
217+
/// enclosing per-metric configuration.
218+
#[configurable_component]
219+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
220+
#[serde(tag = "mode", rename_all = "snake_case", deny_unknown_fields)]
221+
#[configurable(metadata(docs::enum_tag_description = "Controls how this tag key is handled."))]
222+
pub enum PerTagMode {
223+
/// Track this tag with a per-tag value limit. The enclosing per-metric tracking
224+
/// algorithm and all other settings still apply.
225+
LimitOverride {
226+
/// Maximum number of distinct values to accept for this tag key.
227+
value_limit: usize,
228+
},
229+
/// Opt this tag out of cardinality tracking entirely. All values pass through
230+
/// without being recorded or checked against any `value_limit`.
231+
Excluded,
232+
}
233+
234+
// =============================================================================
235+
// Shared building blocks
236+
// =============================================================================
237+
138238
/// Possible actions to take when an event arrives that would exceed the cardinality limit for one
139239
/// or more of its tags.
140240
#[configurable_component]
@@ -148,26 +248,45 @@ pub enum LimitExceededAction {
148248
DropEvent,
149249
}
150250

151-
/// Tag cardinality limit configuration per metric name.
251+
/// Bloom filter configuration in probabilistic mode.
152252
#[configurable_component]
153-
#[derive(Clone, Debug, Eq, PartialEq)]
154-
pub struct PerMetricConfig {
155-
/// Namespace of the metric this configuration refers to.
156-
#[serde(default)]
157-
pub namespace: Option<String>,
158-
159-
#[serde(flatten)]
160-
pub config: Inner,
253+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
254+
pub struct BloomFilterConfig {
255+
/// The size of the cache for detecting duplicate tags, in bytes.
256+
///
257+
/// The larger the cache size, the less likely it is to have a false positive, or a case where
258+
/// we allow a new value for tag even after we have reached the configured limits.
259+
#[serde(default = "default_cache_size")]
260+
#[configurable(metadata(docs::human_name = "Cache Size per Key"))]
261+
pub cache_size_per_key: usize,
161262
}
162263

163-
const fn default_limit_exceeded_action() -> LimitExceededAction {
164-
LimitExceededAction::DropTag
264+
/// Configuration of internal metrics for the TagCardinalityLimit transform.
265+
#[configurable_component]
266+
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
267+
#[serde(deny_unknown_fields)]
268+
pub struct InternalMetricsConfig {
269+
/// Whether to include extended tags (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric.
270+
///
271+
/// This helps identify which metrics and tag keys are hitting cardinality limits, but can significantly
272+
/// increase metric cardinality. Defaults to `false` because these tags have potentially unbounded cardinality.
273+
#[serde(default = "default_include_extended_tags")]
274+
#[configurable(metadata(docs::human_name = "Include Extended Tags"))]
275+
pub include_extended_tags: bool,
165276
}
166277

278+
// =============================================================================
279+
// Defaults
280+
// =============================================================================
281+
167282
const fn default_value_limit() -> usize {
168283
500
169284
}
170285

286+
const fn default_limit_exceeded_action() -> LimitExceededAction {
287+
LimitExceededAction::DropTag
288+
}
289+
171290
const fn default_include_extended_tags() -> bool {
172291
false
173292
}
@@ -176,6 +295,10 @@ pub(crate) const fn default_cache_size() -> usize {
176295
5 * 1024 // 5KB
177296
}
178297

298+
// =============================================================================
299+
// Transform plumbing
300+
// =============================================================================
301+
179302
impl GenerateConfig for Config {
180303
fn generate_config() -> toml::Value {
181304
toml::Value::try_from(Self {

0 commit comments

Comments
 (0)