@@ -11,19 +11,7 @@ use crate::{
1111 transforms:: { Transform , tag_cardinality_limit:: TagCardinalityLimit } ,
1212} ;
1313
14- /// Configuration of internal metrics for the TagCardinalityLimit transform.
15- #[ configurable_component]
16- #[ derive( Clone , Copy , Debug , Default , Eq , PartialEq ) ]
17- #[ serde( deny_unknown_fields) ]
18- pub struct InternalMetricsConfig {
19- /// Whether to include extended tags (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric.
20- ///
21- /// This helps identify which metrics and tag keys are hitting cardinality limits, but can significantly
22- /// increase metric cardinality. Defaults to `false` because these tags have potentially unbounded cardinality.
23- #[ serde( default = "default_include_extended_tags" ) ]
24- #[ configurable( metadata( docs:: human_name = "Include Extended Tags" ) ) ]
25- pub include_extended_tags : bool ,
26- }
14+ // Top-level configuration
2715
2816/// Configuration for the `tag_cardinality_limit` transform.
2917#[ configurable_component( transform(
@@ -79,7 +67,7 @@ pub enum TrackingScope {
7967 PerMetric ,
8068}
8169
82- /// Configuration for the `tag_cardinality_limit` transform for a specific group of metrics .
70+ /// Configuration block used at the global level .
8371#[ configurable_component]
8472#[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
8573pub struct Inner {
@@ -99,7 +87,7 @@ pub struct Inner {
9987 pub internal_metrics : InternalMetricsConfig ,
10088}
10189
102- /// Controls the approach taken for tracking tag cardinality.
90+ /// Controls the approach taken for tracking tag cardinality at the global level .
10391#[ configurable_component]
10492#[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
10593#[ serde( tag = "mode" , rename_all = "snake_case" , deny_unknown_fields) ]
@@ -122,19 +110,131 @@ pub enum Mode {
122110 Probabilistic ( BloomFilterConfig ) ,
123111}
124112
125- /// Bloom filter configuration in probabilistic mode .
113+ /// Per-metric name tag cardinality limit configuration .
126114#[ configurable_component]
127- #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
128- pub struct BloomFilterConfig {
129- /// The size of the cache for detecting duplicate tags, in bytes.
115+ #[ derive( Clone , Debug , Eq , PartialEq ) ]
116+ pub struct PerMetricConfig {
117+ /// Namespace of the metric this configuration refers to.
118+ #[ serde( default ) ]
119+ pub namespace : Option < String > ,
120+
121+ /// Per-tag-key overrides scoped to this metric. Each entry sets a `mode`:
122+ /// - `mode: limit_override` + `value_limit: N` — track with a per-tag cap.
123+ /// - `mode: excluded` — opt this tag out of tracking entirely.
130124 ///
131- /// The larger the cache size, the less likely it is to have a false positive, or a case where
132- /// we allow a new value for tag even after we have reached the configured limits.
133- #[ serde( default = "default_cache_size" ) ]
134- #[ configurable( metadata( docs:: human_name = "Cache Size per Key" ) ) ]
135- pub cache_size_per_key : usize ,
125+ /// All other settings (tracking algorithm, `limit_exceeded_action`, etc.)
126+ /// are inherited from the enclosing per-metric configuration.
127+ /// Tags not listed here use the per-metric configuration.
128+ #[ configurable(
129+ derived,
130+ metadata( docs:: additional_props_description = "An individual tag configuration." )
131+ ) ]
132+ #[ serde( default ) ]
133+ pub per_tag_limits : HashMap < String , PerTagConfig > ,
134+
135+ #[ serde( flatten) ]
136+ pub config : OverrideInner ,
136137}
137138
139+ /// Configuration block used at per-metric level. Same shape as the global configuration but
140+ /// with `OverrideMode`, which adds `excluded` for opting that metric out of cardinality
141+ /// control entirely.
142+ #[ configurable_component]
143+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
144+ pub struct OverrideInner {
145+ /// How many distinct values to accept for any given key. Ignored when `mode: excluded`.
146+ #[ serde( default = "default_value_limit" ) ]
147+ pub value_limit : usize ,
148+
149+ #[ configurable( derived) ]
150+ #[ serde( default = "default_limit_exceeded_action" ) ]
151+ pub limit_exceeded_action : LimitExceededAction ,
152+
153+ #[ serde( flatten) ]
154+ pub mode : OverrideMode ,
155+
156+ #[ configurable( derived) ]
157+ #[ serde( default ) ]
158+ pub internal_metrics : InternalMetricsConfig ,
159+ }
160+
161+ /// Controls the approach taken for tracking tag cardinality at the per-metric level.
162+ /// Adds `excluded` to the global `Mode` variants to allow opting a metric out entirely.
163+ #[ configurable_component]
164+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
165+ #[ serde( tag = "mode" , rename_all = "snake_case" , deny_unknown_fields) ]
166+ #[ configurable( metadata(
167+ docs:: enum_tag_description = "Controls the approach taken for tracking tag cardinality."
168+ ) ) ]
169+ pub enum OverrideMode {
170+ /// Tracks cardinality exactly. See `Mode::Exact` for details.
171+ Exact ,
172+
173+ /// Tracks cardinality probabilistically. See `Mode::Probabilistic` for details.
174+ Probabilistic ( BloomFilterConfig ) ,
175+
176+ /// Skip cardinality tracking for this metric. All tag values pass through and nothing is
177+ /// limited. Other fields in this per-metric configuration are ignored when this is selected.
178+ Excluded ,
179+ }
180+
181+ impl OverrideMode {
182+ /// Returns the equivalent global `Mode` if this scope is tracked, or `None` if excluded.
183+ pub const fn as_mode ( & self ) -> Option < Mode > {
184+ match self {
185+ OverrideMode :: Exact => Some ( Mode :: Exact ) ,
186+ OverrideMode :: Probabilistic ( b) => Some ( Mode :: Probabilistic ( * b) ) ,
187+ OverrideMode :: Excluded => None ,
188+ }
189+ }
190+ }
191+
192+ /// Per-tag cardinality configuration.
193+ ///
194+ /// Specify `mode` to control how this tag is handled:
195+ ///
196+ /// Example:
197+ /// ```yaml
198+ /// per_tag_limits:
199+ /// environment:
200+ /// mode: limit_override # track with a per-tag cap
201+ /// value_limit: 3
202+ /// trace_id:
203+ /// mode: excluded # opt out of tracking entirely
204+ /// ```
205+ #[ configurable_component]
206+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
207+ pub struct PerTagConfig {
208+ #[ configurable( derived) ]
209+ #[ serde( flatten) ]
210+ pub mode : PerTagMode ,
211+ }
212+
213+ /// Mode applied to a specific tag key within a per-metric override.
214+ ///
215+ /// The tracking algorithm (`exact`/`probabilistic`), `cache_size_per_key`,
216+ /// `limit_exceeded_action`, and `internal_metrics` are always inherited from the
217+ /// enclosing per-metric configuration.
218+ #[ configurable_component]
219+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
220+ #[ serde( tag = "mode" , rename_all = "snake_case" , deny_unknown_fields) ]
221+ #[ configurable( metadata( docs:: enum_tag_description = "Controls how this tag key is handled." ) ) ]
222+ pub enum PerTagMode {
223+ /// Track this tag with a per-tag value limit. The enclosing per-metric tracking
224+ /// algorithm and all other settings still apply.
225+ LimitOverride {
226+ /// Maximum number of distinct values to accept for this tag key.
227+ value_limit : usize ,
228+ } ,
229+ /// Opt this tag out of cardinality tracking entirely. All values pass through
230+ /// without being recorded or checked against any `value_limit`.
231+ Excluded ,
232+ }
233+
234+ // =============================================================================
235+ // Shared building blocks
236+ // =============================================================================
237+
138238/// Possible actions to take when an event arrives that would exceed the cardinality limit for one
139239/// or more of its tags.
140240#[ configurable_component]
@@ -148,26 +248,45 @@ pub enum LimitExceededAction {
148248 DropEvent ,
149249}
150250
151- /// Tag cardinality limit configuration per metric name .
251+ /// Bloom filter configuration in probabilistic mode .
152252#[ configurable_component]
153- #[ derive( Clone , Debug , Eq , PartialEq ) ]
154- pub struct PerMetricConfig {
155- /// Namespace of the metric this configuration refers to.
156- #[ serde( default ) ]
157- pub namespace : Option < String > ,
158-
159- #[ serde( flatten) ]
160- pub config : Inner ,
253+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
254+ pub struct BloomFilterConfig {
255+ /// The size of the cache for detecting duplicate tags, in bytes.
256+ ///
257+ /// The larger the cache size, the less likely it is to have a false positive, or a case where
258+ /// we allow a new value for tag even after we have reached the configured limits.
259+ #[ serde( default = "default_cache_size" ) ]
260+ #[ configurable( metadata( docs:: human_name = "Cache Size per Key" ) ) ]
261+ pub cache_size_per_key : usize ,
161262}
162263
163- const fn default_limit_exceeded_action ( ) -> LimitExceededAction {
164- LimitExceededAction :: DropTag
264+ /// Configuration of internal metrics for the TagCardinalityLimit transform.
265+ #[ configurable_component]
266+ #[ derive( Clone , Copy , Debug , Default , Eq , PartialEq ) ]
267+ #[ serde( deny_unknown_fields) ]
268+ pub struct InternalMetricsConfig {
269+ /// Whether to include extended tags (metric_name, tag_key) in the `tag_value_limit_exceeded_total` metric.
270+ ///
271+ /// This helps identify which metrics and tag keys are hitting cardinality limits, but can significantly
272+ /// increase metric cardinality. Defaults to `false` because these tags have potentially unbounded cardinality.
273+ #[ serde( default = "default_include_extended_tags" ) ]
274+ #[ configurable( metadata( docs:: human_name = "Include Extended Tags" ) ) ]
275+ pub include_extended_tags : bool ,
165276}
166277
278+ // =============================================================================
279+ // Defaults
280+ // =============================================================================
281+
167282const fn default_value_limit ( ) -> usize {
168283 500
169284}
170285
286+ const fn default_limit_exceeded_action ( ) -> LimitExceededAction {
287+ LimitExceededAction :: DropTag
288+ }
289+
171290const fn default_include_extended_tags ( ) -> bool {
172291 false
173292}
@@ -176,6 +295,10 @@ pub(crate) const fn default_cache_size() -> usize {
176295 5 * 1024 // 5KB
177296}
178297
298+ // =============================================================================
299+ // Transform plumbing
300+ // =============================================================================
301+
179302impl GenerateConfig for Config {
180303 fn generate_config ( ) -> toml:: Value {
181304 toml:: Value :: try_from ( Self {
0 commit comments