Skip to content

Commit e8114e4

Browse files
fix(metrics): Data Normalization (#3843)
Update metrics data normalization to spec. Fixes GH-3829
1 parent c0f08e7 commit e8114e4

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
- Add timing API for Metrics (#3812):
88
- Add [rate limiting](https://develop.sentry.dev/sdk/rate-limiting/) for Metrics (#3838)
9+
- Data normalization for Metrics (#3843)
910

1011
## 8.23.0
1112

Sources/Swift/Metrics/EncodeMetrics.swift

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
1010
let buckets = bucket.value
1111
for metric in buckets {
1212

13-
statsdString.append(sanitize(key: metric.key))
13+
statsdString.append(sanitize(metricKey: metric.key))
1414
statsdString.append("@")
1515

16-
statsdString.append(metric.unit.unit)
16+
statsdString.append(sanitize(metricUnit: metric.unit.unit))
1717

1818
for serializedValue in metric.serialize() {
1919
statsdString.append(":\(serializedValue)")
@@ -24,7 +24,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
2424

2525
var firstTag = true
2626
for (tagKey, tagValue) in metric.tags {
27-
let sanitizedTagKey = sanitize(key: tagKey)
27+
let sanitizedTagKey = sanitize(tagKey: tagKey)
2828

2929
if firstTag {
3030
statsdString.append("|#")
@@ -34,7 +34,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
3434
}
3535

3636
statsdString.append("\(sanitizedTagKey):")
37-
statsdString.append(sanitize(value: tagValue))
37+
statsdString.append(replaceTagValueCharacters(tagValue: tagValue))
3838
}
3939

4040
statsdString.append("|T")
@@ -46,10 +46,27 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
4646
return statsdString.data(using: .utf8) ?? Data()
4747
}
4848

49-
private func sanitize(key: String) -> String {
50-
return key.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "_", options: .regularExpression)
49+
private func sanitize(metricUnit: String) -> String {
50+
// We can't use \w because it includes chars like ä on Swift
51+
return metricUnit.replacingOccurrences(of: "[^a-zA-Z0-9_]", with: "", options: .regularExpression)
5152
}
5253

53-
private func sanitize(value: String) -> String {
54-
return value.replacingOccurrences(of: "[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+", with: "", options: .regularExpression)
54+
private func sanitize(metricKey: String) -> String {
55+
// We can't use \w because it includes chars like ä on Swift
56+
return metricKey.replacingOccurrences(of: "[^a-zA-Z0-9_.-]+", with: "_", options: .regularExpression)
57+
}
58+
59+
private func sanitize(tagKey: String) -> String {
60+
// We can't use \w because it includes chars like ä on Swift
61+
return tagKey.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "", options: .regularExpression)
62+
}
63+
64+
private func replaceTagValueCharacters(tagValue: String) -> String {
65+
var result = tagValue.replacingOccurrences(of: "\\", with: #"\\\\"#)
66+
result = result.replacingOccurrences(of: "\n", with: #"\\n"#)
67+
result = result.replacingOccurrences(of: "\r", with: #"\\r"#)
68+
result = result.replacingOccurrences(of: "\t", with: #"\\t"#)
69+
result = result.replacingOccurrences(of: "|", with: #"\\u{7c}"#)
70+
return result.replacingOccurrences(of: ",", with: #"\\u{2c}"#)
71+
5572
}

Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,31 @@ final class EncodeMetricTests: XCTestCase {
8383

8484
let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])
8585

86-
expect(data.decodeStatsd()) == "abyzABYZ09_/.-_a_a@second:10.1|c|T10234\n"
86+
expect(data.decodeStatsd()) == "abyzABYZ09__.-_a_a@second:10.1|c|T10234\n"
8787
}
8888

8989
func testEncodeCounterMetricWithTagKeyToSanitize() {
90-
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abyzABYZ09_/.-!@a#$Äa": "value"])
90+
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abcABC123_-./äöü$%&abcABC123": "value"])
9191

9292
let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])
9393

94-
expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abyzABYZ09_/.-_a_a:value|T10234\n"
94+
expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abcABC123_-./abcABC123:value|T10234\n"
9595
}
9696

9797
func testEncodeCounterMetricWithTagValueToSanitize() {
98-
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": #"azAZ1 _:/@.{}[]$\%^&a*"#])
98+
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": "abc\n\r\t|,\\123"])
9999

100100
let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])
101101

102-
expect(data.decodeStatsd()) == "app.start@second:10.1|c|#key:azAZ1 _:/@.{}[]$a|T10234\n"
102+
expect(data.decodeStatsd()).to(contain(#"abc\\n\\r\\t\\u{7c}\\u{2c}\\\\123"#))
103+
}
104+
105+
func testEncodeCounterMetricWithUnitToSanitize() {
106+
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnit(unit: "abyzABYZ09_/.ä"), tags: [:])
107+
108+
let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])
109+
110+
expect(data.decodeStatsd()) == "app.start@abyzABYZ09_:10.1|c|T10234\n"
103111
}
104112
}
105113

0 commit comments

Comments
 (0)