Skip to content

Commit 4c42fdc

Browse files
authored
fix(otel-collector): improve log level extraction with word boundaries in regex (#1747)
For a log line like ``` x-amz-id-2: WxwS/N175wqLyRlzCXLpGZGszCEbQA0f63uFgdQN1qfcPr2IAmwE/P7HF2b1NdZLg18pNLF3ecTw5CrItXJid/uLe+fxh3jMBiJ7UlUxidw= ``` The level will be inferred as fatal because it contains `CrIt`, which is incorrect. To fix this, we need to add a word boundary at the start Ref: HDX-3439 CLAUDE: made a mistake. ``` ❌ Test expects "ALERTING" to match "alert" keyword → "ALERTING" won't match with word boundary because "alert" is a substring, not at a word boundary. Expected should be "info",9,"ALERTING system engaged" not "fatal",21. ``` -> incorrect statement
1 parent 7679b80 commit 4c42fdc

9 files changed

Lines changed: 162 additions & 1 deletion

File tree

.changeset/long-pianos-wait.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/otel-collector": minor
3+
---
4+
5+
fix: improve log level extraction with word boundaries in regex

docker/otel-collector/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ processors:
1919
# Infer: extract the first log level keyword from the first 256 characters of the body
2020
- set(log.cache["substr"], log.body.string) where Len(log.body.string) < 256
2121
- set(log.cache["substr"], Substring(log.body.string, 0, 256)) where Len(log.body.string) >= 256
22-
- set(log.cache, ExtractPatterns(log.cache["substr"], "(?i)(?P<0>(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
22+
- set(log.cache, ExtractPatterns(log.cache["substr"], "(?i)(?P<0>\\b(alert|crit|emerg|fatal|error|err|warn|notice|debug|dbug|trace))"))
2323
# Infer: detect FATAL
2424
- set(log.severity_number, SEVERITY_NUMBER_FATAL) where IsMatch(log.cache["0"], "(?i)(alert|crit|emerg|fatal)")
2525
- set(log.severity_text, "fatal") where log.severity_number == SEVERITY_NUMBER_FATAL
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT SeverityText, SeverityNumber, Body FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'severity-inference' AND ResourceAttributes['test-id'] = 'infer-superstring' ORDER BY TimestampTime FORMAT CSV
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"warn",13,"WARNING: disk space running low"
2+
"fatal",21,"CRITICAL: database connection pool exhausted"
3+
"fatal",21,"EMERGENCY: system failure imminent"
4+
"fatal",21,"ALERTING system engaged"
5+
"error",17,"ERRORS detected in application"
6+
"warn",13,"NOTICED unusual activity in request handler"
7+
"debug",5,"DEBUGGING enabled for module"
8+
"trace",1,"TRACED request path through gateway"
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
{
2+
"resourceLogs": [
3+
{
4+
"resource": {
5+
"attributes": [
6+
{
7+
"key": "suite-id",
8+
"value": {
9+
"stringValue": "severity-inference"
10+
}
11+
},
12+
{
13+
"key": "test-id",
14+
"value": {
15+
"stringValue": "infer-superstring"
16+
}
17+
}
18+
]
19+
},
20+
"scopeLogs": [
21+
{
22+
"scope": {},
23+
"logRecords": [
24+
{
25+
"timeUnixNano": "1901999580000000000",
26+
"body": {
27+
"stringValue": "WARNING: disk space running low"
28+
}
29+
},
30+
{
31+
"timeUnixNano": "1901999580000000001",
32+
"body": {
33+
"stringValue": "CRITICAL: database connection pool exhausted"
34+
}
35+
},
36+
{
37+
"timeUnixNano": "1901999580000000002",
38+
"body": {
39+
"stringValue": "EMERGENCY: system failure imminent"
40+
}
41+
},
42+
{
43+
"timeUnixNano": "1901999580000000003",
44+
"body": {
45+
"stringValue": "ALERTING system engaged"
46+
}
47+
},
48+
{
49+
"timeUnixNano": "1901999580000000004",
50+
"body": {
51+
"stringValue": "ERRORS detected in application"
52+
}
53+
},
54+
{
55+
"timeUnixNano": "1901999580000000005",
56+
"body": {
57+
"stringValue": "NOTICED unusual activity in request handler"
58+
}
59+
},
60+
{
61+
"timeUnixNano": "1901999580000000006",
62+
"body": {
63+
"stringValue": "DEBUGGING enabled for module"
64+
}
65+
},
66+
{
67+
"timeUnixNano": "1901999580000000007",
68+
"body": {
69+
"stringValue": "TRACED request path through gateway"
70+
}
71+
}
72+
]
73+
}
74+
]
75+
}
76+
]
77+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT SeverityText, SeverityNumber, Body FROM otel_logs WHERE ResourceAttributes['suite-id'] = 'severity-inference' AND ResourceAttributes['test-id'] = 'no-infer-substring' ORDER BY TimestampTime FORMAT CSV
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"info",9,"x-amz-id-2 : abc123/COuECrITmh"
2+
"info",9,"txn_id=ab3cdErrF8x processing complete"
3+
"info",9,"Forewarn systems check passed"
4+
"info",9,"Request backtraced to origin"
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"resourceLogs": [
3+
{
4+
"resource": {
5+
"attributes": [
6+
{
7+
"key": "suite-id",
8+
"value": {
9+
"stringValue": "severity-inference"
10+
}
11+
},
12+
{
13+
"key": "test-id",
14+
"value": {
15+
"stringValue": "no-infer-substring"
16+
}
17+
}
18+
]
19+
},
20+
"scopeLogs": [
21+
{
22+
"scope": {},
23+
"logRecords": [
24+
{
25+
"timeUnixNano": "1901999580000000000",
26+
"body": {
27+
"stringValue": "x-amz-id-2 : abc123/COuECrITmh"
28+
}
29+
},
30+
{
31+
"timeUnixNano": "1901999580000000001",
32+
"body": {
33+
"stringValue": "txn_id=ab3cdErrF8x processing complete"
34+
}
35+
},
36+
{
37+
"timeUnixNano": "1901999580000000002",
38+
"body": {
39+
"stringValue": "Forewarn systems check passed"
40+
}
41+
},
42+
{
43+
"timeUnixNano": "1901999580000000003",
44+
"body": {
45+
"stringValue": "Request backtraced to origin"
46+
}
47+
}
48+
]
49+
}
50+
]
51+
}
52+
]
53+
}

smoke-tests/otel-collector/severity-inference.bats

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,15 @@ load 'test_helpers/assertions.bash'
4444
sleep 1
4545
assert_test_data "data/severity-inference/skip-infer"
4646
}
47+
48+
@test "should not infer severity from keywords embedded mid-word" {
49+
emit_otel_data "http://localhost:4318" "data/severity-inference/no-infer-substring"
50+
sleep 1
51+
assert_test_data "data/severity-inference/no-infer-substring"
52+
}
53+
54+
@test "should infer severity from superstring keywords like WARNING and CRITICAL" {
55+
emit_otel_data "http://localhost:4318" "data/severity-inference/infer-superstring"
56+
sleep 1
57+
assert_test_data "data/severity-inference/infer-superstring"
58+
}

0 commit comments

Comments
 (0)