Skip to content

Commit 2693d23

Browse files
Kavindu-Dodanaxw
andauthored
[extension/awslogs_encoding] implement streaming contract for CloudWatch subscription filter (#46220)
#### Description Based on contract introduced at #46211, this PR implements streaming for CloudWatch subscription filter. CloudWatch subscription filter change is focused at commit titled `streaming for CW subscription filter` #### Testing Unit tests and dedicated streaming tests #### Documentation Updated documentation on streaming contract --------- Signed-off-by: Kavindu Dodanduwa <kavindu.dodanduwa@elastic.co> Co-authored-by: Andrew Wilkins <axwalk@gmail.com>
1 parent 18081b2 commit 2693d23

8 files changed

Lines changed: 332 additions & 41 deletions

File tree

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
7+
component: extension/awslogs_encoding
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Adopt encoding extension streaming contract for CloudWatch Logs subscription
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [46214]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

extension/encoding/awslogsencodingextension/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,10 @@ This allows streaming implementation to work independently of compression algori
181181

182182
The table below summarizes streaming support details for each log type, along with the offset tracking mechanism,
183183

184-
| Log Type | Sub Log Type/Source | Offset Tracking | Notes |
185-
|------------------|---------------------|-----------------|-------|
186-
| Network Firewall | Alert/Flow/TLS | Bytes processed | |
184+
| Log Type | Sub Log Type/Source | Offset Tracking | Notes |
185+
|---------------------|---------------------|-----------------------------|----------------------------------------------------------------------------------------------|
186+
| Network Firewall | Alert/Flow/TLS | Bytes processed | |
187+
| Subscription filter | - | Number of records processed | Supports processing multi-line inputs and offset tracks number of records that get processed |
187188

188189
## Produced Records per Format
189190

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
{
2+
"messageType": "DATA_MESSAGE",
3+
"owner": "123456789012",
4+
"logGroup": "/my/first/log/group",
5+
"logStream": "/my/first/log/stream",
6+
"subscriptionFilters": [
7+
"my-account-subscription-to-firehose"
8+
],
9+
"logEvents": [
10+
{
11+
"id": "39494971786919662926967737244165955921131981322179969024",
12+
"timestamp": 1771015786034,
13+
"message": "some text"
14+
}
15+
]
16+
}
17+
{
18+
"messageType": "DATA_MESSAGE",
19+
"owner": "123456789012",
20+
"logGroup": "/my/second/log/group",
21+
"logStream": "/my/second/log/stream",
22+
"subscriptionFilters": [
23+
"my-account-subscription-to-firehose"
24+
],
25+
"logEvents": [
26+
{
27+
"id": "39494969221575739759196034780948230027606320717924401152",
28+
"timestamp": 1771015671000,
29+
"message": "some other text"
30+
}
31+
]
32+
}
33+
{
34+
"messageType": "DATA_MESSAGE",
35+
"owner": "123456789012",
36+
"logGroup": "/my/third/log/group",
37+
"logStream": "/my/third/log/stream",
38+
"subscriptionFilters": [
39+
"my-account-subscription-to-firehose"
40+
],
41+
"logEvents": [
42+
{
43+
"id": "39494969221575739759196034780948230027606320717924401152",
44+
"timestamp": 1771015671000,
45+
"message": "another log message"
46+
}
47+
]
48+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
resourceLogs:
2+
- resource:
3+
attributes:
4+
- key: cloud.provider
5+
value:
6+
stringValue: aws
7+
- key: cloud.account.id
8+
value:
9+
stringValue: "123456789012"
10+
- key: aws.log.group.names
11+
value:
12+
arrayValue:
13+
values:
14+
- stringValue: /my/first/log/group
15+
- key: aws.log.stream.names
16+
value:
17+
arrayValue:
18+
values:
19+
- stringValue: /my/first/log/stream
20+
scopeLogs:
21+
- logRecords:
22+
- body:
23+
stringValue: some text
24+
timeUnixNano: "1771015786034000000"
25+
scope:
26+
attributes:
27+
- key: encoding.format
28+
value:
29+
stringValue: aws.cloudwatch
30+
name: github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
resourceLogs:
2+
- resource:
3+
attributes:
4+
- key: cloud.provider
5+
value:
6+
stringValue: aws
7+
- key: cloud.account.id
8+
value:
9+
stringValue: "123456789012"
10+
- key: aws.log.group.names
11+
value:
12+
arrayValue:
13+
values:
14+
- stringValue: /my/second/log/group
15+
- key: aws.log.stream.names
16+
value:
17+
arrayValue:
18+
values:
19+
- stringValue: /my/second/log/stream
20+
scopeLogs:
21+
- logRecords:
22+
- body:
23+
stringValue: some other text
24+
timeUnixNano: "1771015671000000000"
25+
scope:
26+
attributes:
27+
- key: encoding.format
28+
value:
29+
stringValue: aws.cloudwatch
30+
name: github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
resourceLogs:
2+
- resource:
3+
attributes:
4+
- key: cloud.provider
5+
value:
6+
stringValue: aws
7+
- key: cloud.account.id
8+
value:
9+
stringValue: "123456789012"
10+
- key: aws.log.group.names
11+
value:
12+
arrayValue:
13+
values:
14+
- stringValue: /my/third/log/group
15+
- key: aws.log.stream.names
16+
value:
17+
arrayValue:
18+
values:
19+
- stringValue: /my/third/log/stream
20+
scopeLogs:
21+
- logRecords:
22+
- body:
23+
stringValue: another log message
24+
timeUnixNano: "1771015671000000000"
25+
scope:
26+
attributes:
27+
- key: encoding.format
28+
value:
29+
stringValue: aws.cloudwatch
30+
name: github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension

extension/encoding/awslogsencodingextension/internal/unmarshaler/subscription-filter/unmarshaler.go

Lines changed: 105 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,42 +15,29 @@ import (
1515
"go.opentelemetry.io/collector/pdata/plog"
1616
conventions "go.opentelemetry.io/otel/semconv/v1.38.0"
1717

18+
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding"
1819
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension/internal/constants"
1920
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension/internal/metadata"
2021
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/awslogsencodingextension/internal/unmarshaler"
22+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/xstreamencoding"
2123
)
2224

25+
const ctrlMessageType = "CONTROL_MESSAGE"
26+
2327
var (
2428
errEmptyOwner = errors.New("cloudwatch log with message type 'DATA_MESSAGE' has empty owner field")
2529
errEmptyLogGroup = errors.New("cloudwatch log with message type 'DATA_MESSAGE' has empty log group field")
2630
errEmptyLogStream = errors.New("cloudwatch log with message type 'DATA_MESSAGE' has empty log stream field")
2731
)
2832

29-
func validateLog(log cloudwatchLogsData) error {
30-
switch log.MessageType {
31-
case "DATA_MESSAGE":
32-
if log.Owner == "" {
33-
return errEmptyOwner
34-
}
35-
if log.LogGroup == "" {
36-
return errEmptyLogGroup
37-
}
38-
if log.LogStream == "" {
39-
return errEmptyLogStream
40-
}
41-
case "CONTROL_MESSAGE":
42-
default:
43-
return fmt.Errorf("cloudwatch log has invalid message type %q", log.MessageType)
44-
}
45-
return nil
46-
}
33+
var _ unmarshaler.StreamingLogsUnmarshaler = (*SubscriptionFilterUnmarshaler)(nil)
4734

48-
type subscriptionFilterUnmarshaler struct {
35+
type SubscriptionFilterUnmarshaler struct {
4936
buildInfo component.BuildInfo
5037
}
5138

52-
func NewSubscriptionFilterUnmarshaler(buildInfo component.BuildInfo) unmarshaler.AWSUnmarshaler {
53-
return &subscriptionFilterUnmarshaler{
39+
func NewSubscriptionFilterUnmarshaler(buildInfo component.BuildInfo) *SubscriptionFilterUnmarshaler {
40+
return &SubscriptionFilterUnmarshaler{
5441
buildInfo: buildInfo,
5542
}
5643
}
@@ -61,36 +48,97 @@ func NewSubscriptionFilterUnmarshaler(buildInfo component.BuildInfo) unmarshaler
6148
// logs are further grouped by their extracted account ID and region.
6249
// Logs are assumed to be gzip-compressed as specified at
6350
// https://docs.aws.amazon.com/firehose/latest/dev/writing-with-cloudwatch-logs.html.
64-
func (f *subscriptionFilterUnmarshaler) UnmarshalAWSLogs(reader io.Reader) (plog.Logs, error) {
65-
logs := plog.NewLogs()
66-
resourceLogsByKey := make(map[resourceGroupKey]plog.LogRecordSlice)
51+
func (f *SubscriptionFilterUnmarshaler) UnmarshalAWSLogs(reader io.Reader) (plog.Logs, error) {
52+
// Decode as a stream but flush all at once using flush options
53+
streamUnmarshaler, err := f.NewLogsDecoder(reader, encoding.WithFlushItems(0), encoding.WithFlushBytes(0))
54+
if err != nil {
55+
return plog.Logs{}, err
56+
}
57+
logs, err := streamUnmarshaler.DecodeLogs()
58+
if err != nil {
59+
// we must check for EOF with direct comparison and avoid wrapped EOF that can come from stream itself
60+
//nolint:errorlint
61+
if err == io.EOF {
62+
// EOF indicates no logs were found, return any logs that's available
63+
return logs, nil
64+
}
65+
66+
return plog.Logs{}, err
67+
}
6768

69+
return logs, nil
70+
}
71+
72+
// NewLogsDecoder returns a LogsDecoder that processes CloudWatch Logs subscription filter events.
73+
// Supported sub formats:
74+
// - DATA_MESSAGE: Returns logs grouped by owner, log group, and stream; offset is the number of records processed
75+
// - CONTROL_MESSAGE: Returns empty log; offset is the number of records processed
76+
func (f *SubscriptionFilterUnmarshaler) NewLogsDecoder(reader io.Reader, options ...encoding.DecoderOption) (encoding.LogsDecoder, error) {
77+
batchHelper := xstreamencoding.NewBatchHelper(options...)
6878
decoder := gojson.NewDecoder(reader)
69-
for decoder.More() {
70-
var cwLog cloudwatchLogsData
71-
if err := decoder.Decode(&cwLog); err != nil {
72-
return plog.Logs{}, fmt.Errorf("failed to decode decompressed reader: %w", err)
73-
}
7479

75-
if cwLog.MessageType == "CONTROL_MESSAGE" {
76-
continue
77-
}
80+
var offset int64
7881

79-
if err := validateLog(cwLog); err != nil {
80-
return plog.Logs{}, fmt.Errorf("invalid cloudwatch log: %w", err)
81-
}
82+
if batchHelper.Options().Offset > 0 {
83+
for offset < batchHelper.Options().Offset {
84+
if !decoder.More() {
85+
return nil, fmt.Errorf("EOF reached before offset %d records were discarded", batchHelper.Options().Offset)
86+
}
8287

83-
f.appendLogs(logs, resourceLogsByKey, cwLog)
88+
var raw gojson.RawMessage
89+
if err := decoder.Decode(&raw); err != nil {
90+
return nil, err
91+
}
92+
offset++
93+
}
8494
}
8595

86-
return logs, nil
96+
return xstreamencoding.NewLogsDecoderAdapter(
97+
func() (plog.Logs, error) {
98+
logs := plog.NewLogs()
99+
resourceLogsByKey := make(map[resourceGroupKey]plog.LogRecordSlice)
100+
101+
for decoder.More() {
102+
var cwLog cloudwatchLogsData
103+
if err := decoder.Decode(&cwLog); err != nil {
104+
return plog.Logs{}, fmt.Errorf("failed to decode decompressed reader: %w", err)
105+
}
106+
107+
offset++
108+
batchHelper.IncrementItems(1)
109+
110+
if cwLog.MessageType == ctrlMessageType {
111+
continue
112+
}
113+
114+
if err := validateLog(cwLog); err != nil {
115+
return plog.Logs{}, fmt.Errorf("invalid cloudwatch log: %w", err)
116+
}
117+
118+
f.appendLogs(logs, resourceLogsByKey, cwLog)
119+
120+
if batchHelper.ShouldFlush() {
121+
batchHelper.Reset()
122+
return logs, nil
123+
}
124+
}
125+
126+
if logs.ResourceLogs().Len() == 0 {
127+
return plog.NewLogs(), io.EOF
128+
}
129+
130+
return logs, nil
131+
}, func() int64 {
132+
return offset
133+
},
134+
), nil
87135
}
88136

89137
// appendLogs appends log records from cwLog into the given plog.Logs, reusing
90138
// existing ResourceLogs entries tracked by resourceLogsByKey when possible.
91139
// Events are grouped by their extracted fields (account ID + region) and
92140
// by log group/stream combination.
93-
func (f *subscriptionFilterUnmarshaler) appendLogs(logs plog.Logs, resourceLogsByKey map[resourceGroupKey]plog.LogRecordSlice, cwLog cloudwatchLogsData) {
141+
func (f *SubscriptionFilterUnmarshaler) appendLogs(logs plog.Logs, resourceLogsByKey map[resourceGroupKey]plog.LogRecordSlice, cwLog cloudwatchLogsData) {
94142
for _, event := range cwLog.LogEvents {
95143
key := extractResourceKey(event, cwLog.Owner, cwLog.LogGroup, cwLog.LogStream)
96144

@@ -141,3 +189,22 @@ func extractResourceKey(event cloudwatchLogsLogEvent, owner, logGroup, logStream
141189
}
142190
return key
143191
}
192+
193+
func validateLog(log cloudwatchLogsData) error {
194+
switch log.MessageType {
195+
case "DATA_MESSAGE":
196+
if log.Owner == "" {
197+
return errEmptyOwner
198+
}
199+
if log.LogGroup == "" {
200+
return errEmptyLogGroup
201+
}
202+
if log.LogStream == "" {
203+
return errEmptyLogStream
204+
}
205+
case ctrlMessageType:
206+
default:
207+
return fmt.Errorf("cloudwatch log has invalid message type %q", log.MessageType)
208+
}
209+
return nil
210+
}

0 commit comments

Comments
 (0)