Skip to content

Commit 8469962

Browse files
Ensure monitoring the Otel collector never runs in a beat process (#11204) (#11284)
* Ensure monitoring the Otel collector never runs in a beat process * Add changelog entry * Move log lines to constants (cherry picked from commit a9f4420) Co-authored-by: Mikołaj Świątek <[email protected]>
1 parent 211edfa commit 8469962

File tree

3 files changed

+56
-11
lines changed

3 files changed

+56
-11
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Ensure the monitoring input for the Otel collector can only run inside the collector.
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: elastic-agent
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
#pr: https://github.com/owner/repo/1234
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
#issue: https://github.com/owner/repo/1234

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1812,6 +1812,12 @@ func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtime
18121812
case component.OtelRuntimeManager:
18131813
otelComponents = append(otelComponents, comp)
18141814
case component.ProcessRuntimeManager:
1815+
// Hack to fix https://github.com/elastic/elastic-agent/issues/11169
1816+
// TODO: Remove this after https://github.com/elastic/elastic-agent/issues/10220 is resolved
1817+
if comp.ID == "prometheus/metrics-monitoring" {
1818+
c.logger.Warnf("The Otel prometheus metrics monitoring input can't run in a beats process, skipping")
1819+
continue
1820+
}
18151821
runtimeComponents = append(runtimeComponents, comp)
18161822
default:
18171823
// this should be impossible if we parse the configuration correctly

testing/integration/ess/beat_receivers_test.go

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,12 @@ agent.monitoring.enabled: false
817817
}
818818
}
819819

820+
// Log lines TestBeatsReceiverProcessRuntimeFallback checks for
821+
const (
822+
otelRuntimeUnsupportedLogLineStart = "otel runtime is not supported"
823+
prometheusInputSkippedLogLine = "The Otel prometheus metrics monitoring input can't run in a beats process, skipping"
824+
)
825+
820826
// TestBeatsReceiverProcessRuntimeFallback verifies that we fall back to the process runtime if the otel runtime
821827
// does not support the requested configuration.
822828
func TestBeatsReceiverProcessRuntimeFallback(t *testing.T) {
@@ -848,7 +854,6 @@ outputs:
848854
hosts: [http://localhost:9200]
849855
api_key: placeholder
850856
indices: [] # not supported by the elasticsearch exporter
851-
agent.monitoring.enabled: false
852857
`
853858

854859
// this is the context for the whole test, with a global timeout defined
@@ -872,13 +877,14 @@ agent.monitoring.enabled: false
872877
status, statusErr := fixture.ExecStatus(ctx)
873878
assert.NoError(collect, statusErr)
874879
// we should be running beats processes even though the otel runtime was requested
875-
assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 1)
880+
assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 4)
876881
}, 1*time.Minute, 1*time.Second)
877882
logsBytes, err := fixture.Exec(ctx, []string{"logs", "-n", "1000", "--exclude-events"})
878883
require.NoError(t, err)
879884

880885
// verify we've logged a warning about using the process runtime
881-
var unsupportedLogRecord map[string]any
886+
var unsupportedLogRecords []map[string]any
887+
var prometheusUnsupportedLogRecord map[string]any
882888
for _, line := range strings.Split(string(logsBytes), "\n") {
883889
line = strings.TrimSpace(line)
884890
if line == "" {
@@ -889,9 +895,13 @@ agent.monitoring.enabled: false
889895
continue
890896
}
891897

892-
if message, ok := logRecord["message"].(string); ok && strings.HasPrefix(message, "otel runtime is not supported") {
893-
unsupportedLogRecord = logRecord
894-
break
898+
if message, ok := logRecord["message"].(string); ok {
899+
if strings.HasPrefix(message, otelRuntimeUnsupportedLogLineStart) {
900+
unsupportedLogRecords = append(unsupportedLogRecords, logRecord)
901+
}
902+
if strings.HasPrefix(message, prometheusInputSkippedLogLine) {
903+
prometheusUnsupportedLogRecord = logRecord
904+
}
895905
}
896906
}
897907

@@ -902,11 +912,8 @@ agent.monitoring.enabled: false
902912
}
903913
})
904914

905-
require.NotNil(t, unsupportedLogRecord, "unsupported log message should be present")
906-
message, ok := unsupportedLogRecord["message"].(string)
907-
require.True(t, ok, "log message field should be a string")
908-
expectedMessage := "otel runtime is not supported for component system/metrics-default, switching to process runtime, reason: unsupported configuration for system/metrics-default: error translating config for output: default, unit: system/metrics-default, error: indices is currently not supported: unsupported operation"
909-
assert.Equal(t, expectedMessage, message)
915+
assert.Len(t, unsupportedLogRecords, 5, "one log line for each component we try to run")
916+
assert.NotEmpty(t, prometheusUnsupportedLogRecord, "should get a log line about Otel prometheus metrics input being skipped")
910917
}
911918

912919
// TestComponentWorkDir verifies that the component working directory is not deleted when moving the component from

0 commit comments

Comments
 (0)