Skip to content

Commit 9450f82

Browse files
Ensure monitoring the Otel collector never runs in a beat process (#11204) (#11283)
* Ensure monitoring the Otel collector never runs in a beat process * Add changelog entry * Move log lines to constants (cherry picked from commit a9f4420) Co-authored-by: Mikołaj Świątek <[email protected]>
1 parent 3fddbc0 commit 9450f82

File tree

3 files changed

+57
-11
lines changed

3 files changed

+57
-11
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Ensure the monitoring input for the Otel collector can only run inside the collector.
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: elastic-agent
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
#pr: https://github.com/owner/repo/1234
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
#issue: https://github.com/owner/repo/1234

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,6 +1661,12 @@ func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtime
16611661
case component.OtelRuntimeManager:
16621662
otelComponents = append(otelComponents, comp)
16631663
case component.ProcessRuntimeManager:
1664+
// Hack to fix https://github.com/elastic/elastic-agent/issues/11169
1665+
// TODO: Remove this after https://github.com/elastic/elastic-agent/issues/10220 is resolved
1666+
if comp.ID == "prometheus/metrics-monitoring" {
1667+
c.logger.Warnf("The Otel prometheus metrics monitoring input can't run in a beats process, skipping")
1668+
continue
1669+
}
16641670
runtimeComponents = append(runtimeComponents, comp)
16651671
default:
16661672
// this should be impossible if we parse the configuration correctly

testing/integration/ess/beat_receivers_test.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,12 @@ agent.monitoring.enabled: false
819819
}
820820
}
821821

822+
// Log lines TestBeatsReceiverProcessRuntimeFallback checks for
823+
const (
824+
otelRuntimeUnsupportedLogLineStart = "otel runtime is not supported"
825+
prometheusInputSkippedLogLine = "The Otel prometheus metrics monitoring input can't run in a beats process, skipping"
826+
)
827+
822828
// TestBeatsReceiverProcessRuntimeFallback verifies that we fall back to the process runtime if the otel runtime
823829
// does not support the requested configuration.
824830
func TestBeatsReceiverProcessRuntimeFallback(t *testing.T) {
@@ -850,7 +856,7 @@ outputs:
850856
hosts: [http://localhost:9200]
851857
api_key: placeholder
852858
indices: [] # not supported by the elasticsearch exporter
853-
agent.monitoring.enabled: false
859+
agent.monitoring._runtime_experimental: otel
854860
`
855861

856862
// this is the context for the whole test, with a global timeout defined
@@ -874,13 +880,14 @@ agent.monitoring.enabled: false
874880
status, statusErr := fixture.ExecStatus(ctx)
875881
assert.NoError(collect, statusErr)
876882
// we should be running beats processes even though the otel runtime was requested
877-
assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 1)
883+
assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 4)
878884
}, 1*time.Minute, 1*time.Second)
879885
logsBytes, err := fixture.Exec(ctx, []string{"logs", "-n", "1000", "--exclude-events"})
880886
require.NoError(t, err)
881887

882888
// verify we've logged a warning about using the process runtime
883-
var unsupportedLogRecord map[string]any
889+
var unsupportedLogRecords []map[string]any
890+
var prometheusUnsupportedLogRecord map[string]any
884891
for _, line := range strings.Split(string(logsBytes), "\n") {
885892
line = strings.TrimSpace(line)
886893
if line == "" {
@@ -891,9 +898,13 @@ agent.monitoring.enabled: false
891898
continue
892899
}
893900

894-
if message, ok := logRecord["message"].(string); ok && strings.HasPrefix(message, "otel runtime is not supported") {
895-
unsupportedLogRecord = logRecord
896-
break
901+
if message, ok := logRecord["message"].(string); ok {
902+
if strings.HasPrefix(message, otelRuntimeUnsupportedLogLineStart) {
903+
unsupportedLogRecords = append(unsupportedLogRecords, logRecord)
904+
}
905+
if strings.HasPrefix(message, prometheusInputSkippedLogLine) {
906+
prometheusUnsupportedLogRecord = logRecord
907+
}
897908
}
898909
}
899910

@@ -904,11 +915,8 @@ agent.monitoring.enabled: false
904915
}
905916
})
906917

907-
require.NotNil(t, unsupportedLogRecord, "unsupported log message should be present")
908-
message, ok := unsupportedLogRecord["message"].(string)
909-
require.True(t, ok, "log message field should be a string")
910-
expectedMessage := "otel runtime is not supported for component system/metrics-default, switching to process runtime, reason: unsupported configuration for system/metrics-default: error translating config for output: default, unit: system/metrics-default, error: indices is currently not supported: unsupported operation"
911-
assert.Equal(t, expectedMessage, message)
918+
assert.Len(t, unsupportedLogRecords, 5, "one log line for each component we try to run")
919+
assert.NotEmpty(t, prometheusUnsupportedLogRecord, "should get a log line about Otel prometheus metrics input being skipped")
912920
}
913921

914922
// TestComponentWorkDir verifies that the component working directory is not deleted when moving the component from

0 commit comments

Comments
 (0)