Skip to content

Commit 2c4c615

Browse files
authored
Ensure the self-monitoring configuration knows the actual component runtime (#11300)
* Move ComponentsModifies to the component package * Move Otel runtime determination to component modifier * Check supported outputs in monitoring config generation * Add changelog entry * Log warning about switching to process runtime for monitoring * Fix monitoring config types * fix TestBeatsReceiverProcessRuntimeFallback * Add logstash output to test cases
1 parent fd8d849 commit 2c4c615

File tree

19 files changed

+602
-253
lines changed

19 files changed

+602
-253
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: bug-fix
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Ensure the self-monitoring configuration accounts for the runtime components actually run in.
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: elastic-agent
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
#pr: https://github.com/owner/repo/1234
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
#issue: https://github.com/owner/repo/1234

internal/pkg/agent/application/application.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ func New(
149149
cfg.Settings.DownloadConfig.OS(),
150150
cfg.Settings.MonitoringConfig,
151151
agentInfo,
152+
log,
152153
)
153154

154155
runtime, err := runtime.NewManager(
@@ -171,7 +172,7 @@ func New(
171172

172173
var configMgr coordinator.ConfigManager
173174
var managed *managedConfigManager
174-
var compModifiers = []coordinator.ComponentsModifier{InjectAPMConfig}
175+
var compModifiers = []component.ComponentsModifier{InjectAPMConfig}
175176
var composableManaged bool
176177
var isManaged bool
177178
var actionAcker acker.Acker

internal/pkg/agent/application/coordinator/coordinator.go

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,6 @@ type VarsManager interface {
224224
Watch() <-chan []*transpiler.Vars
225225
}
226226

227-
// ComponentsModifier is a function that takes the computed components model and modifies it before
228-
// passing it into the components runtime manager.
229-
type ComponentsModifier func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error)
230-
231227
// managerShutdownTimeout is how long the coordinator will wait during shutdown
232228
// to receive termination states from its managers.
233229
// Note: The current timeout (5s) is shorter than the default stop timeout for
@@ -267,7 +263,7 @@ type Coordinator struct {
267263
otelCfg *confmap.Conf
268264

269265
caps capabilities.Capabilities
270-
modifiers []ComponentsModifier
266+
modifiers []component.ComponentsModifier
271267

272268
// The current state of the Coordinator. This value and its subfields are
273269
// safe to read directly from within the main Coordinator goroutine.
@@ -439,7 +435,7 @@ func New(
439435
otelMgr OTelManager,
440436
fleetAcker acker.Acker,
441437
initialUpgradeDetails *details.Details,
442-
modifiers ...ComponentsModifier,
438+
modifiers ...component.ComponentsModifier,
443439
) *Coordinator {
444440
var fleetState cproto.State
445441
var fleetMessage string
@@ -1812,17 +1808,10 @@ func (c *Coordinator) updateManagersWithConfig(model *component.Model) {
18121808
func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtimeModel *component.Model, otelModel *component.Model) {
18131809
var otelComponents, runtimeComponents []component.Component
18141810
for _, comp := range model.Components {
1815-
c.maybeOverrideRuntimeForComponent(&comp)
18161811
switch comp.RuntimeManager {
18171812
case component.OtelRuntimeManager:
18181813
otelComponents = append(otelComponents, comp)
18191814
case component.ProcessRuntimeManager:
1820-
// Hack to fix https://github.com/elastic/elastic-agent/issues/11169
1821-
// TODO: Remove this after https://github.com/elastic/elastic-agent/issues/10220 is resolved
1822-
if comp.ID == "prometheus/metrics-monitoring" {
1823-
c.logger.Warnf("The Otel prometheus metrics monitoring input can't run in a beats process, skipping")
1824-
continue
1825-
}
18261815
runtimeComponents = append(runtimeComponents, comp)
18271816
default:
18281817
// this should be impossible if we parse the configuration correctly
@@ -1844,7 +1833,7 @@ func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtime
18441833
// Normally, we use the runtime set in the component itself via the configuration, but
18451834
// we may also fall back to the process runtime if the otel runtime is unsupported for
18461835
// some reason. One example is the output using unsupported config options.
1847-
func (c *Coordinator) maybeOverrideRuntimeForComponent(comp *component.Component) {
1836+
func maybeOverrideRuntimeForComponent(logger *logger.Logger, comp *component.Component) {
18481837
if comp.RuntimeManager == component.ProcessRuntimeManager {
18491838
// do nothing, the process runtime can handle any component
18501839
return
@@ -1853,7 +1842,7 @@ func (c *Coordinator) maybeOverrideRuntimeForComponent(comp *component.Component
18531842
// check if the component is actually supported
18541843
err := translate.VerifyComponentIsOtelSupported(comp)
18551844
if err != nil {
1856-
c.logger.Warnf("otel runtime is not supported for component %s, switching to process runtime, reason: %v", comp.ID, err)
1845+
logger.Warnf("otel runtime is not supported for component %s, switching to process runtime, reason: %v", comp.ID, err)
18571846
comp.RuntimeManager = component.ProcessRuntimeManager
18581847
}
18591848
}
@@ -1943,8 +1932,15 @@ func (c *Coordinator) generateComponentModel() (err error) {
19431932
existingCompState[comp.Component.ID] = comp.State.Pid
19441933
}
19451934

1935+
otelRuntimeModifier := func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
1936+
for i := range comps {
1937+
maybeOverrideRuntimeForComponent(c.logger, &comps[i])
1938+
}
1939+
return comps, nil
1940+
}
19461941
comps, err := c.specs.ToComponents(
19471942
cfg,
1943+
append(c.modifiers, otelRuntimeModifier),
19481944
configInjector,
19491945
c.state.LogLevel,
19501946
c.agentInfo,
@@ -1957,13 +1953,6 @@ func (c *Coordinator) generateComponentModel() (err error) {
19571953
// Filter any disallowed inputs/outputs from the components
19581954
comps = c.filterByCapabilities(comps)
19591955

1960-
for _, modifier := range c.modifiers {
1961-
comps, err = modifier(comps, cfg)
1962-
if err != nil {
1963-
return fmt.Errorf("failed to modify components: %w", err)
1964-
}
1965-
}
1966-
19671956
// If we made it this far, update our internal derived values and
19681957
// return with no error
19691958
c.derivedConfig = cfg

internal/pkg/agent/application/endpoint_component_modifier.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313

1414
"github.com/elastic/elastic-agent-client/v7/pkg/client"
1515
"github.com/elastic/elastic-agent-libs/transport/tlscommon"
16-
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
1716
"github.com/elastic/elastic-agent/pkg/component"
1817
"github.com/elastic/elastic-agent/pkg/core/logger"
1918
)
@@ -50,7 +49,7 @@ func (tlsCache) MakeKey(keyPassPath, certPath, keyPath string) string {
5049
// "revision": 1,
5150
// "type": "endpoint"
5251
// }
53-
func EndpointSignedComponentModifier() coordinator.ComponentsModifier {
52+
func EndpointSignedComponentModifier() component.ComponentsModifier {
5453
return func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
5554
const signedKey = "signed"
5655

@@ -83,7 +82,7 @@ func EndpointSignedComponentModifier() coordinator.ComponentsModifier {
8382
// 'key_passphrase_path'.
8483
// It does so, ONLY for the client TLS configuration for mTLS used with
8584
// fleet-server.
86-
func EndpointTLSComponentModifier(log *logger.Logger) coordinator.ComponentsModifier {
85+
func EndpointTLSComponentModifier(log *logger.Logger) component.ComponentsModifier {
8786
return newEndpointTLSComponentModifier(log, &tlsCache{mu: &sync.Mutex{}})
8887
}
8988

internal/pkg/agent/application/endpoint_component_modifier_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"github.com/elastic/elastic-agent-client/v7/pkg/client"
2121
"github.com/elastic/elastic-agent-client/v7/pkg/proto"
2222
"github.com/elastic/elastic-agent-libs/testing/certutil"
23-
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
2423
"github.com/elastic/elastic-agent/internal/pkg/testutils/fipsutils"
2524
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"
2625

@@ -44,7 +43,7 @@ func TestEndpointComponentModifier(t *testing.T) {
4443

4544
tests := map[string][]struct {
4645
name string
47-
compModifier coordinator.ComponentsModifier
46+
compModifier component.ComponentsModifier
4847
comps []component.Component
4948
cfg map[string]interface{}
5049
wantComps []component.Component

internal/pkg/agent/application/fleet_server_bootstrap.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ var injectFleetServerInput = config.MustNewConfigFrom(map[string]interface{}{
5050

5151
// FleetServerComponentModifier modifies the comps to inject extra information from the policy into
5252
// the Fleet Server component and units needed to run Fleet Server correctly.
53-
func FleetServerComponentModifier(serverCfg *configuration.FleetServerConfig) coordinator.ComponentsModifier {
53+
func FleetServerComponentModifier(serverCfg *configuration.FleetServerConfig) component.ComponentsModifier {
5454
return func(comps []component.Component, _ map[string]interface{}) ([]component.Component, error) {
5555
for i, comp := range comps {
5656
if comp.InputSpec != nil && comp.InputSpec.InputType == fleetServer && comp.Err == nil {
@@ -118,7 +118,7 @@ func addBootstrapCfg(dst map[string]interface{}, es *configuration.Elasticsearch
118118

119119
// InjectFleetConfigComponentModifier The modifier that injects the fleet configuration for the components
120120
// that need to be able to connect to fleet server.
121-
func InjectFleetConfigComponentModifier(fleetCfg *configuration.FleetAgentConfig, agentInfo info.Agent) coordinator.ComponentsModifier {
121+
func InjectFleetConfigComponentModifier(fleetCfg *configuration.FleetAgentConfig, agentInfo info.Agent) component.ComponentsModifier {
122122
return func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
123123
hostsStr := fleetCfg.Client.GetHosts()
124124
fleetHosts := make([]interface{}, 0, len(hostsStr))

internal/pkg/agent/application/inject_proxy_component_modifier.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"golang.org/x/net/http/httpproxy"
1111

1212
"github.com/elastic/elastic-agent-client/v7/pkg/client"
13-
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
1413
"github.com/elastic/elastic-agent/pkg/component"
1514
)
1615

@@ -20,7 +19,7 @@ import (
2019
// The URL used is the HTTPS_PROXY env var. If that's not set the HTTP_PROXY env var is used.
2120
// If there are no env vars set, or the unit's config has `proxy_disable: true`, nothing is injected
2221
// If the output config has `proxy_url: ""`, it will not be overwritten.
23-
func InjectProxyEndpointModifier() coordinator.ComponentsModifier {
22+
func InjectProxyEndpointModifier() component.ComponentsModifier {
2423
return func(comps []component.Component, _ map[string]interface{}) ([]component.Component, error) {
2524
for i, comp := range comps {
2625
if comp.InputSpec != nil && comp.InputSpec.InputType == endpoint {

internal/pkg/agent/application/monitoring/component/testdata/monitoring_config_full_otel.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -915,4 +915,7 @@ inputs:
915915
type: system/metrics
916916
use_output: monitoring
917917
outputs:
918-
monitoring: {}
918+
monitoring:
919+
hosts:
920+
- localhost:9200
921+
type: elasticsearch

internal/pkg/agent/application/monitoring/component/testdata/monitoring_config_full_process.yaml

Lines changed: 18 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ agent:
44
enabled: false
55
metrics: true
66
inputs:
7-
- id: filestream-monitoring-agent
7+
- _runtime_experimental: process
8+
id: filestream-monitoring-agent
89
name: filestream-monitoring-agent
910
streams:
1011
- close:
@@ -110,7 +111,8 @@ inputs:
110111
type: filestream
111112
type: filestream
112113
use_output: monitoring
113-
- data_stream:
114+
- _runtime_experimental: process
115+
data_stream:
114116
namespace: default
115117
id: metrics-monitoring-beats
116118
name: metrics-monitoring-beats
@@ -357,9 +359,18 @@ inputs:
357359
binary: metricbeat
358360
id: prometheus/metrics-monitoring
359361
target: component
362+
- drop_fields:
363+
fields:
364+
- beat.stats.cgroup
365+
- beat.stats.cpu
366+
- beat.stats.handles
367+
- beat.stats.memstats
368+
- beat.stats.runtime
369+
ignore_missing: true
360370
type: beat/metrics
361371
use_output: monitoring
362-
- data_stream:
372+
- _runtime_experimental: process
373+
data_stream:
363374
namespace: default
364375
id: metrics-monitoring-agent
365376
name: metrics-monitoring-agent
@@ -875,61 +886,6 @@ inputs:
875886
binary: metricbeat
876887
id: http/metrics-monitoring
877888
target: component
878-
- data_stream:
879-
dataset: elastic_agent.elastic_agent
880-
namespace: default
881-
type: metrics
882-
failure_threshold: 5
883-
hosts:
884-
- placeholder
885-
id: metrics-monitoring-metricbeat-1
886-
index: metrics-elastic_agent.elastic_agent-default
887-
metricsets:
888-
- json
889-
namespace: agent
890-
path: /stats
891-
period: 1m0s
892-
processors:
893-
- add_fields:
894-
fields:
895-
dataset: elastic_agent.elastic_agent
896-
target: event
897-
- add_fields:
898-
fields:
899-
id: ""
900-
process: metricbeat
901-
snapshot: false
902-
version: placeholder
903-
target: elastic_agent
904-
- add_fields:
905-
fields:
906-
id: ""
907-
target: agent
908-
- copy_fields:
909-
fail_on_error: false
910-
fields:
911-
- from: http.agent.beat.cpu
912-
to: system.process.cpu
913-
- from: http.agent.beat.memstats.memory_sys
914-
to: system.process.memory.size
915-
- from: http.agent.beat.handles
916-
to: system.process.fd
917-
- from: http.agent.beat.cgroup
918-
to: system.process.cgroup
919-
- from: http.agent.apm-server
920-
to: apm-server
921-
- from: http.filebeat_input
922-
to: filebeat_input
923-
ignore_missing: true
924-
- drop_fields:
925-
fields:
926-
- http
927-
ignore_missing: true
928-
- add_fields:
929-
fields:
930-
binary: metricbeat
931-
id: prometheus/metrics-monitoring
932-
target: component
933889
type: http/metrics
934890
use_output: monitoring
935891
- _runtime_experimental: otel
@@ -1099,4 +1055,7 @@ inputs:
10991055
type: system/metrics
11001056
use_output: monitoring
11011057
outputs:
1102-
monitoring: {}
1058+
monitoring:
1059+
hosts:
1060+
- localhost:9200
1061+
type: elasticsearch

0 commit comments

Comments
 (0)