Skip to content

Commit 70d62f8

Browse files
Nr 438909 multi binary health (#1615)
* feat: Add new Exec Health repository * feat: Add id field to executables * feat: Save supervisor healthiness into its repository * feat: Add exec health checker * feat: add ID to EBPF agent type executables * feat: set exec health as the default * feat: fix test failing because now there is always a health_checker spawned (default exec) * feat: Modify exec checker to work with a buffered channel instead of repository * feat: Modify healthiness variable to is_healthy for a more meaningful name. * feat: Executable id can't be duplicated + Health is calculated adding Exec health and File/http health if present. * feat: Modify the publishing of health events to not transform the error * feat: Format code * feat: Correct leftover from rebase * feat: Add default to exectuables * feat: Make exec id non templateable + add the non empty health status. * feat: Fix test_file_health_without_supervisor.
1 parent 96f7814 commit 70d62f8

26 files changed

+772
-166
lines changed

agent-control/agent-type-registry/newrelic/com.newrelic.ebpf-0.1.0.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ deployment:
247247
on_host:
248248
enable_file_logging: ${nr-var:enable_file_logging}
249249
executables:
250-
- path: /usr/bin/nr-ebpf-agent-client
250+
- id: nr-ebpf-agent-client
251+
path: /usr/bin/nr-ebpf-agent-client
251252
env:
252253
NEW_RELIC_LICENSE_KEY: "${nr-env:NEW_RELIC_LICENSE_KEY}"
253254
DEPLOYMENT_NAME: "${nr-var:config_agent.DEPLOYMENT_NAME}"
@@ -292,7 +293,8 @@ deployment:
292293
backoff_strategy:
293294
type: fixed
294295
backoff_delay: ${nr-var:backoff_delay}
295-
- path: /usr/bin/nr-ebpf-agent
296+
- id: nr-ebpf-agent
297+
path: /usr/bin/nr-ebpf-agent
296298
env:
297299
NEW_RELIC_LICENSE_KEY: "${nr-env:NEW_RELIC_LICENSE_KEY}"
298300
DEPLOYMENT_NAME: "${nr-var:config_agent.DEPLOYMENT_NAME}"

agent-control/agent-type-registry/newrelic/com.newrelic.infrastructure-0.1.0.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ deployment:
117117
args: --version
118118
regex: \d+\.\d+\.\d+
119119
executables:
120-
- path: /usr/bin/newrelic-infra
120+
- id: newrelic-infra
121+
path: /usr/bin/newrelic-infra
121122
args: >-
122123
--config=${nr-var:config_agent}
123124
env:

agent-control/agent-type-registry/newrelic/io.opentelemetry.collector-0.1.0.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ deployment:
8383
regex: \d+\.\d+\.\d+
8484
executables:
8585
- # Important to note the binary name is nrdot-collector-host matching the new nrdot binary
86+
id: nrdot-collector-host
8687
path: /usr/bin/nrdot-collector-host
8788
args: >-
8889
--config=${nr-var:config}

agent-control/src/agent_type/README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ deployment:
142142
args: --version
143143
regex: \d+\.\d+\.\d+
144144
executables:
145-
- path: /usr/bin/newrelic-infra
145+
- id: newrelic-infra
146+
path: /usr/bin/newrelic-infra
146147
args: "--config=${nr-var:config_agent}"
147148
env: "NRIA_PLUGIN_DIR=${nr-var:config_integrations} NRIA_STATUS_SERVER_ENABLED=true"
148149
restart_policy:
@@ -157,7 +158,8 @@ In this section:
157158
* `health`: The measures used to check the health status of the agent.
158159
* `version`: The command used to check the version of the binary.
159160
* `executables`: This outlines the list of binaries the agent supervisor runs. Developers can define:
160-
- * `path`: The location of the binary required.
161+
- * `id`: Unique identifier for the exec used by the health checker.
162+
* `path`: The location of the binary required.
161163
* `args`: The command-line arguments needed by the binary.
162164
* `env`: Specifies the required environment variables.
163165
* `restart_policy`: The guidelines for if or when the process should be restarted.
@@ -402,7 +404,8 @@ variables:
402404
deployment:
403405
on_host:
404406
executables:
405-
- path: /usr/bin/telegraf
407+
- id: telegraf
408+
path: /usr/bin/telegraf
406409
args: "--config ${nr-var:config_file}"
407410
env: ""
408411
restart_policy:

agent-control/src/agent_type/definition.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ deployment:
245245
path: /healthz
246246
port: 8080
247247
executables:
248-
- path: ${nr-var:bin}/otelcol
248+
- id: otelcol
249+
path: ${nr-var:bin}/otelcol
249250
args: "-c ${nr-var:deployment.k8s.image}"
250251
restart_policy:
251252
backoff_strategy:
@@ -265,7 +266,8 @@ spec:
265266
deployment:
266267
on_host:
267268
executables:
268-
- path: ${nr-var:bin}/otelcol
269+
- id: otelcol
270+
path: ${nr-var:bin}/otelcol
269271
args: "-c ${nr-var:deployment.k8s.image}"
270272
"#;
271273

@@ -379,7 +381,8 @@ deployment:
379381
path: /v1/status
380382
port: "${nr-var:status_server_port}"
381383
executables:
382-
- path: /usr/bin/newrelic-infra
384+
- id: newrelic-infra
385+
path: /usr/bin/newrelic-infra
383386
args: "--config ${nr-var:config} --config2 ${nr-var:config2}"
384387
"#;
385388

@@ -509,7 +512,8 @@ variables:
509512
deployment:
510513
on_host:
511514
executables:
512-
- path: /bin/echo
515+
- id: echo
516+
path: /bin/echo
513517
args: "${nr-var:restart_policy.type}"
514518
"#;
515519

agent-control/src/agent_type/render/persister/config_persister_file.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,8 @@ variables:
729729
deployment:
730730
on_host:
731731
executables:
732-
- path: /usr/bin/newrelic-infra
732+
- id: newrelic-infra
733+
path: /usr/bin/newrelic-infra
733734
args: "--config=${config_file}"
734735
restart_policy:
735736
backoff_strategy:
@@ -774,7 +775,8 @@ variables:
774775
deployment:
775776
on_host:
776777
executables:
777-
- path: /usr/bin/newrelic-infra
778+
- id: newrelic-infra
779+
path: /usr/bin/newrelic-infra
778780
args: "--config=${config_file1} --config=${config_file2} --config=${config_file3}"
779781
restart_policy:
780782
backoff_strategy:
@@ -808,7 +810,8 @@ variables:
808810
deployment:
809811
on_host:
810812
executables:
811-
- path: /usr/bin/newrelic-infra
813+
- id: newrelic-infra
814+
path: /usr/bin/newrelic-infra
812815
args: "--config=${integrations}"
813816
restart_policy:
814817
backoff_strategy:
@@ -855,7 +858,8 @@ variables:
855858
deployment:
856859
on_host:
857860
executables:
858-
- path: /usr/bin/newrelic-infra
861+
- id: newrelic-infra
862+
path: /usr/bin/newrelic-infra
859863
args: "--config=${integrations} --logging=${logging}"
860864
restart_policy:
861865
backoff_strategy:

agent-control/src/agent_type/render/renderer.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,8 @@ variables: {}
804804
deployment:
805805
on_host:
806806
executables:
807-
- path: /opt/first
807+
- id: first
808+
path: /opt/first
808809
args: "${nr-ac:sa-fake-var}"
809810
"#,
810811
&Environment::OnHost,
@@ -864,9 +865,11 @@ variables:
864865
deployment:
865866
on_host:
866867
executables:
867-
- path: /opt/first
868+
- id: first
869+
path: /opt/first
868870
args: "--config_path=${nr-var:config_path} --foo=${nr-var:config_argument}"
869-
- path: /opt/second
871+
- id: second
872+
path: /opt/second
870873
args: "--config_path=${nr-var:config_path} --foo=${nr-var:config_argument}"
871874
"#;
872875

@@ -897,7 +900,8 @@ variables:
897900
deployment:
898901
on_host:
899902
executables:
900-
- path: /usr/bin/newrelic-infra
903+
- id: newrelic-infra
904+
path: /usr/bin/newrelic-infra
901905
args: "--config1 ${nr-var:config1} --config2 ${nr-var:config2}"
902906
"#;
903907

@@ -941,7 +945,8 @@ variables:
941945
deployment:
942946
on_host:
943947
executables:
944-
- path: /bin/otelcol
948+
- id: otelcol
949+
path: /bin/otelcol
945950
args: "-c some-arg"
946951
restart_policy:
947952
backoff_strategy:

agent-control/src/agent_type/runtime_config/health_config.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const DEFAULT_HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(15);
1616
///
1717
/// This structure includes parameters to define intervals between health checks,
1818
/// timeouts for checks, and the specific health check method—either HTTP or execute command.
19-
#[derive(Debug, Deserialize, Clone, PartialEq)]
19+
#[derive(Debug, Default, Deserialize, Clone, PartialEq)]
2020
pub struct OnHostHealthConfig {
2121
/// The duration to wait between health checks.
2222
#[serde(default)]
@@ -31,8 +31,8 @@ pub struct OnHostHealthConfig {
3131
pub(crate) timeout: HealthCheckTimeout,
3232

3333
/// Details on the type of health check. Defined by the `HealthCheck` enumeration.
34-
#[serde(flatten)]
35-
pub(crate) check: OnHostHealthCheck,
34+
#[serde(default, flatten)]
35+
pub(crate) check: Option<OnHostHealthCheck>,
3636
}
3737

3838
#[derive(Debug, Deserialize, Clone, Copy, PartialEq, WrapperWithDefault)]
@@ -171,7 +171,10 @@ impl Templateable for HttpPath {
171171
impl Templateable for OnHostHealthConfig {
172172
fn template_with(self, variables: &Variables) -> Result<Self, AgentTypeError> {
173173
Ok(Self {
174-
check: self.check.template_with(variables)?,
174+
check: self
175+
.check
176+
.map(|check| check.template_with(variables))
177+
.transpose()?,
175178
..self
176179
})
177180
}

0 commit comments

Comments
 (0)