Skip to content

Commit e5cdc64

Browse files
authored
feat: optional health instrumentation fields (#995)
* feat: optional health instrumentation fields * docs: align with logic * test: add more cases * docs: add ref to additional documentation
1 parent 950a692 commit e5cdc64

File tree

1 file changed

+232
-43
lines changed

1 file changed

+232
-43
lines changed

agent-control/src/sub_agent/health/k8s/instrumentation.rs

Lines changed: 232 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,17 @@ use std::sync::Arc;
2626
#[derive(Debug, Default, Deserialize, PartialEq)]
2727
#[serde(rename_all = "camelCase")]
2828
struct InstrumentationStatus {
29+
#[serde(default)]
2930
pods_matching: i64,
31+
#[serde(default)]
3032
pods_healthy: i64,
33+
#[serde(default)]
3134
pods_injected: i64,
35+
#[serde(default)]
3236
pods_not_ready: i64,
37+
#[serde(default)]
3338
pods_outdated: i64,
39+
#[serde(default)]
3440
pods_unhealthy: i64,
3541
#[serde(default)]
3642
unhealthy_pods_errors: Vec<UnhealthyPodError>,
@@ -55,26 +61,36 @@ impl Display for InstrumentationStatus {
5561

5662
impl InstrumentationStatus {
5763
/// Evaluates the healthiness from an Instrumentation, it returns a status with the following:
58-
/// "podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:0, podsUnhealthy:0"
59-
/// It returns a Healthy or Unhealthy type depending on the conditions:
60-
/// not_ready > 0 --> Unhealthy
61-
/// Matching != Injected --> Unhealthy
62-
/// Unhealthy > 0 ---> Unhealthy with lastErrors
63-
/// We can't rely on the number of healthy pods lower than matching because there can be uninstrumented
64-
/// or outdated pods so the matching will be higher, so we just consider healthy
64+
/// `"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:0, podsUnhealthy:0"`
65+
/// It returns an Healthy value if:
66+
/// `not_ready` <= 0
67+
/// `healthy` > 0
68+
/// `unhealthy` <= 0
69+
/// `matching` > 0
70+
/// `matching` == `injected`
71+
/// We can't rely on the number of healthy pods the same as matching pods because there can be
72+
/// uninstrumented or outdated pods so the matching will be higher. We just consider healthy
6573
/// any case not being one of the previous cases.
6674
pub(crate) fn get_health(&self) -> Health {
67-
if self.pods_matching <= 0 || self.is_healthy() {
75+
if self.is_healthy() {
6876
Health::Healthy(Healthy::new(self.to_string()))
6977
} else {
7078
Health::Unhealthy(Unhealthy::new(self.to_string(), self.last_error()))
7179
}
7280
}
7381

82+
// If this changes please align the docs here: <https://newrelic.atlassian.net/wiki/spaces/INST/pages/3945988387/K8s+Retrieving+health+from+Instrumentation+CR+s+status#Agent-Control-logic>
7483
fn is_healthy(&self) -> bool {
84+
// All pods must be ready
7585
self.pods_not_ready <= 0
76-
&& self.pods_injected == self.pods_matching
77-
&& self.pods_unhealthy <= 0
86+
// No unhealthy pods
87+
&& self.pods_unhealthy <= 0
88+
// At least one pod healthy
89+
&& self.pods_healthy > 0
90+
// There should be matching pods, else the instrumentation is not doing anything
91+
&& self.pods_matching > 0
92+
// The pods that match should have been injected
93+
&& self.pods_injected == self.pods_matching
7894
}
7995

8096
fn last_error(&self) -> String {
@@ -173,27 +189,35 @@ mod tests {
173189
use super::*;
174190

175191
#[test]
176-
fn get_healthiness_basic() {
192+
fn default_instrumentation_value_evals_to_unhealthy() {
177193
let status = InstrumentationStatus::default();
178194

179-
assert!(matches!(status.get_health(), Health::Healthy(_)));
195+
assert!(matches!(status.get_health(), Health::Unhealthy(_)));
180196
}
181197

182198
#[test]
183199
fn json_failing_serde() {
184200
let status_jsons = [
185-
serde_json::json!({}),
186-
serde_json::json!([]),
187-
serde_json::json!(null),
188201
serde_json::json!(1),
189202
serde_json::json!(true),
203+
serde_json::json!("podsMatching"),
204+
serde_json::json!(["podsMatching"]),
205+
serde_json::json!([{"podsMatching": 1}]),
206+
serde_json::json!(null),
190207
];
191208

192-
for status_json in status_jsons.iter() {
193-
let status: Result<InstrumentationStatus, _> =
194-
serde_json::from_value(status_json.clone());
195-
assert!(status.is_err());
196-
}
209+
status_jsons.into_iter().for_each(|status_json| {
210+
assert!(serde_json::from_value::<InstrumentationStatus>(status_json).is_err())
211+
});
212+
}
213+
214+
#[test]
215+
fn json_empty_collections_can_be_deserialized() {
216+
let status_jsons = [serde_json::json!([]), serde_json::json!({})];
217+
218+
status_jsons.into_iter().for_each(|status_json| {
219+
assert!(serde_json::from_value::<InstrumentationStatus>(status_json).is_ok())
220+
});
197221
}
198222

199223
#[test]
@@ -264,12 +288,53 @@ mod tests {
264288
],
265289
},
266290
},
291+
TestData {
292+
case: "missing fields",
293+
json: serde_json::json!({
294+
"podsMatching": 1,
295+
"podsHealthy": 1,
296+
"podsInjected": 1,
297+
"podsUnhealthy": 1,
298+
"unhealthyPodsErrors": [
299+
{
300+
"pod": "pod1",
301+
"lastError": "error1"
302+
},
303+
{
304+
"pod": "pod2",
305+
"lastError": "error2"
306+
}
307+
]
308+
}),
309+
expected: InstrumentationStatus {
310+
pods_matching: 1,
311+
pods_healthy: 1,
312+
pods_injected: 1,
313+
pods_not_ready: 0,
314+
pods_outdated: 0,
315+
pods_unhealthy: 1,
316+
unhealthy_pods_errors: vec![
317+
UnhealthyPodError {
318+
pod: "pod1".to_string(),
319+
last_error: "error1".to_string(),
320+
},
321+
UnhealthyPodError {
322+
pod: "pod2".to_string(),
323+
last_error: "error2".to_string(),
324+
},
325+
],
326+
},
327+
},
267328
];
268329

269-
for data in data_table.iter() {
270-
let status: InstrumentationStatus = serde_json::from_value(data.json.clone()).unwrap();
271-
assert_eq!(status, data.expected, "failed case '{}'", data.case);
272-
}
330+
data_table.into_iter().for_each(|data| {
331+
assert_eq!(
332+
serde_json::from_value::<InstrumentationStatus>(data.json.clone()).unwrap(),
333+
data.expected,
334+
"failed case '{}'",
335+
data.case
336+
);
337+
});
273338
}
274339

275340
#[test]
@@ -283,9 +348,9 @@ mod tests {
283348
TestData {
284349
case: "default case",
285350
status: InstrumentationStatus::default(),
286-
expected: Health::Healthy(Healthy::new(
351+
expected: Health::Unhealthy(Unhealthy::new(
287352
"podsMatching:0, podsHealthy:0, podsInjected:0, podsNotReady:0, podsOutdated:0, podsUnhealthy:0"
288-
.to_string(),
353+
.to_string(), String::default()
289354
)),
290355
},
291356
TestData {
@@ -294,27 +359,19 @@ mod tests {
294359
pods_matching: 1,
295360
pods_healthy: 1,
296361
pods_injected: 1,
297-
pods_not_ready: 1,
298-
pods_outdated: 0,
299-
pods_unhealthy: 0,
300-
unhealthy_pods_errors: vec![],
362+
..Default::default()
301363
},
302-
expected: Health::Unhealthy(Unhealthy::new(
303-
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:1, podsOutdated:0, podsUnhealthy:0"
304-
.to_string(),
305-
"".to_string(),
364+
expected: Health::Healthy(Healthy::new(
365+
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:0, podsUnhealthy:0"
366+
.to_string()
306367
)),
307368
},
308369
TestData {
309370
case: "unhealthy case",
310371
status: InstrumentationStatus {
311372
pods_matching: 1,
312373
pods_healthy: 1,
313-
pods_injected: 0,
314-
pods_not_ready: 0,
315-
pods_outdated: 0,
316-
pods_unhealthy: 0,
317-
unhealthy_pods_errors: vec![],
374+
..Default::default()
318375
},
319376
expected: Health::Unhealthy(Unhealthy::new(
320377
"podsMatching:1, podsHealthy:1, podsInjected:0, podsNotReady:0, podsOutdated:0, podsUnhealthy:0"
@@ -328,28 +385,160 @@ mod tests {
328385
pods_matching: 1,
329386
pods_healthy: 1,
330387
pods_injected: 1,
331-
pods_not_ready: 0,
332-
pods_outdated: 0,
333388
pods_unhealthy: 1,
334389
unhealthy_pods_errors: vec![UnhealthyPodError {
335390
pod: "pod1".to_string(),
336391
last_error: "error1".to_string(),
337392
}],
393+
..Default::default()
338394
},
339395
expected: Health::Unhealthy(Unhealthy::new(
340396
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:0, podsUnhealthy:1"
341397
.to_string(),
342398
"pod pod1:error1".to_string(),
343399
)),},
400+
TestData {
401+
case: "unhealthy case with multiple errors",
402+
status: InstrumentationStatus {
403+
pods_matching: 1,
404+
pods_healthy: 1,
405+
pods_injected: 1,
406+
pods_unhealthy: 2,
407+
unhealthy_pods_errors: vec![
408+
UnhealthyPodError {
409+
pod: "pod1".to_string(),
410+
last_error: "error1".to_string(),
411+
},
412+
UnhealthyPodError {
413+
pod: "pod2".to_string(),
414+
last_error: "error2".to_string(),
415+
},
416+
],
417+
..Default::default()
418+
},
419+
expected: Health::Unhealthy(Unhealthy::new(
420+
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:0, podsUnhealthy:2"
421+
.to_string(),
422+
"pod pod1:error1, pod pod2:error2".to_string(),
423+
)),
424+
},
425+
TestData {
426+
case: "0 pods matching",
427+
status: InstrumentationStatus {
428+
pods_matching: 0,
429+
pods_healthy: 1,
430+
pods_injected: 1,
431+
pods_not_ready: 1,
432+
pods_outdated: 1,
433+
pods_unhealthy: 1,
434+
..Default::default()
435+
},
436+
expected: Health::Unhealthy(Unhealthy::new(
437+
"podsMatching:0, podsHealthy:1, podsInjected:1, podsNotReady:1, podsOutdated:1, podsUnhealthy:1"
438+
.to_string(),
439+
"".to_string(),
440+
)),
441+
442+
},
443+
TestData {
444+
case: "0 healthy pods",
445+
status: InstrumentationStatus {
446+
pods_matching: 1,
447+
pods_healthy: 0,
448+
pods_injected: 1,
449+
pods_not_ready: 1,
450+
pods_outdated: 1,
451+
pods_unhealthy: 1,
452+
..Default::default()
453+
},
454+
expected: Health::Unhealthy(Unhealthy::new(
455+
"podsMatching:1, podsHealthy:0, podsInjected:1, podsNotReady:1, podsOutdated:1, podsUnhealthy:1"
456+
.to_string(),
457+
"".to_string(),
458+
)),
459+
},
460+
461+
TestData {
462+
case: "0 injected pods",
463+
status: InstrumentationStatus {
464+
pods_matching: 1,
465+
pods_healthy: 1,
466+
pods_injected: 0,
467+
pods_not_ready: 1,
468+
pods_outdated: 1,
469+
pods_unhealthy: 1,
470+
..Default::default()
471+
},
472+
expected: Health::Unhealthy(Unhealthy::new(
473+
"podsMatching:1, podsHealthy:1, podsInjected:0, podsNotReady:1, podsOutdated:1, podsUnhealthy:1"
474+
.to_string(),
475+
"".to_string(),
476+
)),
477+
},
478+
479+
TestData {
480+
case: "0 not ready pods but unhealthy",
481+
status: InstrumentationStatus {
482+
pods_matching: 1,
483+
pods_healthy: 1,
484+
pods_injected: 1,
485+
pods_not_ready: 0,
486+
pods_outdated: 1,
487+
pods_unhealthy: 1,
488+
..Default::default()
489+
},
490+
expected: Health::Unhealthy(Unhealthy::new(
491+
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:0, podsOutdated:1, podsUnhealthy:1"
492+
.to_string(),
493+
"".to_string(),
494+
)),
495+
},
496+
497+
TestData {
498+
case: "matching != injected",
499+
status: InstrumentationStatus {
500+
pods_matching: 1,
501+
pods_healthy: 1,
502+
pods_injected: 2,
503+
pods_not_ready: 1,
504+
pods_outdated: 1,
505+
pods_unhealthy: 1,
506+
..Default::default()
507+
},
508+
expected: Health::Unhealthy(Unhealthy::new(
509+
"podsMatching:1, podsHealthy:1, podsInjected:2, podsNotReady:1, podsOutdated:1, podsUnhealthy:1"
510+
.to_string(),
511+
"".to_string(),
512+
)),
513+
},
514+
TestData {
515+
case: "not ready pods",
516+
status: InstrumentationStatus {
517+
pods_matching: 1,
518+
pods_healthy: 1,
519+
pods_injected: 1,
520+
pods_not_ready: 1,
521+
pods_outdated: 1,
522+
pods_unhealthy: 1,
523+
..Default::default()
524+
},
525+
expected: Health::Unhealthy(Unhealthy::new(
526+
"podsMatching:1, podsHealthy:1, podsInjected:1, podsNotReady:1, podsOutdated:1, podsUnhealthy:1"
527+
.to_string(),
528+
"".to_string(),
529+
)),
530+
},
531+
532+
344533
];
345534

346-
for data in data_table.iter() {
535+
data_table.into_iter().for_each(|data| {
347536
assert_eq!(
348537
data.status.get_health(),
349538
data.expected,
350539
"failed case '{}'",
351540
data.case
352541
);
353-
}
542+
});
354543
}
355544
}

0 commit comments

Comments
 (0)