Skip to content

Commit 05dfb2e

Browse files
authored
Update restart events to put resource in regarding (strimzi#11352)
Signed-off-by: Katherine Stanley <11195226+katheris@users.noreply.github.com>
1 parent 3d47004 commit 05dfb2e

8 files changed

Lines changed: 95 additions & 38 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
* The `ContinueReconciliationOnManualRollingUpdateFailure` feature gate moves to GA stage and is permanently enabled without the possibility to disable it.
2424
* Update OAuth library to 0.16.2.
2525
* Update HTTP bridge to 0.32.0.
26+
* Kubernetes events emitted during a Pod restart updated to have the Kafka resource as the `regardingObject` and the Pod in the `related` field.
2627

2728
### Major changes, deprecations and removals
2829

@@ -45,6 +46,8 @@
4546
Please use the template section to configure additional volumes instead.
4647
* Kafka 4.0 and newer is using Log4j2 for logging instead of Reload4j/Log4j1.
4748
If you have any custom logging configuration, you might need to update it during the upgrade to Kafka 4.0.
49+
* Kubernetes events for Pod restarts no longer have the Pod as the `regardingObject`.
50+
If you are using `regardingObject` as a `field-selector` for listing events you must update the selector to specify the Kafka resource instead.
4851

4952
## 0.45.0
5053

cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/resource/KafkaRoller.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ private static <T, E extends Exception> T await(Future<T> future, long timeout,
903903
protected Future<Void> restart(Pod pod, RestartContext restartContext) {
904904
return podOperations.restart(reconciliation, pod, operationTimeoutMs)
905905
.onComplete(i -> vertx.executeBlocking(() -> {
906-
eventsPublisher.publishRestartEvents(pod, restartContext.restartReasons);
906+
eventsPublisher.publishRestartEvents(reconciliation, pod, restartContext.restartReasons);
907907
return null;
908908
}));
909909
}

cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisher.java

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import io.fabric8.kubernetes.client.KubernetesClient;
1313
import io.strimzi.operator.cluster.model.RestartReason;
1414
import io.strimzi.operator.cluster.model.RestartReasons;
15+
import io.strimzi.operator.common.Reconciliation;
1516
import org.apache.logging.log4j.LogManager;
1617
import org.apache.logging.log4j.Logger;
1718

@@ -57,23 +58,25 @@ protected KubernetesRestartEventPublisher(KubernetesClient client, String operat
5758
}
5859

5960
/**
60-
* Publishes an Kubernetes Event about Pod restart
61+
* Publishes a Kubernetes Event about Pod restart
6162
*
62-
* @param pod Pod which is restarted
63-
* @param reasons Reasons for the restart
63+
* @param reconciliation Reconciliation marker
64+
* @param pod Pod which is restarted
65+
* @param reasons Reasons for the restart
6466
*/
65-
public void publishRestartEvents(Pod pod, RestartReasons reasons) {
67+
public void publishRestartEvents(Reconciliation reconciliation, Pod pod, RestartReasons reasons) {
6668
MicroTime k8sEventTime = new MicroTime(K8S_MICROTIME.format(ZonedDateTime.now(clock)));
69+
ObjectReference resourceReference = createResourceReference(reconciliation);
6770
ObjectReference podReference = createPodReference(pod);
6871

6972
try {
7073
for (RestartReason reason : reasons) {
71-
String note = maybeTruncated(reasons.getNoteFor(reason));
74+
String note = maybeTruncated("Rolling Pod " + pod.getMetadata().getName() + " due to " + reasons.getNoteFor(reason));
7275
String type = "Normal";
7376
String k8sFormattedReason = reason.pascalCased();
74-
LOG.debug("Publishing K8s event, time {}, type, {}, reason, {}, note, {}, pod, {}",
75-
k8sEventTime, type, k8sFormattedReason, note, podReference);
76-
publishEvent(k8sEventTime, podReference, k8sFormattedReason, type, note);
77+
LOG.debug("Publishing K8s event, time={}, type={}, reason={}, note={}, resource={}, pod={}",
78+
k8sEventTime, type, k8sFormattedReason, note, resourceReference, podReference);
79+
publishEvent(k8sEventTime, resourceReference, podReference, k8sFormattedReason, type, note);
7780
}
7881
} catch (Exception e) {
7982
LOG.error("Exception on K8s event publication", e);
@@ -83,13 +86,14 @@ public void publishRestartEvents(Pod pod, RestartReasons reasons) {
8386
/**
8487
* Publish a Kubernetes Event referring to certain KafkaRoller pod action
8588
*
86-
* @param eventTime - Microtime to use for event
87-
* @param podReference - ObjectReference pointing to rolled pod
88-
* @param reason - reason the pod is being rolled
89-
* @param type - the type of K8s event "Normal", or "Warning"
90-
* @param note - the note to attach to the event
89+
* @param eventTime - Microtime to use for event
90+
* @param resourceReference - ObjectReference pointing to the owning resource
91+
* @param podReference - ObjectReference pointing to rolled pod
92+
* @param reason - reason the pod is being rolled
93+
* @param type - the type of K8s event "Normal", or "Warning"
94+
* @param note - the note to attach to the event
9195
*/
92-
protected void publishEvent(MicroTime eventTime, ObjectReference podReference, String reason, String type, String note) {
96+
protected void publishEvent(MicroTime eventTime, ObjectReference resourceReference, ObjectReference podReference, String reason, String type, String note) {
9397
EventBuilder builder = new EventBuilder();
9498

9599
builder.withNewMetadata()
@@ -98,7 +102,8 @@ protected void publishEvent(MicroTime eventTime, ObjectReference podReference, S
98102
.withAction(ACTION)
99103
.withReportingController(CONTROLLER)
100104
.withReportingInstance(operatorName)
101-
.withRegarding(podReference)
105+
.withRegarding(resourceReference)
106+
.withRelated(podReference)
102107
.withReason(reason)
103108
.withType(type)
104109
.withEventTime(eventTime)
@@ -114,6 +119,14 @@ ObjectReference createPodReference(Pod pod) {
114119
.build();
115120
}
116121

122+
ObjectReference createResourceReference(Reconciliation reconciliation) {
123+
return new ObjectReferenceBuilder()
124+
.withKind(reconciliation.kind())
125+
.withNamespace(reconciliation.namespace())
126+
.withName(reconciliation.name())
127+
.build();
128+
}
129+
117130

118131
/**
119132
* While the core event API doesn't set a limit on note sizes, events.k8s.io/v1beta1 and v1 do, which is 1kB.

cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherIT.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
import io.fabric8.kubernetes.api.model.events.v1.Event;
1515
import io.fabric8.kubernetes.client.KubernetesClient;
1616
import io.fabric8.kubernetes.client.KubernetesClientBuilder;
17+
import io.strimzi.api.kafka.model.kafka.Kafka;
1718
import io.strimzi.operator.cluster.model.RestartReason;
1819
import io.strimzi.operator.cluster.model.RestartReasons;
20+
import io.strimzi.operator.common.Reconciliation;
1921
import io.strimzi.test.TestUtils;
2022
import org.junit.jupiter.api.AfterAll;
2123
import org.junit.jupiter.api.AfterEach;
@@ -68,7 +70,8 @@ void teardown() {
6870
@Test
6971
void eventPublicationSucceeds() {
7072
KubernetesRestartEventPublisher publisher = new KubernetesRestartEventPublisher(client, "op") { };
71-
publisher.publishRestartEvents(pod, RestartReasons.of(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED).add(RestartReason.FILE_SYSTEM_RESIZE_NEEDED));
73+
Reconciliation reconciliation = new Reconciliation("test", Kafka.RESOURCE_KIND, TEST_NAMESPACE, "test");
74+
publisher.publishRestartEvents(reconciliation, pod, RestartReasons.of(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED).add(RestartReason.FILE_SYSTEM_RESIZE_NEEDED));
7275

7376
ListOptions strimziEventsOnly = new ListOptionsBuilder()
7477
.withFieldSelector("reportingController=" + KubernetesRestartEventPublisher.CONTROLLER)
@@ -79,7 +82,16 @@ void eventPublicationSucceeds() {
7982

8083
Event exemplar = items.get(0);
8184
assertThat(exemplar.getAction(), is(KubernetesRestartEventPublisher.ACTION));
82-
assertThat(exemplar.getRegarding(), is(referenceFromPod(pod)));
85+
assertThat(exemplar.getRegarding(), is(referenceFromReconciliation(reconciliation)));
86+
assertThat(exemplar.getRelated(), is(referenceFromPod(pod)));
87+
}
88+
89+
static ObjectReference referenceFromReconciliation(Reconciliation reconciliation) {
90+
return new ObjectReferenceBuilder()
91+
.withKind(reconciliation.kind())
92+
.withNamespace(reconciliation.namespace())
93+
.withName(reconciliation.name())
94+
.build();
8395
}
8496

8597
static ObjectReference referenceFromPod(Pod pod) {

cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherTest.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
import io.fabric8.kubernetes.client.dsl.NonNamespaceOperation;
1919
import io.fabric8.kubernetes.client.dsl.Resource;
2020
import io.fabric8.kubernetes.client.dsl.V1EventingAPIGroupDSL;
21+
import io.strimzi.api.kafka.model.kafka.Kafka;
2122
import io.strimzi.operator.cluster.model.RestartReason;
2223
import io.strimzi.operator.cluster.model.RestartReasons;
24+
import io.strimzi.operator.common.Reconciliation;
2325
import org.junit.jupiter.api.Assertions;
2426
import org.junit.jupiter.api.BeforeEach;
2527
import org.junit.jupiter.api.Test;
@@ -42,6 +44,7 @@
4244

4345
class KubernetesRestartEventPublisherTest {
4446
private final static String NAMESPACE = "test-ns";
47+
private final static String CLUSTER_NAME = "example-cluster";
4548
private final static String POD_NAME = "example-pod";
4649

4750
private KubernetesRestartEventPublisher publisher;
@@ -51,7 +54,7 @@ void setup() {
5154
KubernetesClient client = mock(KubernetesClient.class);
5255
publisher = new KubernetesRestartEventPublisher(client, "op") {
5356
@Override
54-
protected void publishEvent(MicroTime eventTime, ObjectReference podReference, String reason, String type, String note) {
57+
protected void publishEvent(MicroTime eventTime, ObjectReference resourceReference, ObjectReference podReference, String reason, String type, String note) {
5558
}
5659
};
5760
}
@@ -72,6 +75,22 @@ void testObjectReferenceFromPod() {
7275
assertThat(podRef.getKind(), is("Pod"));
7376
}
7477

78+
@Test
79+
void testObjectReferenceFromReconciliation() {
80+
Reconciliation reconciliation = new Reconciliation(
81+
"test-trigger",
82+
Kafka.RESOURCE_KIND,
83+
NAMESPACE,
84+
CLUSTER_NAME
85+
);
86+
87+
ObjectReference clusterRef = publisher.createResourceReference(reconciliation);
88+
89+
assertThat(clusterRef.getName(), is(CLUSTER_NAME));
90+
assertThat(clusterRef.getNamespace(), is(NAMESPACE));
91+
assertThat(clusterRef.getKind(), is(Kafka.RESOURCE_KIND));
92+
}
93+
7594
@Test
7695
void testTruncation() {
7796
String underOneThousandBytes = "1".repeat(999);
@@ -107,7 +126,7 @@ void testOneEventPublishedPerReason() {
107126
Set<String> capturedReasons = new HashSet<>();
108127
KubernetesRestartEventPublisher capturingPublisher = new KubernetesRestartEventPublisher(client, "op") {
109128
@Override
110-
protected void publishEvent(MicroTime eventTime, ObjectReference podReference, String reason, String type, String note) {
129+
protected void publishEvent(MicroTime eventTime, ObjectReference resourceReference, ObjectReference podReference, String reason, String type, String note) {
111130
capturedReasons.add(reason);
112131
}
113132
};
@@ -117,7 +136,7 @@ protected void publishEvent(MicroTime eventTime, ObjectReference podReference, S
117136
RestartReasons reasons = new RestartReasons().add(RestartReason.FILE_SYSTEM_RESIZE_NEEDED)
118137
.add(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED);
119138

120-
capturingPublisher.publishRestartEvents(mockPod, reasons);
139+
capturingPublisher.publishRestartEvents(Reconciliation.DUMMY_RECONCILIATION, mockPod, reasons);
121140

122141
assertThat(capturedReasons, is(expectedReasons));
123142

@@ -159,22 +178,26 @@ void testPopulatesExpectedFields() {
159178
KubernetesRestartEventPublisher eventPublisher = new KubernetesRestartEventPublisher(client, "cluster-operator-id", clock);
160179

161180
RestartReasons reasons = new RestartReasons().add(RestartReason.FILE_SYSTEM_RESIZE_NEEDED);
162-
eventPublisher.publishRestartEvents(pod, reasons);
181+
eventPublisher.publishRestartEvents(new Reconciliation("test", Kafka.RESOURCE_KIND, NAMESPACE, CLUSTER_NAME), pod, reasons);
163182

164183
verify(mockEventResource, times(1)).create();
165184

166185
Event publishedEvent = eventCaptor.getValue();
167-
assertThat(publishedEvent.getRegarding().getKind(), is("Pod"));
168-
assertThat(publishedEvent.getRegarding().getName(), is(POD_NAME));
186+
assertThat(publishedEvent.getRegarding().getKind(), is(Kafka.RESOURCE_KIND));
187+
assertThat(publishedEvent.getRegarding().getName(), is(CLUSTER_NAME));
169188
assertThat(publishedEvent.getRegarding().getNamespace(), is(NAMESPACE));
170189

190+
assertThat(publishedEvent.getRelated().getKind(), is("Pod"));
191+
assertThat(publishedEvent.getRelated().getName(), is(POD_NAME));
192+
assertThat(publishedEvent.getRelated().getNamespace(), is(NAMESPACE));
193+
171194
assertThat(publishedEvent.getReportingController(), is("strimzi.io/cluster-operator"));
172195
assertThat(publishedEvent.getReportingInstance(), is("cluster-operator-id"));
173196

174197
assertThat(publishedEvent.getReason(), is("FileSystemResizeNeeded"));
175198
assertThat(publishedEvent.getAction(), is("StrimziInitiatedPodRestart"));
176199
assertThat(publishedEvent.getType(), is("Normal"));
177-
assertThat(publishedEvent.getNote(), is(RestartReason.FILE_SYSTEM_RESIZE_NEEDED.getDefaultNote()));
200+
assertThat(publishedEvent.getNote(), is("Rolling Pod " + POD_NAME + " due to " + RestartReason.FILE_SYSTEM_RESIZE_NEEDED.getDefaultNote()));
178201
assertThat(publishedEvent.getEventTime().getTime(), is("2020-10-11T00:00:00.000000Z"));
179202

180203
}

cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventsMockTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,9 @@ private <T> Handler<AsyncResult<T>> verifyEventPublished(RestartReason expectedR
555555
}
556556

557557
Event restartEvent = maybeEvent.get();
558-
assertThat(restartEvent.getRegarding().getName(), is(kafkaPod().getMetadata().getName()));
558+
assertThat(restartEvent.getRelated().getName(), is(kafkaPod().getMetadata().getName()));
559+
assertThat(restartEvent.getRegarding().getName(), is(CLUSTER_NAME));
560+
assertThat(restartEvent.getRegarding().getKind(), is(Kafka.RESOURCE_KIND));
559561
context.completeNow();
560562
}));
561563
}

documentation/modules/deploying/proc-operator-restart-events.adoc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ kubectl -n kafka get events --field-selector reportingController=strimzi.io/clus
2828
.Example showing events returned
2929
[source,shell]
3030
----
31-
LAST SEEN TYPE REASON OBJECT MESSAGE
32-
2m Normal CaCertRenewed pod/strimzi-cluster-kafka-0 CA certificate renewed
33-
58m Normal PodForceRestartOnError pod/strimzi-cluster-kafka-1 Pod needs to be forcibly restarted due to an error
34-
5m47s Normal ManualRollingUpdate pod/strimzi-cluster-kafka-2 Pod was manually annotated to be rolled
31+
LAST SEEN TYPE REASON OBJECT MESSAGE
32+
2m Normal CaCertRenewed kafka/strimzi-cluster Rolling Pod strimzi-cluster-kafka-0 due to CA certificate renewed
33+
58m Normal PodForceRestartOnError kafka/strimzi-cluster Rolling Pod strimzi-cluster-kafka-1 due to Pod needs to be forcibly restarted due to an error
34+
5m47s Normal ManualRollingUpdate kafka/strimzi-cluster Rolling Pod strimzi-cluster-kafka-2 due to Pod was manually annotated to be rolled
3535
----
3636
+
3737
You can also specify a `reason` or other `field-selector` options to constrain the events returned.
@@ -60,12 +60,12 @@ items:
6060
eventTime: "2022-05-13T00:22:34.168086Z"
6161
firstTimestamp: null
6262
involvedObject:
63-
kind: Pod
64-
name: strimzi-cluster-kafka-1
63+
kind: Kafka
64+
name: strimzi-cluster
6565
namespace: kafka
6666
kind: Event
6767
lastTimestamp: null
68-
message: Pod needs to be forcibly restarted due to an error
68+
message: Rolling Pod strimzi-cluster-kafka-1 due to Pod needs to be forcibly restarted due to an error
6969
metadata:
7070
creationTimestamp: "2022-05-13T00:22:34Z"
7171
generateName: strimzi-event
@@ -74,6 +74,10 @@ items:
7474
resourceVersion: "432961"
7575
uid: 29fcdb9e-f2cf-4c95-a165-a5efcd48edfc
7676
reason: PodForceRestartOnError
77+
related:
78+
kind: Pod
79+
name: strimzi-cluster-kafka-1
80+
namespace: kafka
7781
reportingController: strimzi.io/cluster-operator
7882
reportingInstance: strimzi-cluster-operator-6458cfb4c6-6bpdp
7983
source: {}

documentation/modules/deploying/ref-operator-restart-events-fields.adoc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ When checking restart events from the command line, you can specify a `field-sel
1010

1111
The following fields are available when filtering events with `field-selector`.
1212

13-
`regardingObject.kind`:: The object that was restarted, and for restart events, the kind is always `Pod`.
14-
`regarding.namespace`:: The namespace that the pod belongs to.
15-
`regardingObject.name`:: The pod's name, for example, `strimzi-cluster-kafka-0`.
16-
`regardingObject.uid`:: The unique ID of the pod.
17-
`reason`:: The reason the pod was restarted, for example, `JbodVolumesChanged`.
13+
`regardingObject.kind`:: The resource that owns the Pod being restarted, and for restart events, the kind is always `Kafka`.
14+
`regarding.namespace`:: The namespace that the resource belongs to.
15+
`regardingObject.name`:: The resource's name, for example, `strimzi-cluster`.
16+
`regardingObject.uid`:: The unique ID of the resource.
17+
`reason`:: The reason the Pod was restarted, for example, `JbodVolumesChanged`.
1818
`reportingController`:: The reporting component is always `strimzi.io/cluster-operator` for Strimzi restart events.
1919
`source`:: `source` is an older version of `reportingController`. The reporting component is always `strimzi.io/cluster-operator` for Strimzi restart events.
2020
`type`:: The event type, which is either `Warning` or `Normal`. For Strimzi restart events, the type is `Normal`.

0 commit comments

Comments
 (0)