Skip to content

Commit 273aa29

Browse files
pouloknetopyr
andauthored
chore: wait for process termination (#21986) (#22217)
Signed-off-by: Michael Heinrichs <[email protected]> Co-authored-by: Michael Heinrichs <[email protected]>
1 parent 99677b9 commit 273aa29

File tree

5 files changed

+48
-12
lines changed

5 files changed

+48
-12
lines changed

platform-sdk/consensus-otter-docker-app/src/testFixtures/java/org/hiero/consensus/otter/docker/app/ConsensusNodeMain.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
package org.hiero.consensus.otter.docker.app;
33

4+
import static com.swirlds.logging.legacy.LogMarker.STARTUP;
45
import static org.hiero.otter.fixtures.container.utils.ContainerConstants.CONTAINER_APP_WORKING_DIR;
56

67
import com.hedera.hapi.platform.state.NodeId;
@@ -39,6 +40,11 @@ public class ConsensusNodeMain {
3940
/** Logger */
4041
private static final Logger log = LogManager.getLogger(ConsensusNodeMain.class);
4142

43+
/**
44+
* Main method to start the Consensus Node application.
45+
*
46+
* @param args command line arguments; expects a single argument representing the node's ID
47+
*/
4248
public static void main(final String[] args) {
4349
if (args.length != 1) {
4450
throw new IllegalArgumentException("Usage: ConsensusNodeMain <selfId>");
@@ -50,7 +56,7 @@ public static void main(final String[] args) {
5056

5157
final NodeCommunicationService nodeCommunicationService = new NodeCommunicationService(selfId);
5258

53-
log.info("Starting ConsensusNodeMain");
59+
log.info(STARTUP.getMarker(), "Starting ConsensusNodeMain");
5460
// Start the consensus node manager gRPC server
5561
final Server nodeGrpcServer = ServerBuilder.forPort(NODE_COMM_SERVICE_PORT)
5662
.addService(nodeCommunicationService)
@@ -60,11 +66,11 @@ public static void main(final String[] args) {
6066
writeStartedMarkerFile();
6167
nodeGrpcServer.awaitTermination();
6268
} catch (final IOException ie) {
63-
log.error("Failed to start the gRPC server for the consensus node manager", ie);
69+
log.error(STARTUP.getMarker(), "Failed to start the gRPC server for the consensus node manager", ie);
6470
System.exit(-1);
6571
} catch (final InterruptedException e) {
6672
// Only warn, because we expect this exception when we interrupt the thread on a kill request
67-
log.warn("Interrupted while running the consensus node manager gRPC server", e);
73+
log.warn(STARTUP.getMarker(), "Interrupted while running the consensus node manager gRPC server", e);
6874
Thread.currentThread().interrupt();
6975
System.exit(-1);
7076
}
@@ -76,12 +82,18 @@ public static void main(final String[] args) {
7682
private static void writeStartedMarkerFile() {
7783
try {
7884
if (new File(STARTED_MARKER_FILE.toString()).createNewFile()) {
79-
log.info("Node Communication Service marker file written to {}", STARTED_MARKER_FILE);
85+
log.info(
86+
STARTUP.getMarker(),
87+
"Node Communication Service marker file written to {}",
88+
STARTED_MARKER_FILE);
8089
} else {
81-
log.info("Node Communication Service marker file already exists at {}", STARTED_MARKER_FILE);
90+
log.info(
91+
STARTUP.getMarker(),
92+
"Node Communication Service marker file already exists at {}",
93+
STARTED_MARKER_FILE);
8294
}
8395
} catch (final IOException e) {
84-
log.error("Failed to write Node Communication Service marker file", e);
96+
log.error(STARTUP.getMarker(), "Failed to write Node Communication Service marker file", e);
8597
throw new RuntimeException("Failed to write Node Communication Service marker file", e);
8698
}
8799
}

platform-sdk/consensus-otter-docker-app/src/testFixtures/java/org/hiero/consensus/otter/docker/app/DockerManager.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,11 @@ public synchronized void init(
120120
responseObserver.onNext(Empty.getDefaultInstance());
121121
responseObserver.onCompleted();
122122
} else {
123-
log.error("Consensus node process started, but marker file was not detected in the allowed time");
123+
if (!process.isAlive()) {
124+
log.error("Consensus node stopped prematurely. Errorcode: {}", process.exitValue());
125+
} else {
126+
log.error("Consensus node process started, but marker file was not detected in the allowed time");
127+
}
124128
responseObserver.onError(new IllegalStateException(
125129
"Consensus node process started, but marker file was not detected in the allowed time"));
126130
}
@@ -188,9 +192,25 @@ public synchronized void killImmediately(
188192
log.info("Received kill request: {}", request);
189193
if (process != null) {
190194
process.destroyForcibly();
195+
try {
196+
if (process.waitFor(request.getTimeoutSeconds(), TimeUnit.SECONDS)) {
197+
responseObserver.onNext(Empty.getDefaultInstance());
198+
responseObserver.onCompleted();
199+
} else {
200+
log.error("Failed to terminate the consensus node process within the timeout period.");
201+
responseObserver.onError(new IllegalStateException(
202+
"Failed to terminate the consensus node process within the timeout period."));
203+
}
204+
} catch (final InterruptedException e) {
205+
log.error("Interrupted while waiting for the consensus node process to terminate.", e);
206+
Thread.currentThread().interrupt();
207+
responseObserver.onError(new InterruptedException(
208+
"Interrupted while waiting for the consensus node process to terminate."));
209+
}
210+
} else {
211+
responseObserver.onNext(Empty.getDefaultInstance());
212+
responseObserver.onCompleted();
191213
}
192-
responseObserver.onNext(Empty.getDefaultInstance());
193-
responseObserver.onCompleted();
194214
log.info("Kill request completed.");
195215
}
196216

platform-sdk/consensus-otter-docker-app/src/testFixtures/java/org/hiero/consensus/otter/docker/app/logging/DockerLogConfigBuilder.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
package org.hiero.consensus.otter.docker.app.logging;
33

4+
import static com.swirlds.logging.legacy.LogMarker.STARTUP;
45
import static java.util.Objects.requireNonNull;
56
import static org.hiero.otter.fixtures.logging.internal.LogConfigHelper.DEFAULT_PATTERN;
67
import static org.hiero.otter.fixtures.logging.internal.LogConfigHelper.combineFilters;
@@ -100,6 +101,7 @@ public static void configure(@NonNull final Path baseDir, @NonNull final NodeId
100101

101102
Configurator.reconfigure(builder.build());
102103

103-
LogManager.getLogger(DockerLogConfigBuilder.class).info("Logging configuration (re)initialized");
104+
LogManager.getLogger(DockerLogConfigBuilder.class)
105+
.info(STARTUP.getMarker(), "Logging configuration (re)initialized");
104106
}
105107
}

platform-sdk/consensus-otter-tests/src/testFixtures/java/org/hiero/otter/fixtures/container/ContainerNode.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,9 @@ protected void doKillImmediately(@NonNull final Duration timeout) {
265265
// conditions with the stream observer receiving an error.
266266
lifeCycle = SHUTDOWN;
267267

268-
final KillImmediatelyRequest request = KillImmediatelyRequest.getDefaultInstance();
268+
final KillImmediatelyRequest request = KillImmediatelyRequest.newBuilder()
269+
.setTimeoutSeconds((int) timeout.getSeconds())
270+
.build();
269271
// Unary call – will throw if server returns an error.
270272
containerControlBlockingStub.withDeadlineAfter(timeout).killImmediately(request);
271273
platformStatus = null;

platform-sdk/consensus-otter-tests/src/testFixtures/proto/container_communication.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ message InitRequest {
1616

1717
// Request to kill the application immediately.
1818
message KillImmediatelyRequest {
19-
google.protobuf.Empty ignore = 1;
19+
uint32 timeout_seconds = 1;
2020
}
2121

2222
message PingResponse {

0 commit comments

Comments
 (0)