Skip to content

Commit 47a6579

Browse files
authored
feat: pipe system logs to nucleus log path (#1661)
1 parent 00caf83 commit 47a6579

22 files changed

+180
-23
lines changed

scripts/greengrass.service.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ PIDFile=REPLACE_WITH_GG_LOADER_PID_FILE
88
RemainAfterExit=no
99
Restart=on-failure
1010
RestartSec=10
11-
ExecStart=/bin/sh REPLACE_WITH_GG_LOADER_FILE
11+
ExecStart=/bin/sh -c "REPLACE_WITH_GG_LOADER_FILE >> REPLACE_WITH_LOADER_LOG_FILE 2>&1"
1212
KillMode=mixed
1313

1414
[Install]

src/main/java/com/aws/greengrass/deployment/DeviceConfiguration.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ public class DeviceConfiguration {
129129
public static final String FALLBACK_VERSION = "0.0.0";
130130
private final Configuration config;
131131
private final KernelCommandLine kernelCommandLine;
132-
133132
private final Validator deTildeValidator;
134133
private final Validator regionValidator;
135134
private final AtomicBoolean rootCA3Downloaded = new AtomicBoolean(false);

src/main/java/com/aws/greengrass/deployment/KernelUpdateDeploymentTask.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919
import com.aws.greengrass.lifecyclemanager.Kernel;
2020
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
2121
import com.aws.greengrass.logging.api.Logger;
22+
import com.aws.greengrass.util.LoaderLogsSummarizer;
2223
import com.aws.greengrass.util.Pair;
2324
import com.aws.greengrass.util.Utils;
2425

2526
import java.io.IOException;
27+
import java.nio.charset.StandardCharsets;
2628
import java.nio.file.Files;
29+
import java.nio.file.Path;
2730
import java.util.Collections;
2831
import java.util.List;
2932
import java.util.concurrent.CancellationException;
@@ -46,6 +49,7 @@ public class KernelUpdateDeploymentTask implements DeploymentTask {
4649
private final Deployment deployment;
4750
private final ComponentManager componentManager;
4851
private final CompletableFuture<DeploymentResult> deploymentResultCompletableFuture;
52+
private final Path loaderLogsPath;
4953

5054
/**
5155
* Constructor for DefaultDeploymentTask.
@@ -62,6 +66,7 @@ public KernelUpdateDeploymentTask(Kernel kernel, Logger logger, Deployment deplo
6266
this.logger = logger.dfltKv(DEPLOYMENT_ID_LOG_KEY, deployment.getGreengrassDeploymentId());
6367
this.componentManager = componentManager;
6468
this.deploymentResultCompletableFuture = new CompletableFuture<>();
69+
this.loaderLogsPath = kernel.getNucleusPaths().loaderLogsPath();
6570
}
6671

6772
@SuppressWarnings({"PMD.AvoidDuplicateLiterals"})
@@ -138,6 +143,7 @@ private void waitForServicesToStart() {
138143
getDeploymentStatusDetails());
139144
}
140145
}
146+
141147
deploymentResultCompletableFuture.complete(result);
142148
}
143149

@@ -156,9 +162,20 @@ private DeploymentException getDeploymentStatusDetails() {
156162
if (Files.deleteIfExists(
157163
kernel.getNucleusPaths().workPath(DEFAULT_NUCLEUS_COMPONENT_NAME)
158164
.resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath())) {
159-
return new DeploymentException(
160-
"Nucleus update workflow failed to restart Nucleus. See loader logs for more details",
165+
String loaderLogs;
166+
try {
167+
loaderLogs = new String(Files.readAllBytes(this.loaderLogsPath), StandardCharsets.UTF_8);
168+
return new DeploymentException(
169+
String.format("Nucleus update workflow failed to restart Nucleus.%n%s",
170+
LoaderLogsSummarizer.summarizeLogs(loaderLogs)),
171+
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
172+
} catch (IOException e) {
173+
logger.atWarn().log("Unable to read Nucleus logs for restart failure", e);
174+
return new DeploymentException(
175+
"Nucleus update workflow failed to restart Nucleus. Please look at the device and loader "
176+
+ "logs for more info.",
161177
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
178+
}
162179
} else {
163180
return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an "
164181
+ "unexpected device IO error",
@@ -170,7 +187,7 @@ private DeploymentException getDeploymentStatusDetails() {
170187
DeploymentErrorCode.IO_WRITE_ERROR);
171188
}
172189
}
173-
190+
174191
List<DeploymentErrorCode> errorStack = deployment.getErrorStack() == null ? Collections.emptyList()
175192
: deployment.getErrorStack().stream().map(DeploymentErrorCode::valueOf).collect(Collectors.toList());
176193

src/main/java/com/aws/greengrass/easysetup/DeviceProvisioningHelper.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,9 +301,9 @@ public void updateKernelConfigWithIotConfiguration(Kernel kernel, ThingInfo thin
301301
Path certFilePath = certPath.resolve("thingCert.crt");
302302
Files.write(certFilePath, thing.certificatePem.getBytes(StandardCharsets.UTF_8));
303303

304-
new DeviceConfiguration(kernel.getConfig(), kernel.getKernelCommandLine(), thing.thingName, thing.dataEndpoint,
305-
thing.credEndpoint, privKeyFilePath.toString(), certFilePath.toString(), caFilePath.toString(),
306-
awsRegion, roleAliasName);
304+
new DeviceConfiguration(kernel.getConfig(), kernel.getKernelCommandLine(),
305+
thing.thingName, thing.dataEndpoint, thing.credEndpoint, privKeyFilePath.toString(),
306+
certFilePath.toString(), caFilePath.toString(), awsRegion, roleAliasName);
307307
// Make sure tlog persists the device configuration
308308
kernel.getContext().waitForPublishQueueToClear();
309309
outStream.println("Created device configuration");

src/main/java/com/aws/greengrass/easysetup/GreengrassSetup.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,8 @@ void performSetup() throws IOException, DeviceConfigurationException, URISyntaxE
334334
if (setupSystemService) {
335335
kernel.getContext().get(KernelLifecycle.class).softShutdown(30);
336336
boolean ok = kernel.getContext().get(SystemServiceUtilsFactory.class).getInstance()
337-
.setupSystemService(kernel.getContext().get(KernelAlternatives.class), kernelStart);
337+
.setupSystemService(kernel.getContext().get(KernelAlternatives.class), kernel.getNucleusPaths(),
338+
kernelStart);
338339
if (ok) {
339340
outStream.println("Successfully set up Nucleus as a system service");
340341
// Nucleus will be launched by OS as a service

src/main/java/com/aws/greengrass/lifecyclemanager/Kernel.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ public Kernel() {
181181
this.shutdown(-1);
182182
}));
183183

184-
nucleusPaths = new NucleusPaths();
184+
nucleusPaths = new NucleusPaths(Platform.getPlatformLoaderLogsFileName());
185185
context.put(NucleusPaths.class, nucleusPaths);
186186
kernelCommandLine = new KernelCommandLine(this);
187187
kernelLifecycle = new KernelLifecycle(this, kernelCommandLine, nucleusPaths);

src/main/java/com/aws/greengrass/lifecyclemanager/KernelAlternatives.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@
2626
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
2727

2828
import java.io.File;
29+
import java.io.FileOutputStream;
2930
import java.io.IOException;
3031
import java.net.URISyntaxException;
32+
import java.nio.channels.FileChannel;
33+
import java.nio.channels.FileLock;
3134
import java.nio.file.Files;
3235
import java.nio.file.Path;
3336
import java.util.Collections;
@@ -394,6 +397,37 @@ public void prepareBootstrap(String deploymentId) throws IOException {
394397
setupLinkToDirectory(getCurrentDir(), newLaunchDir);
395398
Files.delete(getNewDir());
396399
logger.atInfo().log("Finished setup of launch directory for new Nucleus");
400+
401+
cleanupLoaderLogs();
402+
}
403+
404+
/**
405+
* Cleans up loader logs dumped in loader.log by acquiring a lock on the file first as
406+
* Windows FS does not allow a brute force truncate.
407+
*/
408+
@SuppressWarnings("PMD.AvoidFileStream")
409+
protected void cleanupLoaderLogs() {
410+
logger.atDebug().kv("logs-path", getLoaderLogsPath().toAbsolutePath()).log("Cleaning up Nucleus logs");
411+
try (FileOutputStream fos = new FileOutputStream(getLoaderLogsPath().toAbsolutePath().toString());
412+
FileChannel channel = fos.getChannel()) {
413+
// Try to acquire a lock
414+
FileLock lock = channel.tryLock();
415+
416+
if (lock == null) {
417+
logger.atWarn().log("Cannot clean Nucleus logs, the log file is locked by another process");
418+
} else {
419+
try {
420+
// Truncate the file
421+
channel.truncate(0);
422+
} finally {
423+
// Release and close the lock
424+
lock.close();
425+
logger.atDebug().log("Finished cleaning up Nucleus logs");
426+
}
427+
}
428+
} catch (IOException e) {
429+
logger.atError().setCause(e).log("Error while cleaning the Nucleus logs file");
430+
}
397431
}
398432

399433
/**
@@ -529,4 +563,8 @@ private void cleanupLaunchDirectorySingleLevel(File filePath) throws IOException
529563
}
530564
Files.deleteIfExists(filePath.toPath());
531565
}
566+
567+
public Path getLoaderLogsPath() {
568+
return nucleusPaths.loaderLogsPath().toAbsolutePath();
569+
}
532570
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package com.aws.greengrass.util;
7+
8+
import java.util.Scanner;
9+
10+
public final class LoaderLogsSummarizer {
11+
public static final String STARTING_SUBSEQUENCE_REGEX =
12+
"^Nucleus exited ([0-9])*\\.\\s*(Attempt 2 out of 3|Retrying 2 times)$";
13+
public static final String ENDING_SUBSEQUENCE_REGEX =
14+
"^Nucleus exited ([0-9])*\\.\\s*(Attempt 3 out of 3|Retrying 3 times)$";
15+
16+
private LoaderLogsSummarizer() {
17+
}
18+
19+
/**
20+
* Summarizes loader logs that can be published as part of the deployment status FSS message when deployment fails
21+
* with NRF.
22+
*
23+
* @param blob string blob containing loader logs
24+
* @return string containing summarized logs
25+
*/
26+
public static String summarizeLogs(String blob) {
27+
try (Scanner scanner = new Scanner(blob)) {
28+
StringBuilder parsedLogsStringBuilder = new StringBuilder();
29+
30+
// Skip until the last restart failure
31+
while (scanner.hasNextLine()) {
32+
String line = scanner.nextLine();
33+
// process the line
34+
if (line.matches(STARTING_SUBSEQUENCE_REGEX)) {
35+
break;
36+
}
37+
}
38+
39+
while (scanner.hasNextLine()) {
40+
String line = scanner.nextLine();
41+
42+
if (line.matches(ENDING_SUBSEQUENCE_REGEX)) {
43+
parsedLogsStringBuilder.append(line);
44+
break;
45+
}
46+
47+
if (line.startsWith("+")) {
48+
continue;
49+
}
50+
51+
parsedLogsStringBuilder.append(line).append(System.lineSeparator());
52+
}
53+
54+
scanner.close();
55+
return parsedLogsStringBuilder.toString();
56+
}
57+
}
58+
}

src/main/java/com/aws/greengrass/util/NucleusPaths.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package com.aws.greengrass.util;
77

88
import com.aws.greengrass.componentmanager.models.ComponentIdentifier;
9+
import com.aws.greengrass.logging.impl.LogManager;
910

1011
import java.io.IOException;
1112
import java.nio.file.Path;
@@ -16,6 +17,7 @@
1617

1718
@SuppressWarnings("checkstyle:MissingJavadocMethod")
1819
public class NucleusPaths {
20+
private final String loaderLogFileName;
1921
private Path rootPath;
2022
private Path workPath;
2123
private Path componentStorePath;
@@ -25,6 +27,10 @@ public class NucleusPaths {
2527
private Path cliIpcInfoPath;
2628
private Path binPath;
2729

30+
public NucleusPaths(String loaderLogFileName) {
31+
this.loaderLogFileName = loaderLogFileName;
32+
}
33+
2834
public void initPaths(Path root, Path workPath, Path componentStorePath, Path configPath, Path kernelAlts,
2935
Path deployment, Path cliIpcInfo, Path binPath) throws IOException {
3036
setRootPath(root);
@@ -191,4 +197,9 @@ public static void setLoggerPath(Path p) throws IOException {
191197
Utils.createPaths(p);
192198
Permissions.setLoggerPermission(p);
193199
}
200+
201+
public Path loaderLogsPath() {
202+
return LogManager.getRootLogConfiguration().getStoreDirectory()
203+
.resolve(this.loaderLogFileName).toAbsolutePath();
204+
}
194205
}

src/main/java/com/aws/greengrass/util/orchestration/InitUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
99
import com.aws.greengrass.logging.api.Logger;
1010
import com.aws.greengrass.logging.impl.LogManager;
11+
import com.aws.greengrass.util.NucleusPaths;
1112

1213
public class InitUtils implements SystemServiceUtils {
1314
protected static final Logger logger = LogManager.getLogger(InitUtils.class);
1415

1516
@Override
16-
public boolean setupSystemService(KernelAlternatives kernelAlternatives, boolean start) {
17+
public boolean setupSystemService(KernelAlternatives kernelAlternatives, NucleusPaths nucleusPaths, boolean start) {
1718
logger.atError().log("System service registration is not implemented for this device");
1819
return false;
1920
}

0 commit comments

Comments
 (0)