Skip to content

Commit 65ffe14

Browse files
committed
fix(debug): live-list check, atomic tarball, e2e per-run name
Signed-off-by: Tinson Lai <tinsonl@nvidia.com>
1 parent 5c545c8 commit 65ffe14

6 files changed

Lines changed: 92 additions & 16 deletions

File tree

src/commands/debug.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ function buildDebugCommandDeps(rootDir: string): RunDebugCommandDeps {
6161

6262
const isSandboxKnown = (name: string): boolean => {
6363
const { sandboxes } = registry.listSandboxes();
64-
if (sandboxes.find((sandbox) => sandbox.name === name)) return true;
64+
if (!sandboxes.find((sandbox) => sandbox.name === name)) return false;
6565
const liveList = captureOpenshell(rootDir, ["sandbox", "list"]);
66-
if (liveList.status === 0 && parseLiveSandboxNames(liveList.output).has(name)) return true;
67-
return false;
66+
if (liveList.status === 0 && !parseLiveSandboxNames(liveList.output).has(name)) {
67+
return false;
68+
}
69+
return true;
6870
};
6971

7072
return {

src/lib/diagnostics/debug-command.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ describe("debug command", () => {
8282
},
8383
),
8484
).toThrow("exit");
85+
expect(exit).toHaveBeenCalledWith(1);
8586
expect(runDebug).not.toHaveBeenCalled();
8687
expect(errorLines[0]).toContain("ghost");
8788
expect(errorLines[0]).toContain("NEMOCLAW_SANDBOX_NAME");

src/lib/diagnostics/debug.test.ts

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
import { existsSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
4+
import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
55
import { tmpdir } from "node:os";
66
import { join } from "node:path";
77
import { afterEach, beforeEach, describe, expect, it } from "vitest";
@@ -82,20 +82,27 @@ describe("createTarball", () => {
8282
expect(process.exitCode).toBe(1);
8383
});
8484

85-
it("removes any partial tarball when tar fails", () => {
85+
it("leaves pre-existing user output untouched and removes the temp sibling when tar fails", () => {
8686
tempDir = mkdtempSync(join(tmpdir(), "debug-test-"));
8787
writeFileSync(join(tempDir, "payload.txt"), "test data");
8888
outputDir = mkdtempSync(join(tmpdir(), "debug-test-out-"));
8989
const output = join(outputDir, "partial.tar.gz");
90-
// Pre-create a stub file so createTarball can prove it removed a partial.
91-
writeFileSync(output, "stub partial tarball");
90+
// Pre-existing user file must NOT be clobbered when tar fails.
91+
const previous = "pre-existing user content";
92+
writeFileSync(output, previous);
9293
// Removing the source dir forces tar to fail without racing in-progress
9394
// collection.
9495
rmSync(tempDir, { recursive: true, force: true });
9596
const ok = createTarball(tempDir, output);
9697
expect(ok).toBe(false);
9798
expect(process.exitCode).toBe(1);
98-
expect(existsSync(output)).toBe(false);
99+
expect(existsSync(output)).toBe(true);
100+
expect(readFileSync(output, "utf-8")).toBe(previous);
101+
// No .partial sibling should remain after cleanup.
102+
const partials = readdirSync(outputDir).filter(
103+
(name) => name.endsWith(".partial") || name.includes(".partial."),
104+
);
105+
expect(partials).toEqual([]);
99106
});
100107

101108
it("creates tarball successfully and returns true for valid output path", () => {

src/lib/diagnostics/debug.ts

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
import { execFileSync, spawnSync } from "node:child_process";
5-
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
5+
import { existsSync, mkdtempSync, readFileSync, renameSync, rmSync, writeFileSync } from "node:fs";
66
import { platform, tmpdir } from "node:os";
77
import { basename, dirname, join } from "node:path";
88

@@ -508,23 +508,44 @@ function collectKernelMessages(collectDir: string): void {
508508
* guidance that goes with the generated file.
509509
*/
510510
export function createTarball(collectDir: string, output: string): boolean {
511-
const result = spawnSync("tar", ["czf", output, "-C", dirname(collectDir), basename(collectDir)], {
512-
stdio: "inherit",
513-
timeout: 60_000,
514-
});
511+
// Write to a sibling temp path so a tar failure cannot clobber a
512+
// pre-existing user file at `output`. Rename atomically on success.
513+
const partial = `${output}.partial.${process.pid}`;
514+
const result = spawnSync(
515+
"tar",
516+
["czf", partial, "-C", dirname(collectDir), basename(collectDir)],
517+
{
518+
stdio: "inherit",
519+
timeout: 60_000,
520+
},
521+
);
515522
if (result.status !== 0 || result.signal) {
516523
const reason = result.signal
517524
? `killed by signal ${result.signal}`
518525
: `exited with code ${result.status ?? "unknown"}`;
519526
error(`Failed to create tarball at ${output} (tar ${reason})`);
520527
try {
521-
rmSync(output, { force: true });
528+
rmSync(partial, { force: true });
522529
} catch {
523530
/* best-effort cleanup of partial tarball */
524531
}
525532
process.exitCode = 1;
526533
return false;
527534
}
535+
try {
536+
renameSync(partial, output);
537+
} catch (err) {
538+
error(
539+
`Failed to move tarball into place at ${output}: ${err instanceof Error ? err.message : String(err)}`,
540+
);
541+
try {
542+
rmSync(partial, { force: true });
543+
} catch {
544+
/* best-effort */
545+
}
546+
process.exitCode = 1;
547+
return false;
548+
}
528549
info(`Tarball written to ${output}`);
529550
warn(
530551
"Known secrets are auto-redacted, but please review for any remaining sensitive data before sharing.",

test/cli.test.ts

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,12 +307,14 @@ function createDebugCommandTestEnv(
307307
}
308308
fs.writeFileSync(registryPath, JSON.stringify(current), { mode: 0o600 });
309309
}
310+
const registeredNames = [sandboxName, ...(options.extraSandboxNames ?? [])];
311+
const listLines = ["NAME", ...registeredNames.map((name) => `${name} Ready`)];
310312
fs.writeFileSync(
311313
path.join(localBin, "openshell"),
312314
[
313315
"#!/bin/sh",
314316
'if [ "$1" = "sandbox" ] && [ "$2" = "list" ]; then',
315-
" echo 'NAME'",
317+
...listLines.map((line) => ` echo ${JSON.stringify(line)}`),
316318
" exit 0",
317319
"fi",
318320
"echo 'openshell ok'",
@@ -1534,6 +1536,46 @@ describe("CLI dispatch", () => {
15341536
expect(fs.existsSync(tarball)).toBe(false);
15351537
});
15361538

1539+
it(
1540+
"debug --sandbox NAME rejects a stale registry entry missing from the live gateway",
1541+
testTimeoutOptions(30_000),
1542+
() => {
1543+
// Same fixture pattern as createDebugCommandTestEnv but with an openshell
1544+
// stub whose live list intentionally omits the registry name, mirroring
1545+
// the bug where the local registry kept a name the gateway no longer
1546+
// serves.
1547+
const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-debug-stale-"));
1548+
const localBin = path.join(home, "bin");
1549+
fs.mkdirSync(localBin, { recursive: true });
1550+
writeSandboxRegistry(home, "stale-box");
1551+
fs.writeFileSync(
1552+
path.join(localBin, "openshell"),
1553+
[
1554+
"#!/bin/sh",
1555+
'if [ "$1" = "sandbox" ] && [ "$2" = "list" ]; then',
1556+
" echo 'NAME'",
1557+
" exit 0",
1558+
"fi",
1559+
"exit 0",
1560+
].join("\n"),
1561+
{ mode: 0o755 },
1562+
);
1563+
const tarball = path.join(home, "out.tar.gz");
1564+
const r = runWithEnv(
1565+
`debug --sandbox stale-box --output ${tarball} 2>&1`,
1566+
{
1567+
HOME: home,
1568+
PATH: `${localBin}:${process.env.PATH || ""}`,
1569+
},
1570+
30000,
1571+
);
1572+
expect(r.code).not.toBe(0);
1573+
expect(r.out).toContain("stale-box");
1574+
expect(r.out).toContain("not registered");
1575+
expect(fs.existsSync(tarball)).toBe(false);
1576+
},
1577+
);
1578+
15371579
it("debug --sandbox without a name exits 1", () => {
15381580
const r = run("debug --sandbox");
15391581
expect(r.code).not.toBe(0);

test/e2e/test-diagnostics.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,10 @@ test_diag_06_debug_sandbox_validation() {
314314
fail "TC-DIAG-06: Registered name" "exit=$good_rc, output=${good_log:0:300}"
315315
fi
316316

317-
local bad_name="nemoclaw-e2e-does-not-exist"
317+
# Unique per-run name avoids collisions when another e2e job leaves a
318+
# sandbox with a shared "does-not-exist" placeholder behind.
319+
local bad_name
320+
bad_name="nemoclaw-e2e-missing-$$-$(date +%s)-${RANDOM}"
318321
local bad_output="${debug_dir}/unknown.tar.gz"
319322
local bad_rc=0 bad_log=""
320323
bad_log=$(${TIMEOUT_CMD:+$TIMEOUT_CMD 30} nemoclaw debug --quick --sandbox "$bad_name" --output "$bad_output" 2>&1) || bad_rc=$?

0 commit comments

Comments
 (0)