fix: address PR review comments

lukekim · lukekim · commit 261a943496f3 · 2026-05-02T17:19:29.000-07:00
- Explicitly classify dataset statuses into ok/error/pending buckets so
  `shuttingdown` and any unrecognized values are treated as still-pending
  rather than slipping through as a false-positive "loaded". Only
  `ready`/`disabled`/`refreshing` count as terminal-ok; only `error`
  short-circuits with `DatasetReadinessError`. Timeout messages now
  include each non-terminal-ok dataset's status so the user can tell
  `initializing` from `shuttingdown` from a typo.
- Make dataset readiness failures always fatal once the check is opted in
  (`dataset-ready-timeout-seconds &gt; 0`), independent of
  `fail-on-test-error` (which only governs runtime-probe results). Match
  action.yml/README/CHANGELOG to that behavior. The opt-out is
  `dataset-ready-timeout-seconds: 0`.
- Add deploy + runtime tests covering each new path: terminal-ok values
  beyond `ready`, `shuttingdown`/unknown handling, and the
  fail-on-test-error-false-but-dataset-error-still-fatal case.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ### Added
 - Auto-capture a `repository` tag from `GITHUB_REPOSITORY` when set, sanitized to fit the API's tag-value rule (`/` → `_`). Users can override by setting `repository:` explicitly in the `tags` input.
-- New post-deploy dataset readiness check: poll `GET /v1/datasets?status=true` until every dataset leaves `initializing`, fail immediately on `error`. Configured via `dataset-ready-timeout-seconds` (default `300`, set to `0` to skip). Dataset states are surfaced as a `datasets` action output and as a table in the GitHub job step summary.
+- New post-deploy dataset readiness check: poll `GET /v1/datasets?status=true` until every dataset reaches a terminal-ok state (`ready`, `disabled`, or `refreshing`); fail the job immediately on `error` or on timeout-while-pending — regardless of `fail-on-test-error`, which still only governs runtime-probe results. Statuses like `shuttingdown` and any unrecognized values are treated as still-pending so the loop never returns a false-positive "loaded". Configured via `dataset-ready-timeout-seconds` (default `300`, set `0` to skip). Dataset states are surfaced as a `datasets` action output and as a table in the GitHub job step summary.
 
 ### Changed
 - `parseTags` now validates tag values against the Spice Cloud API rule (alphanumeric plus `_@-`). Previously the action only enforced length, so values like `repo: foo/bar` would round-trip to the API and fail there with a generic 400.
diff --git a/README.md b/README.md
@@ -105,7 +105,7 @@ Grant exactly the scopes for the features you use. The "All-in" row at the botto
 | `test-mcp-arguments`    | no | `{}` | JSON-encoded arguments for the MCP tool call. |
 | `test-warmup-seconds`   | no | `60` | Max wait for `isSpiceReady()` before running probes. |
 | `test-timeout-seconds`  | no | `30` | Per-probe HTTP timeout. |
-| `dataset-ready-timeout-seconds` | no | `300` | Max wait for every dataset to leave `initializing` (via `GET /v1/datasets?status=true`). Action fails immediately if any dataset enters `error`. Set to `0` to skip. |
+| `dataset-ready-timeout-seconds` | no | `300` | Max wait for every dataset (via `GET /v1/datasets?status=true`) to reach a terminal-ok state (`ready`/`disabled`/`refreshing`). The job fails the moment any dataset enters `error`, or when the timeout elapses while any dataset is still pending — independent of `fail-on-test-error`. Set to `0` to skip the check. |
 | `runtime-url`           | no | derived | Override probe base URL. By default derived from the app's region as `https://<region>-prod-aws-data.spiceai.io`. |
 | `fail-on-test-error`    | no | `true` | Fail the job when any probe fails. |
 | `api-url`               | no | `https://api.spice.ai` | Management API base URL. |
diff --git a/__tests__/deploy.test.ts b/__tests__/deploy.test.ts
@@ -385,6 +385,37 @@ describe("runDeploy", () => {
     expect(probeSql).not.toHaveBeenCalled();
   });
 
+  it("dataset readiness failures are fatal even when fail-on-test-error is false", async () => {
+    // The opt-out for the dataset check is `dataset-ready-timeout-seconds: 0`.
+    // `fail-on-test-error` only governs runtime-probe results.
+    const listApps = vi.fn().mockResolvedValue([sampleApp]);
+    const createDeployment = vi.fn().mockResolvedValue(succeededDeployment);
+    const getApiKeys = vi.fn().mockResolvedValue({ api_key: "rk", api_key_2: null });
+    const api = fakeApi({ listApps, createDeployment, getApiKeys });
+
+    const fakeRuntime = {
+      waitForReady: vi.fn().mockResolvedValue(undefined),
+      waitForDatasetsReady: vi.fn().mockRejectedValue(
+        Object.assign(new Error("1 dataset(s) failed to load: foo: bad creds"), {
+          name: "DatasetReadinessError",
+          datasets: [{ name: "foo", status: "Error", error_message: "bad creds" }],
+        }),
+      ),
+    } as unknown as RuntimeClient;
+
+    await expect(
+      runDeploy(
+        api,
+        {
+          ...baseInputs,
+          datasetReadyTimeoutSeconds: 60,
+          failOnTestError: false,
+        },
+        { runtimeFactory: () => fakeRuntime },
+      ),
+    ).rejects.toThrow(/dataset.*failed to load/);
+  });
+
   it("returns dataset states when all datasets are ready", async () => {
     const listApps = vi.fn().mockResolvedValue([sampleApp]);
     const createDeployment = vi.fn().mockResolvedValue(succeededDeployment);
diff --git a/__tests__/runtime.test.ts b/__tests__/runtime.test.ts
@@ -263,6 +263,55 @@ describe("RuntimeClient", () => {
     await expect(rt.waitForDatasetsReady(5)).rejects.toThrow(/did not finish loading/);
   });
 
+  it("waitForDatasetsReady accepts disabled and refreshing as terminal-ok", async () => {
+    const fetchImpl = vi.fn().mockResolvedValue(
+      new Response(
+        JSON.stringify([
+          { name: "a", status: "Ready" },
+          { name: "b", status: "Disabled" },
+          { name: "c", status: "Refreshing" },
+        ]),
+        { status: 200, headers: { "content-type": "application/json" } },
+      ),
+    );
+    const rt = new RuntimeClient({
+      apiKey: "k",
+      baseUrl: "https://x.example",
+      warmupSeconds: 0,
+      timeoutSeconds: 5,
+      sdkFactory: () => makeSdk(),
+      fetchImpl,
+    });
+    const datasets = await rt.waitForDatasetsReady(60);
+    expect(datasets.map((d) => d.name)).toEqual(["a", "b", "c"]);
+  });
+
+  it("waitForDatasetsReady treats shuttingdown and unknown statuses as still-pending (does not return early)", async () => {
+    const fetchImpl = vi.fn().mockResolvedValue(
+      new Response(
+        JSON.stringify([
+          { name: "a", status: "Ready" },
+          { name: "b", status: "ShuttingDown" },
+          { name: "c", status: "QuantumFlux" },
+        ]),
+        { status: 200, headers: { "content-type": "application/json" } },
+      ),
+    );
+    const clock = makeClock();
+    const rt = new RuntimeClient({
+      apiKey: "k",
+      baseUrl: "https://x.example",
+      warmupSeconds: 0,
+      timeoutSeconds: 5,
+      sdkFactory: () => makeSdk(),
+      fetchImpl,
+      clock,
+    });
+    await expect(rt.waitForDatasetsReady(5)).rejects.toThrow(
+      /did not finish loading.*ShuttingDown.*QuantumFlux/s,
+    );
+  });
+
   it("waitForDatasetsReady is a no-op when timeout is 0", async () => {
     const fetchImpl = vi.fn();
     const rt = new RuntimeClient({
diff --git a/action.yml b/action.yml
@@ -152,8 +152,11 @@ inputs:
   dataset-ready-timeout-seconds:
     description: |
       Maximum seconds to wait for every dataset reported by `GET /v1/datasets?status=true`
-      to leave the `initializing` state before running runtime probes. The action fails
-      immediately if any dataset enters the `error` state. Set to `0` to skip the check.
+      to reach a terminal-ok state (`ready`, `disabled`, or `refreshing`) before running
+      runtime probes. The action fails the job immediately if any dataset enters `error`,
+      or if the timeout elapses while datasets are still pending — regardless of
+      `fail-on-test-error`, which only governs runtime-probe results. To disable the
+      dataset check entirely, set this to `0`.
     required: false
     default: "300"
   runtime-url:
diff --git a/dist/index.js b/dist/index.js
diff --git a/dist/index.js.map b/dist/index.js.map
diff --git a/src/deploy.ts b/src/deploy.ts
@@ -243,10 +243,11 @@ async function runPostDeployChecks(
       try {
         datasets = await runtime.waitForDatasetsReady(inputs.datasetReadyTimeoutSeconds);
       } catch (err) {
+        // Dataset readiness failures are always fatal once the check is opted in
+        // (i.e. `dataset-ready-timeout-seconds > 0`) — `fail-on-test-error` only
+        // governs runtime-probe results. The opt-out is `dataset-ready-timeout-seconds: 0`.
         if (err instanceof DatasetReadinessError) datasets = err.datasets;
-        const message = (err as Error).message;
-        if (inputs.failOnTestError) throw new Error(message);
-        core.warning(message);
+        throw err;
       }
     }
 
diff --git a/src/runtime.ts b/src/runtime.ts
@@ -177,10 +177,18 @@ export class RuntimeClient {
   }
 
   /**
-   * Poll `/v1/datasets?status=true` until every dataset is in a terminal-ready state
-   * (`ready`, `disabled`, or `refreshing`). Throws `DatasetReadinessError` immediately
-   * if any dataset reports `error`, or if the timeout expires while datasets are still
-   * `initializing`.
+   * Poll `/v1/datasets?status=true` until every dataset is in a terminal-ok state.
+   *
+   * State machine (statuses are matched case-insensitively):
+   *   - `error`                          → throw `DatasetReadinessError` immediately.
+   *   - `ready` / `disabled` / `refreshing`
+   *                                      → terminal-ok; counts toward "all loaded".
+   *   - `initializing`, `shuttingdown`,
+   *     or any unknown value             → still pending; keep polling.
+   *
+   * On timeout, throws `DatasetReadinessError` listing each non-terminal-ok dataset
+   * with its current status so the user can tell `initializing` from `shuttingdown`
+   * or an unrecognized value.
    */
   async waitForDatasetsReady(timeoutSeconds: number): Promise<DatasetState[]> {
     if (timeoutSeconds <= 0) return [];
@@ -197,7 +205,7 @@ export class RuntimeClient {
         continue;
       }
 
-      const errored = last.filter((d) => normalizeStatus(d.status) === "error");
+      const errored = last.filter((d) => classifyStatus(d.status) === "error");
       if (errored.length > 0) {
         const detail = errored
           .map((d) => `${d.name}: ${d.error_message ?? d.error?.code ?? "<no message>"}`)
@@ -208,23 +216,23 @@ export class RuntimeClient {
         );
       }
 
-      const pending = last.filter((d) => normalizeStatus(d.status) === "initializing");
+      const pending = last.filter((d) => classifyStatus(d.status) === "pending");
       if (pending.length === 0) {
         core.info(`All ${last.length} dataset(s) loaded.`);
         return last;
       }
 
       core.info(
-        `${last.length - pending.length}/${last.length} dataset(s) loaded (still initializing: ${pending.map((d) => d.name).join(", ")})`,
+        `${last.length - pending.length}/${last.length} dataset(s) loaded (waiting on: ${pending.map((d) => `${d.name} [${d.status}]`).join(", ")})`,
       );
       await this.clock.sleep(3_000);
     }
 
     const stillPending = last
-      .filter((d) => normalizeStatus(d.status) === "initializing")
-      .map((d) => d.name);
+      .filter((d) => classifyStatus(d.status) === "pending")
+      .map((d) => `${d.name} [${d.status}]`);
     throw new DatasetReadinessError(
-      `Datasets did not finish loading within ${timeoutSeconds}s (still initializing: ${stillPending.join(", ") || "<unknown>"}).`,
+      `Datasets did not finish loading within ${timeoutSeconds}s (waiting on: ${stillPending.join(", ") || "<unknown>"}).`,
       last,
     );
   }
@@ -313,8 +321,19 @@ export function truncate(value: string, max: number): string {
   return `${value.slice(0, max)}…`;
 }
 
-function normalizeStatus(status: string): string {
-  return (status ?? "").trim().toLowerCase();
+const TERMINAL_OK_DATASET_STATUSES = new Set(["ready", "disabled", "refreshing"]);
+
+/**
+ * Bucket a dataset status string into one of three classes the readiness loop
+ * needs to make decisions. Anything that isn't an explicit terminal-ok or `error`
+ * value (e.g. `shuttingdown`, future-added states, typos) is treated as `pending`
+ * so the loop keeps polling rather than declaring a misclassified success.
+ */
+function classifyStatus(status: string): "ok" | "error" | "pending" {
+  const s = (status ?? "").trim().toLowerCase();
+  if (s === "error") return "error";
+  if (TERMINAL_OK_DATASET_STATUSES.has(s)) return "ok";
+  return "pending";
 }
 
 function summarizeChat(body: string): string | undefined {

Original file line number	Diff line number	Diff line change
`@@ -243,10 +243,11 @@ async function runPostDeployChecks(`
`243`	`243`	`try {`
`244`	`244`	`datasets = await runtime.waitForDatasetsReady(inputs.datasetReadyTimeoutSeconds);`
`245`	`245`	`} catch (err) {`
	`246`	`+ // Dataset readiness failures are always fatal once the check is opted in`
	`247`	+ // (i.e. `dataset-ready-timeout-seconds > 0`) — `fail-on-test-error` only
	`248`	+ // governs runtime-probe results. The opt-out is `dataset-ready-timeout-seconds: 0`.
`246`	`249`	`if (err instanceof DatasetReadinessError) datasets = err.datasets;`
`247`		`- const message = (err as Error).message;`
`248`		`- if (inputs.failOnTestError) throw new Error(message);`
`249`		`- core.warning(message);`
	`250`	`+ throw err;`
`250`	`251`	`}`
`251`	`252`	`}`
`252`	`253`