diff --git a/packages/ui/src/cloud/public-pages/lib/steward-session.ts b/packages/ui/src/cloud/public-pages/lib/steward-session.ts index 73d8c774e0553..dc3f619f016da 100644 --- a/packages/ui/src/cloud/public-pages/lib/steward-session.ts +++ b/packages/ui/src/cloud/public-pages/lib/steward-session.ts @@ -15,13 +15,7 @@ import { type StewardNonceExchangeResponse, StewardSessionError, } from "@elizaos/shared/steward-session-client"; - -const ELIZA_CLOUD_AUTH_BASES: Record = { - "elizacloud.ai": "https://api.elizacloud.ai", - "www.elizacloud.ai": "https://api.elizacloud.ai", - "dev.elizacloud.ai": "https://api.elizacloud.ai", - "staging.elizacloud.ai": "https://api-staging.elizacloud.ai", -}; +import { ELIZA_CLOUD_DIRECT_API_BY_HOST } from "../../shell/steward-url"; export function resolveStewardAuthEndpoint( path: string, @@ -29,7 +23,7 @@ export function resolveStewardAuthEndpoint( ? "" : window.location.hostname.toLowerCase(), ): string { - const base = ELIZA_CLOUD_AUTH_BASES[hostname.toLowerCase()]; + const base = ELIZA_CLOUD_DIRECT_API_BY_HOST[hostname.toLowerCase()]; return base ? `${base}${path}` : path; } diff --git a/packages/ui/src/cloud/shell/StewardProviderRuntime.test.tsx b/packages/ui/src/cloud/shell/StewardProviderRuntime.test.tsx new file mode 100644 index 0000000000000..c800d5079a407 --- /dev/null +++ b/packages/ui/src/cloud/shell/StewardProviderRuntime.test.tsx @@ -0,0 +1,166 @@ +// @vitest-environment jsdom + +import { STEWARD_TOKEN_KEY } from "@elizaos/shared/steward-session-client"; +import { cleanup, render, waitFor } from "@testing-library/react"; +import type { ReactNode } from "react"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +// AuthTokenSync's 401 handling is the load-bearing fix for the re-login loop: +// a 401 from session-sync or refresh must NOT wipe a still-valid token (a +// misrouted/stale control plane 401s valid sessions), but MUST still clear +// once the token is expired — and an exp-less token counts as expired, or no +// 401 could ever clear it. These tests exercise the real AuthTokenSync against +// a stubbed fetch; only the @stwd SDK boundary is mocked. + +vi.mock("@stwd/react", () => ({ + StewardProvider: ({ children }: { children: ReactNode }) => children, + useAuth: () => ({ + isAuthenticated: false, + isLoading: false, + user: null, + session: null, + signOut: () => {}, + getToken: () => "", + verifyEmailCallback: async () => ({ token: "" }), + }), +})); +vi.mock("@stwd/react/styles.css", () => ({})); +vi.mock("@stwd/sdk", () => ({ + StewardClient: class {}, +})); + +import StewardAuthRuntimeProvider from "./StewardProviderRuntime"; + +function makeJwt(payload: Record): string { + const b64url = (value: object) => + btoa(JSON.stringify(value)) + .replace(/\+/g, "-") + .replace(/\//g, "_") + .replace(/=+$/, ""); + return `${b64url({ alg: "HS256", typ: "JWT" })}.${b64url(payload)}.sig`; +} + +type RecordedCall = { url: string; method: string }; +let calls: RecordedCall[] = []; + +// Node ≥22 ships a bare `localStorage` global that is non-functional without +// --localstorage-file and shadows jsdom's Storage (its methods throw), and in +// this vitest setup even window.localStorage resolves to it. The code under +// test reads via both the bare global and window.localStorage, so install one +// in-memory Storage on both access paths. +function createMemoryStorage(): Storage { + const store = new Map(); + return { + get length() { + return store.size; + }, + clear: () => store.clear(), + getItem: (key: string) => store.get(key) ?? null, + key: (index: number) => [...store.keys()][index] ?? null, + removeItem: (key: string) => { + store.delete(key); + }, + setItem: (key: string, value: string) => { + store.set(key, String(value)); + }, + }; +} + +let storage: Storage = createMemoryStorage(); + +function stubFetchWith401s(): void { + vi.stubGlobal( + "fetch", + vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { + const url = String(input); + const method = init?.method ?? "GET"; + calls.push({ url, method }); + if (method === "DELETE") return new Response(null, { status: 200 }); + return new Response(JSON.stringify({}), { status: 401 }); + }), + ); +} + +function postsTo(endpoint: string): RecordedCall[] { + return calls.filter((c) => c.method === "POST" && c.url.includes(endpoint)); +} + +function mount() { + return render( + +
+ , + ); +} + +beforeEach(() => { + calls = []; + storage = createMemoryStorage(); + vi.stubGlobal("localStorage", storage); + Object.defineProperty(window, "localStorage", { + configurable: true, + value: storage, + }); + // Neutralize any configured API base so endpoints resolve to the relative + // paths (unknown jsdom host) — the handlers under test are endpoint-agnostic. + vi.stubEnv("VITE_API_URL", ""); + vi.stubEnv("NEXT_PUBLIC_API_URL", ""); + stubFetchWith401s(); +}); + +afterEach(() => { + cleanup(); + vi.unstubAllGlobals(); + vi.unstubAllEnvs(); +}); + +describe("AuthTokenSync 401 handling", () => { + it("keeps a still-valid token when session-sync and refresh both 401 (no re-login loop), then retries the cookie sync on the next trigger", async () => { + // exp 60s out: valid, but inside the 120s refresh-ahead window so the + // mount-time checkAndRefresh actually POSTs the refresh endpoint. + const token = makeJwt({ + sub: "u1", + exp: Math.floor(Date.now() / 1000) + 60, + }); + storage.setItem(STEWARD_TOKEN_KEY, token); + + mount(); + + await waitFor(() => { + expect(postsTo("steward-session").length).toBeGreaterThanOrEqual(1); + expect(postsTo("steward-refresh").length).toBeGreaterThanOrEqual(1); + }); + + // Both endpoints 401'd — pre-fix this wiped the token and looped /login. + expect(storage.getItem(STEWARD_TOKEN_KEY)).toBe(token); + + // The keep-path resets the sync dedupe marker, so the next trigger + // re-attempts the cookie POST for the SAME token (the endpoint may have + // healed). Without the reset this second POST never happens. + const before = postsTo("steward-session").length; + document.dispatchEvent(new Event("visibilitychange")); + await waitFor(() => + expect(postsTo("steward-session").length).toBeGreaterThan(before), + ); + expect(storage.getItem(STEWARD_TOKEN_KEY)).toBe(token); + }); + + it("clears an expired token on a refresh 401 (genuine end-of-session still self-heals)", async () => { + storage.setItem( + STEWARD_TOKEN_KEY, + makeJwt({ sub: "u1", exp: Math.floor(Date.now() / 1000) - 60 }), + ); + + mount(); + + await waitFor(() => expect(storage.getItem(STEWARD_TOKEN_KEY)).toBeNull()); + }); + + it("clears an exp-less token on a refresh 401 (it can never age out, so it must not be keepable)", async () => { + storage.setItem(STEWARD_TOKEN_KEY, makeJwt({ sub: "u1" })); + + mount(); + + await waitFor(() => expect(storage.getItem(STEWARD_TOKEN_KEY)).toBeNull()); + }); +}); diff --git a/packages/ui/src/cloud/shell/StewardProviderRuntime.tsx b/packages/ui/src/cloud/shell/StewardProviderRuntime.tsx index 170b9340ed5c4..63a72323950f5 100644 --- a/packages/ui/src/cloud/shell/StewardProviderRuntime.tsx +++ b/packages/ui/src/cloud/shell/StewardProviderRuntime.tsx @@ -126,6 +126,11 @@ function AuthTokenSync({ children }: { children: ReactNode }) { // token is actually expired, so a stale staging proxy can't loop us. const current = readStoredToken(); if (current && !tokenIsExpired(current)) { + // Reset the dedupe marker so the next sync trigger (visibility, + // storage, re-render) retries the cookie POST for this same token + // once the endpoint recovers — otherwise the session would ride + // out its lifetime with no HttpOnly cookie ever established. + lastSyncedToken.current = null; console.warn( "[steward] Session-sync 401 but stored token still valid — keeping it (likely a stale control-plane proxy)", ); diff --git a/packages/ui/src/cloud/shell/StewardProviderShared.test.ts b/packages/ui/src/cloud/shell/StewardProviderShared.test.ts index 33fd382425493..908b44643dd1e 100644 --- a/packages/ui/src/cloud/shell/StewardProviderShared.test.ts +++ b/packages/ui/src/cloud/shell/StewardProviderShared.test.ts @@ -2,6 +2,8 @@ import { afterEach, describe, expect, it, vi } from "vitest"; +import { tokenIsExpired } from "./StewardProviderShared"; + // The Steward auth endpoints are resolved per browser host: co-hosted cloud // surfaces bypass the Pages/Worker proxy and call the matching API worker // directly. The regression this guards: `staging.elizacloud.ai` used to have no @@ -68,3 +70,34 @@ describe("Steward auth endpoint resolution", () => { expect(configuredRefreshEndpoint()).toBe("/api/auth/steward-refresh"); }); }); + +function makeJwt(payload: Record): string { + const b64url = (value: object) => + btoa(JSON.stringify(value)) + .replace(/\+/g, "-") + .replace(/\//g, "_") + .replace(/=+$/, ""); + return `${b64url({ alg: "HS256", typ: "JWT" })}.${b64url(payload)}.sig`; +} + +describe("tokenIsExpired", () => { + it("keeps a token with a future exp", () => { + expect( + tokenIsExpired(makeJwt({ exp: Math.floor(Date.now() / 1000) + 600 })), + ).toBe(false); + }); + + it("treats a past exp as expired", () => { + expect( + tokenIsExpired(makeJwt({ exp: Math.floor(Date.now() / 1000) - 600 })), + ).toBe(true); + }); + + it("treats a token WITHOUT exp as expired — the 401 handlers keep any non-expired token, so an exp-less one would otherwise be uncloseable", () => { + expect(tokenIsExpired(makeJwt({ sub: "u1" }))).toBe(true); + }); + + it("treats an undecodable token as expired", () => { + expect(tokenIsExpired("not-a-jwt")).toBe(true); + }); +}); diff --git a/packages/ui/src/cloud/shell/StewardProviderShared.ts b/packages/ui/src/cloud/shell/StewardProviderShared.ts index d0722a10ad2ec..66205609616c2 100644 --- a/packages/ui/src/cloud/shell/StewardProviderShared.ts +++ b/packages/ui/src/cloud/shell/StewardProviderShared.ts @@ -8,6 +8,7 @@ import { createContext } from "react"; import { scrubPersistedAgentProfileTokens } from "../../state/agent-profiles"; import { scrubPersistedActiveServerToken } from "../../state/persistence"; import { decodeJwtPayload } from "../lib/jwt"; +import { ELIZA_CLOUD_DIRECT_API_BY_HOST } from "./steward-url"; export function isPlaceholderValue(value: string | undefined): boolean { if (!value) return true; @@ -25,21 +26,9 @@ function trimTrailingSlash(value: string): string { return value.replace(/\/+$/, ""); } -// Hosts where the SPA is co-hosted with a Cloudflare Pages/Worker deployment -// that proxies the Steward auth endpoints to the API worker. We bypass that -// proxy and hit the matching API worker directly so session-sync + refresh keep -// working even when the Pages Functions bundle / FRONTEND_ALIAS proxy is stale. -// Per-host base — staging MUST resolve to api-staging, NOT prod api. When it -// fell through to the same-origin relative path (staging absent here), a stale -// worker proxy 401'd a valid session and clearStaleStewardSession wiped it → -// the sign-in loop. Mirrors steward-url.ts's ELIZA_CLOUD_DIRECT_API_BY_HOST. -const ELIZA_CLOUD_DIRECT_API_BY_HOST: Record = { - "elizacloud.ai": "https://api.elizacloud.ai", - "www.elizacloud.ai": "https://api.elizacloud.ai", - "dev.elizacloud.ai": "https://api.elizacloud.ai", - "staging.elizacloud.ai": "https://api-staging.elizacloud.ai", -}; - +// On co-hosted elizacloud.ai surfaces, session-sync + refresh bypass the +// Pages/Worker proxy and call each host's OWN API worker directly (the shared +// host → worker map in steward-url.ts). Everywhere else they stay same-origin. function directCloudApiBase(): string | undefined { if (typeof window === "undefined") return undefined; return ELIZA_CLOUD_DIRECT_API_BY_HOST[window.location.hostname.toLowerCase()]; @@ -152,7 +141,11 @@ export function readStoredToken(): string | null { export function tokenIsExpired(token: string): boolean { const payload = decodeJwtPayload(token); if (!payload) return true; - if (!payload.exp) return false; + // No exp claim ⇒ treat as expired. Steward always mints exp; an exp-less + // token is foreign/malformed, and since the 401 handlers keep any + // NON-expired token, an exp-less one would otherwise be uncloseable — no + // 401 could ever clear it and it never ages out on its own. + if (!payload.exp) return true; return payload.exp * 1000 < Date.now(); } diff --git a/packages/ui/src/cloud/shell/steward-url.ts b/packages/ui/src/cloud/shell/steward-url.ts index 35343efb442df..9ef4005656fe1 100644 --- a/packages/ui/src/cloud/shell/steward-url.ts +++ b/packages/ui/src/cloud/shell/steward-url.ts @@ -36,8 +36,14 @@ function getBrowserHostname(): string | undefined { * proxies `/steward/*` to the Workers API. We bypass the proxy and call the * matching API worker directly so login keeps working even when the Pages * Functions bundle is missing or broken. + * + * Single source of truth for the browser host → API worker map. Every host + * must map to its OWN env's worker (staging → api-staging, never prod). The + * Steward auth endpoints (StewardProviderShared, steward-session) resolve off + * this same map — a host missing here silently downgrades its auth calls to + * the co-hosted proxy. */ -const ELIZA_CLOUD_DIRECT_API_BY_HOST: Record = { +export const ELIZA_CLOUD_DIRECT_API_BY_HOST: Record = { "elizacloud.ai": "https://api.elizacloud.ai", "www.elizacloud.ai": "https://api.elizacloud.ai", "dev.elizacloud.ai": "https://api.elizacloud.ai",