kaito-project
diff --git a/‎agents.md‎
Lines changed: 5 additions & 2 deletions b/‎agents.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎backend/src/routes/vllmRecipes.ts‎
Lines changed: 27 additions & 10 deletions b/‎backend/src/routes/vllmRecipes.ts‎
Lines changed: 27 additions & 10 deletions
diff --git a/‎backend/src/services/vllmRecipesClient.test.ts‎
Lines changed: 72 additions & 1 deletion b/‎backend/src/services/vllmRecipesClient.test.ts‎
Lines changed: 72 additions & 1 deletion
@@ -2,7 +2,7 @@
 
 ## WHY: Project Purpose
 
-**AI Runway** is a platform for deploying and managing machine learning models on Kubernetes. It provides a unified CRD abstraction (`ModelDeployment`) that works across multiple inference providers (KAITO, Dynamo, KubeRay, llm-d, etc.).
+**AI Runway** is a platform for deploying and managing machine learning models on Kubernetes. It provides a unified CRD abstraction (`ModelDeployment`) that works across multiple inference providers (KAITO, Dynamo, KubeRay, llm-d, Direct vLLM, etc.).
 
 ## WHAT: Tech Stack & Structure
 
@@ -18,6 +18,7 @@
   - `controller/config/` - Kustomize manifests for CRDs/RBAC
 - `frontend/src/` - React components, hooks, pages
 - `backend/src/` - Hono app, providers, services
+- `providers/` - Standalone provider controllers/shims (`dynamo`, `kaito`, `kuberay`, `llmd`, `vllm`); each renders `ModelDeployment` into its upstream resource. `providers/vllm` is the in-repo Direct vLLM provider (renders native `Deployment`+`Service`, selected via `provider.name: vllm`).
 - `shared/types/` - Shared TypeScript definitions
 - `plugins/headlamp/` - Headlamp dashboard plugin
 - `docs/` - Detailed documentation (read as needed; also the source rendered on the website)
@@ -103,7 +104,9 @@ Unified API for deploying ML models. Key fields:
 - `spec.model.id` - HuggingFace model ID or custom identifier
 - `spec.model.source` - `huggingface` or `custom`
 - `spec.engine.type` - `vllm`, `sglang`, `trtllm`, or `llamacpp` (optional, auto-selected from provider capabilities)
-- `spec.provider.name` - Optional explicit provider selection
+- `spec.engine.image` - Optional engine-specific container image override (preferred over legacy top-level `spec.image`; used by Direct vLLM/custom images)
+- `spec.engine.extraArgs` - Optional list of raw engine flags appended verbatim
+- `spec.provider.name` - Optional explicit provider selection (`kaito`, `dynamo`, `kuberay`, `llmd`, `vllm`)
 - `spec.serving.mode` - `aggregated` (default) or `disaggregated`
 - `spec.resources.gpu.count` - GPU count for aggregated mode
 - `spec.scaling.prefill/decode` - Component scaling for disaggregated mode
 
@@ -2,11 +2,34 @@ import { Hono } from 'hono';
 import { zValidator } from '@hono/zod-validator';
 import { z } from 'zod';
 import { HTTPException } from 'hono/http-exception';
+import type { ContentfulStatusCode } from 'hono/utils/http-status';
 import type { VllmRecipeResolveRequest } from '@airunway/shared';
-import { vllmRecipesClient } from '../services/vllmRecipesClient';
+import {
+  vllmRecipesClient,
+  VllmRecipeValidationError,
+  VllmRecipeTimeoutError,
+} from '../services/vllmRecipesClient';
 import { vllmRecipeResolver } from '../services/vllmRecipeResolver';
 import logger from '../lib/logger';
 
+// Map recipe errors to HTTP status so callers can distinguish bad input
+// (4xx, do not retry) from an upstream recipes.vllm.ai outage (5xx).
+function recipeErrorStatus(error: unknown): ContentfulStatusCode {
+  if (error instanceof VllmRecipeValidationError) {
+    return 400;
+  }
+  if (error instanceof VllmRecipeTimeoutError) {
+    return 504;
+  }
+  return 502;
+}
+
+function recipeHttpException(error: unknown, fallbackMessage: string): HTTPException {
+  return new HTTPException(recipeErrorStatus(error), {
+    message: error instanceof Error ? error.message : fallbackMessage,
+  });
+}
+
 const imageChoiceSchema = z.discriminatedUnion('type', [
   z.object({ type: z.literal('recipe') }),
   z.object({ type: z.literal('custom'), imageRef: z.string().min(1, 'imageRef is required') }),
@@ -34,9 +57,7 @@ const vllmRecipes = new Hono()
       return c.json(result);
     } catch (error) {
       logger.error({ error }, 'Failed to list vLLM recipes');
-      throw new HTTPException(502, {
-        message: error instanceof Error ? error.message : 'Failed to list vLLM recipes',
-      });
+      throw recipeHttpException(error, 'Failed to list vLLM recipes');
     }
   })
 
@@ -52,9 +73,7 @@ const vllmRecipes = new Hono()
       return c.json(result);
     } catch (error) {
       logger.error({ error, modelId: request.modelId }, 'Failed to resolve vLLM recipe');
-      throw new HTTPException(502, {
-        message: error instanceof Error ? error.message : 'Failed to resolve vLLM recipe',
-      });
+      throw recipeHttpException(error, 'Failed to resolve vLLM recipe');
     }
   })
 
@@ -76,9 +95,7 @@ const vllmRecipes = new Hono()
       return c.json(result);
     } catch (error) {
       logger.error({ error, org, model }, 'Failed to fetch vLLM recipe');
-      throw new HTTPException(502, {
-        message: error instanceof Error ? error.message : 'Failed to fetch vLLM recipe',
-      });
+      throw recipeHttpException(error, 'Failed to fetch vLLM recipe');
     }
   });
 
 
@@ -1,5 +1,10 @@
 import { describe, expect, test } from 'bun:test';
-import { VllmRecipesClient } from './vllmRecipesClient';
+import {
+  VllmRecipesClient,
+  VllmRecipeValidationError,
+  VllmRecipeTimeoutError,
+  VllmRecipeUpstreamError,
+} from './vllmRecipesClient';
 
 describe('VllmRecipesClient', () => {
   test('resolves relative recipe references under the configured recipe base URL', () => {
@@ -59,5 +64,71 @@ describe('VllmRecipesClient', () => {
         `Invalid Hugging Face model ID: ${modelId}`
       );
     });
+
+    test('throws a typed validation error for bad input (mapped to 4xx by the route)', async () => {
+      await expect(client.getByModelId('acme/foo/bar')).rejects.toBeInstanceOf(VllmRecipeValidationError);
+    });
+  });
+
+  describe('upstream error classification + caching', () => {
+    const originalFetch = globalThis.fetch;
+
+    function withFetch<T>(fetchImpl: typeof fetch, run: (client: VllmRecipesClient) => Promise<T>): Promise<T> {
+      globalThis.fetch = fetchImpl;
+      const client = new VllmRecipesClient('https://recipes.vllm.ai');
+      return run(client).finally(() => {
+        globalThis.fetch = originalFetch;
+      });
+    }
+
+    test('maps an aborted fetch to a timeout error', async () => {
+      await withFetch(
+        (async () => {
+          const err = new Error('aborted');
+          err.name = 'AbortError';
+          throw err;
+        }) as unknown as typeof fetch,
+        async (client) => {
+          await expect(client.getByModelId('acme/model')).rejects.toBeInstanceOf(VllmRecipeTimeoutError);
+        }
+      );
+    });
+
+    test('maps a non-ok upstream response to an upstream error', async () => {
+      await withFetch(
+        (async () => new Response('nope', { status: 503, statusText: 'Service Unavailable' })) as unknown as typeof fetch,
+        async (client) => {
+          await expect(client.getByModelId('acme/model')).rejects.toBeInstanceOf(VllmRecipeUpstreamError);
+        }
+      );
+    });
+
+    test('rejects an oversized recipe payload', async () => {
+      const huge = JSON.stringify({ blob: 'x'.repeat(6 * 1024 * 1024) });
+      await withFetch(
+        (async () => new Response(huge, { status: 200, headers: { 'content-type': 'application/json' } })) as unknown as typeof fetch,
+        async (client) => {
+          await expect(client.getByModelId('acme/model')).rejects.toBeInstanceOf(VllmRecipeUpstreamError);
+        }
+      );
+    });
+
+    test('caches a model recipe so a second call does not refetch', async () => {
+      let calls = 0;
+      await withFetch(
+        (async () => {
+          calls += 1;
+          return new Response(JSON.stringify({ recipe: true }), {
+            status: 200,
+            headers: { 'content-type': 'application/json' },
+          });
+        }) as unknown as typeof fetch,
+        async (client) => {
+          await client.getByModelId('acme/model');
+          await client.getByModelId('acme/model');
+          expect(calls).toBe(1);
+        }
+      );
+    });
   });
 });