vllm-project · rootfs · Dec 15, 2025 · Dec 14, 2025 · Dec 15, 2025
@@ -182,6 +182,93 @@ jobs:
             echo "⚠️ Response may not contain expected fields, but request succeeded"
           fi
 
+      - name: Test Response API - Create Response
+        run: |
+          echo "Testing Response API: POST /v1/responses..."
+
+          response=$(curl -s -X POST http://localhost:8801/v1/responses \
+            -H "Content-Type: application/json" \
+            -d '{
+              "model": "qwen3",
+              "input": "What is 2 + 2?",
+              "store": true
+            }')
+
+          echo "Response: $response"
+
+          # Extract response ID for subsequent tests
+          response_id=$(echo "$response" | jq -r '.id // empty')
+          if [ -n "$response_id" ] && [[ "$response_id" == resp_* ]]; then
+            echo "✅ Response API create test passed (id=$response_id)"
+            echo "RESPONSE_ID=$response_id" >> $GITHUB_ENV
+          else
+            echo "❌ Response API create test failed - invalid or missing response ID"
+            exit 1
+          fi
+
+      - name: Test Response API - Get Response
+        run: |
+          echo "Testing Response API: GET /v1/responses/$RESPONSE_ID..."
+
+          response=$(curl -s -X GET "http://localhost:8801/v1/responses/$RESPONSE_ID" \
+            -H "Content-Type: application/json")
+
+          echo "Response: $response"
+
+          # Verify response ID matches
+          got_id=$(echo "$response" | jq -r '.id // empty')
+          if [ "$got_id" = "$RESPONSE_ID" ]; then
+            echo "✅ Response API get test passed"
+          else
+            echo "❌ Response API get test failed - ID mismatch (expected=$RESPONSE_ID, got=$got_id)"
+            exit 1
+          fi
+
+      - name: Test Response API - Get Input Items
+        run: |
+          echo "Testing Response API: GET /v1/responses/$RESPONSE_ID/input_items..."
+
+          response=$(curl -s -X GET "http://localhost:8801/v1/responses/$RESPONSE_ID/input_items" \
+            -H "Content-Type: application/json")
+
+          echo "Response: $response"
+
+          # Verify it's a list
+          object_type=$(echo "$response" | jq -r '.object // empty')
+          if [ "$object_type" = "list" ]; then
+            echo "✅ Response API input_items test passed"
+          else
+            echo "❌ Response API input_items test failed - expected object=list, got=$object_type"
+            exit 1
+          fi
+
+      - name: Test Response API - Delete Response
+        run: |
+          echo "Testing Response API: DELETE /v1/responses/$RESPONSE_ID..."
+
+          response=$(curl -s -X DELETE "http://localhost:8801/v1/responses/$RESPONSE_ID" \
+            -H "Content-Type: application/json")
+
+          echo "Response: $response"
+
+          # Verify deletion
+          deleted=$(echo "$response" | jq -r '.deleted // empty')
+          if [ "$deleted" = "true" ]; then
+            echo "✅ Response API delete test passed"
+          else
+            echo "❌ Response API delete test failed - expected deleted=true"
+            exit 1
+          fi
+
+          # Verify 404 on subsequent get
+          get_response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8801/v1/responses/$RESPONSE_ID")
+          if [ "$get_response" = "404" ]; then
+            echo "✅ Response API delete verification passed (404 on get)"
+          else
+            echo "❌ Response API delete verification failed - expected 404, got $get_response"
+            exit 1
+          fi
+
       - name: Show service logs on failure
         if: failure()
         run: |

@@ -3,6 +3,14 @@ bert_model:
   threshold: 0.6
   use_cpu: true
 
+# Response API Configuration
+# Enables OpenAI Response API support with conversation chaining
+response_api:
+  enabled: true
+  store_backend: "memory"  # Options: "memory", "milvus", "redis"
+  ttl_seconds: 86400       # 24 hours
+  max_responses: 1000
+
 semantic_cache:
   enabled: true
   backend_type: "memory"  # Options: "memory", "milvus", or "hybrid"

@@ -18,6 +18,7 @@ The framework follows a **separation of concerns** design:
 - **istio**: Tests Semantic Router with Istio service mesh integration
 - **production-stack**: Tests vLLM Production Stack configurations
 - **llm-d**: Tests Semantic Router with LLM-D distributed inference
+- **response-api**: Tests Response API endpoints (POST/GET/DELETE /v1/responses)
 - **dynamo**: Tests with Nvidia Dynamo (future)
 
 ## Directory Structure
@@ -82,6 +83,15 @@ The framework includes the following test cases (all in `e2e/testcases/`):
 | `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate |
 | `jailbreak-detection` | Jailbreak attack detection | 10 attack types, detection rate, block rate |
 
+### Response API Tests
+
+| Test Case | Description | Metrics |
+|-----------|-------------|---------|
+| `response-api-create` | POST /v1/responses - Create a new response | Response ID validation, status check |
+| `response-api-get` | GET /v1/responses/{id} - Retrieve a response | Response retrieval, ID matching |
+| `response-api-delete` | DELETE /v1/responses/{id} - Delete a response | Deletion confirmation, 404 verification |
+| `response-api-input-items` | GET /v1/responses/{id}/input_items - List input items | Input items list, pagination |
+
 ### Signal-Decision Engine Tests
 
 | Test Case | Description | Metrics |

@@ -0,0 +1,177 @@
+package responseapi
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+
+	"github.com/vllm-project/semantic-router/e2e/pkg/framework"
+	"github.com/vllm-project/semantic-router/e2e/pkg/helm"
+	"github.com/vllm-project/semantic-router/e2e/pkg/helpers"
+
+	// Import testcases package to register all test cases via their init() functions
+	_ "github.com/vllm-project/semantic-router/e2e/testcases"
+)
+
+// Profile implements the Response API test profile
+type Profile struct {
+	verbose    bool
+	kubeConfig string
+}
+
+// NewProfile creates a new Response API profile
+func NewProfile() *Profile {
+	return &Profile{}
+}
+
+// Name returns the profile name
+func (p *Profile) Name() string {
+	return "response-api"
+}
+
+// Description returns the profile description
+func (p *Profile) Description() string {
+	return "Tests Response API endpoints (POST/GET/DELETE /v1/responses)"
+}
+
+// Setup deploys all required components for Response API testing
+func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error {
+	p.verbose = opts.Verbose
+	p.kubeConfig = opts.KubeConfig
+	p.log("Setting up Response API test environment")
+
+	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+	// Step 1: Deploy Semantic Router with Response API enabled
+	p.log("Step 1/3: Deploying Semantic Router with Response API")
+	if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil {
+		return fmt.Errorf("failed to deploy semantic router: %w", err)
+	}
+
+	// Step 2: Deploy Envoy Gateway
+	p.log("Step 2/3: Deploying Envoy Gateway")
+	if err := p.deployEnvoyGateway(ctx, deployer); err != nil {
+		return fmt.Errorf("failed to deploy envoy gateway: %w", err)
+	}
+
+	// Step 3: Verify all components are ready
+	p.log("Step 3/3: Verifying all components are ready")
+	if err := p.verifyEnvironment(ctx, opts); err != nil {
+		return fmt.Errorf("failed to verify environment: %w", err)
+	}
+
+	p.log("Response API test environment setup complete")
+	return nil
+}
+
+// Teardown cleans up all deployed resources
+func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
+	p.verbose = opts.Verbose
+	p.log("Tearing down Response API test environment")
+
+	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+	p.log("Uninstalling Envoy Gateway")
+	_ = deployer.Uninstall(ctx, "eg", "envoy-gateway-system")
+
+	p.log("Uninstalling Semantic Router")
+	_ = deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system")
+
+	p.log("Response API test environment teardown complete")
+	return nil
+}
+
+// GetTestCases returns the list of test cases for this profile
+func (p *Profile) GetTestCases() []string {
+	return []string{
+		// Response API basic operations
+		"response-api-create",
+		"response-api-get",
+		"response-api-delete",
+		"response-api-input-items",
+	}
+}
+
+// GetServiceConfig returns the service configuration for accessing the deployed service
+func (p *Profile) GetServiceConfig() framework.ServiceConfig {
+	return framework.ServiceConfig{
+		LabelSelector: "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router",
+		Namespace:     "envoy-gateway-system",
+		PortMapping:   "8080:80",
+	}
+}
+
+func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error {
+	imageTag := opts.ImageTag
+	if imageTag == "" {
+		imageTag = "latest"
+	}
+
+	return deployer.Install(ctx, helm.InstallOptions{
+		ReleaseName: "semantic-router",
+		Chart:       "deploy/helm/semantic-router",
+		Namespace:   "vllm-semantic-router-system",
+		ValuesFiles: []string{"e2e/profiles/response-api/values.yaml"},
+		Set: map[string]string{
+			"image.repository": "ghcr.io/vllm-project/semantic-router/extproc",
+			"image.tag":        imageTag,
+		},
+		Wait:    true,
+		Timeout: "300s",
+	})
+}
+
+func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer) error {
+	return deployer.Install(ctx, helm.InstallOptions{
+		ReleaseName: "eg",
+		Chart:       "oci://docker.io/envoyproxy/gateway-helm",
+		Namespace:   "envoy-gateway-system",
+		Wait:        true,
+		Timeout:     "300s",
+	})
+}
+
+func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error {
+	config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig)
+	if err != nil {
+		return fmt.Errorf("failed to build kubeconfig: %w", err)
+	}
+
+	client, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("failed to create kubernetes client: %w", err)
+	}
+
+	// Wait for semantic router deployment
+	p.log("Waiting for Semantic Router deployment...")
+	if err := p.waitForDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router"); err != nil {
+		return fmt.Errorf("semantic router deployment not ready: %w", err)
+	}
+
+	p.log("All components are ready")
+	return nil
+}
+
+func (p *Profile) waitForDeployment(ctx context.Context, client *kubernetes.Clientset, namespace, name string) error {
+	timeout := 5 * time.Minute
+	interval := 5 * time.Second
+	deadline := time.Now().Add(timeout)
+
+	for time.Now().Before(deadline) {
+		if err := helpers.CheckDeployment(ctx, client, namespace, name, p.verbose); err == nil {
+			return nil
+		}
+		time.Sleep(interval)
+	}
+
+	return fmt.Errorf("timeout waiting for deployment %s/%s", namespace, name)
+}
+
+func (p *Profile) log(msg string) {
+	if p.verbose {
+		fmt.Printf("[response-api] %s\n", msg)
+	}
+}
@@ -0,0 +1,82 @@
+# Response API E2E Test Profile Values
+# This configuration enables Response API for testing
+
+replicaCount: 1
+
+image:
+  repository: ghcr.io/vllm-project/semantic-router/extproc
+  tag: latest
+  pullPolicy: IfNotPresent
+
+# Response API Configuration
+responseApi:
+  enabled: true
+  storeBackend: "memory"
+  ttlSeconds: 86400
+  maxResponses: 1000
+
+# Semantic Cache (required for some tests)
+semanticCache:
+  enabled: true
+  backendType: "memory"
+  similarityThreshold: 0.8
+  maxEntries: 1000
+  ttlSeconds: 3600
+
+# vLLM Endpoints - use mock backend for testing
+vllmEndpoints:
+  - name: "test-endpoint"
+    address: "mock-vllm"
+    port: 8000
+    weight: 1
+
+# Model configuration
+modelConfig:
+  "MoM":
+    useReasoning: false
+    preferredEndpoints: ["test-endpoint"]
+
+# Minimal classifier configuration
+classifier:
+  categoryModel:
+    modelId: "models/all-MiniLM-L12-v2"
+    threshold: 0.6
+    useCpu: true
+
+# Categories
+categories:
+  - name: other
+    description: "General knowledge and miscellaneous topics"
+
+# Strategy
+strategy: "priority"
+
+# Decisions
+decisions:
+  - name: "default_decision"
+    description: "Default catch-all decision"
+    priority: 1
+    rules:
+      operator: "OR"
+      conditions:
+        - type: "domain"
+          name: "other"
+    modelRefs:
+      - model: "MoM"
+        useReasoning: false
+
+defaultModel: "MoM"
+
+# Service configuration
+service:
+  type: ClusterIP
+  port: 8080
+
+# Resources
+resources:
+  limits:
+    cpu: 500m
+    memory: 512Mi
+  requests:
+    cpu: 100m
+    memory: 128Mi