e2e: add Response API basic operations tests

tao12345666333 · tao12345666333 · commit a19c6299b251 · 2025-12-14T22:06:20.000+08:00
Add E2E tests for Response API basic operations:
- POST /v1/responses - Create a new response
- GET /v1/responses/{id} - Retrieve a response
- DELETE /v1/responses/{id} - Delete a response
- GET /v1/responses/{id}/input_items - List input items

Signed-off-by: Jintao Zhang &lt;zhangjintao9020@gmail.com&gt;
diff --git a/e2e/README.md b/e2e/README.md
@@ -18,6 +18,7 @@ The framework follows a **separation of concerns** design:
 - **istio**: Tests Semantic Router with Istio service mesh integration
 - **production-stack**: Tests vLLM Production Stack configurations
 - **llm-d**: Tests Semantic Router with LLM-D distributed inference
+- **response-api**: Tests Response API endpoints (POST/GET/DELETE /v1/responses)
 - **dynamo**: Tests with Nvidia Dynamo (future)
 
 ## Directory Structure
@@ -82,6 +83,15 @@ The framework includes the following test cases (all in `e2e/testcases/`):
 | `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate |
 | `jailbreak-detection` | Jailbreak attack detection | 10 attack types, detection rate, block rate |
 
+### Response API Tests
+
+| Test Case | Description | Metrics |
+|-----------|-------------|---------|
+| `response-api-create` | POST /v1/responses - Create a new response | Response ID validation, status check |
+| `response-api-get` | GET /v1/responses/{id} - Retrieve a response | Response retrieval, ID matching |
+| `response-api-delete` | DELETE /v1/responses/{id} - Delete a response | Deletion confirmation, 404 verification |
+| `response-api-input-items` | GET /v1/responses/{id}/input_items - List input items | Input items list, pagination |
+
 ### Signal-Decision Engine Tests
 
 | Test Case | Description | Metrics |
diff --git a/e2e/profiles/response-api/profile.go b/e2e/profiles/response-api/profile.go
@@ -0,0 +1,177 @@
+package responseapi
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/clientcmd"
+
+	"github.com/vllm-project/semantic-router/e2e/pkg/framework"
+	"github.com/vllm-project/semantic-router/e2e/pkg/helm"
+	"github.com/vllm-project/semantic-router/e2e/pkg/helpers"
+
+	// Import testcases package to register all test cases via their init() functions
+	_ "github.com/vllm-project/semantic-router/e2e/testcases"
+)
+
+// Profile implements the Response API test profile
+type Profile struct {
+	verbose    bool
+	kubeConfig string
+}
+
+// NewProfile creates a new Response API profile
+func NewProfile() *Profile {
+	return &Profile{}
+}
+
+// Name returns the profile name
+func (p *Profile) Name() string {
+	return "response-api"
+}
+
+// Description returns the profile description
+func (p *Profile) Description() string {
+	return "Tests Response API endpoints (POST/GET/DELETE /v1/responses)"
+}
+
+// Setup deploys all required components for Response API testing
+func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error {
+	p.verbose = opts.Verbose
+	p.kubeConfig = opts.KubeConfig
+	p.log("Setting up Response API test environment")
+
+	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+	// Step 1: Deploy Semantic Router with Response API enabled
+	p.log("Step 1/3: Deploying Semantic Router with Response API")
+	if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil {
+		return fmt.Errorf("failed to deploy semantic router: %w", err)
+	}
+
+	// Step 2: Deploy Envoy Gateway
+	p.log("Step 2/3: Deploying Envoy Gateway")
+	if err := p.deployEnvoyGateway(ctx, deployer); err != nil {
+		return fmt.Errorf("failed to deploy envoy gateway: %w", err)
+	}
+
+	// Step 3: Verify all components are ready
+	p.log("Step 3/3: Verifying all components are ready")
+	if err := p.verifyEnvironment(ctx, opts); err != nil {
+		return fmt.Errorf("failed to verify environment: %w", err)
+	}
+
+	p.log("Response API test environment setup complete")
+	return nil
+}
+
+// Teardown cleans up all deployed resources
+func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
+	p.verbose = opts.Verbose
+	p.log("Tearing down Response API test environment")
+
+	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+
+	p.log("Uninstalling Envoy Gateway")
+	_ = deployer.Uninstall(ctx, "eg", "envoy-gateway-system")
+
+	p.log("Uninstalling Semantic Router")
+	_ = deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system")
+
+	p.log("Response API test environment teardown complete")
+	return nil
+}
+
+// GetTestCases returns the list of test cases for this profile
+func (p *Profile) GetTestCases() []string {
+	return []string{
+		// Response API basic operations
+		"response-api-create",
+		"response-api-get",
+		"response-api-delete",
+		"response-api-input-items",
+	}
+}
+
+// GetServiceConfig returns the service configuration for accessing the deployed service
+func (p *Profile) GetServiceConfig() framework.ServiceConfig {
+	return framework.ServiceConfig{
+		LabelSelector: "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router",
+		Namespace:     "envoy-gateway-system",
+		PortMapping:   "8080:80",
+	}
+}
+
+func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error {
+	imageTag := opts.ImageTag
+	if imageTag == "" {
+		imageTag = "latest"
+	}
+
+	return deployer.Install(ctx, helm.InstallOptions{
+		ReleaseName: "semantic-router",
+		Chart:       "deploy/helm/semantic-router",
+		Namespace:   "vllm-semantic-router-system",
+		ValuesFiles: []string{"e2e/profiles/response-api/values.yaml"},
+		Set: map[string]string{
+			"image.repository": "ghcr.io/vllm-project/semantic-router/extproc",
+			"image.tag":        imageTag,
+		},
+		Wait:    true,
+		Timeout: "300s",
+	})
+}
+
+func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer) error {
+	return deployer.Install(ctx, helm.InstallOptions{
+		ReleaseName: "eg",
+		Chart:       "oci://docker.io/envoyproxy/gateway-helm",
+		Namespace:   "envoy-gateway-system",
+		Wait:        true,
+		Timeout:     "300s",
+	})
+}
+
+func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error {
+	config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig)
+	if err != nil {
+		return fmt.Errorf("failed to build kubeconfig: %w", err)
+	}
+
+	client, err := kubernetes.NewForConfig(config)
+	if err != nil {
+		return fmt.Errorf("failed to create kubernetes client: %w", err)
+	}
+
+	// Wait for semantic router deployment
+	p.log("Waiting for Semantic Router deployment...")
+	if err := p.waitForDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router"); err != nil {
+		return fmt.Errorf("semantic router deployment not ready: %w", err)
+	}
+
+	p.log("All components are ready")
+	return nil
+}
+
+func (p *Profile) waitForDeployment(ctx context.Context, client *kubernetes.Clientset, namespace, name string) error {
+	timeout := 5 * time.Minute
+	interval := 5 * time.Second
+	deadline := time.Now().Add(timeout)
+
+	for time.Now().Before(deadline) {
+		if err := helpers.CheckDeployment(ctx, client, namespace, name, p.verbose); err == nil {
+			return nil
+		}
+		time.Sleep(interval)
+	}
+
+	return fmt.Errorf("timeout waiting for deployment %s/%s", namespace, name)
+}
+
+func (p *Profile) log(msg string) {
+	if p.verbose {
+		fmt.Printf("[response-api] %s\n", msg)
+	}
+}
diff --git a/e2e/profiles/response-api/values.yaml b/e2e/profiles/response-api/values.yaml
@@ -0,0 +1,82 @@
+# Response API E2E Test Profile Values
+# This configuration enables Response API for testing
+
+replicaCount: 1
+
+image:
+  repository: ghcr.io/vllm-project/semantic-router/extproc
+  tag: latest
+  pullPolicy: IfNotPresent
+
+# Response API Configuration
+responseApi:
+  enabled: true
+  storeBackend: "memory"
+  ttlSeconds: 86400
+  maxResponses: 1000
+
+# Semantic Cache (required for some tests)
+semanticCache:
+  enabled: true
+  backendType: "memory"
+  similarityThreshold: 0.8
+  maxEntries: 1000
+  ttlSeconds: 3600
+
+# vLLM Endpoints - use mock backend for testing
+vllmEndpoints:
+  - name: "test-endpoint"
+    address: "mock-vllm"
+    port: 8000
+    weight: 1
+
+# Model configuration
+modelConfig:
+  "MoM":
+    useReasoning: false
+    preferredEndpoints: ["test-endpoint"]
+
+# Minimal classifier configuration
+classifier:
+  categoryModel:
+    modelId: "models/all-MiniLM-L12-v2"
+    threshold: 0.6
+    useCpu: true
+
+# Categories
+categories:
+  - name: other
+    description: "General knowledge and miscellaneous topics"
+
+# Strategy
+strategy: "priority"
+
+# Decisions
+decisions:
+  - name: "default_decision"
+    description: "Default catch-all decision"
+    priority: 1
+    rules:
+      operator: "OR"
+      conditions:
+        - type: "domain"
+          name: "other"
+    modelRefs:
+      - model: "MoM"
+        useReasoning: false
+
+defaultModel: "MoM"
+
+# Service configuration
+service:
+  type: ClusterIP
+  port: 8080
+
+# Resources
+resources:
+  limits:
+    cpu: 500m
+    memory: 512Mi
+  requests:
+    cpu: 100m
+    memory: 128Mi
diff --git a/e2e/testcases/response_api_basic.go b/e2e/testcases/response_api_basic.go