Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions .github/workflows/integration-test-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,93 @@ jobs:
echo "⚠️ Response may not contain expected fields, but request succeeded"
fi

- name: Test Response API - Create Response
run: |
echo "Testing Response API: POST /v1/responses..."

response=$(curl -s -X POST http://localhost:8801/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "qwen3",
"input": "What is 2 + 2?",
"store": true
}')

echo "Response: $response"

# Extract response ID for subsequent tests
response_id=$(echo "$response" | jq -r '.id // empty')
if [ -n "$response_id" ] && [[ "$response_id" == resp_* ]]; then
echo "✅ Response API create test passed (id=$response_id)"
echo "RESPONSE_ID=$response_id" >> $GITHUB_ENV
else
echo "❌ Response API create test failed - invalid or missing response ID"
exit 1
fi

- name: Test Response API - Get Response
run: |
echo "Testing Response API: GET /v1/responses/$RESPONSE_ID..."

response=$(curl -s -X GET "http://localhost:8801/v1/responses/$RESPONSE_ID" \
-H "Content-Type: application/json")

echo "Response: $response"

# Verify response ID matches
got_id=$(echo "$response" | jq -r '.id // empty')
if [ "$got_id" = "$RESPONSE_ID" ]; then
echo "✅ Response API get test passed"
else
echo "❌ Response API get test failed - ID mismatch (expected=$RESPONSE_ID, got=$got_id)"
exit 1
fi

- name: Test Response API - Get Input Items
run: |
echo "Testing Response API: GET /v1/responses/$RESPONSE_ID/input_items..."

response=$(curl -s -X GET "http://localhost:8801/v1/responses/$RESPONSE_ID/input_items" \
-H "Content-Type: application/json")

echo "Response: $response"

# Verify it's a list
object_type=$(echo "$response" | jq -r '.object // empty')
if [ "$object_type" = "list" ]; then
echo "✅ Response API input_items test passed"
else
echo "❌ Response API input_items test failed - expected object=list, got=$object_type"
exit 1
fi

- name: Test Response API - Delete Response
run: |
echo "Testing Response API: DELETE /v1/responses/$RESPONSE_ID..."

response=$(curl -s -X DELETE "http://localhost:8801/v1/responses/$RESPONSE_ID" \
-H "Content-Type: application/json")

echo "Response: $response"

# Verify deletion
deleted=$(echo "$response" | jq -r '.deleted // empty')
if [ "$deleted" = "true" ]; then
echo "✅ Response API delete test passed"
else
echo "❌ Response API delete test failed - expected deleted=true"
exit 1
fi

# Verify 404 on subsequent get
get_response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8801/v1/responses/$RESPONSE_ID")
if [ "$get_response" = "404" ]; then
echo "✅ Response API delete verification passed (404 on get)"
else
echo "❌ Response API delete verification failed - expected 404, got $get_response"
exit 1
fi

- name: Show service logs on failure
if: failure()
run: |
Expand Down
8 changes: 8 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ bert_model:
threshold: 0.6
use_cpu: true

# Response API Configuration
# Enables OpenAI Response API support with conversation chaining
response_api:
enabled: true
store_backend: "memory" # Options: "memory", "milvus", "redis"
ttl_seconds: 86400 # 24 hours
max_responses: 1000

semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory", "milvus", or "hybrid"
Expand Down
10 changes: 10 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The framework follows a **separation of concerns** design:
- **istio**: Tests Semantic Router with Istio service mesh integration
- **production-stack**: Tests vLLM Production Stack configurations
- **llm-d**: Tests Semantic Router with LLM-D distributed inference
- **response-api**: Tests Response API endpoints (POST/GET/DELETE /v1/responses)
- **dynamo**: Tests with Nvidia Dynamo (future)

## Directory Structure
Expand Down Expand Up @@ -82,6 +83,15 @@ The framework includes the following test cases (all in `e2e/testcases/`):
| `pii-detection` | PII detection and blocking | 10 PII types, detection rate, block rate |
| `jailbreak-detection` | Jailbreak attack detection | 10 attack types, detection rate, block rate |

### Response API Tests

| Test Case | Description | Metrics |
|-----------|-------------|---------|
| `response-api-create` | POST /v1/responses - Create a new response | Response ID validation, status check |
| `response-api-get` | GET /v1/responses/{id} - Retrieve a response | Response retrieval, ID matching |
| `response-api-delete` | DELETE /v1/responses/{id} - Delete a response | Deletion confirmation, 404 verification |
| `response-api-input-items` | GET /v1/responses/{id}/input_items - List input items | Input items list, pagination |

### Signal-Decision Engine Tests

| Test Case | Description | Metrics |
Expand Down
177 changes: 177 additions & 0 deletions e2e/profiles/response-api/profile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
package responseapi

import (
"context"
"fmt"
"time"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"

"github.com/vllm-project/semantic-router/e2e/pkg/framework"
"github.com/vllm-project/semantic-router/e2e/pkg/helm"
"github.com/vllm-project/semantic-router/e2e/pkg/helpers"

// Import testcases package to register all test cases via their init() functions
_ "github.com/vllm-project/semantic-router/e2e/testcases"
)

// Profile implements the Response API test profile
type Profile struct {
verbose bool
kubeConfig string
}

// NewProfile creates a new Response API profile
func NewProfile() *Profile {
return &Profile{}
}

// Name returns the profile name
func (p *Profile) Name() string {
return "response-api"
}

// Description returns the profile description
func (p *Profile) Description() string {
return "Tests Response API endpoints (POST/GET/DELETE /v1/responses)"
}

// Setup deploys all required components for Response API testing
func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error {
p.verbose = opts.Verbose
p.kubeConfig = opts.KubeConfig
p.log("Setting up Response API test environment")

deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)

// Step 1: Deploy Semantic Router with Response API enabled
p.log("Step 1/3: Deploying Semantic Router with Response API")
if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil {
return fmt.Errorf("failed to deploy semantic router: %w", err)
}

// Step 2: Deploy Envoy Gateway
p.log("Step 2/3: Deploying Envoy Gateway")
if err := p.deployEnvoyGateway(ctx, deployer); err != nil {
return fmt.Errorf("failed to deploy envoy gateway: %w", err)
}

// Step 3: Verify all components are ready
p.log("Step 3/3: Verifying all components are ready")
if err := p.verifyEnvironment(ctx, opts); err != nil {
return fmt.Errorf("failed to verify environment: %w", err)
}

p.log("Response API test environment setup complete")
return nil
}

// Teardown cleans up all deployed resources
func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
p.verbose = opts.Verbose
p.log("Tearing down Response API test environment")

deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)

p.log("Uninstalling Envoy Gateway")
_ = deployer.Uninstall(ctx, "eg", "envoy-gateway-system")

p.log("Uninstalling Semantic Router")
_ = deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system")

p.log("Response API test environment teardown complete")
return nil
}

// GetTestCases returns the list of test cases for this profile
func (p *Profile) GetTestCases() []string {
return []string{
// Response API basic operations
"response-api-create",
"response-api-get",
"response-api-delete",
"response-api-input-items",
}
}

// GetServiceConfig returns the service configuration for accessing the deployed service
func (p *Profile) GetServiceConfig() framework.ServiceConfig {
return framework.ServiceConfig{
LabelSelector: "gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router",
Namespace: "envoy-gateway-system",
PortMapping: "8080:80",
}
}

func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error {
imageTag := opts.ImageTag
if imageTag == "" {
imageTag = "latest"
}

return deployer.Install(ctx, helm.InstallOptions{
ReleaseName: "semantic-router",
Chart: "deploy/helm/semantic-router",
Namespace: "vllm-semantic-router-system",
ValuesFiles: []string{"e2e/profiles/response-api/values.yaml"},
Set: map[string]string{
"image.repository": "ghcr.io/vllm-project/semantic-router/extproc",
"image.tag": imageTag,
},
Wait: true,
Timeout: "300s",
})
}

func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer) error {
return deployer.Install(ctx, helm.InstallOptions{
ReleaseName: "eg",
Chart: "oci://docker.io/envoyproxy/gateway-helm",
Namespace: "envoy-gateway-system",
Wait: true,
Timeout: "300s",
})
}

func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOptions) error {
config, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfig)
if err != nil {
return fmt.Errorf("failed to build kubeconfig: %w", err)
}

client, err := kubernetes.NewForConfig(config)
if err != nil {
return fmt.Errorf("failed to create kubernetes client: %w", err)
}

// Wait for semantic router deployment
p.log("Waiting for Semantic Router deployment...")
if err := p.waitForDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router"); err != nil {
return fmt.Errorf("semantic router deployment not ready: %w", err)
}

p.log("All components are ready")
return nil
}

func (p *Profile) waitForDeployment(ctx context.Context, client *kubernetes.Clientset, namespace, name string) error {
timeout := 5 * time.Minute
interval := 5 * time.Second
deadline := time.Now().Add(timeout)

for time.Now().Before(deadline) {
if err := helpers.CheckDeployment(ctx, client, namespace, name, p.verbose); err == nil {
return nil
}
time.Sleep(interval)
}

return fmt.Errorf("timeout waiting for deployment %s/%s", namespace, name)
}

func (p *Profile) log(msg string) {
if p.verbose {
fmt.Printf("[response-api] %s\n", msg)
}
}
82 changes: 82 additions & 0 deletions e2e/profiles/response-api/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Response API E2E Test Profile Values
# This configuration enables Response API for testing

replicaCount: 1

image:
repository: ghcr.io/vllm-project/semantic-router/extproc
tag: latest
pullPolicy: IfNotPresent

# Response API Configuration
responseApi:
enabled: true
storeBackend: "memory"
ttlSeconds: 86400
maxResponses: 1000

# Semantic Cache (required for some tests)
semanticCache:
enabled: true
backendType: "memory"
similarityThreshold: 0.8
maxEntries: 1000
ttlSeconds: 3600

# vLLM Endpoints - use mock backend for testing
vllmEndpoints:
- name: "test-endpoint"
address: "mock-vllm"
port: 8000
weight: 1

# Model configuration
modelConfig:
"MoM":
useReasoning: false
preferredEndpoints: ["test-endpoint"]

# Minimal classifier configuration
classifier:
categoryModel:
modelId: "models/all-MiniLM-L12-v2"
threshold: 0.6
useCpu: true

# Categories
categories:
- name: other
description: "General knowledge and miscellaneous topics"

# Strategy
strategy: "priority"

# Decisions
decisions:
- name: "default_decision"
description: "Default catch-all decision"
priority: 1
rules:
operator: "OR"
conditions:
- type: "domain"
name: "other"
modelRefs:
- model: "MoM"
useReasoning: false

defaultModel: "MoM"

# Service configuration
service:
type: ClusterIP
port: 8080

# Resources
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 100m
memory: 128Mi
Loading
Loading