ai-on-eks/blueprints/gateways/envoy-ai-gateway/multi-model-routing/client.py at main · devfloor9/ai-on-eks · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
"""
Multi-Model AI Gateway Test Client
Tests self-hosted models (Qwen3, GPT-OSS) and AWS Bedrock Claude
"""

import requests
import json
import subprocess
import sys

def get_gateway_url():
    """Auto-detect AI Gateway URL"""
    try:
        result = subprocess.run([
            'kubectl', 'get', 'gateway', 'ai-gateway',
            '-o', 'jsonpath={.status.addresses[0].value}'
        ], capture_output=True, text=True, check=True)

        if result.stdout.strip():
            return f"http://{result.stdout.strip()}"
        else:
            print("Gateway address not found. Make sure the Gateway is deployed and has an address.")
            return None
    except subprocess.CalledProcessError as e:
        print(f"Error getting gateway URL: {e}")
        return "http://localhost:8080"  # fallback

def test_qwen3_model(gateway_url):
    """Test Qwen3 model via /v1/chat/completions"""
    print("=== Testing Qwen3 1.7B ===")
    try:
        response = requests.post(
            f"{gateway_url}/v1/chat/completions",
            headers={
                'Content-Type': 'application/json',
                'x-ai-eg-model': 'Qwen/Qwen3-1.7B'
            },
            json={
                'model': 'Qwen/Qwen3-1.7B',
                'max_tokens': 50,
                'messages': [{'role': 'user', 'content': 'Hello from Qwen3!'}]
            },
            timeout=30
        )

        print(f"Status Code: {response.status_code}")
        if response.status_code == 200:
            data = response.json()
            content = data.get('choices', [{}])[0].get('message', {}).get('content', 'No content')
            print(f"✅ SUCCESS: Qwen3 - {content[:100]}...")
            return True
        else:
            print(f"❌ ERROR: Qwen3 - {response.text}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"❌ ERROR: Qwen3 - {e}")
        return False

def test_gpt_model(gateway_url):
    """Test GPT model via /v1/chat/completions"""
    print("\n=== Testing Self-hosted GPT ===")
    try:
        response = requests.post(
            f"{gateway_url}/v1/chat/completions",
            headers={
                'Content-Type': 'application/json',
                'x-ai-eg-model': 'openai/gpt-oss-20b'
            },
            json={
                'model': 'openai/gpt-oss-20b',
                'max_tokens': 50,
                'messages': [{'role': 'user', 'content': 'Hello from GPT!'}]
            },
            timeout=30
        )

        print(f"Status Code: {response.status_code}")
        if response.status_code == 200:
            data = response.json()
            content = data.get('choices', [{}])[0].get('message', {}).get('content', 'No content')
            print(f"✅ SUCCESS: GPT - {content[:100]}...")
            return True
        else:
            print(f"❌ ERROR: GPT - {response.text}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"❌ ERROR: GPT - {e}")
        return False

def test_bedrock_claude(gateway_url):
    """Test Bedrock Claude via /anthropic/v1/messages"""
    print("\n=== Testing Bedrock Claude ===")
    try:
        response = requests.post(
            f"{gateway_url}/anthropic/v1/messages",
            headers={
                'Content-Type': 'application/json',
                'x-ai-eg-model': 'anthropic.claude-3-haiku-20240307-v1:0',
                'anthropic-version': 'bedrock-2023-05-31'
            },
            json={
                'model': 'anthropic.claude-3-haiku-20240307-v1:0',
                'max_tokens': 50,
                'messages': [{'role': 'user', 'content': 'Hello from Bedrock!'}]
            },
            timeout=30
        )

        print(f"Status Code: {response.status_code}")
        if response.status_code == 200:
            data = response.json()
            content = data.get('content', [{}])[0].get('text', 'No content')
            print(f"✅ SUCCESS: Bedrock Claude - {content[:100]}...")
            return True
        else:
            print(f"❌ ERROR: Bedrock Claude - {response.text}")
            return False
    except requests.exceptions.RequestException as e:
        print(f"❌ ERROR: Bedrock Claude - {e}")
        return False

def main():
    print("🚀 AI Gateway Multi-Model Routing Test")
    print("=" * 60)

    gateway_url = get_gateway_url()
    if not gateway_url:
        print("❌ Could not determine Gateway URL. Exiting.")
        sys.exit(1)

    print(f"Gateway URL: {gateway_url}")

    results = []
    results.append(test_qwen3_model(gateway_url))
    results.append(test_gpt_model(gateway_url))
    results.append(test_bedrock_claude(gateway_url))

    print("\n" + "=" * 60)
    print("🎯 Final Results:")
    print(f"• Qwen3 1.7B: {'✅ PASS' if results[0] else '❌ FAIL'}")
    print(f"• GPT OSS 20B: {'✅ PASS' if results[1] else '❌ FAIL'}")
    print(f"• Bedrock Claude: {'✅ PASS' if results[2] else '❌ FAIL'}")

    passed = sum(results)
    print(f"\n📊 Summary: {passed}/3 models working")
    print("📋 Routing: Header-based using 'x-ai-eg-model'")
    print("🔗 All models accessible through single Gateway endpoint")

    if passed > 0:
        print(f"\n🎉 SUCCESS! {passed} model(s) working through AI Gateway!")
        sys.exit(0)
    else:
        print(f"\n❌ All tests failed")
        sys.exit(1)

if __name__ == "__main__":
    main()