katanemo · adilhafeez · Mar 20, 2025
diff --git a/demos/use_cases/llm_routing/parallel_calls/README.md b/demos/use_cases/llm_routing/parallel_calls/README.md
@@ -0,0 +1 @@
+This demo shows how you can make parallel calls using python against arch gateway
diff --git a/demos/use_cases/llm_routing/parallel_calls/main.py b/demos/use_cases/llm_routing/parallel_calls/main.py
@@ -0,0 +1,47 @@
+import asyncio
+import aiohttp
+import time
+
+API_URL = "http://localhost:12000/v1/chat/completions"
+
+
+async def fetch_response(
+    session: aiohttp.ClientSession, prompt: str
+) -> aiohttp.ClientResponse:
+    headers = {
+        "Content-Type": "application/json",
+    }
+    payload = {"messages": [{"role": "user", "content": prompt}]}
+
+    start_time = time.monotonic()
+    async with session.post(API_URL, json=payload, headers=headers) as response:
+        result = await response.json()
+        end_time = time.monotonic()
+        elapsed_time = end_time - start_time
+        return prompt, result, elapsed_time
+
+
+async def main():
+    prompts = [
+        "Hello!",
+        "Tell me a joke.",
+        "Who was the president of the United States in the 1990?",
+    ]
+
+    async with aiohttp.ClientSession() as session:
+        tasks = [fetch_response(session, prompt) for prompt in prompts]
+
+        for completed in asyncio.as_completed(tasks):
+            prompt, result, elapsed_time = await completed
+            print("user prompt: ", prompt)
+            resp = result.get("choices")[0].get("message", {}).get("content", {})
+            print("assistant response: ", resp)
+            print(
+                f"logs: request time: {elapsed_time:.3f}s, model name: {result.get('model', '')}"
+            )
+            print()
+        for task in tasks:
+            task.close()
+
+
+asyncio.run(main())
diff --git a/demos/use_cases/llm_routing/parallel_calls/pyproject.toml b/demos/use_cases/llm_routing/parallel_calls/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "parallel-calls"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "aiohttp>=3.11.14",
+]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		This demo shows how you can make parallel calls using python against arch gateway