bare api agent chat stream deploy works

MRGuziX · MRGuziX · commit a7bc7f9b87ff · 2026-03-06T15:02:45.000+01:00
diff --git a/agents/base/openai_responses_agent/README.md b/agents/base/openai_responses_agent/README.md
@@ -174,13 +174,32 @@ oc get route openai-responses-agent -o jsonpath='{.spec.host}'
 ```
 
 Send a test request:
+/chat endpoint
 
 ```bash
 curl -X POST https://<YOUR_ROUTE_URL>/chat \
   -H "Content-Type: application/json" \
   -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}'
 ```
 
+/stream endpoint
+Classic Print
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}'
+```
+
+Pretty Printed Stream
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}' |
+   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.content // empty'
+```
+
 ---
 
 ## Agent-Specific Documentation
diff --git a/agents/base/openai_responses_agent/main.py b/agents/base/openai_responses_agent/main.py
@@ -1,10 +1,16 @@
+import asyncio
+import json
+import logging
 from contextlib import asynccontextmanager
 from os import getenv
 
 from fastapi import FastAPI, HTTPException
-from openai_responses_agent_base.agent import get_agent_closure
+from fastapi.responses import StreamingResponse
+from openai_responses_agent_base.agent import get_agent_closure, AIAgent
 from pydantic import BaseModel
 
+logger = logging.getLogger(__name__)
+
 
 # Request/Response models
 class ChatRequest(BaseModel):
@@ -81,6 +87,75 @@ async def chat(request: ChatRequest):
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """
+    Streaming chat endpoint that accepts a message and returns the agent's
+    response as Server-Sent Events (SSE).
+
+    Event types:
+        - tool_call: tool invocation by the agent
+        - tool_result: result returned by a tool (observation)
+        - token: final answer text
+        - done: signals the stream is complete
+
+    Args:
+        request: ChatRequest containing the user message
+    """
+    global get_agent
+
+    if get_agent is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def event_generator():
+        try:
+            queue: asyncio.Queue = asyncio.Queue()
+
+            def on_event(event_type: str, data: dict):
+                queue.put_nowait((event_type, data))
+
+            def run_agent():
+                adapter = get_agent()
+                agent = AIAgent(
+                    model=adapter._model_id,
+                    base_url=adapter._base_url,
+                    api_key=adapter._api_key,
+                )
+                for name, func in adapter._tools:
+                    agent.register_tool(name, func)
+                return agent.query(request.message, on_event=on_event)
+
+            task = asyncio.get_event_loop().run_in_executor(None, run_agent)
+
+            while not task.done():
+                try:
+                    event_type, data = await asyncio.wait_for(queue.get(), timeout=0.1)
+                    yield f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+                except asyncio.TimeoutError:
+                    continue
+
+            # Drain remaining events
+            while not queue.empty():
+                event_type, data = queue.get_nowait()
+                yield f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+
+            answer = task.result()
+            if answer:
+                yield f"event: token\ndata: {json.dumps({'content': answer})}\n\n"
+
+            yield "event: done\ndata: {}\n\n"
+
+        except Exception:
+            logger.exception("Error in stream event_generator")
+            yield f"event: error\ndata: {json.dumps({'detail': 'Internal server error'})}\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
 @app.get("/health")
 async def health():
     """Return service health and whether the agent has been initialized."""
diff --git a/agents/base/openai_responses_agent/src/openai_responses_agent_base/agent.py b/agents/base/openai_responses_agent/src/openai_responses_agent_base/agent.py
@@ -233,13 +233,14 @@ def _execute(self) -> str:
         )
         return _get_output_text_from_response(response)
 
-    def query(self, question: str, max_turns: int = 10) -> Optional[str]:
+    def query(self, question: str, max_turns: int = 10, on_event: Optional[Callable] = None) -> Optional[str]:
         """
         Process a question through multiple turns until getting final answer.
 
         Args:
             question: The input question to process.
             max_turns: Maximum number of turns before timing out.
+            on_event: Optional callback(event_type, data) for streaming events.
 
         Returns:
             The final answer or None if no answer found.
@@ -268,11 +269,18 @@ def query(self, question: str, max_turns: int = 10) -> Optional[str]:
                     action, args_str = actions[0].groups()
                     action_inputs = self._parse_arguments(args_str)
 
+                    if on_event:
+                        on_event("tool_call", {"name": action, "args": action_inputs})
+
                     tool = self.tools.get(action)
                     if not tool:
                         raise ValueError(f"Unknown action: {action}")
 
                     observation = tool(*action_inputs)
+
+                    if on_event:
+                        on_event("tool_result", {"name": action, "output": str(observation)})
+
                     next_prompt = f"Observation: {observation}"
                 else:
                     # No Action: line – treat the whole response as the final answer