red-hat-data-services
diff --git a/‎agents/base/langgraph_react_agent/README.md‎
Lines changed: 9 additions & 1 deletion b/‎agents/base/langgraph_react_agent/README.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎agents/base/langgraph_react_agent/main.py‎
Lines changed: 69 additions & 0 deletions b/‎agents/base/langgraph_react_agent/main.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎agents/base/llamaindex_websearch_agent/README.md‎
Lines changed: 20 additions & 0 deletions b/‎agents/base/llamaindex_websearch_agent/README.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎agents/base/llamaindex_websearch_agent/main.py‎
Lines changed: 65 additions & 0 deletions b/‎agents/base/llamaindex_websearch_agent/main.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎agents/base/openai_responses_agent/README.md‎
Lines changed: 19 additions & 0 deletions b/‎agents/base/openai_responses_agent/README.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎agents/base/openai_responses_agent/main.py‎
Lines changed: 76 additions & 1 deletion b/‎agents/base/openai_responses_agent/main.py‎
Lines changed: 76 additions & 1 deletion
diff --git a/‎agents/base/openai_responses_agent/src/openai_responses_agent_base/agent.py‎
Lines changed: 9 additions & 1 deletion b/‎agents/base/openai_responses_agent/src/openai_responses_agent_base/agent.py‎
Lines changed: 9 additions & 1 deletion
@@ -182,14 +182,22 @@ COPY the route URL and PASTE into the CURL below
 oc get route langgraph-react-agent -o jsonpath='{.spec.host}'
 ```
 
-Send a test request:
+Send a test request on `/chat` endpoint
 
 ```bash
 curl -X POST https://<YOUR_ROUTE_URL>/chat \
   -H "Content-Type: application/json" \
   -d '{"message": "What is the best company? Answer with the first correct answer."}'
 ```
 
+Send a test request on `/stream` endpoint
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "What is the best company? Answer with the first correct answer."}'
+```
+
 ## Agent-Specific Documentation
 
 Each agent has detailed documentation for setup and deployment:
 
@@ -1,11 +1,15 @@
 import json
+import logging
 from contextlib import asynccontextmanager
 from os import getenv
 
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
 from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from pydantic import BaseModel
 
+logger = logging.getLogger(__name__)
+
 from langgraph_react_agent_base.agent import get_graph_closure
 
 
@@ -137,6 +141,71 @@ async def chat(request: ChatRequest):
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """
+    Streaming chat endpoint that accepts a message and returns the agent's
+    response as Server-Sent Events (SSE).
+
+    Event types:
+        - token: streamed text token from the LLM
+        - tool_call: tool invocation by the agent
+        - tool_result: result returned by a tool
+        - done: signals the stream is complete
+
+    Args:
+        request: ChatRequest containing the user message
+    """
+    global agent_graph
+
+    if agent_graph is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def event_generator():
+        try:
+            messages = [HumanMessage(content=request.message)]
+
+            async for event in agent_graph.astream_events(
+                {"messages": messages},
+                config={"recursion_limit": 10},
+                version="v2",
+            ):
+                kind = event["event"]
+
+                # LLM streaming tokens
+                if kind == "on_chat_model_stream":
+                    chunk = event["data"]["chunk"]
+                    if chunk.content:
+                        yield f"event: token\ndata: {json.dumps({'content': chunk.content})}\n\n"
+
+                # Complete tool call (after LLM finishes generating the call)
+                elif kind == "on_chat_model_end":
+                    message = event["data"]["output"]
+                    if hasattr(message, "tool_calls") and message.tool_calls:
+                        for tc in message.tool_calls:
+                            yield f"event: tool_call\ndata: {json.dumps({'name': tc['name'], 'args': tc['args']})}\n\n"
+
+                # Tool execution results
+                elif kind == "on_tool_end":
+                    output = event["data"].get("output", "")
+                    # Extract content from ToolMessage if present
+                    if hasattr(output, "content"):
+                        output = output.content
+                    yield f"event: tool_result\ndata: {json.dumps({'name': event.get('name', ''), 'output': str(output)})}\n\n"
+
+            yield "event: done\ndata: {}\n\n"
+
+        except Exception as e:
+            logger.exception("Error in stream event_generator")
+            yield f"event: error\ndata: {json.dumps({'detail': 'Internal server error'})}\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
 @app.get("/health")
 async def health():
     """Return service health and whether the agent graph has been initialized."""
 
@@ -183,12 +183,32 @@ oc get route llamaindex-websearch-agent -o jsonpath='{.spec.host}'
 
 Send a test request:
 
+/chat endpoint
+
 ```bash
 curl -X POST https://<YOUR_ROUTE_URL>/chat \
   -H "Content-Type: application/json" \
   -d '{"message": "Which company is consider the best?"}'
 ```
 
+/stream endpoint
+Classic Print
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "Which company is consider the best?"}'
+```
+
+Pretty Printed Stream
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "Which company is consider the best?"}' |
+   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.content // empty'
+```
+
 ---
 
 ## Agent-Specific Documentation
 
@@ -1,11 +1,16 @@
 import json
+import logging
 from contextlib import asynccontextmanager
 from os import getenv
 
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
 from llama_index_workflow_agent_base.agent import get_workflow_closure
+from llama_index_workflow_agent_base.workflow import ToolCallEvent, InputEvent
 from pydantic import BaseModel
 
+logger = logging.getLogger(__name__)
+
 
 # Request/Response models
 class ChatRequest(BaseModel):
@@ -189,6 +194,66 @@ async def chat(request: ChatRequest):
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """
+    Streaming chat endpoint that accepts a message and returns the agent's
+    response as Server-Sent Events (SSE).
+
+    Event types:
+        - tool_call: tool invocation by the agent
+        - tool_result: result returned by a tool
+        - token: final answer text
+        - done: signals the stream is complete
+
+    Args:
+        request: ChatRequest containing the user message
+    """
+    global get_agent
+
+    if get_agent is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def event_generator():
+        try:
+            agent = get_agent()
+            messages = [{"role": "user", "content": request.message}]
+
+            handler = agent.run(input=messages)
+
+            async for event in handler.stream_events():
+                if isinstance(event, ToolCallEvent):
+                    for tc in event.tool_calls:
+                        yield f"event: tool_call\ndata: {json.dumps({'name': tc.tool_name, 'args': tc.tool_kwargs})}\n\n"
+
+                elif isinstance(event, InputEvent):
+                    # Check if the last message is a tool result
+                    if event.input:
+                        last_msg = event.input[-1]
+                        if getattr(last_msg, "role", None) == "tool":
+                            additional = getattr(last_msg, "additional_kwargs", {}) or {}
+                            yield f"event: tool_result\ndata: {json.dumps({'name': additional.get('name', ''), 'output': _get_message_content(last_msg)})}\n\n"
+
+            result = await handler
+            # Extract final answer from the result
+            if result and "response" in result:
+                content = _get_message_content(result["response"].message)
+                if content:
+                    yield f"event: token\ndata: {json.dumps({'content': content})}\n\n"
+
+            yield "event: done\ndata: {}\n\n"
+
+        except Exception as e:
+            logger.exception("Error in stream event_generator")
+            yield f"event: error\ndata: {json.dumps({'detail': 'Internal server error'})}\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
 @app.get("/health")
 async def health():
     """Return service health and whether the workflow closure has been initialized."""
 
@@ -174,13 +174,32 @@ oc get route openai-responses-agent -o jsonpath='{.spec.host}'
 ```
 
 Send a test request:
+/chat endpoint
 
 ```bash
 curl -X POST https://<YOUR_ROUTE_URL>/chat \
   -H "Content-Type: application/json" \
   -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}'
 ```
 
+/stream endpoint
+Classic Print
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}'
+```
+
+Pretty Printed Stream
+
+```bash
+curl -X POST https://<YOUR_ROUTE_URL>/stream \
+  -H "Content-Type: application/json" \
+  -d '{"message": "How much does a Lenovo Laptop cost and what are the reviews?"}' |
+   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.content // empty'
+```
+
 ---
 
 ## Agent-Specific Documentation
 
@@ -1,10 +1,16 @@
+import asyncio
+import json
+import logging
 from contextlib import asynccontextmanager
 from os import getenv
 
 from fastapi import FastAPI, HTTPException
-from openai_responses_agent_base.agent import get_agent_closure
+from fastapi.responses import StreamingResponse
+from openai_responses_agent_base.agent import get_agent_closure, AIAgent
 from pydantic import BaseModel
 
+logger = logging.getLogger(__name__)
+
 
 # Request/Response models
 class ChatRequest(BaseModel):
@@ -81,6 +87,75 @@ async def chat(request: ChatRequest):
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """
+    Streaming chat endpoint that accepts a message and returns the agent's
+    response as Server-Sent Events (SSE).
+
+    Event types:
+        - tool_call: tool invocation by the agent
+        - tool_result: result returned by a tool (observation)
+        - token: final answer text
+        - done: signals the stream is complete
+
+    Args:
+        request: ChatRequest containing the user message
+    """
+    global get_agent
+
+    if get_agent is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def event_generator():
+        try:
+            queue: asyncio.Queue = asyncio.Queue()
+
+            def on_event(event_type: str, data: dict):
+                queue.put_nowait((event_type, data))
+
+            def run_agent():
+                adapter = get_agent()
+                agent = AIAgent(
+                    model=adapter._model_id,
+                    base_url=adapter._base_url,
+                    api_key=adapter._api_key,
+                )
+                for name, func in adapter._tools:
+                    agent.register_tool(name, func)
+                return agent.query(request.message, on_event=on_event)
+
+            task = asyncio.get_event_loop().run_in_executor(None, run_agent)
+
+            while not task.done():
+                try:
+                    event_type, data = await asyncio.wait_for(queue.get(), timeout=0.1)
+                    yield f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+                except asyncio.TimeoutError:
+                    continue
+
+            # Drain remaining events
+            while not queue.empty():
+                event_type, data = queue.get_nowait()
+                yield f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+
+            answer = task.result()
+            if answer:
+                yield f"event: token\ndata: {json.dumps({'content': answer})}\n\n"
+
+            yield "event: done\ndata: {}\n\n"
+
+        except Exception:
+            logger.exception("Error in stream event_generator")
+            yield f"event: error\ndata: {json.dumps({'detail': 'Internal server error'})}\n\n"
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
 @app.get("/health")
 async def health():
     """Return service health and whether the agent has been initialized."""
 
@@ -233,13 +233,14 @@ def _execute(self) -> str:
         )
         return _get_output_text_from_response(response)
 
-    def query(self, question: str, max_turns: int = 10) -> Optional[str]:
+    def query(self, question: str, max_turns: int = 10, on_event: Optional[Callable] = None) -> Optional[str]:
         """
         Process a question through multiple turns until getting final answer.
 
         Args:
             question: The input question to process.
             max_turns: Maximum number of turns before timing out.
+            on_event: Optional callback(event_type, data) for streaming events.
 
         Returns:
             The final answer or None if no answer found.
@@ -268,11 +269,18 @@ def query(self, question: str, max_turns: int = 10) -> Optional[str]:
                     action, args_str = actions[0].groups()
                     action_inputs = self._parse_arguments(args_str)
 
+                    if on_event:
+                        on_event("tool_call", {"name": action, "args": action_inputs})
+
                     tool = self.tools.get(action)
                     if not tool:
                         raise ValueError(f"Unknown action: {action}")
 
                     observation = tool(*action_inputs)
+
+                    if on_event:
+                        on_event("tool_result", {"name": action, "output": str(observation)})
+
                     next_prompt = f"Observation: {observation}"
                 else:
                     # No Action: line – treat the whole response as the final answer