red-hat-data-services
diff --git a/‎agents/base/crewai_websearch_agent/README.md‎
Lines changed: 1 addition & 1 deletion b/‎agents/base/crewai_websearch_agent/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agents/base/llamaindex_websearch_agent/README.md‎
Lines changed: 9 additions & 10 deletions b/‎agents/base/llamaindex_websearch_agent/README.md‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎agents/base/llamaindex_websearch_agent/main.py‎
Lines changed: 163 additions & 58 deletions b/‎agents/base/llamaindex_websearch_agent/main.py‎
Lines changed: 163 additions & 58 deletions
@@ -177,7 +177,7 @@ Send a test request:
 Non-streaming
 
 ```bash
-curl -X POST https://crewai-websearch-agent-tguzik-agents.apps.rosa.ai-eng-gpu.socc.p3.openshiftapps.com/chat/completions \
+curl -X POST https://<YOUR_ROUTE_URL>/chat/completions \
   -H "Content-Type: application/json" \
   -d '{"messages": [{"role": "user", "content": "What is the best cluster hosting service?"}], "stream": false}'
 ```
 
@@ -183,30 +183,29 @@ oc get route llamaindex-websearch-agent -o jsonpath='{.spec.host}'
 
 Send a test request:
 
-/chat endpoint
+Non-streaming
 
 ```bash
-curl -X POST https://<YOUR_ROUTE_URL>/chat \
+curl -X POST https://<YOUR_ROUTE_URL>/chat/completions \
   -H "Content-Type: application/json" \
-  -d '{"message": "Which company is consider the best?"}'
+  -d '{"messages": [{"role": "user", "content": "Which company is consider the best?"}], "stream": false}'
 ```
 
-/stream endpoint
-Classic Print
+Streaming
 
 ```bash
-curl -X POST https://<YOUR_ROUTE_URL>/stream \
+curl -X POST https://<YOUR_ROUTE_URL>/chat/completions \
   -H "Content-Type: application/json" \
-  -d '{"message": "Which company is consider the best?"}'
+  -d '{"messages": [{"role": "user", "content": "Which company is consider the best?"}], "stream": true}'
 ```
 
 Pretty Printed Stream
 
 ```bash
-curl -X POST https://<YOUR_ROUTE_URL>/stream \
+curl -X POST https://<YOUR_ROUTE_URL>/chat/completions \
   -H "Content-Type: application/json" \
-  -d '{"message": "Which company is consider the best?"}' |
-   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.content // empty'
+  -d '{"messages": [{"role": "user", "content": "Which company is consider the best?"}], "stream": true}' |
+   jq -R -r -j --stream 'scan("^data:(.*)")[] | fromjson.choices[0].delta.content // empty'
 ```
 
 ---
 
@@ -1,5 +1,7 @@
 import json
 import logging
+import time
+import uuid
 from contextlib import asynccontextmanager
 from os import getenv
 
@@ -12,18 +14,18 @@
 logger = logging.getLogger(__name__)
 
 
-# Request/Response models
-class ChatRequest(BaseModel):
-    """Incoming chat request body for the /chat endpoint."""
+# OpenAI-compatible request/response models
+class ChatMessage(BaseModel):
+    role: str
+    content: str
 
-    message: str
 
+class ChatCompletionRequest(BaseModel):
+    """OpenAI-compatible chat completion request."""
 
-class ChatResponse(BaseModel):
-    """Structured chat response (answer and optional steps)."""
-
-    answer: str
-    steps: list[str]
+    messages: list[ChatMessage]
+    model: str | None = None
+    stream: bool = False
 
 
 # Global variable for workflow closure (get_agent callable)
@@ -32,27 +34,19 @@ class ChatResponse(BaseModel):
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Initialize the LlamaIndex workflow closure on startup and clear it on shutdown.
-
-    Reads BASE_URL and MODEL_ID from the environment, builds the workflow via
-    get_workflow_closure, and sets the global get_agent for the /chat endpoint.
-    """
+    """Initialize the LlamaIndex workflow closure on startup and clear it on shutdown."""
     global get_agent
 
-    # Get environment variables
     base_url = getenv("BASE_URL")
     model_id = getenv("MODEL_ID")
 
-    # Ensure base_url ends with /v1 if provided
     if base_url and not base_url.endswith("/v1"):
         base_url = base_url.rstrip("/") + "/v1"
 
-    # Get workflow closure (returns a callable that returns an agent)
     get_agent = get_workflow_closure(model_id=model_id, base_url=base_url)
 
     yield
 
-    # Cleanup on shutdown (if needed)
     get_agent = None
 
 
@@ -83,7 +77,7 @@ def _get_message_content(msg) -> str:
 
 
 def _message_to_response_dict(msg):
-    """Map a LlamaIndex ChatMessage to the same format as LangGraph (role, content, tool_calls, etc.)."""
+    """Map a LlamaIndex ChatMessage to OpenAI-compatible format."""
     role = getattr(msg, "role", "user")
     content = _get_message_content(msg)
 
@@ -154,98 +148,209 @@ def _message_to_response_dict(msg):
     return None  # skip system or unknown
 
 
-@app.post("/chat")
-async def chat(request: ChatRequest):
-    """
-    Chat endpoint that accepts a message and returns the agent's response.
+def _build_user_message(messages: list[ChatMessage]) -> str:
+    """Extract the last user message from the OpenAI-format messages list."""
+    for msg in reversed(messages):
+        if msg.role == "user":
+            return msg.content
+    raise ValueError("No user message found in messages list")
+
+
+def _make_completion_id() -> str:
+    return f"chatcmpl-{uuid.uuid4().hex[:12]}"
 
-    Args:
-        request: ChatRequest containing the user message
 
-    Returns:
-        JSON response with full conversation history including tool calls
+@app.post("/chat/completions")
+async def chat_completions(request: ChatCompletionRequest):
+    """
+    OpenAI-compatible chat completions endpoint.
+
+    When stream=false, returns a full chat.completion response.
+    When stream=true, returns SSE chat.completion.chunk events.
     """
     global get_agent
 
     if get_agent is None:
         raise HTTPException(status_code=503, detail="Agent not initialized")
 
+    user_message = _build_user_message(request.messages)
+    model_id = request.model or getenv("MODEL_ID", "model")
+
+    if request.stream:
+        return await _handle_stream(user_message, model_id)
+    else:
+        return await _handle_chat(user_message, model_id)
+
+
+async def _handle_chat(user_message: str, model_id: str):
+    """Handle non-streaming chat completion."""
+    global get_agent
+
     try:
         agent = get_agent()
-        messages = [{"role": "user", "content": request.message}]
+        messages = [{"role": "user", "content": user_message}]
 
         result = await agent.run(input=messages)
 
-        response_messages = []
+        # Extract the final assistant message content
+        assistant_content = ""
+        context_messages = []
 
         if result and "messages" in result and len(result["messages"]) > 0:
             for message in result["messages"]:
                 if getattr(message, "role", None) == "system":
                     continue
                 item = _message_to_response_dict(message)
                 if item is not None:
-                    response_messages.append(item)
+                    context_messages.append(item)
 
-        return {"messages": response_messages, "finish_reason": "stop"}
+            # Final assistant content is the last assistant message with content
+            for item in reversed(context_messages):
+                if item["role"] == "assistant" and item.get("content"):
+                    assistant_content = item["content"]
+                    break
+
+        return {
+            "id": _make_completion_id(),
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": model_id,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": assistant_content,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "context": context_messages,
+            "usage": None,
+        }
 
     except Exception as e:
         raise HTTPException(
             status_code=500, detail=f"Error processing request: {str(e)}"
         )
 
 
-@app.post("/stream")
-async def stream(request: ChatRequest):
-    """
-    Streaming chat endpoint that accepts a message and returns the agent's
-    response as Server-Sent Events (SSE).
-
-    Event types:
-        - tool_call: tool invocation by the agent
-        - tool_result: result returned by a tool
-        - token: final answer text
-        - done: signals the stream is complete
-
-    Args:
-        request: ChatRequest containing the user message
-    """
+async def _handle_stream(user_message: str, model_id: str):
+    """Handle streaming chat completion with OpenAI-compatible SSE chunks."""
     global get_agent
 
-    if get_agent is None:
-        raise HTTPException(status_code=503, detail="Agent not initialized")
+    completion_id = _make_completion_id()
+    created = int(time.time())
 
     async def event_generator():
         try:
             agent = get_agent()
-            messages = [{"role": "user", "content": request.message}]
+            messages = [{"role": "user", "content": user_message}]
 
             handler = agent.run(input=messages)
 
             async for event in handler.stream_events():
                 if isinstance(event, ToolCallEvent):
                     for tc in event.tool_calls:
-                        yield f"event: tool_call\ndata: {json.dumps({'name': tc.tool_name, 'args': tc.tool_kwargs})}\n\n"
+                        tool_calls_delta = [
+                            {
+                                "index": 0,
+                                "id": getattr(tc, "tool_id", ""),
+                                "type": "function",
+                                "function": {
+                                    "name": tc.tool_name,
+                                    "arguments": json.dumps(tc.tool_kwargs),
+                                },
+                            }
+                        ]
+                        data = {
+                            "id": completion_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model_id,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {
+                                        "role": "assistant",
+                                        "tool_calls": tool_calls_delta,
+                                    },
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(data)}\n\n"
 
                 elif isinstance(event, InputEvent):
-                    # Check if the last message is a tool result
                     if event.input:
                         last_msg = event.input[-1]
                         if getattr(last_msg, "role", None) == "tool":
                             additional = getattr(last_msg, "additional_kwargs", {}) or {}
-                            yield f"event: tool_result\ndata: {json.dumps({'name': additional.get('name', ''), 'output': _get_message_content(last_msg)})}\n\n"
+                            data = {
+                                "id": completion_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model_id,
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {
+                                            "role": "tool",
+                                            "content": _get_message_content(last_msg),
+                                            "name": additional.get("name", ""),
+                                        },
+                                        "finish_reason": None,
+                                    }
+                                ],
+                            }
+                            yield f"data: {json.dumps(data)}\n\n"
 
             result = await handler
             # Extract final answer from the result
             if result and "response" in result:
                 content = _get_message_content(result["response"].message)
                 if content:
-                    yield f"event: token\ndata: {json.dumps({'content': content})}\n\n"
-
-            yield "event: done\ndata: {}\n\n"
+                    data = {
+                        "id": completion_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model_id,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"content": content},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(data)}\n\n"
+
+            # Send final chunk with finish_reason
+            final_data = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created,
+                "model": model_id,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {},
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+            yield f"data: {json.dumps(final_data)}\n\n"
+            yield "data: [DONE]\n\n"
 
-        except Exception as e:
+        except Exception:
             logger.exception("Error in stream event_generator")
-            yield f"event: error\ndata: {json.dumps({'detail': 'Internal server error'})}\n\n"
+            error_data = {
+                "error": {
+                    "message": "Internal server error",
+                    "type": "server_error",
+                }
+            }
+            yield f"data: {json.dumps(error_data)}\n\n"
 
     return StreamingResponse(
         event_generator(),
@@ -264,4 +369,4 @@ async def health():
     import uvicorn
 
     port = int(getenv("PORT", 8000))
-    uvicorn.run(app, host="0.0.0.0", port=port)
+    uvicorn.run(app, host="0.0.0.0", port=port)