working locally and deployment for crew ai agent

MRGuziX · MRGuziX · commit bb1f94d6666e · 2026-03-09T12:51:28.000+01:00
diff --git a/agents/base/crewai-websearch-agent/examples/ai_service.py b/agents/base/crewai-websearch-agent/examples/ai_service.py
@@ -8,18 +8,17 @@
 def ai_stream_service(context, base_url=None, model_id=None):
     """Create a deployable AI service that runs the CrewAI web search crew.
 
-    Builds the LLM once, then returns two callables: one for a single
-    non-streaming response and one that returns a non-streaming response
-    (CrewAI does not support streaming).
+    Builds the LLM once, then returns two callables:
+      - generate: returns a single response dict
+      - generate_stream: yields streaming choice dicts via step_callback
 
     Args:
         context: Object with get_json() used to read the request payload.
         base_url: LLM API base URL (OpenAI-compatible / llama-stack).
         model_id: LLM model id; will be prefixed with 'openai/'.
 
     Returns:
-        Tuple (generate, generate). CrewAI does not support streaming,
-        so both entries return the same non-streaming callable.
+        Tuple (generate, generate_stream).
     """
     from os import getenv
 
@@ -40,45 +39,67 @@ def get_formatted_message(
     ) -> dict | None:
         """Turn a CrewAI step into a display dict (role + content) for the client."""
         if isinstance(crewai_step, AgentAction):
-            return {"role": "assistant", "content": crewai_step.result}
+            return {"role": "assistant", "content": str(crewai_step.result)}
         elif isinstance(crewai_step, AgentFinish):
             return {"role": "assistant", "content": crewai_step.output}
         elif isinstance(crewai_step, ToolResult):
-            return {"role": "tool", "content": f"\n🔧 Tool Output:\n {crewai_step.result}"}
+            return {"role": "tool", "content": str(crewai_step.result)}
         return None
 
-    def generate(context) -> dict:
-        """Run the crew on the context payload and return a response dict with choices."""
+    def _parse_inputs(context):
         payload = context.get_json()
         messages = payload.get("messages", [])
-
         user_question = messages[-1]["content"]
         custom_instruction = ""
         if messages and messages[0].get("role") == "system":
             custom_instruction = messages[0]["content"]
-
-        inputs = {
+        return {
             "user_prompt": user_question,
             "custom_instruction": custom_instruction,
         }
 
-        intermediate_steps: list = []
-        _ = (
-            AssistanceAgents(llm=llm, intermediate_steps=intermediate_steps)
+    def generate(context) -> dict:
+        """Run the crew and return a single response dict with choices."""
+        inputs = _parse_inputs(context)
+
+        result = AssistanceAgents(llm=llm).crew().kickoff(inputs=inputs)
+
+        return {
+            "headers": {"Content-Type": "application/json"},
+            "body": {
+                "choices": [
+                    {"index": 0, "message": {"role": "assistant", "content": str(result)}}
+                ]
+            },
+        }
+
+    def generate_stream(context):
+        """Run the crew and yield streaming choice dicts as steps complete."""
+        inputs = _parse_inputs(context)
+        steps_collected = []
+
+        def _on_step(step_output):
+            steps_collected.append(step_output)
+
+        result = (
+            AssistanceAgents(llm=llm, step_callback=_on_step)
             .crew()
             .kickoff(inputs=inputs)
         )
 
-        choices = []
-        for i, step in enumerate(intermediate_steps):
+        # Yield collected intermediate steps
+        for step in steps_collected:
             msg = get_formatted_message(step)
             if msg:
-                choices.append({"index": i, "message": msg})
-
-        return {
-            "headers": {"Content-Type": "application/json"},
-            "body": {"choices": choices},
+                yield {"choices": [{"index": 0, "delta": msg, "finish_reason": None}]}
+
+        # Yield final answer
+        yield {
+            "choices": [{
+                "index": 0,
+                "delta": {"role": "assistant", "content": str(result)},
+                "finish_reason": "stop",
+            }]
         }
 
-    # CrewAI does not support streaming, so both entries point to generate
-    return generate, generate
+    return generate, generate_stream
diff --git a/agents/base/crewai-websearch-agent/examples/execute_ai_service_locally.py b/agents/base/crewai-websearch-agent/examples/execute_ai_service_locally.py
@@ -27,7 +27,7 @@ def get_headers(self):
 if base_url and not base_url.endswith("/v1"):
     base_url = base_url.rstrip("/") + "/v1"
 
-stream = False  # CrewAI does not support streaming
+stream = True
 context = SimpleContext()
 ai_service_resp_func = ai_stream_service(
     context=context, base_url=base_url, model_id=model_id
diff --git a/agents/base/crewai-websearch-agent/main.py b/agents/base/crewai-websearch-agent/main.py
@@ -1,29 +1,45 @@
 import asyncio
+import json
+import re
 from contextlib import asynccontextmanager
 from os import getenv
 
 from crewai import LLM
 from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 
 from crewai_web_search.crew import AssistanceAgents
 
 
 class ChatRequest(BaseModel):
-    """Incoming chat request body for the /chat endpoint."""
+    """Incoming chat request body for the /chat and /stream endpoints."""
 
     message: str
 
 
-class ChatResponse(BaseModel):
-    """Structured chat response."""
+# Global LLM instance
+llm = None
+
+# Patterns that indicate CrewAI internal scaffolding in the output
+_REACT_NOISE = re.compile(
+    r"(^|\n)\s*(Thought:\s*|Action:\s*|Action Input:\s*|Observation:\s*|Final Answer:\s*).*",
+    re.DOTALL,
+)
+_CREWAI_PROMPT_MARKER = "\n\n\nYou ONLY have access to"
 
-    answer: str
-    steps: list[str]
 
+def _clean_content(text: str) -> str:
+    """Strip CrewAI internal ReAct scaffolding and prompt noise from output."""
+    # Strip appended retry instructions
+    idx = text.find(_CREWAI_PROMPT_MARKER)
+    if idx != -1:
+        text = text[:idx]
 
-# Global LLM instance
-llm = None
+    # Strip ReAct format artifacts (Thought:/Action:/Final Answer: prefixes)
+    text = _REACT_NOISE.sub("", text)
+
+    return text.strip()
 
 
 @asynccontextmanager
@@ -59,7 +75,7 @@ async def lifespan(app: FastAPI):
 
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    """Chat endpoint that runs the CrewAI crew and returns the response."""
+    """Non-streaming chat endpoint. Returns the final answer."""
     global llm
 
     if llm is None:
@@ -71,33 +87,111 @@ async def chat(request: ChatRequest):
             "custom_instruction": "",
         }
 
-        intermediate_steps: list = []
-        crew = AssistanceAgents(
-            llm=llm, intermediate_steps=intermediate_steps
-        ).crew()
-
+        crew = AssistanceAgents(llm=llm).crew()
         result = await asyncio.to_thread(crew.kickoff, inputs=inputs)
 
-        steps = []
-        for step in intermediate_steps:
-            from crewai.agents.parser import AgentAction, AgentFinish
-            from crewai.tools.tool_types import ToolResult
-
-            if isinstance(step, AgentAction):
-                steps.append(f"[action] {step.result}")
-            elif isinstance(step, AgentFinish):
-                steps.append(f"[finish] {step.output}")
-            elif isinstance(step, ToolResult):
-                steps.append(f"[tool] {step.result}")
+        response_messages = [
+            {"role": "user", "content": request.message},
+            {"role": "assistant", "content": _clean_content(str(result))},
+        ]
 
-        return ChatResponse(answer=str(result), steps=steps)
+        return {"messages": response_messages, "finish_reason": "stop"}
 
     except Exception as e:
         raise HTTPException(
             status_code=500, detail=f"Error processing request: {str(e)}"
         )
 
 
+@app.post("/stream")
+async def stream(request: ChatRequest):
+    """Streaming chat endpoint using CrewAI's native token-level streaming.
+
+    Uses Crew(stream=True) with kickoff_async() which returns a
+    CrewStreamingOutput that yields StreamChunk objects with real
+    token-by-token content from the LLM.
+    """
+    global llm
+
+    if llm is None:
+        raise HTTPException(status_code=503, detail="Agent not initialized")
+
+    async def _event_generator():
+        inputs = {
+            "user_prompt": request.message,
+            "custom_instruction": "",
+        }
+
+        crew = AssistanceAgents(llm=llm, stream=True).crew()
+
+        # kickoff_async with stream=True returns CrewStreamingOutput
+        streaming_output = await crew.kickoff_async(inputs=inputs)
+
+        # Buffer tokens until we see "Final Answer:" — everything before
+        # that is internal ReAct reasoning (Thought/Action/Observation).
+        buffer = ""
+        emitting = False
+
+        async for chunk in streaming_output:
+            if chunk.chunk_type.value != "text" or not chunk.content:
+                continue
+
+            if emitting:
+                # Already past "Final Answer:", emit tokens directly
+                sse_chunk = {
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"role": "assistant", "content": chunk.content},
+                        "finish_reason": None,
+                    }]
+                }
+                yield f"data: {json.dumps(sse_chunk)}\n\n"
+            else:
+                buffer += chunk.content
+                # Check if we've reached the final answer
+                marker = "Final Answer:"
+                idx = buffer.find(marker)
+                if idx != -1:
+                    emitting = True
+                    # Emit any text after the marker that arrived in this chunk
+                    remainder = buffer[idx + len(marker):]
+                    if remainder.strip():
+                        sse_chunk = {
+                            "choices": [{
+                                "index": 0,
+                                "delta": {"role": "assistant", "content": remainder.lstrip()},
+                                "finish_reason": None,
+                            }]
+                        }
+                        yield f"data: {json.dumps(sse_chunk)}\n\n"
+
+        # If no "Final Answer:" was found, send the cleaned full buffer
+        if not emitting and buffer.strip():
+            cleaned = _clean_content(buffer)
+            if cleaned:
+                sse_chunk = {
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"role": "assistant", "content": cleaned},
+                        "finish_reason": None,
+                    }]
+                }
+                yield f"data: {json.dumps(sse_chunk)}\n\n"
+
+        # Send final stop event
+        final_chunk = {
+            "choices": [{
+                "index": 0,
+                "delta": {},
+                "finish_reason": "stop",
+            }]
+        }
+        yield f"data: {json.dumps(final_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+
+    return StreamingResponse(_event_generator(), media_type="text/event-stream")
+
+
 @app.get("/health")
 async def health():
     """Return service health status."""
diff --git a/agents/base/crewai-websearch-agent/src/crewai_web_search/config/agents.yaml b/agents/base/crewai-websearch-agent/src/crewai_web_search/config/agents.yaml
@@ -2,13 +2,15 @@ ai_assistant:
   role: >
     Senior Assistant
   goal: >
-    Provide a helpful answer to the user's question. Only use the Web Search tool when the user
-    asks a factual question that requires looking up external information. For simple messages
-    like greetings, casual conversation, or questions you can answer from your own knowledge,
-    respond directly without using any tools.
+    Provide a helpful answer to the user's question. You may use the Web Search tool
+    to look up factual information. After calling a tool once, always provide your
+    Final Answer — never call the same tool again.
   backstory: >
-    You are an experienced assistant. You respond directly to greetings and simple messages
-    without searching the web. You only use the Web Search tool when the user asks a specific
-    factual question that genuinely requires looking up current or external information.
-    If a tool returns unhelpful results, do not retry — just answer with your own knowledge.
-    {custom_instruction}
+    You are an experienced assistant with broad knowledge. Follow these rules strictly:
+    1. You may call the Web Search tool at most ONCE per question.
+    2. After receiving a tool result, immediately give your Final Answer.
+    3. If the tool result is not relevant to the question, ignore it and answer
+       from your own knowledge. Say "Based on my knowledge" when doing so.
+    4. For greetings or casual messages, respond directly without using any tools.
+    5. NEVER retry a tool call. One call maximum, then Final Answer.
+    {custom_instruction}
diff --git a/agents/base/crewai-websearch-agent/src/crewai_web_search/config/tasks.yaml b/agents/base/crewai-websearch-agent/src/crewai_web_search/config/tasks.yaml
@@ -1,9 +1,11 @@
 generate_response_task:
   description: >
     Respond to user prompt: {user_prompt}.
-    If the prompt is a greeting or casual message, respond directly without using any tools.
-    Only use the Web Search tool if the user is asking a factual question that requires
-    looking up external information. Never call a tool more than once for the same question.
+    You may call the Web Search tool once if the question needs external information.
+    After receiving a tool result, give your Final Answer immediately — do not call
+    the tool again. If the tool result does not answer the question, use your own
+    knowledge and say "Based on my knowledge".
+    For greetings or casual messages, respond directly without tools.
   expected_output: >
-    A concise and polite response to the user prompt: {user_prompt}
-  agent: ai_assistant
+    A concise, helpful, and polite response to: {user_prompt}
+  agent: ai_assistant
diff --git a/agents/base/crewai-websearch-agent/src/crewai_web_search/crew.py b/agents/base/crewai-websearch-agent/src/crewai_web_search/crew.py
@@ -13,7 +13,8 @@ class AssistanceAgents:
 
     def __init__(self, llm: LLM, **kwargs):
         self.llm = llm
-        self.intermediate_steps = kwargs.pop("intermediate_steps", None)
+        self.step_callback = kwargs.pop("step_callback", None)
+        self.enable_stream = kwargs.pop("stream", False)
 
     @after_kickoff  # Optional hook to be executed after the crew has finished
     def log_results(self, output):
@@ -42,13 +43,11 @@ def generate_response_task(self) -> Task:
     def crew(self) -> Crew:
         """Creates the AI Assistant crew"""
 
-        def task_callback(step_output):
-            self.intermediate_steps.append(step_output)
-
         return Crew(
             agents=self.agents,  # Automatically created by the @agent decorator
             tasks=self.tasks,  # Automatically created by the @task decorator
             process=Process.sequential,
             verbose=True,
-            step_callback=task_callback,
+            step_callback=self.step_callback,
+            stream=self.enable_stream,
         )