Skip to content

Commit bb1f94d

Browse files
committed
working locally and deployment for crew ai agent
1 parent 2ad98ac commit bb1f94d

6 files changed

Lines changed: 187 additions & 69 deletions

File tree

agents/base/crewai-websearch-agent/examples/ai_service.py

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,17 @@
88
def ai_stream_service(context, base_url=None, model_id=None):
99
"""Create a deployable AI service that runs the CrewAI web search crew.
1010
11-
Builds the LLM once, then returns two callables: one for a single
12-
non-streaming response and one that returns a non-streaming response
13-
(CrewAI does not support streaming).
11+
Builds the LLM once, then returns two callables:
12+
- generate: returns a single response dict
13+
- generate_stream: yields streaming choice dicts via step_callback
1414
1515
Args:
1616
context: Object with get_json() used to read the request payload.
1717
base_url: LLM API base URL (OpenAI-compatible / llama-stack).
1818
model_id: LLM model id; will be prefixed with 'openai/'.
1919
2020
Returns:
21-
Tuple (generate, generate). CrewAI does not support streaming,
22-
so both entries return the same non-streaming callable.
21+
Tuple (generate, generate_stream).
2322
"""
2423
from os import getenv
2524

@@ -40,45 +39,67 @@ def get_formatted_message(
4039
) -> dict | None:
4140
"""Turn a CrewAI step into a display dict (role + content) for the client."""
4241
if isinstance(crewai_step, AgentAction):
43-
return {"role": "assistant", "content": crewai_step.result}
42+
return {"role": "assistant", "content": str(crewai_step.result)}
4443
elif isinstance(crewai_step, AgentFinish):
4544
return {"role": "assistant", "content": crewai_step.output}
4645
elif isinstance(crewai_step, ToolResult):
47-
return {"role": "tool", "content": f"\n🔧 Tool Output:\n {crewai_step.result}"}
46+
return {"role": "tool", "content": str(crewai_step.result)}
4847
return None
4948

50-
def generate(context) -> dict:
51-
"""Run the crew on the context payload and return a response dict with choices."""
49+
def _parse_inputs(context):
5250
payload = context.get_json()
5351
messages = payload.get("messages", [])
54-
5552
user_question = messages[-1]["content"]
5653
custom_instruction = ""
5754
if messages and messages[0].get("role") == "system":
5855
custom_instruction = messages[0]["content"]
59-
60-
inputs = {
56+
return {
6157
"user_prompt": user_question,
6258
"custom_instruction": custom_instruction,
6359
}
6460

65-
intermediate_steps: list = []
66-
_ = (
67-
AssistanceAgents(llm=llm, intermediate_steps=intermediate_steps)
61+
def generate(context) -> dict:
62+
"""Run the crew and return a single response dict with choices."""
63+
inputs = _parse_inputs(context)
64+
65+
result = AssistanceAgents(llm=llm).crew().kickoff(inputs=inputs)
66+
67+
return {
68+
"headers": {"Content-Type": "application/json"},
69+
"body": {
70+
"choices": [
71+
{"index": 0, "message": {"role": "assistant", "content": str(result)}}
72+
]
73+
},
74+
}
75+
76+
def generate_stream(context):
77+
"""Run the crew and yield streaming choice dicts as steps complete."""
78+
inputs = _parse_inputs(context)
79+
steps_collected = []
80+
81+
def _on_step(step_output):
82+
steps_collected.append(step_output)
83+
84+
result = (
85+
AssistanceAgents(llm=llm, step_callback=_on_step)
6886
.crew()
6987
.kickoff(inputs=inputs)
7088
)
7189

72-
choices = []
73-
for i, step in enumerate(intermediate_steps):
90+
# Yield collected intermediate steps
91+
for step in steps_collected:
7492
msg = get_formatted_message(step)
7593
if msg:
76-
choices.append({"index": i, "message": msg})
77-
78-
return {
79-
"headers": {"Content-Type": "application/json"},
80-
"body": {"choices": choices},
94+
yield {"choices": [{"index": 0, "delta": msg, "finish_reason": None}]}
95+
96+
# Yield final answer
97+
yield {
98+
"choices": [{
99+
"index": 0,
100+
"delta": {"role": "assistant", "content": str(result)},
101+
"finish_reason": "stop",
102+
}]
81103
}
82104

83-
# CrewAI does not support streaming, so both entries point to generate
84-
return generate, generate
105+
return generate, generate_stream

agents/base/crewai-websearch-agent/examples/execute_ai_service_locally.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def get_headers(self):
2727
if base_url and not base_url.endswith("/v1"):
2828
base_url = base_url.rstrip("/") + "/v1"
2929

30-
stream = False # CrewAI does not support streaming
30+
stream = True
3131
context = SimpleContext()
3232
ai_service_resp_func = ai_stream_service(
3333
context=context, base_url=base_url, model_id=model_id

agents/base/crewai-websearch-agent/main.py

Lines changed: 119 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,45 @@
11
import asyncio
2+
import json
3+
import re
24
from contextlib import asynccontextmanager
35
from os import getenv
46

57
from crewai import LLM
68
from fastapi import FastAPI, HTTPException
9+
from fastapi.responses import StreamingResponse
710
from pydantic import BaseModel
811

912
from crewai_web_search.crew import AssistanceAgents
1013

1114

1215
class ChatRequest(BaseModel):
13-
"""Incoming chat request body for the /chat endpoint."""
16+
"""Incoming chat request body for the /chat and /stream endpoints."""
1417

1518
message: str
1619

1720

18-
class ChatResponse(BaseModel):
19-
"""Structured chat response."""
21+
# Global LLM instance
22+
llm = None
23+
24+
# Patterns that indicate CrewAI internal scaffolding in the output
25+
_REACT_NOISE = re.compile(
26+
r"(^|\n)\s*(Thought:\s*|Action:\s*|Action Input:\s*|Observation:\s*|Final Answer:\s*).*",
27+
re.DOTALL,
28+
)
29+
_CREWAI_PROMPT_MARKER = "\n\n\nYou ONLY have access to"
2030

21-
answer: str
22-
steps: list[str]
2331

32+
def _clean_content(text: str) -> str:
33+
"""Strip CrewAI internal ReAct scaffolding and prompt noise from output."""
34+
# Strip appended retry instructions
35+
idx = text.find(_CREWAI_PROMPT_MARKER)
36+
if idx != -1:
37+
text = text[:idx]
2438

25-
# Global LLM instance
26-
llm = None
39+
# Strip ReAct format artifacts (Thought:/Action:/Final Answer: prefixes)
40+
text = _REACT_NOISE.sub("", text)
41+
42+
return text.strip()
2743

2844

2945
@asynccontextmanager
@@ -59,7 +75,7 @@ async def lifespan(app: FastAPI):
5975

6076
@app.post("/chat")
6177
async def chat(request: ChatRequest):
62-
"""Chat endpoint that runs the CrewAI crew and returns the response."""
78+
"""Non-streaming chat endpoint. Returns the final answer."""
6379
global llm
6480

6581
if llm is None:
@@ -71,33 +87,111 @@ async def chat(request: ChatRequest):
7187
"custom_instruction": "",
7288
}
7389

74-
intermediate_steps: list = []
75-
crew = AssistanceAgents(
76-
llm=llm, intermediate_steps=intermediate_steps
77-
).crew()
78-
90+
crew = AssistanceAgents(llm=llm).crew()
7991
result = await asyncio.to_thread(crew.kickoff, inputs=inputs)
8092

81-
steps = []
82-
for step in intermediate_steps:
83-
from crewai.agents.parser import AgentAction, AgentFinish
84-
from crewai.tools.tool_types import ToolResult
85-
86-
if isinstance(step, AgentAction):
87-
steps.append(f"[action] {step.result}")
88-
elif isinstance(step, AgentFinish):
89-
steps.append(f"[finish] {step.output}")
90-
elif isinstance(step, ToolResult):
91-
steps.append(f"[tool] {step.result}")
93+
response_messages = [
94+
{"role": "user", "content": request.message},
95+
{"role": "assistant", "content": _clean_content(str(result))},
96+
]
9297

93-
return ChatResponse(answer=str(result), steps=steps)
98+
return {"messages": response_messages, "finish_reason": "stop"}
9499

95100
except Exception as e:
96101
raise HTTPException(
97102
status_code=500, detail=f"Error processing request: {str(e)}"
98103
)
99104

100105

106+
@app.post("/stream")
107+
async def stream(request: ChatRequest):
108+
"""Streaming chat endpoint using CrewAI's native token-level streaming.
109+
110+
Uses Crew(stream=True) with kickoff_async() which returns a
111+
CrewStreamingOutput that yields StreamChunk objects with real
112+
token-by-token content from the LLM.
113+
"""
114+
global llm
115+
116+
if llm is None:
117+
raise HTTPException(status_code=503, detail="Agent not initialized")
118+
119+
async def _event_generator():
120+
inputs = {
121+
"user_prompt": request.message,
122+
"custom_instruction": "",
123+
}
124+
125+
crew = AssistanceAgents(llm=llm, stream=True).crew()
126+
127+
# kickoff_async with stream=True returns CrewStreamingOutput
128+
streaming_output = await crew.kickoff_async(inputs=inputs)
129+
130+
# Buffer tokens until we see "Final Answer:" — everything before
131+
# that is internal ReAct reasoning (Thought/Action/Observation).
132+
buffer = ""
133+
emitting = False
134+
135+
async for chunk in streaming_output:
136+
if chunk.chunk_type.value != "text" or not chunk.content:
137+
continue
138+
139+
if emitting:
140+
# Already past "Final Answer:", emit tokens directly
141+
sse_chunk = {
142+
"choices": [{
143+
"index": 0,
144+
"delta": {"role": "assistant", "content": chunk.content},
145+
"finish_reason": None,
146+
}]
147+
}
148+
yield f"data: {json.dumps(sse_chunk)}\n\n"
149+
else:
150+
buffer += chunk.content
151+
# Check if we've reached the final answer
152+
marker = "Final Answer:"
153+
idx = buffer.find(marker)
154+
if idx != -1:
155+
emitting = True
156+
# Emit any text after the marker that arrived in this chunk
157+
remainder = buffer[idx + len(marker):]
158+
if remainder.strip():
159+
sse_chunk = {
160+
"choices": [{
161+
"index": 0,
162+
"delta": {"role": "assistant", "content": remainder.lstrip()},
163+
"finish_reason": None,
164+
}]
165+
}
166+
yield f"data: {json.dumps(sse_chunk)}\n\n"
167+
168+
# If no "Final Answer:" was found, send the cleaned full buffer
169+
if not emitting and buffer.strip():
170+
cleaned = _clean_content(buffer)
171+
if cleaned:
172+
sse_chunk = {
173+
"choices": [{
174+
"index": 0,
175+
"delta": {"role": "assistant", "content": cleaned},
176+
"finish_reason": None,
177+
}]
178+
}
179+
yield f"data: {json.dumps(sse_chunk)}\n\n"
180+
181+
# Send final stop event
182+
final_chunk = {
183+
"choices": [{
184+
"index": 0,
185+
"delta": {},
186+
"finish_reason": "stop",
187+
}]
188+
}
189+
yield f"data: {json.dumps(final_chunk)}\n\n"
190+
yield "data: [DONE]\n\n"
191+
192+
return StreamingResponse(_event_generator(), media_type="text/event-stream")
193+
194+
101195
@app.get("/health")
102196
async def health():
103197
"""Return service health status."""

agents/base/crewai-websearch-agent/src/crewai_web_search/config/agents.yaml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ ai_assistant:
22
role: >
33
Senior Assistant
44
goal: >
5-
Provide a helpful answer to the user's question. Only use the Web Search tool when the user
6-
asks a factual question that requires looking up external information. For simple messages
7-
like greetings, casual conversation, or questions you can answer from your own knowledge,
8-
respond directly without using any tools.
5+
Provide a helpful answer to the user's question. You may use the Web Search tool
6+
to look up factual information. After calling a tool once, always provide your
7+
Final Answer — never call the same tool again.
98
backstory: >
10-
You are an experienced assistant. You respond directly to greetings and simple messages
11-
without searching the web. You only use the Web Search tool when the user asks a specific
12-
factual question that genuinely requires looking up current or external information.
13-
If a tool returns unhelpful results, do not retry — just answer with your own knowledge.
14-
{custom_instruction}
9+
You are an experienced assistant with broad knowledge. Follow these rules strictly:
10+
1. You may call the Web Search tool at most ONCE per question.
11+
2. After receiving a tool result, immediately give your Final Answer.
12+
3. If the tool result is not relevant to the question, ignore it and answer
13+
from your own knowledge. Say "Based on my knowledge" when doing so.
14+
4. For greetings or casual messages, respond directly without using any tools.
15+
5. NEVER retry a tool call. One call maximum, then Final Answer.
16+
{custom_instruction}
Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
generate_response_task:
22
description: >
33
Respond to user prompt: {user_prompt}.
4-
If the prompt is a greeting or casual message, respond directly without using any tools.
5-
Only use the Web Search tool if the user is asking a factual question that requires
6-
looking up external information. Never call a tool more than once for the same question.
4+
You may call the Web Search tool once if the question needs external information.
5+
After receiving a tool result, give your Final Answer immediately — do not call
6+
the tool again. If the tool result does not answer the question, use your own
7+
knowledge and say "Based on my knowledge".
8+
For greetings or casual messages, respond directly without tools.
79
expected_output: >
8-
A concise and polite response to the user prompt: {user_prompt}
9-
agent: ai_assistant
10+
A concise, helpful, and polite response to: {user_prompt}
11+
agent: ai_assistant

agents/base/crewai-websearch-agent/src/crewai_web_search/crew.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ class AssistanceAgents:
1313

1414
def __init__(self, llm: LLM, **kwargs):
1515
self.llm = llm
16-
self.intermediate_steps = kwargs.pop("intermediate_steps", None)
16+
self.step_callback = kwargs.pop("step_callback", None)
17+
self.enable_stream = kwargs.pop("stream", False)
1718

1819
@after_kickoff # Optional hook to be executed after the crew has finished
1920
def log_results(self, output):
@@ -42,13 +43,11 @@ def generate_response_task(self) -> Task:
4243
def crew(self) -> Crew:
4344
"""Creates the AI Assistant crew"""
4445

45-
def task_callback(step_output):
46-
self.intermediate_steps.append(step_output)
47-
4846
return Crew(
4947
agents=self.agents, # Automatically created by the @agent decorator
5048
tasks=self.tasks, # Automatically created by the @task decorator
5149
process=Process.sequential,
5250
verbose=True,
53-
step_callback=task_callback,
51+
step_callback=self.step_callback,
52+
stream=self.enable_stream,
5453
)

0 commit comments

Comments
 (0)