Skip to content

Commit 788ec6d

Browse files
Merge pull request #37 from agentevals-dev/peterj/addci
add ci
2 parents 39430fb + c20e426 commit 788ec6d

46 files changed

Lines changed: 1408 additions & 1003 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: CI
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
push:
7+
branches: [main]
8+
9+
permissions:
10+
contents: read
11+
12+
jobs:
13+
lint:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- uses: actions/checkout@v6
17+
18+
- uses: astral-sh/setup-uv@v7
19+
with:
20+
enable-cache: true
21+
22+
- name: Install dependencies
23+
run: |
24+
uv venv
25+
uv pip install setuptools
26+
uv sync --dev
27+
28+
- name: Ruff check
29+
run: uv run ruff check .
30+
31+
- name: Ruff format check
32+
run: uv run ruff format --check .
33+
34+
test:
35+
runs-on: ubuntu-latest
36+
strategy:
37+
matrix:
38+
python-version: ["3.11", "3.12", "3.13"]
39+
steps:
40+
- uses: actions/checkout@v6
41+
42+
- uses: astral-sh/setup-uv@v7
43+
with:
44+
enable-cache: true
45+
46+
- name: Install Python ${{ matrix.python-version }}
47+
run: uv python install ${{ matrix.python-version }}
48+
49+
- name: Install dependencies
50+
run: |
51+
uv venv --python ${{ matrix.python-version }}
52+
uv pip install setuptools
53+
uv sync --dev --python ${{ matrix.python-version }}
54+
55+
- name: Run tests
56+
run: uv run pytest -m "not integration and not e2e" --tb=short -q

examples/custom_evaluators/response_quality.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,10 @@ def response_quality(input: EvalInput) -> EvalResult:
3535
if len(inv.final_response.strip()) < min_len:
3636
score -= 0.3
3737
issues.append(
38-
f"{inv.invocation_id}: response too short "
39-
f"({len(inv.final_response.strip())} < {min_len} chars)"
38+
f"{inv.invocation_id}: response too short ({len(inv.final_response.strip())} < {min_len} chars)"
4039
)
4140

42-
if (
43-
inv.user_content
44-
and inv.final_response.strip().lower() == inv.user_content.strip().lower()
45-
):
41+
if inv.user_content and inv.final_response.strip().lower() == inv.user_content.strip().lower():
4642
score -= 0.5
4743
issues.append(f"{inv.invocation_id}: response is just the user input echoed back")
4844

examples/dice_agent/agent.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,7 @@ def roll_die(sides: int = 6) -> dict:
2222
return {"error": "Die must have at least 2 sides"}
2323

2424
result = random.randint(1, sides)
25-
return {
26-
"sides": sides,
27-
"result": result,
28-
"message": f"Rolled a {sides}-sided die and got {result}"
29-
}
25+
return {"sides": sides, "result": result, "message": f"Rolled a {sides}-sided die and got {result}"}
3026

3127

3228
def check_prime(nums: list[int]) -> dict:
@@ -38,6 +34,7 @@ def check_prime(nums: list[int]) -> dict:
3834
Returns:
3935
Dictionary mapping each number to whether it's prime
4036
"""
37+
4138
def is_prime(n: int) -> bool:
4239
if n < 2:
4340
return False
@@ -53,16 +50,12 @@ def is_prime(n: int) -> bool:
5350
results = {num: is_prime(num) for num in nums}
5451
prime_nums = [n for n, is_p in results.items() if is_p]
5552

56-
return {
57-
"results": results,
58-
"prime_count": len(prime_nums),
59-
"prime_numbers": prime_nums
60-
}
53+
return {"results": results, "prime_count": len(prime_nums), "prime_numbers": prime_nums}
6154

6255

6356
dice_agent = Agent(
6457
name="dice_agent",
65-
#model="gemini-2.5-flash",
58+
# model="gemini-2.5-flash",
6659
model="gemini-2.5-flash-lite",
6760
instruction="""You are a helpful assistant that can roll dice and check if numbers are prime.
6861

examples/dice_agent/main.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,7 @@ async def main():
8282
user_id = "demo_user"
8383

8484
runner = InMemoryRunner(agent=dice_agent, app_name=app_name)
85-
session = await runner.session_service.create_session(
86-
app_name=app_name, user_id=user_id
87-
)
85+
session = await runner.session_service.create_session(app_name=app_name, user_id=user_id)
8886

8987
test_queries = [
9088
"Hi! Can you help me?",
@@ -95,14 +93,10 @@ async def main():
9593
for i, query in enumerate(test_queries, 1):
9694
print(f"\n[{i}/{len(test_queries)}] User: {query}")
9795

98-
content = types.Content(
99-
role="user", parts=[types.Part.from_text(text=query)]
100-
)
96+
content = types.Content(role="user", parts=[types.Part.from_text(text=query)])
10197

10298
agent_response = ""
103-
async for event in runner.run_async(
104-
user_id=user_id, session_id=session.id, new_message=content
105-
):
99+
async for event in runner.run_async(user_id=user_id, session_id=session.id, new_message=content):
106100
if event.content.parts and event.content.parts[0].text:
107101
agent_response = event.content.parts[0].text
108102

@@ -125,17 +119,11 @@ async def main():
125119
app_name = "dice_agent_app"
126120
user_id = "demo_user"
127121
runner = InMemoryRunner(agent=dice_agent, app_name=app_name)
128-
session = await runner.session_service.create_session(
129-
app_name=app_name, user_id=user_id
130-
)
122+
session = await runner.session_service.create_session(app_name=app_name, user_id=user_id)
131123

132-
content = types.Content(
133-
role="user", parts=[types.Part.from_text(text="Roll a 6-sided die")]
134-
)
124+
content = types.Content(role="user", parts=[types.Part.from_text(text="Roll a 6-sided die")])
135125

136-
async for event in runner.run_async(
137-
user_id=user_id, session_id=session.id, new_message=content
138-
):
126+
async for event in runner.run_async(user_id=user_id, session_id=session.id, new_message=content):
139127
if event.content.parts and event.content.parts[0].text:
140128
print(f"Agent: {event.content.parts[0].text}")
141129

examples/langchain_agent/agent.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,10 @@ def roll_die(sides: int = 6) -> dict:
1717
Dictionary with sides, result, and message
1818
"""
1919
if sides < 2:
20-
return {
21-
"sides": sides,
22-
"result": None,
23-
"message": "Error: Die must have at least 2 sides"
24-
}
20+
return {"sides": sides, "result": None, "message": "Error: Die must have at least 2 sides"}
2521

2622
result = random.randint(1, sides)
27-
return {
28-
"sides": sides,
29-
"result": result,
30-
"message": f"Rolled a {result} on a {sides}-sided die"
31-
}
23+
return {"sides": sides, "result": result, "message": f"Rolled a {result} on a {sides}-sided die"}
3224

3325

3426
@tool
@@ -41,6 +33,7 @@ def check_prime(nums: list[int]) -> dict:
4133
Returns:
4234
Dictionary with results, prime_count, and prime_numbers
4335
"""
36+
4437
def is_prime(n: int) -> bool:
4538
if n < 2:
4639
return False
@@ -56,14 +49,10 @@ def is_prime(n: int) -> bool:
5649
results = {n: is_prime(n) for n in nums}
5750
prime_numbers = [n for n, is_p in results.items() if is_p]
5851

59-
return {
60-
"results": results,
61-
"prime_count": len(prime_numbers),
62-
"prime_numbers": prime_numbers
63-
}
52+
return {"results": results, "prime_count": len(prime_numbers), "prime_numbers": prime_numbers}
6453

6554

66-
#def create_dice_agent(model: str = "gpt-3.5-turbo", temperature: float = 0.0):
55+
# def create_dice_agent(model: str = "gpt-3.5-turbo", temperature: float = 0.0):
6756
def create_dice_agent(model: str = "gpt-4o-mini", temperature: float = 0.0):
6857
llm = ChatOpenAI(model=model, temperature=temperature)
6958
tools = [roll_die, check_prime]

examples/langchain_agent/main.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,7 @@ def run_loop_in_background():
9090
thread = threading.Thread(target=run_loop_in_background, daemon=True)
9191
thread.start()
9292

93-
future = asyncio.run_coroutine_threadsafe(
94-
processor.connect(eval_set_id=eval_set_id), loop
95-
)
93+
future = asyncio.run_coroutine_threadsafe(processor.connect(eval_set_id=eval_set_id), loop)
9694
future.result()
9795

9896
tracer_provider.add_span_processor(processor)
@@ -161,11 +159,7 @@ def main():
161159
selected_tool = {t.name: t for t in tools}.get(tool_name)
162160
if selected_tool:
163161
tool_result = selected_tool.invoke(tool_args)
164-
messages.append(
165-
ToolMessage(
166-
content=str(tool_result), tool_call_id=tool_call["id"]
167-
)
168-
)
162+
messages.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call["id"]))
169163
else:
170164
print(" Agent: [Max iterations reached]")
171165

examples/sdk_example/async_example.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,12 @@ async def main():
4747
metadata={"model": dice_agent.model},
4848
):
4949
runner = InMemoryRunner(agent=dice_agent, app_name="dice_app")
50-
session = await runner.session_service.create_session(
51-
app_name="dice_app", user_id="demo_user"
52-
)
50+
session = await runner.session_service.create_session(app_name="dice_app", user_id="demo_user")
5351

5452
for query in ["Roll a 20-sided die", "Is that number prime?"]:
5553
print(f"User: {query}")
56-
content = types.Content(
57-
role="user", parts=[types.Part.from_text(text=query)]
58-
)
59-
async for event in runner.run_async(
60-
user_id="demo_user", session_id=session.id, new_message=content
61-
):
54+
content = types.Content(role="user", parts=[types.Part.from_text(text=query)])
55+
async for event in runner.run_async(user_id="demo_user", session_id=session.id, new_message=content):
6256
if event.content.parts and event.content.parts[0].text:
6357
print(f"Agent: {event.content.parts[0].text}")
6458

examples/strands_agent/agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def check_prime(nums: list[int]) -> dict:
3434
Returns:
3535
Dictionary with primality results and list of prime numbers
3636
"""
37+
3738
def is_prime(n: int) -> bool:
3839
if n < 2:
3940
return False

examples/strands_agent/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ def run_loop_in_background():
7171
thread = threading.Thread(target=run_loop_in_background, daemon=True)
7272
thread.start()
7373

74-
future = asyncio.run_coroutine_threadsafe(
75-
processor.connect(eval_set_id=eval_set_id), loop
76-
)
74+
future = asyncio.run_coroutine_threadsafe(processor.connect(eval_set_id=eval_set_id), loop)
7775
future.result()
7876

7977
telemetry.tracer_provider.add_span_processor(processor)

examples/zero-code-examples/langchain/run.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,17 @@ def main():
5050

5151
os.environ.setdefault(
5252
"OTEL_RESOURCE_ATTRIBUTES",
53-
"agentevals.eval_set_id=langchain_agent_eval,"
54-
"agentevals.session_name=langchain-zero-code",
53+
"agentevals.eval_set_id=langchain_agent_eval,agentevals.session_name=langchain-zero-code",
5554
)
5655

5756
resource = Resource.create()
5857

5958
tracer_provider = TracerProvider(resource=resource)
60-
tracer_provider.add_span_processor(
61-
BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000)
62-
)
59+
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(), schedule_delay_millis=1000))
6360
trace.set_tracer_provider(tracer_provider)
6461

6562
logger_provider = LoggerProvider(resource=resource)
66-
logger_provider.add_log_record_processor(
67-
BatchLogRecordProcessor(OTLPLogExporter(), schedule_delay_millis=1000)
68-
)
63+
logger_provider.add_log_record_processor(BatchLogRecordProcessor(OTLPLogExporter(), schedule_delay_millis=1000))
6964
set_logger_provider(logger_provider)
7065

7166
OpenAIInstrumentor().instrument()
@@ -101,11 +96,7 @@ def main():
10196
selected_tool = {t.name: t for t in tools}.get(tool_name)
10297
if selected_tool:
10398
tool_result = selected_tool.invoke(tool_args)
104-
messages.append(
105-
ToolMessage(
106-
content=str(tool_result), tool_call_id=tool_call["id"]
107-
)
108-
)
99+
messages.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call["id"]))
109100
else:
110101
print(" Agent: [Max iterations reached]")
111102

0 commit comments

Comments
 (0)