-
Notifications
You must be signed in to change notification settings - Fork 329
Expand file tree
/
Copy pathopensearch_test.py
More file actions
63 lines (54 loc) · 2.11 KB
/
opensearch_test.py
File metadata and controls
63 lines (54 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pytest
from mcp import ClientSession
from conftest import models
from utils import assert_mcp_eval, run_llm_tool_loop
pytestmark = pytest.mark.anyio
@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_logs(
model: str,
mcp_client: ClientSession,
mcp_transport: str,
):
prompt = (
"Can you query the OpenSearch datasource for the last 10 log entries "
"from the 'test-logs-2024' index? Show me the log messages and their severity levels."
)
final_content, tools_called, mcp_server = await run_llm_tool_loop(
model, mcp_client, mcp_transport, prompt
)
assert_mcp_eval(
prompt,
final_content,
tools_called,
mcp_server,
"Does the response contain specific log data that could only come from an OpenSearch datasource? "
"This could include log messages with levels like 'info', 'error', 'warn', or 'debug', "
"service names like 'api-gateway' or 'auth-service', or HTTP status codes. "
"The response should show evidence of real data rather than generic statements.",
expected_tools="query_opensearch",
)
@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_errors(
model: str,
mcp_client: ClientSession,
mcp_transport: str,
):
prompt = (
"Search for error-level logs in the OpenSearch datasource using the 'test-logs-2024' index. "
"Use the query 'level:error' to find them. What errors occurred?"
)
final_content, tools_called, mcp_server = await run_llm_tool_loop(
model, mcp_client, mcp_transport, prompt
)
assert_mcp_eval(
prompt,
final_content,
tools_called,
mcp_server,
"Does the response contain information about error-level log entries from OpenSearch? "
"It should reference specific error messages such as database timeouts or JSON parsing failures. "
"The response should show evidence of real error data rather than generic statements.",
expected_tools="query_opensearch",
)