mcp-grafana/tests/opensearch_test.py at 2375189c3891dd11079636f777976b1127421d70 · grafana/mcp-grafana · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pytest
from mcp import ClientSession

from conftest import models
from utils import assert_mcp_eval, run_llm_tool_loop


pytestmark = pytest.mark.anyio


@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_logs(
    model: str,
    mcp_client: ClientSession,
    mcp_transport: str,
):
    prompt = (
        "Can you query the OpenSearch datasource for the last 10 log entries "
        "from the 'test-logs-2024' index? Show me the log messages and their severity levels."
    )
    final_content, tools_called, mcp_server = await run_llm_tool_loop(
        model, mcp_client, mcp_transport, prompt
    )

    assert_mcp_eval(
        prompt,
        final_content,
        tools_called,
        mcp_server,
        "Does the response contain specific log data that could only come from an OpenSearch datasource? "
        "This could include log messages with levels like 'info', 'error', 'warn', or 'debug', "
        "service names like 'api-gateway' or 'auth-service', or HTTP status codes. "
        "The response should show evidence of real data rather than generic statements.",
        expected_tools="query_opensearch",
    )


@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_errors(
    model: str,
    mcp_client: ClientSession,
    mcp_transport: str,
):
    prompt = (
        "Search for error-level logs in the OpenSearch datasource using the 'test-logs-2024' index. "
        "Use the query 'level:error' to find them. What errors occurred?"
    )
    final_content, tools_called, mcp_server = await run_llm_tool_loop(
        model, mcp_client, mcp_transport, prompt
    )

    assert_mcp_eval(
        prompt,
        final_content,
        tools_called,
        mcp_server,
        "Does the response contain information about error-level log entries from OpenSearch? "
        "It should reference specific error messages such as database timeouts or JSON parsing failures. "
        "The response should show evidence of real error data rather than generic statements.",
        expected_tools="query_opensearch",
    )