Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
- name: Start MCP server in background
if: matrix.transport != 'stdio'
run: |
nohup ./dist/mcp-grafana -t ${{ matrix.transport }} --enabled-tools search,datasource,incident,prometheus,loki,elasticsearch,alerting,dashboard,folder,oncall,asserts,sift,pyroscope,navigation,proxied,annotations,rendering,admin,clickhouse,cloudwatch > mcp.log 2>&1 &
nohup ./dist/mcp-grafana -t ${{ matrix.transport }} --enabled-tools search,datasource,incident,prometheus,loki,elasticsearch,opensearch,alerting,dashboard,folder,oncall,asserts,sift,pyroscope,navigation,proxied,annotations,rendering,admin,clickhouse,cloudwatch > mcp.log 2>&1 &
sleep 2 # Give server time to start and create log file
if [ ! -f mcp.log ]; then
echo "Warning: mcp.log was not created. Server may have failed to start."
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ The dashboard tools now include several strategies to manage context window usag
### Datasources

- **List and fetch datasource information:** View all configured datasources and retrieve detailed information about each.
- _Supported datasource types: Prometheus, Loki, ClickHouse, CloudWatch, Elasticsearch._
- _Supported datasource types: Prometheus, Loki, ClickHouse, CloudWatch, Elasticsearch, OpenSearch._

### Query Examples

Expand Down Expand Up @@ -115,6 +115,12 @@ The dashboard tools now include several strategies to manage context window usag

- **Query Elasticsearch:** Execute search queries against Elasticsearch datasources using either Lucene query syntax or Elasticsearch Query DSL. Supports filtering by time range and retrieving logs, metrics, or any indexed data. Returns documents with their index, ID, source fields, and optional relevance score.

### OpenSearch Querying

> **Note:** OpenSearch tools are **disabled by default**. To enable them, add `opensearch` to your `--enabled-tools` flag.

- **Query OpenSearch:** Execute search queries against OpenSearch datasources using either Lucene query syntax or OpenSearch Query DSL. Supports filtering by time range and retrieving logs, metrics, or any indexed data. Returns documents with their index, ID, source fields, and optional relevance score.

### Incidents

- **Search, create, and update incidents:** Manage incidents in Grafana Incident, including searching, creating, and adding activities to incidents.
Expand Down Expand Up @@ -289,6 +295,7 @@ Scopes define the specific resources that permissions apply to. Each action requ
| `query_cloudwatch` | CloudWatch* | Execute CloudWatch metric queries | `datasources:query` | `datasources:uid:*` |
| `search_logs` | SearchLogs* | Search logs across ClickHouse and Loki | `datasources:query` | `datasources:uid:*` |
| `query_elasticsearch` | Elasticsearch* | Query Elasticsearch using Lucene syntax or Query DSL | `datasources:query` | `datasources:uid:elasticsearch-uid` |
| `query_opensearch` | OpenSearch* | Query OpenSearch using Lucene syntax or Query DSL | `datasources:query` | `datasources:uid:opensearch-uid` |
| `alerting_manage_rules` | Alerting | Manage alert rules (list, get, versions, create, update, delete) | `alert.rules:read` + `alert.rules:write` for mutations | `folders:*` or `folders:uid:alerts-folder` |
| `alerting_manage_routing` | Alerting | Manage notification policies, contact points, and time intervals | `alert.notifications:read` | Global scope |
| `list_oncall_schedules` | OnCall | List schedules from Grafana OnCall | `grafana-oncall-app.schedules:read` | Plugin-specific scopes |
Expand Down Expand Up @@ -345,6 +352,7 @@ The `mcp-grafana` binary supports various command-line flags for configuration:
- `--disable-write`: Disable write tools (create/update operations)
- `--disable-loki`: Disable loki tools
- `--disable-elasticsearch`: Disable elasticsearch tools
- `--disable-opensearch`: Disable opensearch tools
- `--disable-alerting`: Disable alerting tools
- `--disable-dashboard`: Disable dashboard tools
- `--disable-oncall`: Disable oncall tools
Expand Down
5 changes: 4 additions & 1 deletion cmd/mcp-grafana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ type disabledTools struct {
enabledTools string

search, datasource, incident,
prometheus, loki, elasticsearch, alerting,
prometheus, loki, elasticsearch, opensearch, alerting,
dashboard, folder, oncall, asserts, sift, admin,
pyroscope, navigation, proxied, annotations, rendering, cloudwatch, write,
examples, clickhouse, searchlogs,
Expand Down Expand Up @@ -71,6 +71,7 @@ func (dt *disabledTools) addFlags() {
flag.BoolVar(&dt.prometheus, "disable-prometheus", false, "Disable prometheus tools")
flag.BoolVar(&dt.loki, "disable-loki", false, "Disable loki tools")
flag.BoolVar(&dt.elasticsearch, "disable-elasticsearch", false, "Disable elasticsearch tools")
flag.BoolVar(&dt.opensearch, "disable-opensearch", false, "Disable opensearch tools")
flag.BoolVar(&dt.alerting, "disable-alerting", false, "Disable alerting tools")
flag.BoolVar(&dt.dashboard, "disable-dashboard", false, "Disable dashboard tools")
flag.BoolVar(&dt.folder, "disable-folder", false, "Disable folder tools")
Expand Down Expand Up @@ -113,6 +114,7 @@ func (dt *disabledTools) addTools(s *server.MCPServer) {
maybeAddTools(s, tools.AddPrometheusTools, enabledTools, dt.prometheus, "prometheus")
maybeAddTools(s, tools.AddLokiTools, enabledTools, dt.loki, "loki")
maybeAddTools(s, tools.AddElasticsearchTools, enabledTools, dt.elasticsearch, "elasticsearch")
maybeAddTools(s, tools.AddOpenSearchTools, enabledTools, dt.opensearch, "opensearch")
maybeAddTools(s, func(mcp *server.MCPServer) { tools.AddAlertingTools(mcp, enableWriteTools) }, enabledTools, dt.alerting, "alerting")
maybeAddTools(s, func(mcp *server.MCPServer) { tools.AddDashboardTools(mcp, enableWriteTools) }, enabledTools, dt.dashboard, "dashboard")
maybeAddTools(s, func(mcp *server.MCPServer) { tools.AddFolderTools(mcp, enableWriteTools) }, enabledTools, dt.folder, "folder")
Expand Down Expand Up @@ -183,6 +185,7 @@ Available Capabilities:
- Prometheus & Loki: Run PromQL and LogQL queries, retrieve metric/log metadata, and explore label names/values.
- ClickHouse: Query ClickHouse datasources via Grafana with macro and variable substitution support.
- Elasticsearch: Query Elasticsearch datasources using Lucene syntax or Query DSL for logs and metrics.
- OpenSearch: Query OpenSearch datasources using Lucene syntax or Query DSL for logs and metrics.
- Incidents: Search, create, update, and resolve incidents in Grafana Incident.
- Sift Investigations: Start and manage Sift investigations, analyze logs/traces, find error patterns, and detect slow requests.
- Alerting: List and fetch alert rules and notification contact points.
Expand Down
25 changes: 24 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ services:
image: grafana/grafana:12.4.0@sha256:b0ae311af06228bcfd4a620504b653db80f5b91e94dc3dc2a5b7dab202bcde20
environment:
GF_AUTH_ANONYMOUS_ENABLED: "false"
GF_INSTALL_PLUGINS: grafana-clickhouse-datasource
GF_INSTALL_PLUGINS: grafana-clickhouse-datasource,grafana-opensearch-datasource
GF_LOG_LEVEL: debug
GF_SERVER_ROUTER_LOGGING: "true"
GF_RENDERING_SERVER_URL: http://renderer:8081/render
Expand Down Expand Up @@ -105,6 +105,29 @@ services:
- ./testdata/elasticsearch-seed.sh:/seed.sh
entrypoint: ["sh", "/seed.sh"]

opensearch:
image: opensearchproject/opensearch:2.19.1
environment:
- discovery.type=single-node
- DISABLE_SECURITY_PLUGIN=true
- "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
ports:
- "9201:9200"
healthcheck:
test: ["CMD-SHELL", "curl -sf http://localhost:9200/_cluster/health | grep -q '\"status\":\"green\"\\|\"status\":\"yellow\"'"]
interval: 10s
timeout: 5s
retries: 10

opensearch-seed:
image: curlimages/curl:8.12.1
depends_on:
opensearch:
condition: service_healthy
volumes:
- ./testdata/opensearch-seed.sh:/seed.sh
entrypoint: ["sh", "/seed.sh"]

localstack:
image: localstack/localstack:4.13@sha256:46302bcb91a7e8008e6394be8afafdbfa40fb77a54d4046a38be35992042d5de
ports:
Expand Down
86 changes: 86 additions & 0 deletions testdata/opensearch-seed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash
# OpenSearch data seeding script for integration tests.
# This script waits for OpenSearch to be ready, then indexes sample log data.

set -e

OS_URL="${OS_URL:-http://opensearch:9200}"

echo "Waiting for OpenSearch to be ready..."
until curl -sf "${OS_URL}/_cluster/health" > /dev/null 2>&1; do
sleep 2
done
echo "OpenSearch is ready."

# Create an index template for test logs
curl -sf -X PUT "${OS_URL}/_index_template/test-logs-template" \
-H 'Content-Type: application/json' \
-d '{
"index_patterns": ["test-logs-*"],
"template": {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"@timestamp": { "type": "date" },
"message": { "type": "text" },
"level": { "type": "keyword" },
"service": { "type": "keyword" },
"host": { "type": "keyword" },
"status_code": { "type": "integer" },
"duration_ms": { "type": "float" },
"trace_id": { "type": "keyword" }
}
}
}
}'
echo ""
echo "Created index template."

# Get current timestamp in milliseconds for realistic data
NOW_MS=$(date +%s000)
# Offsets in milliseconds (going back from now)
O1=$((NOW_MS - 60000))
O2=$((NOW_MS - 120000))
O3=$((NOW_MS - 180000))
O4=$((NOW_MS - 240000))
O5=$((NOW_MS - 300000))
O6=$((NOW_MS - 360000))
O7=$((NOW_MS - 420000))
O8=$((NOW_MS - 480000))
O9=$((NOW_MS - 540000))
O10=$((NOW_MS - 600000))

# Bulk index sample log documents
curl -sf -X POST "${OS_URL}/test-logs-2024/_bulk" \
-H 'Content-Type: application/x-ndjson' \
-d '{"index":{}}
{"@timestamp":'"${O1}"',"message":"GET /api/users 200 OK","level":"info","service":"api-gateway","host":"server1","status_code":200,"duration_ms":12.5,"trace_id":"abc123"}
{"index":{}}
{"@timestamp":'"${O2}"',"message":"POST /api/login 401 Unauthorized","level":"warn","service":"auth-service","host":"server2","status_code":401,"duration_ms":45.2,"trace_id":"def456"}
{"index":{}}
{"@timestamp":'"${O3}"',"message":"Database connection timeout after 30s","level":"error","service":"user-service","host":"server1","status_code":500,"duration_ms":30000.0,"trace_id":"ghi789"}
{"index":{}}
{"@timestamp":'"${O4}"',"message":"GET /api/health 200 OK","level":"info","service":"api-gateway","host":"server1","status_code":200,"duration_ms":1.2,"trace_id":"jkl012"}
{"index":{}}
{"@timestamp":'"${O5}"',"message":"Cache miss for key user:1234","level":"debug","service":"cache-service","host":"server3","status_code":200,"duration_ms":0.5,"trace_id":"mno345"}
{"index":{}}
{"@timestamp":'"${O6}"',"message":"POST /api/orders 201 Created","level":"info","service":"order-service","host":"server2","status_code":201,"duration_ms":89.3,"trace_id":"pqr678"}
{"index":{}}
{"@timestamp":'"${O7}"',"message":"Failed to parse request body: invalid JSON","level":"error","service":"api-gateway","host":"server1","status_code":400,"duration_ms":2.1,"trace_id":"stu901"}
{"index":{}}
{"@timestamp":'"${O8}"',"message":"GET /api/products 200 OK","level":"info","service":"product-service","host":"server3","status_code":200,"duration_ms":23.7,"trace_id":"vwx234"}
{"index":{}}
{"@timestamp":'"${O9}"',"message":"Rate limit exceeded for IP 192.168.1.100","level":"warn","service":"api-gateway","host":"server1","status_code":429,"duration_ms":0.8,"trace_id":"yza567"}
{"index":{}}
{"@timestamp":'"${O10}"',"message":"Scheduled job completed: cleanup_sessions","level":"info","service":"scheduler","host":"server2","status_code":200,"duration_ms":1523.4,"trace_id":"bcd890"}
'
echo ""
echo "Indexed sample log data."

# Refresh the index to make documents searchable immediately
curl -sf -X POST "${OS_URL}/test-logs-2024/_refresh"
echo ""
echo "OpenSearch seeding complete."
11 changes: 11 additions & 0 deletions testdata/provisioning/datasources/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@ datasources:
index: test-logs-*
timeField: "@timestamp"
esVersion: 8.17.0
- name: OpenSearch
uid: opensearch
type: grafana-opensearch-datasource
access: proxy
url: http://opensearch:9200
jsonData:
database: test-logs-*
timeField: "@timestamp"
flavor: opensearch
version: 2.19.1
pplEnabled: false
- name: CloudWatch
id: 8
uid: cloudwatch
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def grafana_headers():
@pytest.fixture
async def mcp_client(mcp_transport, mcp_url, grafana_env, grafana_headers):
if mcp_transport == "stdio":
enabled_tools = "search,datasource,incident,prometheus,loki,elasticsearch,alerting,dashboard,folder,oncall,asserts,sift,pyroscope,navigation,proxied,annotations,rendering,admin,clickhouse,cloudwatch"
enabled_tools = "search,datasource,incident,prometheus,loki,elasticsearch,opensearch,alerting,dashboard,folder,oncall,asserts,sift,pyroscope,navigation,proxied,annotations,rendering,admin,clickhouse,cloudwatch"
params = StdioServerParameters(
command=os.environ.get("MCP_GRAFANA_PATH", "../dist/mcp-grafana"),
args=["--debug", "--log-level", "debug", "--enabled-tools", enabled_tools],
Expand Down
63 changes: 63 additions & 0 deletions tests/opensearch_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pytest
from mcp import ClientSession

from conftest import models
from utils import assert_mcp_eval, run_llm_tool_loop


pytestmark = pytest.mark.anyio


@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_logs(
model: str,
mcp_client: ClientSession,
mcp_transport: str,
):
prompt = (
"Can you query the OpenSearch datasource for the last 10 log entries "
"from the 'test-logs-2024' index? Show me the log messages and their severity levels."
)
final_content, tools_called, mcp_server = await run_llm_tool_loop(
model, mcp_client, mcp_transport, prompt
)

assert_mcp_eval(
prompt,
final_content,
tools_called,
mcp_server,
"Does the response contain specific log data that could only come from an OpenSearch datasource? "
"This could include log messages with levels like 'info', 'error', 'warn', or 'debug', "
"service names like 'api-gateway' or 'auth-service', or HTTP status codes. "
"The response should show evidence of real data rather than generic statements.",
expected_tools="query_opensearch",
)


@pytest.mark.parametrize("model", models)
@pytest.mark.flaky(reruns=2)
async def test_opensearch_query_errors(
model: str,
mcp_client: ClientSession,
mcp_transport: str,
):
prompt = (
"Search for error-level logs in the OpenSearch datasource using the 'test-logs-2024' index. "
"Use the query 'level:error' to find them. What errors occurred?"
)
final_content, tools_called, mcp_server = await run_llm_tool_loop(
model, mcp_client, mcp_transport, prompt
)

assert_mcp_eval(
prompt,
final_content,
tools_called,
mcp_server,
"Does the response contain information about error-level log entries from OpenSearch? "
"It should reference specific error messages such as database timeouts or JSON parsing failures. "
"The response should show evidence of real error data rather than generic statements.",
expected_tools="query_opensearch",
)
4 changes: 2 additions & 2 deletions tools/datasources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ func TestDatasourcesTools(t *testing.T) {
result, err := listDatasources(ctx, ListDatasourcesParams{})
require.NoError(t, err)

// Ten datasources are provisioned in the test environment (Prometheus, Prometheus Demo, Loki, Pyroscope, Tempo, Tempo Secondary, Alertmanager, ClickHouse and CloudWatch).
assert.Len(t, result.Datasources, 10)
// Eleven datasources are provisioned in the test environment (Prometheus, Prometheus Demo, Loki, Pyroscope, Tempo, Tempo Secondary, Alertmanager, ClickHouse, Elasticsearch, OpenSearch, and CloudWatch).
assert.Len(t, result.Datasources, 11)
})

t.Run("list datasources for type", func(t *testing.T) {
Expand Down
Loading
Loading