Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,65 @@ jobs:
go-version: "1.26"
cache: true

- name: Wait for Grafana server and Prometheus server to start and scrape
run: sleep 30
- name: Wait for Grafana API to be ready
run: |
set -euo pipefail
ready=false
for i in $(seq 1 60); do
if curl -fsS http://localhost:3000/api/health >/dev/null 2>&1; then
echo "Grafana is up"
ready=true
break
fi
echo "Waiting for Grafana... attempt $i"
sleep 2
done
if [ "$ready" != "true" ]; then
echo "Timed out waiting for Grafana to become ready"
exit 1
fi

- name: Wait for datasources to be provisioned
run: |
set -euo pipefail
ready=false
for i in $(seq 1 60); do
count="$(curl -fsS -u admin:admin http://localhost:3000/api/datasources 2>/dev/null | jq 'length' 2>/dev/null || echo 0)"
echo "datasource count=$count"
if [ "${count}" -ge 10 ]; then
echo "Datasources provisioned"
ready=true
break
fi
sleep 2
done
if [ "$ready" != "true" ]; then
echo "Timed out waiting for datasources to be provisioned"
curl -fsS -u admin:admin http://localhost:3000/api/datasources || true
exit 1
fi

- name: Wait for Graphite metrics to be queryable
run: |
set -euo pipefail
# Graphite port 80 is not exposed on the host; go through Grafana's datasource proxy.
ready=false
for i in $(seq 1 60); do
count="$(curl -fsS -u admin:admin \
'http://localhost:3000/api/datasources/proxy/uid/graphite/metrics/find?query=test.*' \
2>/dev/null | jq 'length' 2>/dev/null || echo 0)"
echo "Graphite test.* node count=$count"
if [ "${count}" -ge 1 ]; then
echo "Graphite metrics are seeded and queryable"
ready=true
break
fi
sleep 2
done
if [ "$ready" != "true" ]; then
echo "Timed out waiting for Graphite metrics to be queryable"
exit 1
fi

- name: Run integration tests
run: make test-integration
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ The dashboard tools now include several strategies to manage context window usag

- **Search logs:** High-level log search across ClickHouse (OTel format) and Loki datasources.

### Graphite Querying

> **Note:** Graphite tools are **disabled by default**. To enable them, add `graphite` to your `--enabled-tools` flag.

- **Query Graphite:** Execute Graphite render API queries against a Graphite datasource.
- **List Graphite metrics:** Browse and discover Graphite metric paths.
- **List Graphite tags:** List available Graphite tags and tag values.
- **Query Graphite density:** Query Graphite metric density for a given pattern.

### Elasticsearch Querying

> **Note:** Elasticsearch tools are **disabled by default**. To enable them, add `elasticsearch` to your `--enabled-tools` flag.
Expand Down Expand Up @@ -339,7 +348,7 @@ The `mcp-grafana` binary supports various command-line flags for configuration:
- `--session-idle-timeout-minutes`: Session idle timeout in minutes. Sessions with no activity for this duration are automatically reaped - default: `30`. Set to `0` to disable session reaping. Only relevant for SSE and streamable-http transports.

**Tool Configuration:**
- `--enabled-tools`: Comma-separated list of enabled categories - default: all categories except `admin`, to enable admin tools, add `admin` to the list (e.g., `"search,datasource,...,admin"`)
- `--enabled-tools`: Comma-separated list of enabled categories - default: all categories except `admin`, `clickhouse`, `cloudwatch`, `elasticsearch`, `examples`, `graphite`, `runpanelquery`, and `searchlogs`. To enable disabled categories, add them to the list (e.g., `"search,datasource,...,graphite"`)
- `--max-loki-log-limit`: Maximum number of log lines returned per `query_loki_logs` call - default: `100`. Note: Set this at least 1 below Loki's server-side `max_entries_limit_per_query` to allow truncation detection (the tool requests `limit+1` internally to detect if more data exists).
- `--disable-search`: Disable search tools
- `--disable-datasource`: Disable datasource tools
Expand All @@ -362,6 +371,7 @@ The `mcp-grafana` binary supports various command-line flags for configuration:
- `--disable-clickhouse`: Disable ClickHouse tools
- `--disable-searchlogs`: Disable search_logs tool
- `--disable-runpanelquery`: Disable run panel query tools
- `--disable-graphite`: Disable Graphite tools

### Read-Only Mode

Expand Down
6 changes: 4 additions & 2 deletions cmd/mcp-grafana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type disabledTools struct {
prometheus, loki, elasticsearch, alerting,
dashboard, folder, oncall, asserts, sift, admin,
pyroscope, navigation, proxied, annotations, rendering, cloudwatch, write,
examples, clickhouse, searchlogs,
examples, clickhouse, searchlogs, graphite,
runpanelquery bool
}

Expand Down Expand Up @@ -89,6 +89,7 @@ func (dt *disabledTools) addFlags() {
flag.BoolVar(&dt.clickhouse, "disable-clickhouse", false, "Disable ClickHouse tools")
flag.BoolVar(&dt.searchlogs, "disable-searchlogs", false, "Disable search logs tools")
flag.BoolVar(&dt.runpanelquery, "disable-runpanelquery", false, "Disable run panel query tools")
flag.BoolVar(&dt.graphite, "disable-graphite", false, "Disable Graphite tools")
}

func (gc *grafanaConfig) addFlags() {
Expand Down Expand Up @@ -129,6 +130,7 @@ func (dt *disabledTools) addTools(s *server.MCPServer) {
maybeAddTools(s, tools.AddClickHouseTools, enabledTools, dt.clickhouse, "clickhouse")
maybeAddTools(s, tools.AddSearchLogsTools, enabledTools, dt.searchlogs, "searchlogs")
maybeAddTools(s, tools.AddRunPanelQueryTools, enabledTools, dt.runpanelquery, "runpanelquery")
maybeAddTools(s, tools.AddGraphiteTools, enabledTools, dt.graphite, "graphite")
}

func newServer(transport string, dt disabledTools, obs *observability.Observability, sessionIdleTimeoutMinutes int) (*server.MCPServer, *mcpgrafana.ToolManager, *mcpgrafana.SessionManager) {
Expand Down Expand Up @@ -473,4 +475,4 @@ func parseLevel(level string) slog.Level {
return slog.LevelInfo
}
return l
}
}
19 changes: 19 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,22 @@ services:
interval: 10s
timeout: 5s
retries: 5

graphite:
image: graphiteapp/graphite-statsd:latest@sha256:2d61228771119ddaee2f62d65739d3b5e903de36666e899703e47be1def571fe
healthcheck:
test: ["CMD-SHELL", "wget -q -O /dev/null 'http://127.0.0.1/metrics/find?query=*'"]
interval: 10s
timeout: 5s
retries: 15

graphite-seed:
image: alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d
depends_on:
graphite:
condition: service_healthy
volumes:
- ./testdata/graphite-seed.sh:/seed.sh
entrypoint: ["sh", "/seed.sh"]
environment:
GRAPHITE_HOST: graphite
38 changes: 38 additions & 0 deletions testdata/graphite-seed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/sh
# Graphite data seeding script for integration tests.
# Sends test metrics to Carbon via the plaintext protocol.

set -e

GRAPHITE_HOST="${GRAPHITE_HOST:-graphite}"
GRAPHITE_CARBON_PORT="${GRAPHITE_CARBON_PORT:-2003}"

echo "Waiting for Graphite Carbon to be ready on ${GRAPHITE_HOST}:${GRAPHITE_CARBON_PORT}..."
until nc -z "$GRAPHITE_HOST" "$GRAPHITE_CARBON_PORT" 2>/dev/null; do
sleep 2
done
echo "Graphite Carbon is ready."

NOW=$(date +%s)

send_metric() {
printf "%s %s %s\n" "$1" "$2" "$3" | nc -w 3 "$GRAPHITE_HOST" "$GRAPHITE_CARBON_PORT"
}

# Hierarchical metrics for listGraphiteMetrics and queryGraphite tests.
send_metric "test.servers.web01.cpu.load5" "1.5" "$NOW"
send_metric "test.servers.web01.cpu.load15" "1.2" "$NOW"
send_metric "test.servers.web02.cpu.load5" "2.3" "$NOW"
send_metric "test.servers.web02.cpu.load15" "2.1" "$NOW"
send_metric "test.servers.db01.cpu.load5" "0.8" "$NOW"

# Tagged metrics for listGraphiteTags tests.
send_metric "test.tagged.cpu;server=web01;env=prod" "1.5" "$NOW"
send_metric "test.tagged.cpu;server=web02;env=prod" "2.3" "$NOW"

echo "Graphite metrics seeded successfully."

# Give Carbon a moment to process the received metrics into its cache
# so they are available via the render API before the tests run.
sleep 5
echo "Done."
7 changes: 7 additions & 0 deletions testdata/provisioning/datasources/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,10 @@ datasources:
accessKey: test
secretKey: test
isDefault: false
- name: Graphite
id: 9
uid: graphite
type: graphite
access: proxy
url: http://graphite:80
isDefault: false
12 changes: 10 additions & 2 deletions tools/datasources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,16 @@ func TestDatasourcesTools(t *testing.T) {
result, err := listDatasources(ctx, ListDatasourcesParams{})
require.NoError(t, err)

// Ten datasources are provisioned in the test environment (Prometheus, Prometheus Demo, Loki, Pyroscope, Tempo, Tempo Secondary, Alertmanager, ClickHouse and CloudWatch).
assert.Len(t, result.Datasources, 10)
// Verify the core datasources provisioned in the test environment are present.
uids := make(map[string]bool, len(result.Datasources))
for _, ds := range result.Datasources {
uids[ds.UID] = true
}
assert.True(t, uids["prometheus"], "prometheus datasource should be provisioned")
assert.True(t, uids["loki"], "loki datasource should be provisioned")
assert.True(t, uids["graphite"], "graphite datasource should be provisioned")
assert.True(t, uids["tempo"], "tempo datasource should be provisioned")
assert.True(t, uids["elasticsearch"], "elasticsearch datasource should be provisioned")
})

t.Run("list datasources for type", func(t *testing.T) {
Expand Down
7 changes: 5 additions & 2 deletions tools/fallback_transport.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@ func (t *datasourceFallbackTransport) RoundTrip(req *http.Request) (*http.Respon
return nil, retryErr
}

// If the fallback succeeded, remember it for future requests.
if retryResp.StatusCode != http.StatusForbidden && retryResp.StatusCode != http.StatusInternalServerError {
// Only cache the fallback path when the fallback actually returned a
// successful (2xx) response. A 4xx from the fallback means neither path
// is working for this particular request; caching it would silently break
// all subsequent calls that would otherwise succeed via the primary path.
if retryResp.StatusCode >= 200 && retryResp.StatusCode < 300 {
fallbackEndpoints.Store(t.primaryBase, true)
}

Expand Down
Loading