diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 3869fab5..7ae6a36c 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -34,8 +34,65 @@ jobs: go-version: "1.26" cache: true - - name: Wait for Grafana server and Prometheus server to start and scrape - run: sleep 30 + - name: Wait for Grafana API to be ready + run: | + set -euo pipefail + ready=false + for i in $(seq 1 60); do + if curl -fsS http://localhost:3000/api/health >/dev/null 2>&1; then + echo "Grafana is up" + ready=true + break + fi + echo "Waiting for Grafana... attempt $i" + sleep 2 + done + if [ "$ready" != "true" ]; then + echo "Timed out waiting for Grafana to become ready" + exit 1 + fi + + - name: Wait for datasources to be provisioned + run: | + set -euo pipefail + ready=false + for i in $(seq 1 60); do + count="$(curl -fsS -u admin:admin http://localhost:3000/api/datasources 2>/dev/null | jq 'length' 2>/dev/null || echo 0)" + echo "datasource count=$count" + if [ "${count}" -ge 10 ]; then + echo "Datasources provisioned" + ready=true + break + fi + sleep 2 + done + if [ "$ready" != "true" ]; then + echo "Timed out waiting for datasources to be provisioned" + curl -fsS -u admin:admin http://localhost:3000/api/datasources || true + exit 1 + fi + + - name: Wait for Graphite metrics to be queryable + run: | + set -euo pipefail + # Graphite port 80 is not exposed on the host; go through Grafana's datasource proxy. + ready=false + for i in $(seq 1 60); do + count="$(curl -fsS -u admin:admin \ + 'http://localhost:3000/api/datasources/proxy/uid/graphite/metrics/find?query=test.*' \ + 2>/dev/null | jq 'length' 2>/dev/null || echo 0)" + echo "Graphite test.* node count=$count" + if [ "${count}" -ge 1 ]; then + echo "Graphite metrics are seeded and queryable" + ready=true + break + fi + sleep 2 + done + if [ "$ready" != "true" ]; then + echo "Timed out waiting for Graphite metrics to be queryable" + exit 1 + fi - name: Run integration tests run: make test-integration diff --git a/README.md b/README.md index 8ef2b371..32919058 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,15 @@ The dashboard tools now include several strategies to manage context window usag - **Search logs:** High-level log search across ClickHouse (OTel format) and Loki datasources. +### Graphite Querying + +> **Note:** Graphite tools are **disabled by default**. To enable them, add `graphite` to your `--enabled-tools` flag. + +- **Query Graphite:** Execute Graphite render API queries against a Graphite datasource. +- **List Graphite metrics:** Browse and discover Graphite metric paths. +- **List Graphite tags:** List available Graphite tags and tag values. +- **Query Graphite density:** Query Graphite metric density for a given pattern. + ### Elasticsearch Querying > **Note:** Elasticsearch tools are **disabled by default**. To enable them, add `elasticsearch` to your `--enabled-tools` flag. @@ -339,7 +348,7 @@ The `mcp-grafana` binary supports various command-line flags for configuration: - `--session-idle-timeout-minutes`: Session idle timeout in minutes. Sessions with no activity for this duration are automatically reaped - default: `30`. Set to `0` to disable session reaping. Only relevant for SSE and streamable-http transports. **Tool Configuration:** -- `--enabled-tools`: Comma-separated list of enabled categories - default: all categories except `admin`, to enable admin tools, add `admin` to the list (e.g., `"search,datasource,...,admin"`) +- `--enabled-tools`: Comma-separated list of enabled categories - default: all categories except `admin`, `clickhouse`, `cloudwatch`, `elasticsearch`, `examples`, `graphite`, `runpanelquery`, and `searchlogs`. To enable disabled categories, add them to the list (e.g., `"search,datasource,...,graphite"`) - `--max-loki-log-limit`: Maximum number of log lines returned per `query_loki_logs` call - default: `100`. Note: Set this at least 1 below Loki's server-side `max_entries_limit_per_query` to allow truncation detection (the tool requests `limit+1` internally to detect if more data exists). - `--disable-search`: Disable search tools - `--disable-datasource`: Disable datasource tools @@ -362,6 +371,7 @@ The `mcp-grafana` binary supports various command-line flags for configuration: - `--disable-clickhouse`: Disable ClickHouse tools - `--disable-searchlogs`: Disable search_logs tool - `--disable-runpanelquery`: Disable run panel query tools +- `--disable-graphite`: Disable Graphite tools ### Read-Only Mode diff --git a/cmd/mcp-grafana/main.go b/cmd/mcp-grafana/main.go index 64c92bcf..6c895b2a 100644 --- a/cmd/mcp-grafana/main.go +++ b/cmd/mcp-grafana/main.go @@ -44,7 +44,7 @@ type disabledTools struct { prometheus, loki, elasticsearch, alerting, dashboard, folder, oncall, asserts, sift, admin, pyroscope, navigation, proxied, annotations, rendering, cloudwatch, write, - examples, clickhouse, searchlogs, + examples, clickhouse, searchlogs, graphite, runpanelquery bool } @@ -89,6 +89,7 @@ func (dt *disabledTools) addFlags() { flag.BoolVar(&dt.clickhouse, "disable-clickhouse", false, "Disable ClickHouse tools") flag.BoolVar(&dt.searchlogs, "disable-searchlogs", false, "Disable search logs tools") flag.BoolVar(&dt.runpanelquery, "disable-runpanelquery", false, "Disable run panel query tools") + flag.BoolVar(&dt.graphite, "disable-graphite", false, "Disable Graphite tools") } func (gc *grafanaConfig) addFlags() { @@ -129,6 +130,7 @@ func (dt *disabledTools) addTools(s *server.MCPServer) { maybeAddTools(s, tools.AddClickHouseTools, enabledTools, dt.clickhouse, "clickhouse") maybeAddTools(s, tools.AddSearchLogsTools, enabledTools, dt.searchlogs, "searchlogs") maybeAddTools(s, tools.AddRunPanelQueryTools, enabledTools, dt.runpanelquery, "runpanelquery") + maybeAddTools(s, tools.AddGraphiteTools, enabledTools, dt.graphite, "graphite") } func newServer(transport string, dt disabledTools, obs *observability.Observability, sessionIdleTimeoutMinutes int) (*server.MCPServer, *mcpgrafana.ToolManager, *mcpgrafana.SessionManager) { @@ -473,4 +475,4 @@ func parseLevel(level string) slog.Level { return slog.LevelInfo } return l -} +} \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 1fc1666d..4f4cbf4e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -134,3 +134,22 @@ services: interval: 10s timeout: 5s retries: 5 + + graphite: + image: graphiteapp/graphite-statsd:latest@sha256:2d61228771119ddaee2f62d65739d3b5e903de36666e899703e47be1def571fe + healthcheck: + test: ["CMD-SHELL", "wget -q -O /dev/null 'http://127.0.0.1/metrics/find?query=*'"] + interval: 10s + timeout: 5s + retries: 15 + + graphite-seed: + image: alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d + depends_on: + graphite: + condition: service_healthy + volumes: + - ./testdata/graphite-seed.sh:/seed.sh + entrypoint: ["sh", "/seed.sh"] + environment: + GRAPHITE_HOST: graphite diff --git a/testdata/graphite-seed.sh b/testdata/graphite-seed.sh new file mode 100644 index 00000000..a6c95cac --- /dev/null +++ b/testdata/graphite-seed.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# Graphite data seeding script for integration tests. +# Sends test metrics to Carbon via the plaintext protocol. + +set -e + +GRAPHITE_HOST="${GRAPHITE_HOST:-graphite}" +GRAPHITE_CARBON_PORT="${GRAPHITE_CARBON_PORT:-2003}" + +echo "Waiting for Graphite Carbon to be ready on ${GRAPHITE_HOST}:${GRAPHITE_CARBON_PORT}..." +until nc -z "$GRAPHITE_HOST" "$GRAPHITE_CARBON_PORT" 2>/dev/null; do + sleep 2 +done +echo "Graphite Carbon is ready." + +NOW=$(date +%s) + +send_metric() { + printf "%s %s %s\n" "$1" "$2" "$3" | nc -w 3 "$GRAPHITE_HOST" "$GRAPHITE_CARBON_PORT" +} + +# Hierarchical metrics for listGraphiteMetrics and queryGraphite tests. +send_metric "test.servers.web01.cpu.load5" "1.5" "$NOW" +send_metric "test.servers.web01.cpu.load15" "1.2" "$NOW" +send_metric "test.servers.web02.cpu.load5" "2.3" "$NOW" +send_metric "test.servers.web02.cpu.load15" "2.1" "$NOW" +send_metric "test.servers.db01.cpu.load5" "0.8" "$NOW" + +# Tagged metrics for listGraphiteTags tests. +send_metric "test.tagged.cpu;server=web01;env=prod" "1.5" "$NOW" +send_metric "test.tagged.cpu;server=web02;env=prod" "2.3" "$NOW" + +echo "Graphite metrics seeded successfully." + +# Give Carbon a moment to process the received metrics into its cache +# so they are available via the render API before the tests run. +sleep 5 +echo "Done." diff --git a/testdata/provisioning/datasources/datasources.yaml b/testdata/provisioning/datasources/datasources.yaml index fd714408..aec3d192 100644 --- a/testdata/provisioning/datasources/datasources.yaml +++ b/testdata/provisioning/datasources/datasources.yaml @@ -87,3 +87,10 @@ datasources: accessKey: test secretKey: test isDefault: false + - name: Graphite + id: 9 + uid: graphite + type: graphite + access: proxy + url: http://graphite:80 + isDefault: false diff --git a/tools/datasources_test.go b/tools/datasources_test.go index 8f1397c3..befcebe6 100644 --- a/tools/datasources_test.go +++ b/tools/datasources_test.go @@ -18,8 +18,16 @@ func TestDatasourcesTools(t *testing.T) { result, err := listDatasources(ctx, ListDatasourcesParams{}) require.NoError(t, err) - // Ten datasources are provisioned in the test environment (Prometheus, Prometheus Demo, Loki, Pyroscope, Tempo, Tempo Secondary, Alertmanager, ClickHouse and CloudWatch). - assert.Len(t, result.Datasources, 10) + // Verify the core datasources provisioned in the test environment are present. + uids := make(map[string]bool, len(result.Datasources)) + for _, ds := range result.Datasources { + uids[ds.UID] = true + } + assert.True(t, uids["prometheus"], "prometheus datasource should be provisioned") + assert.True(t, uids["loki"], "loki datasource should be provisioned") + assert.True(t, uids["graphite"], "graphite datasource should be provisioned") + assert.True(t, uids["tempo"], "tempo datasource should be provisioned") + assert.True(t, uids["elasticsearch"], "elasticsearch datasource should be provisioned") }) t.Run("list datasources for type", func(t *testing.T) { diff --git a/tools/fallback_transport.go b/tools/fallback_transport.go index 598848d8..88fa3d9d 100644 --- a/tools/fallback_transport.go +++ b/tools/fallback_transport.go @@ -75,8 +75,11 @@ func (t *datasourceFallbackTransport) RoundTrip(req *http.Request) (*http.Respon return nil, retryErr } - // If the fallback succeeded, remember it for future requests. - if retryResp.StatusCode != http.StatusForbidden && retryResp.StatusCode != http.StatusInternalServerError { + // Only cache the fallback path when the fallback actually returned a + // successful (2xx) response. A 4xx from the fallback means neither path + // is working for this particular request; caching it would silently break + // all subsequent calls that would otherwise succeed via the primary path. + if retryResp.StatusCode >= 200 && retryResp.StatusCode < 300 { fallbackEndpoints.Store(t.primaryBase, true) } diff --git a/tools/graphite.go b/tools/graphite.go new file mode 100644 index 00000000..dd839681 --- /dev/null +++ b/tools/graphite.go @@ -0,0 +1,515 @@ +package tools + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + mcpgrafana "github.com/grafana/mcp-grafana" + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" +) + +const ( + // GraphiteDatasourceType is the type identifier for Graphite datasources + GraphiteDatasourceType = "graphite" + + graphiteResponseLimitBytes = 1024 * 1024 * 10 // 10MB +) + +// GraphiteClient handles queries to a Graphite datasource via Grafana proxy +type GraphiteClient struct { + httpClient *http.Client + baseURL string +} + +func newGraphiteClient(ctx context.Context, uid string) (*GraphiteClient, error) { + ds, err := getDatasourceByUID(ctx, GetDatasourceByUIDParams{UID: uid}) + if err != nil { + return nil, err + } + if ds.Type != GraphiteDatasourceType { + return nil, fmt.Errorf("datasource %s is of type %s, not %s", uid, ds.Type, GraphiteDatasourceType) + } + + cfg := mcpgrafana.GrafanaConfigFromContext(ctx) + grafanaURL := strings.TrimRight(cfg.URL, "/") + resourcesBase, proxyBase := datasourceProxyPaths(uid) + baseURL := grafanaURL + proxyBase + + transport, err := mcpgrafana.BuildTransport(&cfg, nil) + if err != nil { + return nil, fmt.Errorf("failed to create custom transport: %w", err) + } + transport = NewAuthRoundTripper(transport, cfg.AccessToken, cfg.IDToken, cfg.APIKey, cfg.BasicAuth) + transport = mcpgrafana.NewOrgIDRoundTripper(transport, cfg.OrgID) + + // Wrap with fallback transport: try /proxy first, fall back to /resources + // on 403/500 for compatibility with different managed Grafana deployments. + var rt http.RoundTripper = mcpgrafana.NewUserAgentTransport(transport) + rt = newDatasourceFallbackTransport(rt, proxyBase, resourcesBase) + + client := &http.Client{Transport: rt} + return &GraphiteClient{httpClient: client, baseURL: baseURL}, nil +} + +// doGet performs a GET request to the Graphite API via the Grafana proxy +func (c *GraphiteClient) doGet(ctx context.Context, path string, params url.Values) ([]byte, error) { + fullURL := strings.TrimRight(c.baseURL, "/") + path + if len(params) > 0 { + fullURL += "?" + params.Encode() + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fullURL, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("executing request: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("graphite API returned status %d: %s", resp.StatusCode, string(body)) + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, graphiteResponseLimitBytes)) + if err != nil { + return nil, fmt.Errorf("reading response body: %w", err) + } + return data, nil +} + +// GraphiteDatapoint is a single metric sample. Value is nil when Graphite +// reports no data for that timestamp (null in the JSON response). +type GraphiteDatapoint struct { + Value *float64 `json:"value"` + Timestamp int64 `json:"timestamp"` +} + +// GraphiteSeries is a metric series as returned by the Graphite render API. +type GraphiteSeries struct { + Target string `json:"target"` + Tags map[string]string `json:"tags,omitempty"` + Datapoints []GraphiteDatapoint `json:"datapoints"` +} + +// graphiteRawSeries is the wire format for the Graphite render API response. +// Each datapoint is [value_or_null, unix_timestamp]. +type graphiteRawSeries struct { + Target string `json:"target"` + Tags map[string]string `json:"tags,omitempty"` + Datapoints [][]json.RawMessage `json:"datapoints"` +} + +// parseGraphiteDatapoints converts the raw render API datapoints to typed values. +func parseGraphiteDatapoints(raw [][]json.RawMessage) []GraphiteDatapoint { + pts := make([]GraphiteDatapoint, 0, len(raw)) + for _, pair := range raw { + if len(pair) < 2 { + continue + } + var ts int64 + if err := json.Unmarshal(pair[1], &ts); err != nil { + continue + } + var val *float64 + if string(pair[0]) != "null" { + var f float64 + if err := json.Unmarshal(pair[0], &f); err == nil { + val = &f + } + } + pts = append(pts, GraphiteDatapoint{Value: val, Timestamp: ts}) + } + return pts +} + +// parseGraphiteTime converts a time string to a value Graphite's render API +// accepts for its `from`/`until` parameters. +// +// - Empty string → returned as-is (caller should supply a default). +// - Graphite relative formats ("-1h", "-24h", "now", …) → passed through unchanged. +// - RFC 3339 strings → converted to a Unix timestamp (integer seconds). +func parseGraphiteTime(s string) string { + if s == "" || s == "now" || strings.HasPrefix(s, "-") { + return s + } + t, err := time.Parse(time.RFC3339, s) + if err != nil { + // Unknown format — pass through and let Graphite decide. + return s + } + return strconv.FormatInt(t.Unix(), 10) +} + +// QueryGraphiteParams defines the parameters for querying a Graphite datasource. +type QueryGraphiteParams struct { + DatasourceUID string `json:"datasourceUid" jsonschema:"required,description=The UID of the Graphite datasource to query"` + Target string `json:"target" jsonschema:"required,description=The Graphite target expression to evaluate (e.g. 'servers.web*.cpu.load5'\\, 'sumSeries(app.*.requests)'\\, 'seriesByTag(\\'name=cpu.load\\')')"` + From string `json:"from,omitempty" jsonschema:"description=Start of the time range. Accepts RFC3339 (e.g. '2024-01-01T00:00:00Z') or Graphite relative times (e.g. '-1h'\\, '-24h'). Defaults to '-1h'."` + Until string `json:"until,omitempty" jsonschema:"description=End of the time range. Accepts RFC3339 or Graphite relative times (e.g. 'now'). Defaults to 'now'."` + MaxDataPoints int `json:"maxDataPoints,omitempty" jsonschema:"description=Optional maximum number of data points per series. Graphite consolidates data when the requested range exceeds this value."` +} + +// QueryGraphiteResult wraps a Graphite render query result with optional hints. +type QueryGraphiteResult struct { + Series []*GraphiteSeries `json:"series"` + Hints *EmptyResultHints `json:"hints,omitempty"` +} + +func queryGraphite(ctx context.Context, args QueryGraphiteParams) (*QueryGraphiteResult, error) { + client, err := newGraphiteClient(ctx, args.DatasourceUID) + if err != nil { + return nil, fmt.Errorf("creating graphite client: %w", err) + } + + from := args.From + if from == "" { + from = "-1h" + } + until := args.Until + if until == "" { + until = "now" + } + + params := url.Values{} + params.Set("target", args.Target) + params.Set("from", parseGraphiteTime(from)) + params.Set("until", parseGraphiteTime(until)) + params.Set("format", "json") + if args.MaxDataPoints > 0 { + params.Set("maxDataPoints", strconv.Itoa(args.MaxDataPoints)) + } + + data, err := client.doGet(ctx, "/render", params) + if err != nil { + return nil, fmt.Errorf("querying graphite render API: %w", err) + } + + var rawSeries []graphiteRawSeries + if err := json.Unmarshal(data, &rawSeries); err != nil { + return nil, fmt.Errorf("parsing graphite render response: %w", err) + } + + series := make([]*GraphiteSeries, 0, len(rawSeries)) + for _, rs := range rawSeries { + series = append(series, &GraphiteSeries{ + Target: rs.Target, + Tags: rs.Tags, + Datapoints: parseGraphiteDatapoints(rs.Datapoints), + }) + } + + result := &QueryGraphiteResult{Series: series} + if len(series) == 0 { + var startTime, endTime time.Time + if t, err := time.Parse(time.RFC3339, args.From); err == nil { + startTime = t + } + if t, err := time.Parse(time.RFC3339, args.Until); err == nil { + endTime = t + } + result.Hints = GenerateEmptyResultHints(HintContext{ + DatasourceType: GraphiteDatasourceType, + Query: args.Target, + StartTime: startTime, + EndTime: endTime, + }) + } + return result, nil +} + +// QueryGraphite is the MCP tool for querying a Graphite datasource. +var QueryGraphite = mcpgrafana.MustTool( + "query_graphite", + "WORKFLOW: list_graphite_metrics -> query_graphite.\n\nExecutes a Graphite render API query against a Graphite datasource and returns matching metric series with their datapoints. Supports the full Graphite target expression language including wildcard patterns (e.g. 'servers.web*.cpu.load5'), aggregation functions (e.g. 'sumSeries(app.*.requests)'), and tag-based queries (e.g. 'seriesByTag(\\'name=cpu.load\\')'). Datapoints with no recorded value are returned with a null value field. Time range defaults to the last hour if not specified.", + queryGraphite, + mcp.WithTitleAnnotation("Query Graphite metrics"), + mcp.WithIdempotentHintAnnotation(true), + mcp.WithReadOnlyHintAnnotation(true), +) + +// GraphiteMetricNode is a node in the Graphite metric hierarchy as returned +// by the find API. +type GraphiteMetricNode struct { + // ID is the full dotted path of this node (e.g. "servers.web01.cpu"). + ID string `json:"id"` + // Text is the last segment of the path (e.g. "cpu"). + Text string `json:"text"` + // Leaf indicates whether this node is an actual metric (true) or a + // branch that can be expanded further (false). + Leaf bool `json:"leaf"` + // Expandable indicates whether this node has children. + Expandable bool `json:"expandable"` +} + +// graphiteRawMetricNode is the wire format returned by Graphite's find API; +// leaf and expandable are encoded as integers (0 or 1). +type graphiteRawMetricNode struct { + ID string `json:"id"` + Text string `json:"text"` + Leaf int `json:"leaf"` + Expandable int `json:"expandable"` +} + +// ListGraphiteMetricsParams defines the parameters for the list_graphite_metrics tool. +type ListGraphiteMetricsParams struct { + DatasourceUID string `json:"datasourceUid" jsonschema:"required,description=The UID of the Graphite datasource to query"` + Query string `json:"query" jsonschema:"required,description=Metric path pattern to search. Use '*' as a wildcard at any level (e.g. '*' lists top-level nodes\\, 'servers.*' lists all servers\\, 'servers.web01.*' lists all metrics under web01)."` +} + +func listGraphiteMetrics(ctx context.Context, args ListGraphiteMetricsParams) ([]GraphiteMetricNode, error) { + client, err := newGraphiteClient(ctx, args.DatasourceUID) + if err != nil { + return nil, fmt.Errorf("creating graphite client: %w", err) + } + + query := args.Query + if query == "" { + query = "*" + } + + params := url.Values{} + params.Set("query", query) + + data, err := client.doGet(ctx, "/metrics/find", params) + if err != nil { + return nil, fmt.Errorf("listing graphite metrics: %w", err) + } + + var rawNodes []graphiteRawMetricNode + if err := json.Unmarshal(data, &rawNodes); err != nil { + return nil, fmt.Errorf("parsing graphite metrics response: %w", err) + } + + nodes := make([]GraphiteMetricNode, 0, len(rawNodes)) + for _, rn := range rawNodes { + nodes = append(nodes, GraphiteMetricNode{ + ID: rn.ID, + Text: rn.Text, + Leaf: rn.Leaf == 1, + Expandable: rn.Expandable == 1, + }) + } + return nodes, nil +} + +// ListGraphiteMetrics is the MCP tool for browsing the Graphite metric tree. +var ListGraphiteMetrics = mcpgrafana.MustTool( + "list_graphite_metrics", + "Discover available metric paths in a Graphite datasource by browsing the metric tree. Returns nodes matching the query pattern\\, each indicating whether it is a leaf metric (has data) or an expandable branch (has children). Use '*' as a wildcard at any level to enumerate the tree (e.g. '*' → top-level nodes\\, 'servers.*' → all second-level nodes under 'servers'). Drill down progressively to find the full metric path before querying with query_graphite.", + listGraphiteMetrics, + mcp.WithTitleAnnotation("List Graphite metrics"), + mcp.WithIdempotentHintAnnotation(true), + mcp.WithReadOnlyHintAnnotation(true), +) + +// ListGraphiteTagsParams defines the parameters for the list_graphite_tags tool. +type ListGraphiteTagsParams struct { + DatasourceUID string `json:"datasourceUid" jsonschema:"required,description=The UID of the Graphite datasource to query"` + Prefix string `json:"prefix,omitempty" jsonschema:"description=Optional prefix to filter tag names (e.g. 'env' returns tags whose name starts with 'env')."` +} + +func listGraphiteTags(ctx context.Context, args ListGraphiteTagsParams) ([]string, error) { + client, err := newGraphiteClient(ctx, args.DatasourceUID) + if err != nil { + return nil, fmt.Errorf("creating graphite client: %w", err) + } + + params := url.Values{} + if args.Prefix != "" { + params.Set("tagPrefix", args.Prefix) + } + + data, err := client.doGet(ctx, "/tags", params) + if err != nil { + return nil, fmt.Errorf("listing graphite tags: %w", err) + } + + var raw []struct { + Tag string `json:"tag"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("parsing graphite tags response: %w", err) + } + tags := make([]string, 0, len(raw)) + for _, t := range raw { + if args.Prefix == "" || strings.HasPrefix(t.Tag, args.Prefix) { + tags = append(tags, t.Tag) + } + } + return tags, nil +} + +// ListGraphiteTags is the MCP tool for listing tag names in a tagged Graphite instance. +var ListGraphiteTags = mcpgrafana.MustTool( + "list_graphite_tags", + "List available tag names in a Graphite datasource that uses tag-based metrics. Returns a list of tag name strings (e.g. [\"name\"\\, \"env\"\\, \"region\"]). These tags can be used to build tag-based target expressions for query_graphite (e.g. seriesByTag('name=cpu.load\\,env=prod')). Optionally filter by a prefix. Requires Graphite to be configured with tag support.", + listGraphiteTags, + mcp.WithTitleAnnotation("List Graphite tags"), + mcp.WithIdempotentHintAnnotation(true), + mcp.WithReadOnlyHintAnnotation(true), +) + +// computeSeriesDensity derives data-density statistics from the parsed +// datapoints of a single Graphite series. +func computeSeriesDensity(target string, pts []GraphiteDatapoint) *GraphiteSeriesDensity { + total := len(pts) + + var stepSec int64 + if total >= 2 { + if d := pts[1].Timestamp - pts[0].Timestamp; d > 0 { + stepSec = d + } + } + + var nonNull int + var lastSeenTS int64 + var hasLastSeen bool + var currentGap, longestGap int + + for _, dp := range pts { + if dp.Value != nil { + nonNull++ + lastSeenTS = dp.Timestamp + hasLastSeen = true + currentGap = 0 + } else { + currentGap++ + if currentGap > longestGap { + longestGap = currentGap + } + } + } + + var fillRatio float64 + if total > 0 { + fillRatio = float64(nonNull) / float64(total) + } + + var lastSeen *int64 + if hasLastSeen { + cp := lastSeenTS + lastSeen = &cp + } + + return &GraphiteSeriesDensity{ + Target: target, + FillRatio: fillRatio, + TotalPoints: total, + NonNullPoints: nonNull, + LastSeen: lastSeen, + LongestGapSec: int64(longestGap) * stepSec, + EstimatedInterval: stepSec, + } +} + +// QueryGraphiteDensityParams defines the inputs for the query_graphite_density tool. +type QueryGraphiteDensityParams struct { + DatasourceUID string `json:"datasourceUid" jsonschema:"required,description=The UID of the Graphite datasource to query"` + Target string `json:"target" jsonschema:"required,description=Graphite target expression; supports wildcards (e.g. 'obox-cl*.sys.sessions')"` + From string `json:"from,omitempty" jsonschema:"description=Start of the time range. Accepts RFC3339 or Graphite relative times (e.g. '-1h'). Defaults to '-1h'."` + Until string `json:"until,omitempty" jsonschema:"description=End of the time range. Accepts RFC3339 or Graphite relative times (e.g. 'now'). Defaults to 'now'."` +} + +// GraphiteSeriesDensity holds data-density statistics for a single Graphite series. +type GraphiteSeriesDensity struct { + // Target is the resolved metric path. + Target string `json:"target"` + // FillRatio is the fraction of non-null datapoints (0.0–1.0). + FillRatio float64 `json:"fillRatio"` + // TotalPoints is the total datapoints returned in the window. + TotalPoints int `json:"totalPoints"` + // NonNullPoints is the count of datapoints that carry a value. + NonNullPoints int `json:"nonNullPoints"` + // LastSeen is the Unix timestamp of the most recent non-null datapoint, + // or null if no data was seen in the window. + LastSeen *int64 `json:"lastSeen"` + // LongestGapSec is the duration in seconds of the longest consecutive + // run of null datapoints. + LongestGapSec int64 `json:"longestGap"` + // EstimatedInterval is the inferred write interval in seconds derived + // from the spacing between consecutive datapoints. + EstimatedInterval int64 `json:"estimatedInterval"` +} + +// QueryGraphiteDensityResult is returned by the query_graphite_density tool. +type QueryGraphiteDensityResult struct { + Series []*GraphiteSeriesDensity `json:"series"` +} + +func queryGraphiteDensity(ctx context.Context, args QueryGraphiteDensityParams) (*QueryGraphiteDensityResult, error) { + client, err := newGraphiteClient(ctx, args.DatasourceUID) + if err != nil { + return nil, fmt.Errorf("creating graphite client: %w", err) + } + + from := parseGraphiteTime(args.From) + if from == "" { + from = "-1h" + } + until := parseGraphiteTime(args.Until) + if until == "" { + until = "now" + } + + rawParams := url.Values{} + rawParams.Set("target", args.Target) + rawParams.Set("from", from) + rawParams.Set("until", until) + rawParams.Set("format", "json") + + rawData, err := client.doGet(ctx, "/render", rawParams) + if err != nil { + return nil, fmt.Errorf("querying graphite render API: %w", err) + } + + var rawSeries []graphiteRawSeries + if err := json.Unmarshal(rawData, &rawSeries); err != nil { + return nil, fmt.Errorf("parsing graphite render response: %w", err) + } + + result := &QueryGraphiteDensityResult{ + Series: make([]*GraphiteSeriesDensity, 0, len(rawSeries)), + } + for _, rs := range rawSeries { + pts := parseGraphiteDatapoints(rs.Datapoints) + stats := computeSeriesDensity(rs.Target, pts) + result.Series = append(result.Series, stats) + } + return result, nil +} + +// QueryGraphiteDensity is the MCP tool for analysing metric data density. +var QueryGraphiteDensity = mcpgrafana.MustTool( + "query_graphite_density", + "Analyses metric data density for one or more Graphite series over a time window. "+ + "Returns per-series statistics: fillRatio (fraction of non-null datapoints, 0.0–1.0), "+ + "totalPoints, nonNullPoints, lastSeen (Unix timestamp of most recent non-null value, or null if none), "+ + "longestGap (longest consecutive null run in seconds), and estimatedInterval (inferred write interval in seconds). "+ + "Supports wildcard targets (e.g. 'obox-cl*.sys.sessions') to diagnose stale, sparse, or dead metrics across a cluster. "+ + "A fillRatio of 0 with lastSeen null means the series reported no data in the requested window.", + queryGraphiteDensity, + mcp.WithTitleAnnotation("Query Graphite metric density"), + mcp.WithIdempotentHintAnnotation(true), + mcp.WithReadOnlyHintAnnotation(true), +) + +// AddGraphiteTools registers all Graphite tools with the MCP server. +func AddGraphiteTools(mcp *server.MCPServer) { + QueryGraphite.Register(mcp) + ListGraphiteMetrics.Register(mcp) + ListGraphiteTags.Register(mcp) + QueryGraphiteDensity.Register(mcp) +} \ No newline at end of file diff --git a/tools/graphite_integration_test.go b/tools/graphite_integration_test.go new file mode 100644 index 00000000..f8735a5e --- /dev/null +++ b/tools/graphite_integration_test.go @@ -0,0 +1,170 @@ +//go:build integration + +package tools + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const graphiteTestDatasourceUID = "graphite" + +func TestGraphiteIntegration_ListMetrics(t *testing.T) { + t.Run("list top-level metrics", func(t *testing.T) { + ctx := newTestContext() + result, err := listGraphiteMetrics(ctx, ListGraphiteMetricsParams{ + DatasourceUID: graphiteTestDatasourceUID, + Query: "*", + }) + require.NoError(t, err) + require.NotEmpty(t, result, "should return at least one top-level node") + + // Verify node structure. + for _, node := range result { + assert.NotEmpty(t, node.ID, "node should have an ID") + assert.NotEmpty(t, node.Text, "node should have a text") + } + + // The seeded metrics all live under "test.*". + ids := make(map[string]bool, len(result)) + for _, n := range result { + ids[n.ID] = true + } + assert.True(t, ids["test"], "top-level 'test' node should be present") + }) + + t.Run("list second-level metrics", func(t *testing.T) { + ctx := newTestContext() + result, err := listGraphiteMetrics(ctx, ListGraphiteMetricsParams{ + DatasourceUID: graphiteTestDatasourceUID, + Query: "test.*", + }) + require.NoError(t, err) + require.NotEmpty(t, result, "should return at least one node under 'test'") + + ids := make(map[string]bool, len(result)) + for _, n := range result { + ids[n.ID] = true + } + assert.True(t, ids["test.servers"], "'test.servers' node should be present") + }) + + t.Run("list leaf metrics", func(t *testing.T) { + ctx := newTestContext() + result, err := listGraphiteMetrics(ctx, ListGraphiteMetricsParams{ + DatasourceUID: graphiteTestDatasourceUID, + Query: "test.servers.web01.cpu.*", + }) + require.NoError(t, err) + require.NotEmpty(t, result, "should return leaf metrics under 'test.servers.web01.cpu'") + + // All returned nodes should be leaves. + for _, node := range result { + assert.True(t, node.Leaf, "node %q should be a leaf", node.ID) + } + }) +} + +func TestGraphiteIntegration_QueryGraphite(t *testing.T) { + t.Run("query returns series with data", func(t *testing.T) { + ctx := newTestContext() + result, err := queryGraphite(ctx, QueryGraphiteParams{ + DatasourceUID: graphiteTestDatasourceUID, + Target: "test.servers.web01.cpu.load5", + From: "-1h", + Until: "now", + }) + require.NoError(t, err) + require.NotNil(t, result) + require.NotEmpty(t, result.Series, "should return at least one series") + assert.Nil(t, result.Hints, "hints should be absent when data is returned") + + series := result.Series[0] + assert.Equal(t, "test.servers.web01.cpu.load5", series.Target) + assert.NotEmpty(t, series.Datapoints, "series should have datapoints") + }) + + t.Run("query with wildcard returns multiple series", func(t *testing.T) { + ctx := newTestContext() + result, err := queryGraphite(ctx, QueryGraphiteParams{ + DatasourceUID: graphiteTestDatasourceUID, + Target: "test.servers.*.cpu.load5", + }) + require.NoError(t, err) + require.NotNil(t, result) + assert.GreaterOrEqual(t, len(result.Series), 2, "wildcard should match multiple servers") + }) + + t.Run("query with no matching target returns hints", func(t *testing.T) { + ctx := newTestContext() + result, err := queryGraphite(ctx, QueryGraphiteParams{ + DatasourceUID: graphiteTestDatasourceUID, + Target: "test.nonexistent.metric.xyz", + }) + require.NoError(t, err) + require.NotNil(t, result) + assert.Empty(t, result.Series, "non-matching target should return no series") + assert.NotNil(t, result.Hints, "hints should be present for empty results") + }) +} + +func TestGraphiteIntegration_QueryGraphiteDensity(t *testing.T) { + t.Run("density for specific series", func(t *testing.T) { + ctx := newTestContext() + result, err := queryGraphiteDensity(ctx, QueryGraphiteDensityParams{ + DatasourceUID: graphiteTestDatasourceUID, + Target: "test.servers.web01.cpu.load5", + From: "-1h", + }) + require.NoError(t, err) + require.NotNil(t, result) + require.NotEmpty(t, result.Series, "should return density for the seeded series") + + density := result.Series[0] + assert.Equal(t, "test.servers.web01.cpu.load5", density.Target) + assert.Greater(t, density.TotalPoints, 0, "should have datapoints in the window") + assert.GreaterOrEqual(t, density.FillRatio, 0.0, "fill ratio should be non-negative") + assert.LessOrEqual(t, density.FillRatio, 1.0, "fill ratio should not exceed 1.0") + assert.NotNil(t, density.LastSeen, "lastSeen should be set since we seeded data") + }) + + t.Run("density for wildcard matches multiple series", func(t *testing.T) { + ctx := newTestContext() + result, err := queryGraphiteDensity(ctx, QueryGraphiteDensityParams{ + DatasourceUID: graphiteTestDatasourceUID, + Target: "test.servers.*.cpu.load5", + }) + require.NoError(t, err) + require.NotNil(t, result) + assert.GreaterOrEqual(t, len(result.Series), 2, "wildcard should match multiple series") + }) +} + +func TestGraphiteIntegration_ListTags(t *testing.T) { + t.Run("list tags returns without error", func(t *testing.T) { + ctx := newTestContext() + result, err := listGraphiteTags(ctx, ListGraphiteTagsParams{ + DatasourceUID: graphiteTestDatasourceUID, + }) + require.NoError(t, err) + // Tags may or may not be present depending on whether Graphite's tag + // support has indexed the seeded tagged metrics yet. We only assert that + // the call succeeds and returns a non-nil slice. + assert.NotNil(t, result) + }) + + t.Run("list tags with prefix filter", func(t *testing.T) { + ctx := newTestContext() + result, err := listGraphiteTags(ctx, ListGraphiteTagsParams{ + DatasourceUID: graphiteTestDatasourceUID, + Prefix: "server", + }) + require.NoError(t, err) + // All returned tags must start with the requested prefix. + for _, tag := range result { + assert.Contains(t, tag, "server", "all tags should match the prefix filter") + } + }) +} diff --git a/tools/graphite_unit_test.go b/tools/graphite_unit_test.go new file mode 100644 index 00000000..6c8d3907 --- /dev/null +++ b/tools/graphite_unit_test.go @@ -0,0 +1,577 @@ +package tools + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "net/url" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// --- parseGraphiteTime --- + +func TestParseGraphiteTime(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "empty string passes through", + input: "", + want: "", + }, + { + name: "now passes through", + input: "now", + want: "now", + }, + { + name: "relative -1h passes through", + input: "-1h", + want: "-1h", + }, + { + name: "relative -24h passes through", + input: "-24h", + want: "-24h", + }, + { + name: "RFC3339 is converted to unix timestamp", + input: "2024-01-01T00:00:00Z", + want: "1704067200", + }, + { + name: "unknown format passes through", + input: "12:00_20240101", + want: "12:00_20240101", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := parseGraphiteTime(tc.input) + assert.Equal(t, tc.want, got) + }) + } +} + +// --- parseGraphiteDatapoints --- + +func TestParseGraphiteDatapoints(t *testing.T) { + t.Run("normal values", func(t *testing.T) { + raw := [][]json.RawMessage{ + {json.RawMessage("1.5"), json.RawMessage("1704067200")}, + {json.RawMessage("2.0"), json.RawMessage("1704067260")}, + } + pts := parseGraphiteDatapoints(raw) + require.Len(t, pts, 2) + require.NotNil(t, pts[0].Value) + assert.InDelta(t, 1.5, *pts[0].Value, 1e-9) + assert.Equal(t, int64(1704067200), pts[0].Timestamp) + require.NotNil(t, pts[1].Value) + assert.InDelta(t, 2.0, *pts[1].Value, 1e-9) + }) + + t.Run("null value becomes nil pointer", func(t *testing.T) { + raw := [][]json.RawMessage{ + {json.RawMessage("null"), json.RawMessage("1704067200")}, + } + pts := parseGraphiteDatapoints(raw) + require.Len(t, pts, 1) + assert.Nil(t, pts[0].Value) + assert.Equal(t, int64(1704067200), pts[0].Timestamp) + }) + + t.Run("mix of null and non-null values", func(t *testing.T) { + raw := [][]json.RawMessage{ + {json.RawMessage("null"), json.RawMessage("1704067200")}, + {json.RawMessage("3.14"), json.RawMessage("1704067260")}, + {json.RawMessage("null"), json.RawMessage("1704067320")}, + } + pts := parseGraphiteDatapoints(raw) + require.Len(t, pts, 3) + assert.Nil(t, pts[0].Value) + require.NotNil(t, pts[1].Value) + assert.InDelta(t, 3.14, *pts[1].Value, 1e-9) + assert.Nil(t, pts[2].Value) + }) + + t.Run("empty input returns empty slice", func(t *testing.T) { + pts := parseGraphiteDatapoints(nil) + assert.Empty(t, pts) + }) + + t.Run("malformed pairs are skipped", func(t *testing.T) { + raw := [][]json.RawMessage{ + {json.RawMessage("1.0")}, // only one element — no timestamp + {json.RawMessage("2.0"), json.RawMessage("1704067200")}, + } + pts := parseGraphiteDatapoints(raw) + require.Len(t, pts, 1) + assert.Equal(t, int64(1704067200), pts[0].Timestamp) + }) +} + +// --- queryGraphite handler (via doGet) --- + +func TestQueryGraphite_DoGet_ParsesRenderResponse(t *testing.T) { + renderResp := []graphiteRawSeries{ + { + Target: "servers.web01.cpu.load5", + Datapoints: [][]json.RawMessage{ + {json.RawMessage("0.5"), json.RawMessage("1704067200")}, + {json.RawMessage("null"), json.RawMessage("1704067260")}, + {json.RawMessage("1.2"), json.RawMessage("1704067320")}, + }, + }, + } + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/render", r.URL.Path) + assert.Equal(t, "GET", r.Method) + assert.Equal(t, "servers.web01.cpu.load5", r.URL.Query().Get("target")) + assert.Equal(t, "json", r.URL.Query().Get("format")) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(renderResp) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + params := url.Values{} + params.Set("target", "servers.web01.cpu.load5") + params.Set("from", "-1h") + params.Set("until", "now") + params.Set("format", "json") + + data, err := client.doGet(context.Background(), "/render", params) + require.NoError(t, err) + + var series []graphiteRawSeries + require.NoError(t, json.Unmarshal(data, &series)) + require.Len(t, series, 1) + assert.Equal(t, "servers.web01.cpu.load5", series[0].Target) + + pts := parseGraphiteDatapoints(series[0].Datapoints) + require.Len(t, pts, 3) + require.NotNil(t, pts[0].Value) + assert.InDelta(t, 0.5, *pts[0].Value, 1e-9) + assert.Nil(t, pts[1].Value) + require.NotNil(t, pts[2].Value) + assert.InDelta(t, 1.2, *pts[2].Value, 1e-9) +} + +func TestQueryGraphite_EmptyResult_HasHints(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte("[]")) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + ctx := context.Background() + data, err := client.doGet(ctx, "/render", nil) + require.NoError(t, err) + + var rawSeries []graphiteRawSeries + require.NoError(t, json.Unmarshal(data, &rawSeries)) + assert.Empty(t, rawSeries) + + // Simulate the handler building hints for an empty result + hints := GenerateEmptyResultHints(HintContext{ + DatasourceType: GraphiteDatasourceType, + Query: "nonexistent.metric.*", + StartTime: time.Now().Add(-time.Hour), + EndTime: time.Now(), + }) + require.NotNil(t, hints) + assert.NotEmpty(t, hints.Summary) + assert.NotEmpty(t, hints.PossibleCauses) + assert.NotEmpty(t, hints.SuggestedActions) +} + +// --- listGraphiteMetrics handler --- + +func TestListGraphiteMetrics_ParsesNodes(t *testing.T) { + rawNodes := []graphiteRawMetricNode{ + {ID: "servers", Text: "servers", Leaf: 0, Expandable: 1}, + {ID: "cpu.load5", Text: "load5", Leaf: 1, Expandable: 0}, + } + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/metrics/find", r.URL.Path) + assert.Equal(t, "servers.*", r.URL.Query().Get("query")) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(rawNodes) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + ctx := context.Background() + params := url.Values{} + params.Set("query", "servers.*") + + data, err := client.doGet(ctx, "/metrics/find", params) + require.NoError(t, err) + + var nodes []graphiteRawMetricNode + require.NoError(t, json.Unmarshal(data, &nodes)) + require.Len(t, nodes, 2) + + parsed := make([]GraphiteMetricNode, 0, len(nodes)) + for _, n := range nodes { + parsed = append(parsed, GraphiteMetricNode{ + ID: n.ID, + Text: n.Text, + Leaf: n.Leaf == 1, + Expandable: n.Expandable == 1, + }) + } + assert.False(t, parsed[0].Leaf) + assert.True(t, parsed[0].Expandable) + assert.True(t, parsed[1].Leaf) + assert.False(t, parsed[1].Expandable) +} + +// --- listGraphiteTags handler --- + +func TestListGraphiteTags_ReturnsTags(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/tags", r.URL.Path) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"tag": "env", "count": 3}, + {"tag": "name", "count": 5}, + {"tag": "region", "count": 2}, + {"tag": "server", "count": 1}, + }) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + ctx := context.Background() + data, err := client.doGet(ctx, "/tags", nil) + require.NoError(t, err) + + var raw []struct { + Tag string `json:"tag"` + } + require.NoError(t, json.Unmarshal(data, &raw)) + result := make([]string, len(raw)) + for i, t := range raw { + result[i] = t.Tag + } + assert.Equal(t, []string{"env", "name", "region", "server"}, result) +} + +func TestListGraphiteTags_WithPrefix(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "env", r.URL.Query().Get("tagPrefix")) + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode([]map[string]any{ + {"tag": "env", "count": 3}, + }) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + ctx := context.Background() + params := url.Values{} + params.Set("tagPrefix", "env") + + data, err := client.doGet(ctx, "/tags", params) + require.NoError(t, err) + + var raw []struct { + Tag string `json:"tag"` + } + require.NoError(t, json.Unmarshal(data, &raw)) + result := make([]string, len(raw)) + for i, t := range raw { + result[i] = t.Tag + } + assert.Equal(t, []string{"env"}, result) +} + +func TestListGraphiteTags_EmptyList(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte("[]")) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + data, err := client.doGet(context.Background(), "/tags", nil) + require.NoError(t, err) + + var raw []struct { + Tag string `json:"tag"` + } + require.NoError(t, json.Unmarshal(data, &raw)) + result := make([]string, len(raw)) + for i, t := range raw { + result[i] = t.Tag + } + assert.Empty(t, result) +} + +// --- doGet error handling --- + +func TestGraphiteClient_DoGet_NonOKStatus(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "internal error", http.StatusInternalServerError) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{ + httpClient: http.DefaultClient, + baseURL: ts.URL, + } + + _, err := client.doGet(context.Background(), "/render", nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "500") +} + +// --- computeSeriesDensity --- + +func TestComputeSeriesDensity_AllNull(t *testing.T) { + pts := []GraphiteDatapoint{ + {Value: nil, Timestamp: 1704067200}, + {Value: nil, Timestamp: 1704067260}, + {Value: nil, Timestamp: 1704067320}, + } + s := computeSeriesDensity("my.metric", pts) + assert.Equal(t, "my.metric", s.Target) + assert.InDelta(t, 0.0, s.FillRatio, 1e-9) + assert.Equal(t, 3, s.TotalPoints) + assert.Equal(t, 0, s.NonNullPoints) + assert.Nil(t, s.LastSeen) + assert.Equal(t, int64(180), s.LongestGapSec) // 3 nulls × 60 s/step + assert.Equal(t, int64(60), s.EstimatedInterval) +} + +func TestComputeSeriesDensity_AllNonNull(t *testing.T) { + v1, v2, v3 := 1.0, 2.0, 3.0 + pts := []GraphiteDatapoint{ + {Value: &v1, Timestamp: 1704067200}, + {Value: &v2, Timestamp: 1704067260}, + {Value: &v3, Timestamp: 1704067320}, + } + s := computeSeriesDensity("my.metric", pts) + assert.InDelta(t, 1.0, s.FillRatio, 1e-9) + assert.Equal(t, 3, s.TotalPoints) + assert.Equal(t, 3, s.NonNullPoints) + require.NotNil(t, s.LastSeen) + assert.Equal(t, int64(1704067320), *s.LastSeen) + assert.Equal(t, int64(0), s.LongestGapSec) + assert.Equal(t, int64(60), s.EstimatedInterval) +} + +func TestComputeSeriesDensity_Mixed(t *testing.T) { + v := 5.5 + pts := []GraphiteDatapoint{ + {Value: nil, Timestamp: 1704067200}, + {Value: &v, Timestamp: 1704067260}, + {Value: nil, Timestamp: 1704067320}, + {Value: nil, Timestamp: 1704067380}, + } + s := computeSeriesDensity("my.metric", pts) + assert.InDelta(t, 0.25, s.FillRatio, 1e-9) + assert.Equal(t, 4, s.TotalPoints) + assert.Equal(t, 1, s.NonNullPoints) + require.NotNil(t, s.LastSeen) + assert.Equal(t, int64(1704067260), *s.LastSeen) + assert.Equal(t, int64(120), s.LongestGapSec) // trailing 2-null run × 60 s + assert.Equal(t, int64(60), s.EstimatedInterval) +} + +func TestComputeSeriesDensity_Empty(t *testing.T) { + s := computeSeriesDensity("my.metric", nil) + assert.Equal(t, "my.metric", s.Target) + assert.InDelta(t, 0.0, s.FillRatio, 1e-9) + assert.Equal(t, 0, s.TotalPoints) + assert.Equal(t, 0, s.NonNullPoints) + assert.Nil(t, s.LastSeen) + assert.Equal(t, int64(0), s.LongestGapSec) + assert.Equal(t, int64(0), s.EstimatedInterval) +} + +func TestComputeSeriesDensity_SingleNonNull(t *testing.T) { + v := 1.0 + pts := []GraphiteDatapoint{{Value: &v, Timestamp: 1704067200}} + s := computeSeriesDensity("my.metric", pts) + assert.InDelta(t, 1.0, s.FillRatio, 1e-9) + assert.Equal(t, 1, s.TotalPoints) + assert.Equal(t, 1, s.NonNullPoints) + require.NotNil(t, s.LastSeen) + assert.Equal(t, int64(1704067200), *s.LastSeen) + assert.Equal(t, int64(0), s.LongestGapSec) + assert.Equal(t, int64(0), s.EstimatedInterval) // can't infer from 1 point +} + +func TestComputeSeriesDensity_LongestGapInMiddle(t *testing.T) { + v := 1.0 + pts := []GraphiteDatapoint{ + {Value: &v, Timestamp: 1704067200}, + {Value: nil, Timestamp: 1704067260}, + {Value: nil, Timestamp: 1704067320}, + {Value: nil, Timestamp: 1704067380}, + {Value: &v, Timestamp: 1704067440}, + {Value: nil, Timestamp: 1704067500}, + } + s := computeSeriesDensity("my.metric", pts) + // Middle gap: 3 nulls = 180 s; trailing gap: 1 null = 60 s + assert.Equal(t, int64(180), s.LongestGapSec) + assert.Equal(t, 2, s.NonNullPoints) + require.NotNil(t, s.LastSeen) + assert.Equal(t, int64(1704067440), *s.LastSeen) +} + +// --- query_graphite_density full-flow via client --- + +func TestQueryGraphiteDensity_AllNullCluster(t *testing.T) { + // Primary use-case: wildcard target where every node is all-null. + // Verifies that the raw-series path returns fillRatio=0 and lastSeen=nil + // for every series. + rawResp := []graphiteRawSeries{ + { + Target: "obox-cl1.sys.sessions", + Datapoints: [][]json.RawMessage{ + {json.RawMessage("null"), json.RawMessage("1704067200")}, + {json.RawMessage("null"), json.RawMessage("1704067260")}, + {json.RawMessage("null"), json.RawMessage("1704067320")}, + }, + }, + { + Target: "obox-cl2.sys.sessions", + Datapoints: [][]json.RawMessage{ + {json.RawMessage("null"), json.RawMessage("1704067200")}, + {json.RawMessage("null"), json.RawMessage("1704067260")}, + {json.RawMessage("null"), json.RawMessage("1704067320")}, + }, + }, + } + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(rawResp) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{httpClient: http.DefaultClient, baseURL: ts.URL} + + params := url.Values{} + params.Set("target", "obox-cl*.sys.sessions") + params.Set("from", "-1h") + params.Set("until", "now") + params.Set("format", "json") + + data, err := client.doGet(context.Background(), "/render", params) + require.NoError(t, err) + + var raw []graphiteRawSeries + require.NoError(t, json.Unmarshal(data, &raw)) + require.Len(t, raw, 2) + + for _, rs := range raw { + pts := parseGraphiteDatapoints(rs.Datapoints) + s := computeSeriesDensity(rs.Target, pts) + assert.InDelta(t, 0.0, s.FillRatio, 1e-9, "series %s", rs.Target) + assert.Equal(t, 0, s.NonNullPoints, "series %s", rs.Target) + assert.Nil(t, s.LastSeen, "series %s", rs.Target) + } +} + +func TestQueryGraphiteDensity_MixedCluster(t *testing.T) { + v5, v6 := 5.0, 6.0 + rawResp := []graphiteRawSeries{ + { + Target: "obox-cl1.sys.sessions", + Datapoints: [][]json.RawMessage{ + {json.RawMessage("null"), json.RawMessage("1704067200")}, + {json.RawMessage("null"), json.RawMessage("1704067260")}, + {json.RawMessage("null"), json.RawMessage("1704067320")}, + }, + }, + { + Target: "obox-cl2.sys.sessions", + Datapoints: [][]json.RawMessage{ + {json.RawMessage("5.0"), json.RawMessage("1704067200")}, + {json.RawMessage("null"), json.RawMessage("1704067260")}, + {json.RawMessage("6.0"), json.RawMessage("1704067320")}, + }, + }, + } + _ = v5 + _ = v6 + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(rawResp) + })) + t.Cleanup(ts.Close) + + client := &GraphiteClient{httpClient: http.DefaultClient, baseURL: ts.URL} + + params := url.Values{} + params.Set("target", "obox-cl*.sys.sessions") + params.Set("format", "json") + + data, err := client.doGet(context.Background(), "/render", params) + require.NoError(t, err) + + var raw []graphiteRawSeries + require.NoError(t, json.Unmarshal(data, &raw)) + require.Len(t, raw, 2) + + // cl1: all null + s1 := computeSeriesDensity(raw[0].Target, parseGraphiteDatapoints(raw[0].Datapoints)) + assert.Equal(t, "obox-cl1.sys.sessions", s1.Target) + assert.InDelta(t, 0.0, s1.FillRatio, 1e-9) + assert.Equal(t, 3, s1.TotalPoints) + assert.Equal(t, 0, s1.NonNullPoints) + assert.Nil(t, s1.LastSeen) + assert.Equal(t, int64(180), s1.LongestGapSec) + + // cl2: mixed + s2 := computeSeriesDensity(raw[1].Target, parseGraphiteDatapoints(raw[1].Datapoints)) + assert.Equal(t, "obox-cl2.sys.sessions", s2.Target) + assert.InDelta(t, 2.0/3.0, s2.FillRatio, 1e-9) + assert.Equal(t, 3, s2.TotalPoints) + assert.Equal(t, 2, s2.NonNullPoints) + require.NotNil(t, s2.LastSeen) + assert.Equal(t, int64(1704067320), *s2.LastSeen) + assert.Equal(t, int64(60), s2.LongestGapSec) + assert.Equal(t, int64(60), s2.EstimatedInterval) +} + diff --git a/tools/hints.go b/tools/hints.go index 499f0e73..1da23e99 100644 --- a/tools/hints.go +++ b/tools/hints.go @@ -73,6 +73,11 @@ func GenerateEmptyResultHints(ctx HintContext) *EmptyResultHints { hints.PossibleCauses = getCloudWatchCauses(ctx) hints.SuggestedActions = getCloudWatchActions(ctx) + case "graphite": + hints.Summary = "The Graphite query returned no metric series for the specified target and time range." + hints.PossibleCauses = getGraphiteCauses(ctx) + hints.SuggestedActions = getGraphiteActions(ctx) + default: hints.Summary = "The query returned no data for the specified parameters." hints.PossibleCauses = getGenericCauses() @@ -200,6 +205,36 @@ func getCloudWatchActions(ctx HintContext) []string { } } +// getGraphiteCauses returns possible causes for empty Graphite results +func getGraphiteCauses(ctx HintContext) []string { + causes := []string{ + "The target expression may not match any metric paths", + "No data was recorded for the specified time range", + "The time range may be outside the data retention period for this metric", + "Wildcard patterns may not expand to any existing metrics", + } + if strings.Contains(ctx.Query, "seriesByTag") { + causes = append(causes, "Tag values in seriesByTag() may not match any tagged series") + } + if strings.Contains(ctx.Query, "sumSeries") || strings.Contains(ctx.Query, "averageSeries") { + causes = append(causes, "Aggregation functions return no data when the inner target matches nothing") + } + return causes +} + +// getGraphiteActions returns suggested actions for empty Graphite results +func getGraphiteActions(ctx HintContext) []string { + actions := []string{ + "Use list_graphite_metrics to browse and verify the metric path exists", + "Try a simpler wildcard pattern (e.g. '*') to confirm the top-level namespace", + "Expand the time range — the metric may have data in a different period", + } + if strings.Contains(ctx.Query, "seriesByTag") { + actions = append(actions, "Use list_graphite_tags to verify tag names and values") + } + return actions +} + // getGenericCauses returns generic causes for empty results func getGenericCauses() []string { return []string{ @@ -216,4 +251,4 @@ func getGenericActions() []string { "Review and simplify query filters", "Verify that the data source is configured correctly", } -} +} \ No newline at end of file