Skip to content

Commit 255e6b2

Browse files
authored
feat: switch list_metrics response from JSON to CSV (#709)
Closes #708 ## Summary Switches the `list_metrics` tool response from JSON to CSV format. This is unconventional, but for this use case — tabular data with potentially many rows — CSV offers substantial benefits over JSON as LLM context: - **67% smaller response**: 24,855 chars (JSON) → 8,139 chars (CSV) - **Up to 33% lower cost** to answer a question end-to-end vs JSON baseline - Null/empty fields are omitted dynamically: columns only appear when at least one metric has a value, keeping output compact by default - When the CSV still exceeds `DBT_MCP_SL_MAX_RESPONSE_CHARS`, optional columns (`description`, `metadata`) are stripped as a fallback ### Also included **Prompt improvements** — guide agents to skip unnecessary `get_dimensions` calls when `metric_time` is the only dimension needed, compounding the token savings from the smaller response. **Where clause fix** — strip surrounding double-quotes that LLMs sometimes add to the `where` parameter string, preventing `Invalid {{ delimiter` errors from the dbt Semantic Layer's Jinja parser.
1 parent c92b535 commit 255e6b2

15 files changed

Lines changed: 217 additions & 19 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
kind: Enhancement or New Feature
2+
body: Reduce list_metrics response size by switching from JSON to CSV format, cutting response size by ~67% and reducing agent cost by up to 33% per query
3+
time: 2026-04-10T13:30:00.000000-07:00

src/dbt_mcp/config/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def load_config(enable_proxied_tools: bool = True) -> Config:
147147
credentials_provider=credentials_provider,
148148
admin_client=admin_client,
149149
metrics_related_max=settings.sl_metrics_related_max,
150+
max_response_chars=settings.sl_metrics_max_response_chars,
150151
)
151152
)
152153

@@ -181,6 +182,7 @@ def load_config(enable_proxied_tools: bool = True) -> Config:
181182
semantic_layer_config_provider = DefaultSemanticLayerConfigProvider(
182183
credentials_provider=credentials_provider,
183184
metrics_related_max=settings.sl_metrics_related_max,
185+
max_response_chars=settings.sl_metrics_max_response_chars,
184186
)
185187

186188
lsp_config = None

src/dbt_mcp/config/config_providers/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,4 @@ class SemanticLayerConfig:
5858
token_provider: TokenProvider
5959
headers_provider: HeadersProvider
6060
metrics_related_max: int = 10
61+
max_response_chars: int = 16000

src/dbt_mcp/config/config_providers/semantic_layer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ def __init__(
1414
credentials_provider: CredentialsProvider,
1515
*,
1616
metrics_related_max: int = 10,
17+
max_response_chars: int = 16000,
1718
):
1819
self.credentials_provider = credentials_provider
1920
self.metrics_related_max = metrics_related_max
21+
self.max_response_chars = max_response_chars
2022

2123
async def get_config(self) -> SemanticLayerConfig:
2224
settings, token_provider = await self.credentials_provider.get_credentials()
@@ -39,6 +41,7 @@ async def get_config(self) -> SemanticLayerConfig:
3941
token_provider=token_provider
4042
),
4143
metrics_related_max=self.metrics_related_max,
44+
max_response_chars=self.max_response_chars,
4245
)
4346

4447

@@ -51,10 +54,12 @@ def __init__(
5154
admin_client: DbtAdminAPIClient,
5255
*,
5356
metrics_related_max: int = 10,
57+
max_response_chars: int = 16000,
5458
):
5559
self.credentials_provider = credentials_provider
5660
self.admin_client = admin_client
5761
self.metrics_related_max = metrics_related_max
62+
self.max_response_chars = max_response_chars
5863

5964
async def get_config(self, project_id: int) -> SemanticLayerConfig:
6065
settings, token_provider = await self.credentials_provider.get_credentials()
@@ -86,4 +91,5 @@ async def get_config(self, project_id: int) -> SemanticLayerConfig:
8691
token_provider=token_provider
8792
),
8893
metrics_related_max=self.metrics_related_max,
94+
max_response_chars=self.max_response_chars,
8995
)

src/dbt_mcp/config/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ class DbtMcpSettings(BaseSettings):
108108
sl_metrics_related_max: int = Field(
109109
10, alias="DBT_MCP_SL_METRICS_RELATED_MAX", ge=0
110110
)
111+
sl_metrics_max_response_chars: int = Field(
112+
16000, alias="DBT_MCP_SL_MAX_RESPONSE_CHARS", ge=0
113+
)
111114

112115
def __repr__(self):
113116
"""Custom repr to bring most important settings to front. Redact sensitive info."""

src/dbt_mcp/prompts/semantic_layer/get_dimensions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
<instructions>
22
Get the dimensions for specified metrics
33

4+
Note: `metric_time` is a standard time dimension available on most metrics. You do not need to call this tool just to confirm time dimensions exist — call it only when you need categorical dimensions or specific granularity details. If this tool returns no results, proceed to query directly using `metric_time`.
5+
46
Dimensions are the attributes, features, or characteristics
57
that describe or categorize data.
68

src/dbt_mcp/prompts/semantic_layer/list_metrics.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
List metrics from the dbt Semantic Layer.
22

3+
The response is a CSV string with a header row. Columns are dynamic: a column is only present if at least one metric has a non-empty value for it. `name` and `type` are always present; `label`, `description`, `metadata`, `dimensions`, and `entities` are included only when at least one metric has a value. The `dimensions` and `entities` cells contain comma-separated lists of names.
4+
35
When the number of metrics is below the configured threshold (default: 10), each metric includes the names of its available dimensions and entities. Use get_dimensions or get_entities for full details (types, granularities, descriptions) on specific metrics.
46

5-
When above the threshold, only metrics are returned. Use get_dimensions and get_entities with the specific metrics you need.
7+
When above the threshold, only metrics are returned. `metric_time` is a standard time dimension available on most metrics you can often query directly without calling `get_dimensions` first. Call `get_dimensions` only when you need non-time dimensions or specific granularity details.
68

79
If the user is asking a data-related or business-related question, use this tool as a first step.
810

src/dbt_mcp/prompts/semantic_layer/query_metrics.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ and entity are referenced differently. For categorical dimensions,
1818
use `{{ Dimension('<name>') }}` and for time dimensions add the grain
1919
like `{{ TimeDimension('<name>', '<grain>') }}`. For entities,
2020
use `{{ Entity('<name>') }}`. When referencing dates in the `where`
21-
parameter, only use the format `yyyy-mm-dd`.
21+
parameter, only use the format `yyyy-mm-dd`. Pass the `where` value as a
22+
plain string — do not wrap it in additional quotes.
2223

2324
Don't call this tool if the user's question cannot be answered with the provided
2425
metrics, dimensions, and entities. Instead, clarify what metrics, dimensions,

src/dbt_mcp/semantic_layer/client.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,19 @@ def _format_get_metrics_compiled_sql_error(
296296
error=self._format_semantic_layer_error(compile_error)
297297
)
298298

299+
def _normalize_where(self, where: str | None) -> str | None:
300+
"""Strip surrounding quotes that LLMs sometimes add to where clause strings.
301+
302+
Returns None if the input is None or becomes empty/whitespace-only after
303+
stripping quotes — the caller should treat this as "no where clause".
304+
"""
305+
if where is None:
306+
return None
307+
where = where.strip()
308+
if len(where) >= 2 and where[0] == '"' and where[-1] == '"':
309+
where = where[1:-1]
310+
return where.strip() or None
311+
299312
# TODO: move this to the SDK
300313
def _format_query_failed_error(self, query_error: Exception) -> QueryMetricsError:
301314
if isinstance(query_error, QueryFailedError):
@@ -369,7 +382,9 @@ async def get_metrics_compiled_sql(
369382
metrics=metrics,
370383
group_by=group_by, # type: ignore
371384
order_by=parsed_order_by, # type: ignore
372-
where=[where] if where else None,
385+
where=[normalized_where]
386+
if (normalized_where := self._normalize_where(where))
387+
else None,
373388
limit=limit,
374389
read_cache=True,
375390
)
@@ -406,7 +421,9 @@ async def query_metrics(
406421
metrics=metrics,
407422
group_by=group_by, # type: ignore
408423
order_by=parsed_order_by, # type: ignore
409-
where=[where] if where else None,
424+
where=[normalized_where]
425+
if (normalized_where := self._normalize_where(where))
426+
else None,
410427
limit=limit,
411428
)
412429
except RetryTimeoutError as e:

src/dbt_mcp/semantic_layer/tools.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import csv
2+
import io
3+
import json
14
import logging
25
from dataclasses import dataclass
36

@@ -14,10 +17,11 @@
1417
DimensionToolResponse,
1518
EntityToolResponse,
1619
GetMetricsCompiledSqlSuccess,
20+
ListMetricsResponse,
21+
MetricToolResponse,
1722
OrderByParam,
1823
QueryMetricsSuccess,
1924
SavedQueryToolResponse,
20-
ListMetricsResponse,
2125
)
2226
from dbt_mcp.tools.definitions import dbt_mcp_tool
2327
from dbt_mcp.tools.register import register_tools
@@ -27,6 +31,48 @@
2731
logger = logging.getLogger(__name__)
2832

2933

34+
def _build_csv(metrics: list[MetricToolResponse], columns: list[str]) -> str:
35+
def _cell(m: MetricToolResponse, col: str) -> str:
36+
val = getattr(m, col)
37+
if val is None:
38+
return ""
39+
if isinstance(val, list):
40+
return ",".join(str(v) for v in val)
41+
if isinstance(val, dict):
42+
return json.dumps(val, separators=(",", ":"), sort_keys=True)
43+
return str(val)
44+
45+
output = io.StringIO()
46+
writer = csv.writer(output, lineterminator="\n")
47+
writer.writerow(columns)
48+
for m in metrics:
49+
writer.writerow([_cell(m, col) for col in columns])
50+
return output.getvalue().rstrip("\n")
51+
52+
53+
def metrics_to_csv(response: ListMetricsResponse, max_response_chars: int = 0) -> str:
54+
metrics = response.metrics
55+
if not metrics:
56+
return ""
57+
58+
def _has_any(field: str) -> bool:
59+
# Skip columns where every value is None/empty — empty lists/dicts/strings
60+
# count as "no data" so the column is omitted entirely.
61+
return any(getattr(m, field) for m in metrics)
62+
63+
columns: list[str] = ["name", "type"]
64+
for col in ("label", "description", "metadata", "dimensions", "entities"):
65+
if _has_any(col):
66+
columns.append(col)
67+
68+
result = _build_csv(metrics, columns)
69+
if max_response_chars > 0 and len(result) > max_response_chars:
70+
# Strip optional fields and rebuild
71+
columns = [c for c in columns if c not in ("description", "metadata")]
72+
result = _build_csv(metrics, columns)
73+
return result
74+
75+
3076
@dataclass
3177
class SemanticLayerToolContext:
3278
config_provider: ConfigProvider[SemanticLayerConfig]
@@ -53,11 +99,12 @@ def __init__(
5399
async def list_metrics(
54100
context: SemanticLayerToolContext,
55101
search: str | None = None,
56-
) -> ListMetricsResponse:
102+
) -> str:
57103
config = await context.config_provider.get_config()
58-
return await context.semantic_layer_fetcher.list_metrics(
104+
response = await context.semantic_layer_fetcher.list_metrics(
59105
config=config, search=search
60106
)
107+
return metrics_to_csv(response, max_response_chars=config.max_response_chars)
61108

62109

63110
@dbt_mcp_tool(

0 commit comments

Comments
 (0)