-
Notifications
You must be signed in to change notification settings - Fork 122
feat(semantic-layer): add get_dimension_values tool #782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1f41d2b
1ca9011
09220b3
094634a
e8dfe38
19fa51b
14e35c1
09b1319
81f58a7
0f489f6
b685479
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| kind: Bug Fix | ||
| body: 'Fix get_dimension_values: stringify dimension values to maintain list[str] type, omit nulls, add error handling, add limit >= 1 validation' | ||
| time: 2026-05-19T14:21:46.951552+02:00 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| kind: Enhancement or New Feature | ||
| body: Add get_dimension_values tool to Semantic Layer toolset to retrieve distinct values for a dimension | ||
| time: 2026-05-19T14:14:00.872546+02:00 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| graph LR | ||
| %% Styling | ||
| classDef default stroke-width:2px,rx:8 | ||
|
|
||
| mcp[dbt MCP server] | ||
|
|
||
| %% Tools subgraph | ||
| subgraph tools[Tools] | ||
| direction TB | ||
|
|
||
| subgraph cli[dbt CLI - local executable] | ||
| cli_build[build] | ||
| cli_compile[compile] | ||
| cli_docs[docs] | ||
| cli_ls[ls] | ||
| cli_parse[parse] | ||
| cli_run[run] | ||
| cli_test[test] | ||
| cli_show[show] | ||
| end | ||
|
|
||
| subgraph discovery[Discovery - Metadata API] | ||
| disc_mart[get_mart_models] | ||
| disc_all[get_all_models] | ||
| disc_details[get_model_details] | ||
| disc_parents[get_model_parents] | ||
| disc_children[get_model_children] | ||
| disc_health[get_model_health] | ||
| disc_exp[get_exposures] | ||
| disc_exp_det[get_exposure_details] | ||
| end | ||
|
|
||
| subgraph sl[dbt Semantic Layer] | ||
| sl_metrics[list_metrics] | ||
| sl_dims[get_dimensions] | ||
| sl_dim_vals[get_dimension_values] | ||
| sl_ent[get_entities] | ||
| sl_query[query_metrics] | ||
| sl_sql[get_metrics_compiled_sql] | ||
| end | ||
|
|
||
| subgraph sql[SQL tools] | ||
| sql_text[text_to_sql] | ||
| sql_exec[execute_sql] | ||
| end | ||
|
|
||
| subgraph admin[Admin API tools] | ||
| admin_list[list_jobs] | ||
| admin_details[get_job_details] | ||
| admin_trigger[trigger_job_run] | ||
| admin_runs[list_jobs_runs] | ||
| admin_run_det[get_job_run_details] | ||
| admin_cancel[cancel_job_run] | ||
| admin_retry[retry_job_run] | ||
| admin_artifacts[list_job_run_artifacts] | ||
| admin_artifact[get_job_run_artifact] | ||
| admin_error[get_job_run_error] | ||
| end | ||
|
|
||
| subgraph codegen[dbt-codegen tools] | ||
| code_source[generate_source] | ||
| code_yaml[generate_model_yaml] | ||
| code_staging[generate_staging_model] | ||
| end | ||
| end | ||
|
|
||
| %% MCP Clients | ||
| subgraph clients[MCP Clients] | ||
| direction TB | ||
| client_claude[Claude] | ||
| client_cursor[Cursor] | ||
| client_any[Any MCP client] | ||
| end | ||
|
|
||
| %% Other components | ||
| other_mcp[Other MCP servers] | ||
| other_tools[Other tools] | ||
|
|
||
| %% Connections | ||
| mcp --> cli | ||
| mcp --> discovery | ||
| mcp --> sl | ||
| mcp --> sql | ||
| mcp --> admin | ||
| mcp --> codegen | ||
|
|
||
| clients --> mcp | ||
| clients --> other_mcp | ||
| other_mcp --> other_tools |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| <instructions> | ||
| Get the distinct values for a given dimension, optionally scoped to specific metrics. | ||
|
|
||
| Use this tool to discover what values exist for a dimension before building a `where` | ||
| filter in `query_metrics`. For example, to filter by country you first call this tool | ||
| to see which country values exist, then use those values in the `where` clause. | ||
|
|
||
| If the response includes `truncated: true`, there are more values than the current | ||
| `limit`. Increase `limit` to retrieve more, or add `metrics` to narrow the scope. | ||
| </instructions> | ||
|
|
||
| <examples> | ||
| <example> | ||
| Question: "I want to filter revenue for a specific country — what countries are available?" | ||
| Thinking step-by-step: | ||
| - The user wants to know valid values for a country dimension | ||
| - I should scope to the "revenue" metric for accurate results | ||
| Parameters: | ||
| dimension="customer__country" | ||
| metrics=["revenue"] | ||
| limit=100 | ||
| </example> | ||
|
|
||
| <example> | ||
| Question: "What are the possible order statuses?" | ||
| Thinking step-by-step: | ||
| - The user wants all distinct values for an order status dimension | ||
| - No specific metric context needed | ||
| Parameters: | ||
| dimension="order__status" | ||
| metrics=null | ||
| limit=100 | ||
| </example> | ||
| </examples> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| from dbt_mcp.semantic_layer.gql.gql_request import submit_request | ||
| from dbt_mcp.semantic_layer.types import ( | ||
| DimensionToolResponse, | ||
| DimensionValuesResponse, | ||
| EntityToolResponse, | ||
| GetMetricsCompiledSqlError, | ||
| GetMetricsCompiledSqlResult, | ||
|
|
@@ -112,6 +113,12 @@ def compile_sql( | |
| read_cache: bool = True, | ||
| ) -> str: ... | ||
|
|
||
| def dimension_values( | ||
| self, | ||
| metrics: list[str], | ||
| group_by: str, | ||
| ) -> Any: ... | ||
|
|
||
|
|
||
| class SemanticLayerClientProvider(Protocol): | ||
| async def get_client( | ||
|
|
@@ -350,6 +357,30 @@ async def get_entities( | |
| self.entities_cache[metrics_key] = entities | ||
| return self.entities_cache[metrics_key] | ||
|
|
||
| async def get_dimension_values( | ||
| self, | ||
| config: SemanticLayerConfig, | ||
| dimension: str, | ||
| metrics: list[str] | None = None, | ||
| limit: int = 100, | ||
| ) -> DimensionValuesResponse: | ||
| try: | ||
| sl_client = await self.client_provider.get_client(config=config) | ||
| with sl_client.session(): | ||
| raw_table: Any = await asyncio.to_thread( | ||
| sl_client.dimension_values, | ||
| metrics=metrics or [], | ||
| group_by=dimension, | ||
| ) | ||
| # SDK doesn't support server-side limiting; truncation is applied client-side. | ||
| raw: list[str] = [ | ||
| str(v) for v in raw_table.column(dimension).to_pylist() if v is not None | ||
| ] | ||
|
Comment on lines
+375
to
+378
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tested this locally and a bug came up - the SDK looks to return column names in uppercase and if the dimension is passed as lowercase, there is a mismatch and we get I was able to fix it by adding a change locally to implement a case-insensitive lookup |
||
| truncated = len(raw) > limit | ||
| return DimensionValuesResponse(values=raw[:limit], truncated=truncated) | ||
|
b-per marked this conversation as resolved.
|
||
| except Exception as e: | ||
| raise RuntimeError(self._format_semantic_layer_error(e)) from e | ||
|
|
||
| def _format_semantic_layer_error(self, error: Exception) -> str: | ||
| """Format semantic layer errors by cleaning up common error message patterns.""" | ||
| error_str = str(error) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
| ) | ||
| from dbt_mcp.semantic_layer.param_descriptions import ( | ||
| QUERY_RESULT_LIMIT, | ||
| SEMANTIC_DIMENSION, | ||
| SEMANTIC_DIMENSION_VALUES_LIMIT, | ||
| SEMANTIC_GROUP_BY, | ||
| SEMANTIC_METRICS, | ||
| SEMANTIC_ORDER_BY, | ||
|
|
@@ -28,6 +30,7 @@ | |
| ) | ||
| from dbt_mcp.semantic_layer.types import ( | ||
| DimensionToolResponse, | ||
| DimensionValuesResponse, | ||
| EntityToolResponse, | ||
| GetMetricsCompiledSqlSuccess, | ||
| ListMetricsResponse, | ||
|
Comment on lines
18
to
36
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we also need to add these and register the new tool in |
||
|
|
@@ -201,6 +204,30 @@ async def get_entities( | |
| ) | ||
|
|
||
|
|
||
| @dbt_mcp_tool( | ||
| description=get_prompt("semantic_layer/get_dimension_values"), | ||
| title="Get Dimension Values", | ||
| read_only_hint=True, | ||
| destructive_hint=False, | ||
| idempotent_hint=True, | ||
| ) | ||
| async def get_dimension_values( | ||
| context: SemanticLayerToolContext, | ||
| dimension: Annotated[str, Field(description=SEMANTIC_DIMENSION)], | ||
| metrics: Annotated[list[str] | None, Field(description=SEMANTIC_METRICS)] = None, | ||
| limit: Annotated[ | ||
| int, Field(ge=1, description=SEMANTIC_DIMENSION_VALUES_LIMIT) | ||
| ] = 100, | ||
| ) -> DimensionValuesResponse: | ||
| config = await context.config_provider.get_config() | ||
| return await context.semantic_layer_fetcher.get_dimension_values( | ||
| config=config, | ||
| dimension=dimension, | ||
| metrics=metrics, | ||
| limit=limit, | ||
| ) | ||
|
|
||
|
|
||
| @dbt_mcp_tool( | ||
| description=get_prompt("semantic_layer/query_metrics"), | ||
| title="Query Metrics", | ||
|
|
@@ -274,6 +301,7 @@ async def get_metrics_compiled_sql( | |
| list_saved_queries, | ||
| get_dimensions, | ||
| get_entities, | ||
| get_dimension_values, | ||
| query_metrics, | ||
| get_metrics_compiled_sql, | ||
| ] | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nearly all docs today are automated via existing scripts in
scripts/and the use oftask docs:generate. This mermaid diagram is not included in that and will drift.Should we (1) include this mermaid generation in docs automation in this PR or (2) exclude the file from here and open another PR for this addition in particular?