diff --git a/.changes/unreleased/Bug Fix-20260519-142146.yaml b/.changes/unreleased/Bug Fix-20260519-142146.yaml new file mode 100644 index 000000000..4a5a5153d --- /dev/null +++ b/.changes/unreleased/Bug Fix-20260519-142146.yaml @@ -0,0 +1,3 @@ +kind: Bug Fix +body: 'Fix get_dimension_values: stringify dimension values to maintain list[str] type, omit nulls, add error handling, add limit >= 1 validation' +time: 2026-05-19T14:21:46.951552+02:00 diff --git a/.changes/unreleased/Enhancement or New Feature-20260519-141400.yaml b/.changes/unreleased/Enhancement or New Feature-20260519-141400.yaml new file mode 100644 index 000000000..9765574dd --- /dev/null +++ b/.changes/unreleased/Enhancement or New Feature-20260519-141400.yaml @@ -0,0 +1,3 @@ +kind: Enhancement or New Feature +body: Add get_dimension_values tool to Semantic Layer toolset to retrieve distinct values for a dimension +time: 2026-05-19T14:14:00.872546+02:00 diff --git a/README.md b/README.md index dec61ecde..5ab34d0ea 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Tools for executing and generating SQL on dbt Platform infrastructure. ### Semantic Layer To learn more about the dbt Semantic Layer, click [here](https://docs.getdbt.com/docs/use-dbt-semantic-layer/dbt-sl). +- `get_dimension_values`: Gets distinct values for a dimension; option to scope to specific metrics. - `get_dimensions`: Gets dimensions for specified metrics. - `get_entities`: Gets entities for specified metrics. - `get_metrics_compiled_sql`: Returns compiled SQL for metrics without executing the query. diff --git a/docs/diagram.mmd b/docs/diagram.mmd new file mode 100644 index 000000000..a62756bba --- /dev/null +++ b/docs/diagram.mmd @@ -0,0 +1,89 @@ +graph LR + %% Styling + classDef default stroke-width:2px,rx:8 + + mcp[dbt MCP server] + + %% Tools subgraph + subgraph tools[Tools] + direction TB + + subgraph cli[dbt CLI - local executable] + cli_build[build] + cli_compile[compile] + cli_docs[docs] + cli_ls[ls] + cli_parse[parse] + cli_run[run] + cli_test[test] + cli_show[show] + end + + subgraph discovery[Discovery - Metadata API] + disc_mart[get_mart_models] + disc_all[get_all_models] + disc_details[get_model_details] + disc_parents[get_model_parents] + disc_children[get_model_children] + disc_health[get_model_health] + disc_exp[get_exposures] + disc_exp_det[get_exposure_details] + end + + subgraph sl[dbt Semantic Layer] + sl_metrics[list_metrics] + sl_dims[get_dimensions] + sl_dim_vals[get_dimension_values] + sl_ent[get_entities] + sl_query[query_metrics] + sl_sql[get_metrics_compiled_sql] + end + + subgraph sql[SQL tools] + sql_text[text_to_sql] + sql_exec[execute_sql] + end + + subgraph admin[Admin API tools] + admin_list[list_jobs] + admin_details[get_job_details] + admin_trigger[trigger_job_run] + admin_runs[list_jobs_runs] + admin_run_det[get_job_run_details] + admin_cancel[cancel_job_run] + admin_retry[retry_job_run] + admin_artifacts[list_job_run_artifacts] + admin_artifact[get_job_run_artifact] + admin_error[get_job_run_error] + end + + subgraph codegen[dbt-codegen tools] + code_source[generate_source] + code_yaml[generate_model_yaml] + code_staging[generate_staging_model] + end + end + + %% MCP Clients + subgraph clients[MCP Clients] + direction TB + client_claude[Claude] + client_cursor[Cursor] + client_any[Any MCP client] + end + + %% Other components + other_mcp[Other MCP servers] + other_tools[Other tools] + + %% Connections + mcp --> cli + mcp --> discovery + mcp --> sl + mcp --> sql + mcp --> admin + mcp --> codegen + + clients --> mcp + clients --> other_mcp + other_mcp --> other_tools diff --git a/src/dbt_mcp/prompts/semantic_layer/get_dimension_values.md b/src/dbt_mcp/prompts/semantic_layer/get_dimension_values.md new file mode 100644 index 000000000..463793252 --- /dev/null +++ b/src/dbt_mcp/prompts/semantic_layer/get_dimension_values.md @@ -0,0 +1,34 @@ + +Get the distinct values for a given dimension, optionally scoped to specific metrics. + +Use this tool to discover what values exist for a dimension before building a `where` +filter in `query_metrics`. For example, to filter by country you first call this tool +to see which country values exist, then use those values in the `where` clause. + +If the response includes `truncated: true`, there are more values than the current +`limit`. Increase `limit` to retrieve more, or add `metrics` to narrow the scope. + + + + +Question: "I want to filter revenue for a specific country — what countries are available?" +Thinking step-by-step: + - The user wants to know valid values for a country dimension + - I should scope to the "revenue" metric for accurate results +Parameters: + dimension="customer__country" + metrics=["revenue"] + limit=100 + + + +Question: "What are the possible order statuses?" +Thinking step-by-step: + - The user wants all distinct values for an order status dimension + - No specific metric context needed +Parameters: + dimension="order__status" + metrics=null + limit=100 + + diff --git a/src/dbt_mcp/semantic_layer/client.py b/src/dbt_mcp/semantic_layer/client.py index feb836bdb..53d990064 100644 --- a/src/dbt_mcp/semantic_layer/client.py +++ b/src/dbt_mcp/semantic_layer/client.py @@ -26,6 +26,7 @@ from dbt_mcp.semantic_layer.gql.gql_request import submit_request from dbt_mcp.semantic_layer.types import ( DimensionToolResponse, + DimensionValuesResponse, EntityToolResponse, GetMetricsCompiledSqlError, GetMetricsCompiledSqlResult, @@ -112,6 +113,12 @@ def compile_sql( read_cache: bool = True, ) -> str: ... + def dimension_values( + self, + metrics: list[str], + group_by: str, + ) -> Any: ... + class SemanticLayerClientProvider(Protocol): async def get_client( @@ -350,6 +357,30 @@ async def get_entities( self.entities_cache[metrics_key] = entities return self.entities_cache[metrics_key] + async def get_dimension_values( + self, + config: SemanticLayerConfig, + dimension: str, + metrics: list[str] | None = None, + limit: int = 100, + ) -> DimensionValuesResponse: + try: + sl_client = await self.client_provider.get_client(config=config) + with sl_client.session(): + raw_table: Any = await asyncio.to_thread( + sl_client.dimension_values, + metrics=metrics or [], + group_by=dimension, + ) + # SDK doesn't support server-side limiting; truncation is applied client-side. + raw: list[str] = [ + str(v) for v in raw_table.column(dimension).to_pylist() if v is not None + ] + truncated = len(raw) > limit + return DimensionValuesResponse(values=raw[:limit], truncated=truncated) + except Exception as e: + raise RuntimeError(self._format_semantic_layer_error(e)) from e + def _format_semantic_layer_error(self, error: Exception) -> str: """Format semantic layer errors by cleaning up common error message patterns.""" error_str = str(error) diff --git a/src/dbt_mcp/semantic_layer/param_descriptions.py b/src/dbt_mcp/semantic_layer/param_descriptions.py index 00a757cd1..26f6c6a61 100644 --- a/src/dbt_mcp/semantic_layer/param_descriptions.py +++ b/src/dbt_mcp/semantic_layer/param_descriptions.py @@ -44,3 +44,14 @@ "Semantic Layer filter; use {{ Dimension('name') }}, " "{{ TimeDimension('name', 'grain') }}, {{ Entity('name') }}; dates as yyyy-mm-dd" ) + +SEMANTIC_DIMENSION = ( + "The dimension name to retrieve values for " + "(e.g. `customer__country`, `order__status`)" +) + +SEMANTIC_DIMENSION_VALUES_LIMIT = ( + "Maximum number of distinct values to return (default 100). " + "If the result is truncated, the response `truncated` field will be True — " + "increase this limit to retrieve more values." +) diff --git a/src/dbt_mcp/semantic_layer/tools.py b/src/dbt_mcp/semantic_layer/tools.py index 8d0de6892..11b1c4b2b 100644 --- a/src/dbt_mcp/semantic_layer/tools.py +++ b/src/dbt_mcp/semantic_layer/tools.py @@ -17,6 +17,8 @@ ) from dbt_mcp.semantic_layer.param_descriptions import ( QUERY_RESULT_LIMIT, + SEMANTIC_DIMENSION, + SEMANTIC_DIMENSION_VALUES_LIMIT, SEMANTIC_GROUP_BY, SEMANTIC_METRICS, SEMANTIC_ORDER_BY, @@ -28,6 +30,7 @@ ) from dbt_mcp.semantic_layer.types import ( DimensionToolResponse, + DimensionValuesResponse, EntityToolResponse, GetMetricsCompiledSqlSuccess, ListMetricsResponse, @@ -201,6 +204,30 @@ async def get_entities( ) +@dbt_mcp_tool( + description=get_prompt("semantic_layer/get_dimension_values"), + title="Get Dimension Values", + read_only_hint=True, + destructive_hint=False, + idempotent_hint=True, +) +async def get_dimension_values( + context: SemanticLayerToolContext, + dimension: Annotated[str, Field(description=SEMANTIC_DIMENSION)], + metrics: Annotated[list[str] | None, Field(description=SEMANTIC_METRICS)] = None, + limit: Annotated[ + int, Field(ge=1, description=SEMANTIC_DIMENSION_VALUES_LIMIT) + ] = 100, +) -> DimensionValuesResponse: + config = await context.config_provider.get_config() + return await context.semantic_layer_fetcher.get_dimension_values( + config=config, + dimension=dimension, + metrics=metrics, + limit=limit, + ) + + @dbt_mcp_tool( description=get_prompt("semantic_layer/query_metrics"), title="Query Metrics", @@ -274,6 +301,7 @@ async def get_metrics_compiled_sql( list_saved_queries, get_dimensions, get_entities, + get_dimension_values, query_metrics, get_metrics_compiled_sql, ] diff --git a/src/dbt_mcp/semantic_layer/types.py b/src/dbt_mcp/semantic_layer/types.py index d0cb95c93..6f46faf4b 100644 --- a/src/dbt_mcp/semantic_layer/types.py +++ b/src/dbt_mcp/semantic_layer/types.py @@ -84,3 +84,9 @@ class GetMetricsCompiledSqlError: GetMetricsCompiledSqlResult = GetMetricsCompiledSqlSuccess | GetMetricsCompiledSqlError + + +@dataclass +class DimensionValuesResponse: + values: list[str] + truncated: bool diff --git a/src/dbt_mcp/tools/readme_mappings.py b/src/dbt_mcp/tools/readme_mappings.py index f9b465224..c9e585cb5 100644 --- a/src/dbt_mcp/tools/readme_mappings.py +++ b/src/dbt_mcp/tools/readme_mappings.py @@ -21,6 +21,7 @@ ToolName.LIST_SAVED_QUERIES: "Retrieves all saved queries.", ToolName.GET_DIMENSIONS: "Gets dimensions for specified metrics.", ToolName.GET_ENTITIES: "Gets entities for specified metrics.", + ToolName.GET_DIMENSION_VALUES: "Gets distinct values for a dimension; option to scope to specific metrics.", ToolName.QUERY_METRICS: "Executes metric queries with filtering and grouping options.", ToolName.GET_METRICS_COMPILED_SQL: "Returns compiled SQL for metrics without executing the query.", # Discovery tools diff --git a/src/dbt_mcp/tools/tool_names.py b/src/dbt_mcp/tools/tool_names.py index e113491ef..8a1dc2542 100644 --- a/src/dbt_mcp/tools/tool_names.py +++ b/src/dbt_mcp/tools/tool_names.py @@ -22,6 +22,7 @@ class ToolName(Enum): LIST_SAVED_QUERIES = "list_saved_queries" GET_DIMENSIONS = "get_dimensions" GET_ENTITIES = "get_entities" + GET_DIMENSION_VALUES = "get_dimension_values" QUERY_METRICS = "query_metrics" GET_METRICS_COMPILED_SQL = "get_metrics_compiled_sql" diff --git a/src/dbt_mcp/tools/toolsets.py b/src/dbt_mcp/tools/toolsets.py index dde8c128a..ecc02e38b 100644 --- a/src/dbt_mcp/tools/toolsets.py +++ b/src/dbt_mcp/tools/toolsets.py @@ -53,6 +53,7 @@ class Toolset(Enum): ToolName.LIST_SAVED_QUERIES, ToolName.GET_DIMENSIONS, ToolName.GET_ENTITIES, + ToolName.GET_DIMENSION_VALUES, ToolName.QUERY_METRICS, ToolName.GET_METRICS_COMPILED_SQL, }, diff --git a/tests/unit/semantic_layer/test_client.py b/tests/unit/semantic_layer/test_client.py index 93982e774..de43d623f 100644 --- a/tests/unit/semantic_layer/test_client.py +++ b/tests/unit/semantic_layer/test_client.py @@ -19,7 +19,11 @@ from dbt_mcp.errors import InvalidParameterError from dbt_mcp.errors.semantic_layer import SemanticLayerQueryTimeoutError from dbt_mcp.semantic_layer.client import DEFAULT_RESULT_FORMATTER, SemanticLayerFetcher -from dbt_mcp.semantic_layer.types import OrderByParam, QueryMetricsError +from dbt_mcp.semantic_layer.types import ( + DimensionValuesResponse, + OrderByParam, + QueryMetricsError, +) def test_default_result_formatter_outputs_iso_dates() -> None: @@ -876,3 +880,182 @@ def test_get_order_bys_unknown_name_raises(fetcher) -> None: def test_get_order_bys_none_returns_empty(fetcher) -> None: assert fetcher._get_order_bys(None) == [] + + +# Tests for DimensionValuesResponse + + +def test_dimension_values_response_creation() -> None: + response = DimensionValuesResponse(values=["value1", "value2"], truncated=False) + assert response.values == ["value1", "value2"] + assert response.truncated is False + + +def test_dimension_values_response_truncated() -> None: + response = DimensionValuesResponse(values=["a", "b", "c"], truncated=True) + assert len(response.values) == 3 + assert response.truncated is True + + +@pytest.mark.asyncio +async def test_get_dimension_values_returns_values(mock_client_provider): + mock_sl_client = MagicMock() + session_ctx = MagicMock() + mock_sl_client.session.return_value = session_ctx + session_ctx.__enter__ = MagicMock(return_value=mock_sl_client) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_sl_client.dimension_values.return_value = pa.table( + {"customer__country": ["US", "UK", "FR"]} + ) + mock_client_provider.get_client.return_value = mock_sl_client + + token_p = MagicMock() + token_p.get_token.return_value = "tok" + headers_p = MagicMock() + headers_p.get_headers.return_value = {} + config = SemanticLayerConfig( + url="https://test-host/api/graphql", + host="test-host", + prod_environment_id=123, + token_provider=token_p, + headers_provider=headers_p, + ) + fetcher = SemanticLayerFetcher(client_provider=mock_client_provider) + result = await fetcher.get_dimension_values( + config=config, dimension="customer__country", metrics=["revenue"], limit=100 + ) + + mock_sl_client.dimension_values.assert_called_once_with( + metrics=["revenue"], group_by="customer__country" + ) + assert result.values == ["US", "UK", "FR"] + assert result.truncated is False + + +@pytest.mark.asyncio +async def test_get_dimension_values_truncates_at_limit(mock_client_provider): + mock_sl_client = MagicMock() + session_ctx = MagicMock() + mock_sl_client.session.return_value = session_ctx + session_ctx.__enter__ = MagicMock(return_value=mock_sl_client) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_sl_client.dimension_values.return_value = pa.table( + {"status": ["a", "b", "c", "d", "e"]} + ) + mock_client_provider.get_client.return_value = mock_sl_client + + token_p = MagicMock() + token_p.get_token.return_value = "tok" + headers_p = MagicMock() + headers_p.get_headers.return_value = {} + config = SemanticLayerConfig( + url="https://test-host/api/graphql", + host="test-host", + prod_environment_id=123, + token_provider=token_p, + headers_provider=headers_p, + ) + fetcher = SemanticLayerFetcher(client_provider=mock_client_provider) + result = await fetcher.get_dimension_values( + config=config, dimension="status", limit=3 + ) + + assert result.values == ["a", "b", "c"] + assert result.truncated is True + + +@pytest.mark.asyncio +async def test_get_dimension_values_exact_limit_not_truncated(mock_client_provider): + mock_sl_client = MagicMock() + session_ctx = MagicMock() + mock_sl_client.session.return_value = session_ctx + session_ctx.__enter__ = MagicMock(return_value=mock_sl_client) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_sl_client.dimension_values.return_value = pa.table({"status": ["a", "b", "c"]}) + mock_client_provider.get_client.return_value = mock_sl_client + + token_p = MagicMock() + token_p.get_token.return_value = "tok" + headers_p = MagicMock() + headers_p.get_headers.return_value = {} + config = SemanticLayerConfig( + url="https://test-host/api/graphql", + host="test-host", + prod_environment_id=123, + token_provider=token_p, + headers_provider=headers_p, + ) + fetcher = SemanticLayerFetcher(client_provider=mock_client_provider) + result = await fetcher.get_dimension_values( + config=config, dimension="status", limit=3 + ) + + assert result.values == ["a", "b", "c"] + assert result.truncated is False + + +@pytest.mark.asyncio +async def test_get_dimension_values_no_metrics_passes_empty_list(mock_client_provider): + mock_sl_client = MagicMock() + session_ctx = MagicMock() + mock_sl_client.session.return_value = session_ctx + session_ctx.__enter__ = MagicMock(return_value=mock_sl_client) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_sl_client.dimension_values.return_value = pa.table( + {"customer__status": ["active", "inactive"]} + ) + mock_client_provider.get_client.return_value = mock_sl_client + + token_p = MagicMock() + token_p.get_token.return_value = "tok" + headers_p = MagicMock() + headers_p.get_headers.return_value = {} + config = SemanticLayerConfig( + url="https://test-host/api/graphql", + host="test-host", + prod_environment_id=123, + token_provider=token_p, + headers_provider=headers_p, + ) + fetcher = SemanticLayerFetcher(client_provider=mock_client_provider) + result = await fetcher.get_dimension_values( + config=config, dimension="customer__status", metrics=None, limit=100 + ) + + mock_sl_client.dimension_values.assert_called_once_with( + metrics=[], group_by="customer__status" + ) + assert result.values == ["active", "inactive"] + assert result.truncated is False + + +@pytest.mark.asyncio +async def test_get_dimension_values_omits_nulls(mock_client_provider): + mock_sl_client = MagicMock() + session_ctx = MagicMock() + mock_sl_client.session.return_value = session_ctx + session_ctx.__enter__ = MagicMock(return_value=mock_sl_client) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_sl_client.dimension_values.return_value = pa.table( + {"customer__country": pa.array(["US", None, "FR"], type=pa.string())} + ) + mock_client_provider.get_client.return_value = mock_sl_client + + token_p = MagicMock() + token_p.get_token.return_value = "tok" + headers_p = MagicMock() + headers_p.get_headers.return_value = {} + config = SemanticLayerConfig( + url="https://test-host/api/graphql", + host="test-host", + prod_environment_id=123, + token_provider=token_p, + headers_provider=headers_p, + ) + fetcher = SemanticLayerFetcher(client_provider=mock_client_provider) + result = await fetcher.get_dimension_values( + config=config, dimension="customer__country", metrics=None, limit=100 + ) + + assert result.values == ["US", "FR"] + assert result.truncated is False