Skip to content

Commit 940ad52

Browse files
committed
added sample collection to volatile tables
1 parent 83d6ad6 commit 940ad52

5 files changed

Lines changed: 318 additions & 99 deletions

File tree

docs/developer_guide/PROGRESSIVE_DISCLOSURE.md

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,23 @@
22

33
## Overview
44

5-
Progressive disclosure is an optimization technique for MCP servers with a large number of tools (100+). Instead of listing all tools in the `tools/list` response (which consumes significant context window space), tools are dynamically discovered and executed through a catalog system.
6-
7-
## Context Window Savings
8-
9-
- **Static Mode** (traditional): All 100+ tools listed → ~50,000 tokens
10-
- **Progressive Disclosure Mode**: 3 proxy tools + 1 core tool → ~500 tokens
11-
- **Savings**: 99% reduction in initial context window usage
5+
We enable progressive disclosure of MCP tools and other assets (to be implemented) to optimize the context window usage.
6+
7+
This is particularly relevant considering the large number of tools that this server offers by default, and the pace at which new custom tools are created after deployment.
8+
9+
Instead of listing all tools in the `tools/list` response (which consumes significant context window space), tools are dynamically discovered and executed through a catalog system.
10+
11+
```mermaid
12+
%%{init: {'theme':'neutral', 'themeVariables': { 'fontSize':'12px'}, 'flowchart':{'htmlLabels':true, 'curve':'basis', 'nodeSpacing': 30, 'rankSpacing': 30}}}%%
13+
flowchart TD
14+
A[User Input] --> B[Current Context]
15+
B --> C[Start Reasoning]
16+
C --> D{Do I need more<br/>information?}
17+
D -->|No| G[Answer]
18+
D -->|Yes| E[Search for<br/>relevant tools]
19+
E --> F[Execute tool and<br/>add to context]
20+
F --> B
21+
```
1222

1323
## Usage
1424

docs/developer_guide/REGISTRY_IMPLEMENTATION.md

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -102,23 +102,37 @@ class RegistryLoader:
102102
- `DATE``str`
103103
- Default → `str`
104104

105-
#### 2. SQL Builder (`registry_tools.py`)
105+
#### 2. SQL Builder & Type Casting (`registry_tools.py`)
106106

107-
Generates SQL statements for tool execution:
107+
Generates SQL statements with named parameter placeholders and handles type casting:
108108

109109
```python
110-
def build_registry_sql(tool_def: Dict[str, Any], params: Dict[str, Any]) -> str:
111-
"""Build SQL statement to execute a database-registered tool (UDF or Macro)."""
112-
# For UDFs: SELECT * FROM db_object(param1, param2, ...)
113-
# For Macros: EXEC db_object(param1, param2, ...)
114-
# Handles parameter ordering, type formatting, NULL handling
110+
def build_registry_sql(tool_def: Dict[str, Any]) -> str:
111+
"""Build SQL with named parameter placeholders for safe binding."""
112+
# For UDFs: SELECT database.function_name(:param1, :param2)
113+
# For Macros: EXEC database.macro_name(:param1, :param2)
114+
# Parameters sorted by position, using named placeholders
115+
116+
def cast_parameters(params: Dict[str, Any], tool_def: Dict[str, Any]) -> Dict[str, Any]:
117+
"""Cast parameter values to correct Python types before SQLAlchemy binding."""
118+
# Ensures integer parameters stay as int, not string
119+
# Critical for Teradata operations like TOP N
115120
```
116121

117122
**SQL Generation**:
118-
- **UDF**: `SELECT * FROM database.function_name(param1, param2)`
119-
- **Macro**: `EXEC database.macro_name(param1, param2)`
123+
- **UDF**: `SELECT database.function_name(:param1, :param2)`
124+
- **Macro**: `EXEC database.macro_name(:param1, :param2)`
120125
- Parameters sorted by position
121-
- Values formatted by type (strings quoted, numbers unquoted, NULL handling)
126+
- Uses named placeholders (`:param_name`) for safe SQLAlchemy parameter binding
127+
- No value formatting needed - database handles escaping
128+
- Eliminates SQL injection risks
129+
130+
**Type Casting**:
131+
- Parameters received from clients (JSON/HTTP) are often strings
132+
- `cast_parameters()` converts them to correct Python types based on tool definition
133+
- Example: String `'10'` → integer `10` for TOP N parameters
134+
- Essential for Teradata type-sensitive operations
135+
- Handles int, float, str, bool, and None/NULL values
122136

123137
#### 3. Main Integration (`app.py`)
124138

@@ -167,28 +181,51 @@ def execute_db_tool_with_registry(*args, **kwargs):
167181

168182
### Tool Registration
169183

170-
Registry tools are registered using the same pattern as YAML tools:
184+
Registry tools are registered using the same pattern as YAML tools, with safe parameter binding:
171185

172186
```python
173-
# Create executor that generates SQL and executes
174-
def make_executor(tool_def_captured=tool_def, tool_name_captured=tool_name):
175-
def executor(**kwargs):
176-
sql = build_registry_sql(tool_def_captured, kwargs)
177-
return execute_db_tool(td.handle_base_readQuery, sql, tool_name=tool_name_captured, **kwargs)
178-
return executor
179-
180-
# Create MCP tool
181-
tool_func = create_mcp_tool(
182-
executor_func=make_executor(),
183-
signature=sig,
184-
validate_required=True,
185-
tool_name=tool_name,
186-
)
187+
# Create handler function (like handle_* functions)
188+
def handler(conn, tool_name=None, **kwargs):
189+
"""Registry-defined database tool handler."""
190+
# Build SQL with named parameter placeholders
191+
sql = build_registry_sql(tool_def)
192+
193+
# Extract tool parameters (excluding special params)
194+
special_params = {'persist', 'tool_name'}
195+
tool_params = {k: v for k, v in kwargs.items() if k not in special_params}
196+
197+
# Extract special params for handle_base_readQuery
198+
persist = kwargs.get('persist', False)
199+
200+
# Pass SQL with named placeholders and parameter values for safe binding
201+
return execute_db_tool(
202+
td.handle_base_readQuery,
203+
sql,
204+
tool_name=tool_name,
205+
persist=persist,
206+
**tool_params # These will be safely bound by SQLAlchemy
207+
)
208+
209+
# Wrap handler as MCP tool
210+
wrapped = make_tool_wrapper(handler)
187211

188212
# Register with FastMCP
189-
mcp.tool(name=tool_name, description=description)(tool_func)
213+
mcp.tool(name=tool_name, description=description)(wrapped)
190214
```
191215

216+
**Key Points**:
217+
- SQL is generated once with placeholders (`:param1`, `:param2`)
218+
- Parameter values are passed separately as kwargs
219+
- SQLAlchemy safely binds parameters, preventing SQL injection
220+
- Same secure pattern used by YAML tools
221+
222+
**Important Limitations**:
223+
- **Registry tools do not support the `persist` parameter**:
224+
- The `persist` parameter is not available for registry tools (both UDFs and Macros)
225+
- Macros: Cannot be wrapped in `CREATE VOLATILE TABLE` (invalid: `CREATE VOLATILE TABLE vt AS (EXEC mydb.my_macro(param)) WITH DATA`) ❌
226+
- UDFs: While technically possible, persist is disabled for consistency and simplicity
227+
- If you need to persist results from registry tools, use YAML tools or Python tools instead
228+
192229
## Configuration
193230

194231
### Profile Setup

src/teradata_mcp_server/app.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,14 +1013,13 @@ def create_registry_handler(tool_name, tool_def):
10131013

10141014
# Build docstring with parameters
10151015
docstring_parts = [description]
1016-
if param_defs or True: # Always show Arguments section to include persist
1016+
if param_defs:
10171017
docstring_parts.append("\nArguments:")
10181018
for param_name, p in sorted(param_defs.items(), key=lambda x: x[1].get('position', 0)):
10191019
param_desc = p.get("description", "")
10201020
docstring_parts.append(f" {param_name} - {param_desc}")
1021-
# Add persist parameter documentation
1022-
docstring_parts.append(f" persist - If True, materializes result as a volatile table and returns table name")
10231021
docstring_parts.append(f"\nRegistry tool: {tool_def['object_type']} {tool_def['db_object']}")
1022+
docstring_parts.append(f"Note: Registry tools do not support the 'persist' parameter")
10241023

10251024
# Add required 'conn' parameter at the beginning (for catalog compatibility)
10261025
parameters = [
@@ -1032,13 +1031,6 @@ def create_registry_handler(tool_name, tool_def):
10321031
inspect.Parameter("tool_name", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, default=None)
10331032
)
10341033

1035-
# Add persist parameter (for materializing results as volatile table)
1036-
persist_description = "If True, materializes result as a volatile table and returns table name"
1037-
parameters.append(
1038-
inspect.Parameter("persist", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
1039-
default=False, annotation=Annotated[bool, persist_description])
1040-
)
1041-
10421034
# Add registry parameters - separate required and optional
10431035
required_params = []
10441036
optional_params = []
@@ -1057,14 +1049,43 @@ def create_registry_handler(tool_name, tool_def):
10571049
else:
10581050
optional_params.append(param)
10591051

1060-
# Build signature with correct order: conn, required_params, tool_name, persist (both with defaults), optional_params
1061-
sig = inspect.Signature([parameters[0]] + required_params + [parameters[1], parameters[2]] + optional_params)
1052+
# Build signature: conn, required_params, tool_name (with default), optional_params
1053+
sig = inspect.Signature([parameters[0]] + required_params + [parameters[1]] + optional_params)
10621054

10631055
# Create the handler function (like handle_* functions)
10641056
def handler(conn, tool_name=None, **kwargs):
10651057
"""Registry-defined database tool handler."""
1066-
sql = build_registry_sql(tool_def, kwargs)
1067-
return execute_db_tool(td.handle_base_readQuery, sql, tool_name=tool_name or tool_name, **kwargs)
1058+
from teradata_mcp_server.tools.registry.registry_tools import cast_parameters, build_registry_sql_with_values
1059+
1060+
logger.info(f"[REGISTRY_HANDLER] Starting handler for tool '{tool_name}'")
1061+
logger.info(f"[REGISTRY_HANDLER] Received kwargs: {kwargs}")
1062+
1063+
# Extract tool parameters (excluding special params)
1064+
# Note: persist is not supported for registry tools
1065+
special_params = {'tool_name'}
1066+
tool_params = {k: v for k, v in kwargs.items() if k not in special_params}
1067+
logger.info(f"[REGISTRY_HANDLER] Tool params before casting: {tool_params}")
1068+
1069+
# Cast parameters to their correct types based on tool definition
1070+
# This ensures values are properly typed before formatting into SQL
1071+
cast_params = cast_parameters(tool_params, tool_def)
1072+
logger.info(f"[REGISTRY_HANDLER] Tool params after casting: {cast_params}")
1073+
1074+
# Build SQL with values formatted as literals
1075+
# This approach is necessary because SQLAlchemy parameter binding doesn't
1076+
# preserve type information correctly with the Teradata driver
1077+
sql = build_registry_sql_with_values(tool_def, cast_params)
1078+
logger.info(f"[REGISTRY_HANDLER] Generated SQL: {sql}")
1079+
1080+
# Execute the SQL without parameters (values already in SQL)
1081+
# Note: persist=False for registry tools (not supported)
1082+
return execute_db_tool(
1083+
td.handle_base_readQuery,
1084+
sql, # SQL string with values already formatted
1085+
tool_name=tool_name or tool_name,
1086+
persist=False # Registry tools do not support persist
1087+
# No **kwargs here - values are already in the SQL string
1088+
)
10681089

10691090
# Set metadata on the handler
10701091
handler.__name__ = f"handle_{tool_name}"

src/teradata_mcp_server/tools/base/base_tools.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def handle_base_readQuery(
2424
2525
Arguments:
2626
sql - SQL text, with optional bind-parameter placeholders
27-
persist - Set to True to materializes results as a table and reuse it later
27+
persist - Set to True to persist the results as a table and reuse it later. Recommended for large result sets.
2828
2929
Returns:
3030
ResponseType: formatted response with query results + metadata
@@ -41,6 +41,9 @@ def handle_base_readQuery(
4141

4242
# Strip trailing semicolons from the SQL
4343
sql_clean = sql.rstrip().rstrip(';')
44+
45+
#Remove the final ORDER BY clause if present
46+
sql_clean = re.sub(r'ORDER BY [\w\W\s\S]*$', '', sql_clean, flags=re.IGNORECASE).strip()
4447

4548
# Wrap in CREATE VOLATILE TABLE statement
4649
sql = f"CREATE VOLATILE TABLE {volatile_table_name} AS ({sql_clean}) WITH DATA ON COMMIT PRESERVE ROWS"
@@ -53,6 +56,11 @@ def handle_base_readQuery(
5356
result = conn.execute(stmt, kwargs) if kwargs else conn.execute(stmt)
5457

5558
# 3. Fetch rows & column metadata
59+
60+
# If we persisted in a volatile table, we won't get any rows back, we sample the resulting voltile table instead
61+
if volatile_table_name:
62+
result = conn.execute(text(f'select top 10 * from {volatile_table_name}'))
63+
5664
cursor = result.cursor # underlying DB-API cursor
5765
raw_rows = cursor.fetchall() or []
5866
data = rows_to_json(cursor.description, raw_rows)
@@ -63,7 +71,7 @@ def handle_base_readQuery(
6371
}
6472
for col in (cursor.description or [])
6573
]
66-
74+
6775
# 4. Compile the statement with literal binds for “final SQL”
6876
# Fallback to DefaultDialect if conn has no `.dialect`
6977
dialect = getattr(conn, "dialect", default.DefaultDialect())
@@ -83,12 +91,11 @@ def handle_base_readQuery(
8391

8492
# Add volatile table name if persisted
8593
if volatile_table_name:
86-
metadata["columns"] = None
8794
metadata["row_count"] = None
95+
metadata["sample_size"] = 10
8896
metadata["volatile_table"] = volatile_table_name
8997
metadata["persist"] = True
9098
logger.info(f"Query results persisted to volatile table: {volatile_table_name}")
91-
data = [{"results stored in volatile_table": volatile_table_name}]
9299

93100
logger.debug(f"Tool: handle_base_readQuery: metadata: {metadata}")
94101
return create_response(data, metadata)

0 commit comments

Comments
 (0)